Icache integrated!

Merge branch 'icache-almost-working' into main
This commit is contained in:
Ross Thompson 2021-04-26 11:48:58 -05:00
commit 8e5409af66
57 changed files with 15085 additions and 447 deletions

6
.gitignore vendored
View File

@ -17,3 +17,9 @@ wlft*
/imperas-riscv-tests/ProgramMap.txt
/wally-pipelined/busybear-testgen/gdbcombined.txt
/wally-pipelined/busybear-testgen/first10.txt
*.o
*.d
testsBP/*/*/*.elf*
testsBP/*/OBJ/*
testsBP/*/*.a

26
testsBP/crt0/Makefile Normal file
View File

@ -0,0 +1,26 @@
TARGETDIR := bin
TARGET := $(TARGETDIR)/start
ROOT := ..
LIBRARY_DIRS :=
LIBRARY_FILES :=
MARCH :=-march=rv64ic
MABI :=-mabi=lp64
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles
AFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -W
CFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -mcmodel=medany
AS=riscv64-unknown-elf-as
CC=riscv64-unknown-elf-gcc
AR=riscv64-unknown-elf-ar
all: libcrt0.a
%.o: %.s
${AS} ${AFLAGS} -c $< -o $@
libcrt0.a: start.o
${AR} -r $@ $^
clean:
rm -rf *.a *.o

213
testsBP/crt0/isr.s Normal file
View File

@ -0,0 +1,213 @@
.section .text
.global __trap_handler
.type __trap_handler, @function
__trap_handler:
# save the context of the cpu to the top of the current stack
addi sp, sp, -124
sw x1, 0x0(sp)
sw x2, 0x4(sp)
sw x3, 0x8(sp)
sw x4, 0xC(sp)
sw x5, 0x10(sp)
sw x6, 0x14(sp)
sw x7, 0x18(sp)
sw x8, 0x1C(sp)
sw x9, 0x20(sp)
sw x10, 0x24(sp)
sw x11, 0x28(sp)
sw x12, 0x2C(sp)
sw x13, 0x30(sp)
sw x14, 0x34(sp)
sw x15, 0x38(sp)
sw x16, 0x3C(sp)
sw x17, 0x40(sp)
sw x18, 0x44(sp)
sw x19, 0x48(sp)
sw x20, 0x4C(sp)
sw x21, 0x50(sp)
sw x22, 0x54(sp)
sw x23, 0x58(sp)
sw x24, 0x5C(sp)
sw x25, 0x60(sp)
sw x26, 0x64(sp)
sw x27, 0x68(sp)
sw x28, 0x6C(sp)
sw x29, 0x70(sp)
sw x30, 0x74(sp)
sw x31, 0x78(sp)
# figure out what caused the trap.
csrrw t0, mcause, x0
# mcause is {int, 31 bit exception code}
# for this implementation only the lowest 4 bits are used
srli t1, t0, 31 # interrupt flag
andi t2, t0, 0xF # 4 bit cause
slli t1, t1, 5 # shift int flag
or t1, t1, t2 # combine
slli t1, t1, 2 # multiply by 4
la t3, exception_table
add t4, t3, t1
lw t5, 0(t4)
jr t5, 0 # jump to specific ISR
# specific ISR is expected to set epc
restore_st:
# restore register from stack on exit.
lw x1, 0x0(sp)
lw x2, 0x4(sp)
lw x3, 0x8(sp)
lw x4, 0xC(sp)
lw x5, 0x10(sp)
lw x6, 0x14(sp)
lw x7, 0x18(sp)
lw x8, 0x1C(sp)
lw x9, 0x20(sp)
lw x10, 0x24(sp)
lw x11, 0x28(sp)
lw x12, 0x2C(sp)
lw x13, 0x30(sp)
lw x14, 0x34(sp)
lw x15, 0x38(sp)
lw x16, 0x3C(sp)
lw x17, 0x40(sp)
lw x18, 0x44(sp)
lw x19, 0x48(sp)
lw x20, 0x4C(sp)
lw x21, 0x50(sp)
lw x22, 0x54(sp)
lw x23, 0x58(sp)
lw x24, 0x5C(sp)
lw x25, 0x60(sp)
lw x26, 0x64(sp)
lw x27, 0x68(sp)
lw x28, 0x6C(sp)
lw x29, 0x70(sp)
lw x30, 0x74(sp)
lw x31, 0x78(sp)
addi sp, sp, 124
mret
.section .text
.type trap_instr_addr_misalign, @function
trap_instr_addr_misalign:
# fatal error, report error and halt
addi sp, sp, 4
sw ra, 0(sp)
jal fail
lw ra, 0(sp)
la t0, restore_st
jr t0, 0
.section .text
.type trap_m_ecall, @function
trap_m_ecall:
addi sp, sp, -4
sw ra, 0(sp)
# select which system call based on a7.
# for this example we will just define the following.
# not standard with linux or anything.
# 0: execute a call back function
# 1: decrease privilege by 1 (m=>s, s=>u, u=>u)
# 2: increase privilege by 1 (m=>m, s=>m, u=>s)
# check a7
li t0, 1
beq a7, t0, trap_m_decrease_privilege
li t0, 2
beq a7, t0, trap_m_increase_privilege
# call call back function if not zero
la t1, isr_m_ecall_cb_fp
lw t0, 0(t1)
beq t0, x0, trap_m_ecall_skip_cb
jalr ra, t0, 0
trap_m_ecall_skip_cb:
# modify the mepc
csrrw t0, mepc, x0
addi t0, t0, 4
csrrw x0, mepc, t0
lw ra, 0(sp)
addi sp, sp, 4
la t0, restore_st
jr t0, 0
trap_m_decrease_privilege:
# read the mstatus register
csrrw t0, mstatus, x0
# 11 => 01, and 01 => 00.
# this is accomplished by clearing bit 12 and taking the old
# bit 12 as the new bit 11.
li t3, 0x00001800
and t1, t0, t3 # isolates the bits 12 and 11.
# shift right by 1.
srli t2, t1, 1
and t2, t2, t3 # this will clear bit 10.
li t3, ~0x00001800
and t4, t0, t3
or t0, t2, t4
csrrw x0, mstatus, t0
j trap_m_ecall_skip_cb
trap_m_increase_privilege:
# read the mstatus register
csrrw t0, mstatus, x0
# 11 => 01, and 01 => 00.
# this is accomplished by setting bit 11 and taking the old
# bit 11 as the new bit 12.
li t3, 0x00000800
li t4, ~0x00000800
and t1, t0, t3
slli t2, t1, 1 # shift left by 1.
or t2, t2, t3 # bit 11 is always set.
and t1, t0, t5
or t0, t1, t2
csrrw x0, mstatus, t0
j trap_m_ecall_skip_cb
.data
exception_table:
.int trap_instr_addr_misalign
.int trap_instr_addr_misalign #trap_instr_access_fault
.int trap_instr_addr_misalign #trap_illegal_instr
.int trap_instr_addr_misalign #trap_breakpoint
.int trap_instr_addr_misalign #trap_load_addr_misalign
.int trap_instr_addr_misalign #trap_load_access_fault
.int trap_instr_addr_misalign #trap_store_addr_misalign
.int trap_instr_addr_misalign #trap_store_access_fault
.int trap_m_ecall
.int trap_m_ecall
.int restore_st
.int trap_m_ecall
.int trap_instr_addr_misalign #trap_instr_page_fault
.int trap_instr_addr_misalign #trap_load_page_fault
.int restore_st
.int trap_instr_addr_misalign #trap_store_page_fault
#.data
#interrupt_table:
.int trap_instr_addr_misalign #trap_u_software
.int trap_instr_addr_misalign #trap_s_software
.int restore_st
.int trap_instr_addr_misalign #trap_m_software
.int trap_instr_addr_misalign #trap_u_timer
.int trap_instr_addr_misalign #trap_s_timer
.int restore_st
.int trap_instr_addr_misalign #trap_m_timer
.int trap_instr_addr_misalign #trap_u_external
.int trap_instr_addr_misalign #trap_s_external
.int restore_st
.int trap_instr_addr_misalign #trap_m_external
.int restore_st
.int restore_st
.int restore_st
.int restore_st
.section .data
.global isr_m_ecall_cb_fp
isr_m_ecall_cb_fp:
.int 0

60
testsBP/crt0/start.s Normal file
View File

@ -0,0 +1,60 @@
.section .init
.global _start
.type _start, @function
_start:
# Initialize global pointer
.option push
.option norelax
1:auipc gp, %pcrel_hi(__global_pointer$)
addi gp, gp, %pcrel_lo(1b)
.option pop
li x1, 0
li x2, 0
li x4, 0
li x5, 0
li x6, 0
li x7, 0
li x8, 0
li x9, 0
li x10, 0
li x11, 0
li x12, 0
li x13, 0
li x14, 0
li x15, 0
li x16, 0
li x17, 0
li x18, 0
li x19, 0
li x20, 0
li x21, 0
li x22, 0
li x23, 0
li x24, 0
li x25, 0
li x26, 0
li x27, 0
li x28, 0
li x29, 0
li x30, 0
li x31, 0
# set the stack pointer to the top of memory
# 0x8000_0000 + 64K - 8 bytes
li sp, 0x007FFFF8
jal ra, main
jal ra, _halt
.section .text
.global _halt
.type _halt, @function
_halt:
li gp, 1
li a0, 0
ecall
j _halt

244
testsBP/linker.x Normal file
View File

@ -0,0 +1,244 @@
OUTPUT_FORMAT("elf64-littleriscv", "elf64-littleriscv",
"elf64-littleriscv")
OUTPUT_ARCH(riscv)
ENTRY(_start)
SEARCH_DIR("/opt/riscv/riscv64-unknown-elf/lib");
SECTIONS
{
/* Read-only sections, merged into text segment: */
/* init segment to ensure we get a consistent start routine*/
. = 0x0000000000000000;
. = ALIGN(0x0);
.init : {
*(.init)
}
_start_end = .;
PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x0)); . = SEGMENT_START("text-segment", _start_end);
.interp : { *(.interp) }
.note.gnu.build-id : { *(.note.gnu.build-id) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.rela.dyn :
{
*(.rela.init)
*(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
*(.rela.fini)
*(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
*(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
*(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
*(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
*(.rela.ctors)
*(.rela.dtors)
*(.rela.got)
*(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*)
*(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*)
*(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*)
*(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*)
*(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
PROVIDE_HIDDEN (__rela_iplt_start = .);
*(.rela.iplt)
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.rela.plt :
{
*(.rela.plt)
}
.init :
{
KEEP (*(SORT_NONE(.init)))
}
.plt : { *(.plt) }
.iplt : { *(.iplt) }
.text :
{
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
*(.text.hot .text.hot.*)
*(.text .stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf64.em. */
*(.gnu.warning)
}
.fini :
{
KEEP (*(SORT_NONE(.fini)))
}
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.sdata2 :
{
*(.sdata2 .sdata2.* .gnu.linkonce.s2.*)
}
.sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) }
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) }
/* These sections are generated by the Sun/Oracle C++ compiler. */
.exception_ranges : ONLY_IF_RO { *(.exception_ranges*) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gnu_extab : ONLY_IF_RW { *(.gnu_extab) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
/* Thread Local Storage sections */
.tdata :
{
PROVIDE_HIDDEN (__tdata_start = .);
*(.tdata .tdata.* .gnu.linkonce.td.*)
}
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP (*(.preinit_array))
PROVIDE_HIDDEN (__preinit_array_end = .);
}
.init_array :
{
PROVIDE_HIDDEN (__init_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
PROVIDE_HIDDEN (__init_array_end = .);
}
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
PROVIDE_HIDDEN (__fini_array_end = .);
}
.ctors :
{
/* gcc uses crtbegin.o to find the start of
the constructors, so we make sure it is
first. Because this is a wildcard, it
doesn't matter if the user does not
actually link against crtbegin.o; the
linker won't look for a file to match a
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
KEEP (*crtbegin.o(.ctors))
KEEP (*crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
}
.dtors :
{
KEEP (*crtbegin.o(.dtors))
KEEP (*crtbegin?.o(.dtors))
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
}
.jcr : { KEEP (*(.jcr)) }
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
.dynamic : { *(.dynamic) }
. = DATA_SEGMENT_RELRO_END (0, .);
.data :
{
__DATA_BEGIN__ = .;
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
.got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
/* We want the small data sections together, so single-instruction offsets
can access them all, and initialized data all before uninitialized, so
we can shorten the on-disk segment size. */
.sdata :
{
__SDATA_BEGIN__ = .;
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*)
*(.sdata .sdata.* .gnu.linkonce.s.*)
}
_edata = .; PROVIDE (edata = .);
. = .;
__bss_start = .;
.sbss :
{
*(.dynsbss)
*(.sbss .sbss.* .gnu.linkonce.sb.*)
*(.scommon)
}
.bss :
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON)
/* Align here to ensure that the .bss section occupies space up to
_end. Align after .bss to ensure correct alignment even if the
.bss section disappears because there are no input sections.
FIXME: Why do we need it? When there is no .bss section, we do not
pad the .data section. */
. = ALIGN(. != 0 ? 64 / 8 : 1);
}
. = ALIGN(64 / 8);
. = SEGMENT_START("ldata-segment", .);
. = ALIGN(64 / 8);
__BSS_END__ = .;
__global_pointer$ = MIN(__SDATA_BEGIN__ + 0x800,
MAX(__DATA_BEGIN__ + 0x800, __BSS_END__ - 0x800));
_end = .; PROVIDE (end = .);
. = DATA_SEGMENT_END (.);
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
.gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
/* DWARF 3 */
.debug_pubtypes 0 : { *(.debug_pubtypes) }
.debug_ranges 0 : { *(.debug_ranges) }
/* DWARF Extension. */
.debug_macro 0 : { *(.debug_macro) }
.debug_addr 0 : { *(.debug_addr) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
}

93
testsBP/makefile.inc Normal file
View File

@ -0,0 +1,93 @@
CEXT := c
CPPEXT := cpp
AEXT := s
SEXT := S
SRCEXT := \([$(CEXT)$(AEXT)$(SEXT)]\|$(CPPEXT)\)
OBJEXT := o
DEPEXT := d
SRCDIR := .
BUILDDIR := OBJ
LINKER := ${ROOT}/linker.x
SOURCES ?= $(shell find $(SRCDIR) -type f -regex ".*\.$(SRCEXT)" | sort)
OBJECTS := $(SOURCES:.$(CEXT)=.$(OBJEXT))
OBJECTS := $(OBJECTS:.$(AEXT)=.$(OBJEXT))
OBJECTS := $(OBJECTS:.$(SEXT)=.$(OBJEXT))
OBJECTS := $(OBJECTS:.$(CPPEXT)=.$(OBJEXT))
OBJECTS := $(patsubst $(SRCDIR)/%,$(BUILDDIR)/%,$(OBJECTS))
#Default Make
all: directories $(TARGET).memfile
#Remake
remake: clean all
#Make the Directories
directories:
@mkdir -p $(TARGETDIR)
@mkdir -p $(BUILDDIR)
clean:
rm -rf $(BUILDDIR) $(TARGETDIR) *.memfile *.objdump
#Needed for building additional library projects
ifdef LIBRARY_DIRS
LIBS+=${LIBRARY_DIRS:%=-L%} ${LIBRARY_FILES:%=-l%}
INC+=${LIBRARY_DIRS:%=-I%}
${LIBRARY_DIRS}:
make -C $@ -j 1
.PHONY: $(LIBRARY_DIRS) $(TARGET)
endif
#Pull in dependency info for *existing* .o files
-include $(OBJECTS:.$(OBJEXT)=.$(DEPEXT))
#Link
$(TARGET): $(OBJECTS) $(LIBRARY_DIRS)
$(CC) $(LINK_FLAGS) -g -o $(TARGET) $(OBJECTS) ${LIBS} -T ${LINKER}
#Compile
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(CEXT)
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
@$(CC) $(CFLAGS) $(INC) -MM $(SRCDIR)/$*.$(CEXT) > $(BUILDDIR)/$*.$(DEPEXT)
@cp -f $(BUILDDIR)/$*.$(DEPEXT) $(BUILDDIR)/$*.$(DEPEXT).tmp
@sed -e 's|.*:|$(BUILDDIR)/$*.$(OBJEXT):|' < $(BUILDDIR)/$*.$(DEPEXT).tmp > $(BUILDDIR)/$*.$(DEPEXT)
@sed -e 's/.*://' -e 's/\\$$//' < $(BUILDDIR)/$*.$(DEPEXT).tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $(BUILDDIR)/$*.$(DEPEXT)
@rm -f $(BUILDDIR)/$*.$(DEPEXT).tmp
# gcc won't output dependencies for assembly files for some reason
# most asm files don't have dependencies so the echo will work for now.
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(AEXT)
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) -c -o $@ $< > $(BUILDDIR)/$*.list
@echo $@: $< > $(BUILDDIR)/$*.$(DEPEXT)
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(SEXT)
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
@echo $@: $< > $(BUILDDIR)/$*.$(DEPEXT)
# C++
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(CPPEXT)
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
@$(CC) $(CFLAGS) $(INC) -MM $(SRCDIR)/$*.$(CPPEXT) > $(BUILDDIR)/$*.$(DEPEXT)
@cp -f $(BUILDDIR)/$*.$(DEPEXT) $(BUILDDIR)/$*.$(DEPEXT).tmp
@sed -e 's|.*:|$(BUILDDIR)/$*.$(OBJEXT):|' < $(BUILDDIR)/$*.$(DEPEXT).tmp > $(BUILDDIR)/$*.$(DEPEXT)
@sed -e 's/.*://' -e 's/\\$$//' < $(BUILDDIR)/$*.$(DEPEXT).tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $(BUILDDIR)/$*.$(DEPEXT)
@rm -f $(BUILDDIR)/$*.$(DEPEXT).tmp
# convert to hex
$(TARGET).memfile: $(TARGET)
@echo 'Making object dump file.'
@riscv64-unknown-elf-objdump -D $< > $<.objdump
@echo 'Making memory file'
exe2memfile0.pl $<
extractFunctionRadix.sh $<.objdump
cp $(TARGETDIR)/* ../../imperas-riscv-tests/work/rv64BP/

View File

@ -0,0 +1,3 @@
Matt wrote this using STL.
It is GPL'ed.

View File

@ -0,0 +1,19 @@
TARGETDIR := qsort
TARGET := $(TARGETDIR)/$(TARGETDIR).elf
ROOT := ..
LIBRARY_DIRS := ${ROOT}/crt0
LIBRARY_FILES := crt0
MARCH :=-march=rv64ic
MABI :=-mabi=lp64
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align
CC=riscv64-unknown-elf-gcc
DA=riscv64-unknown-elf-objdump -d
include $(ROOT)/makefile.inc

File diff suppressed because it is too large Load Diff

19
testsBP/sieve/Makefile Normal file
View File

@ -0,0 +1,19 @@
TARGETDIR := sieve
TARGET := $(TARGETDIR)/$(TARGETDIR).elf
ROOT := ..
LIBRARY_DIRS := ${ROOT}/crt0
LIBRARY_FILES := crt0
MARCH :=-march=rv64ic
MABI :=-mabi=lp64
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align
CC=riscv64-unknown-elf-gcc
DA=riscv64-unknown-elf-objdump -d
include $(ROOT)/makefile.inc

101
testsBP/sieve/sieve.c Normal file
View File

@ -0,0 +1,101 @@
/*
* Filename:
*
* sieve.c
*
* Description:
*
* The Sieve of Eratosthenes benchmark, from Byte Magazine
* early 1980s, when a PC would do well to run this in 10
* seconds. This version really does count prime numbers
* but omits the numbers 1, 3 and all even numbers. The
* expected count is 1899.
*
*/
#include <sys/time.h>
#include <stdio.h>
#define SIZE 8190
//#define SIZE 8388608
double time_diff(struct timeval x , struct timeval y);
int sieve () {
unsigned char flags [SIZE + 1];
int iter;
int count;
for (iter = 1; iter <= 10; iter++)
{
int i, prime, k;
count = 0;
for (i = 0; i <= SIZE; i++)
flags [i] = 1;
for (i = 0; i <= SIZE; i++)
{
if (flags [i])
{
prime = i + i + 3;
k = i + prime;
while (k <= SIZE)
{
flags [k] = 0;
k += prime;
}
count++;
}
}
}
return count;
}
int main () {
int ans;
//struct timeval before , after;
//gettimeofday(&before , NULL);
ans = sieve ();
//gettimeofday(&after , NULL);
if (ans != 1899)
printf ("Sieve result wrong, ans = %d, expected 1899", ans);
//printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) );
printf("Round 2\n");
//gettimeofday(&before , NULL);
ans = sieve ();
//gettimeofday(&after , NULL);
if (ans != 1899)
printf ("Sieve result wrong, ans = %d, expected 1899", ans);
//printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) );
return 0;
}
double time_diff(struct timeval x , struct timeval y)
{
double x_ms , y_ms , diff;
x_ms = (double)x.tv_sec*1000000 + (double)x.tv_usec;
y_ms = (double)y.tv_sec*1000000 + (double)y.tv_usec;
diff = (double)y_ms - (double)x_ms;
return diff;
}

18
testsBP/simple/Makefile Normal file
View File

@ -0,0 +1,18 @@
TARGETDIR := simple
TARGET := $(TARGETDIR)/$(TARGETDIR).elf
ROOT := ..
LIBRARY_DIRS := ${ROOT}/crt0
LIBRARY_FILES := crt0
MARCH :=-march=rv64ic
MABI :=-mabi=lp64
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align
CC=riscv64-unknown-elf-gcc
DA=riscv64-unknown-elf-objdump -d
include $(ROOT)/makefile.inc

11
testsBP/simple/fail.s Normal file
View File

@ -0,0 +1,11 @@
# Ross Thompson
# March 17, 2021
# Oklahoma State University
.section .text
.global fail
.type fail, @function
fail:
li gp, 1
li a0, -1
ecall

8
testsBP/simple/header.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef __header
#define __header
int fail();
int simple_csrbr_test();
int lbu_test();
int icache_spill_test();
#endif

19
testsBP/simple/lbu_test.s Normal file
View File

@ -0,0 +1,19 @@
.section .text
.global lbu_test
.type lbu_test, @function
lbu_test:
li t0, 0x80000
lbu t1, 0(t0)
pass:
li a0, 0
done:
ret
fail:
li a0, -1
j done

16
testsBP/simple/main.c Normal file
View File

@ -0,0 +1,16 @@
#include "header.h"
int main(){
int res = icache_spill_test();
if (res < 0) {
fail();
return 0;
}else {
if((res = lbu_test()) < 0) {
fail();
return 0;
}
res = simple_csrbr_test();
return 0;
}
}

61
testsBP/simple/sample.s Normal file
View File

@ -0,0 +1,61 @@
.section .text
.global simple_csrbr_test
.type simple_csrbr_test, @function
simple_csrbr_test:
# step 1 enable the performance counters
# by default the hardware enables all performance counters
# however we will eventually want to manually enable incase
# some other code disables them
# br count is counter 5
# br mp count is counter 4
li t0, 0x30
csrrc x0, 0x320, t0 # clear bits 4 and 5 of inhibit register.
# step 2 read performance counters into general purpose registers
csrrw t2, 0xB05, x0 # t2 = BR COUNT (perf count 5)
csrrw t3, 0xB04, x0 # t3 = BRMP COUNT (perf count 4)
# step 3 simple loop to show the counters are updated.
li t0, 0 # this is the loop counter
li t1, 100 # this is the loop end condition
# for(t1 = 0; t1 < t0; t1++);
loop:
addi t0, t0, 1
blt t0, t1, loop
loop_done:
# step 2 read performance counters into general purpose registers
csrrw t4, 0xB05, x0 # t4 = BR COUNT (perf count 5)
csrrw t5, 0xB04, x0 # t5 = BRMP COUNT (perf count 4)
sub t2, t4, t2 # this is the number of branch instructions committed.
sub t3, t5, t3 # this is the number of branch mispredictions committed.
# now check if the branch count equals 100 and if the branch
bne t4, t2, fail
li t5, 3
bne t3, t5, fail
pass:
li a0, 0
done:
li t0, 0x30
csrrs x0, 0x320, t0 # set bits 4 and 5
ret
fail:
li a0, -1
j done
.data
sample_data:
.int 0

View File

@ -0,0 +1,200 @@
#!/usr/bin/perl -w
# exe2memfile.pl
# David_Harris@hmc.edu 26 November 2020
# Converts an executable file to a series of 32-bit hex instructions
# to read into a Verilog simulation with $readmemh
use File::stat;
use IO::Handle;
if ($#ARGV == -1) {
die("Usage: $0 executable_file");
}
# array to hold contents of memory file
my @memfilebytes = (0)*16384*4;
my $maxaddress = 0;
STDOUT->autoflush(1);
# *** Ross Thompson I think there is a bug here needs to be +1
print ("Processing $#ARGV memfiles: \n");
my $frac = $#ARGV/10;
for(my $i=0; $i<=$#ARGV; $i++) {
if ($i < 10 || $i % $frac == 0) { print ("$i ") };
my $fname = $ARGV[$i];
# print "fname = $fname";
my $ofile = $fname.".objdump";
my $memfile = $fname.".memfile";
my $needsprocessing = 0;
if (!-e $memfile) { $needsprocessing = 1; } # create memfile if it doesn't exist
else {
my $osb = stat($ofile) || die("Can't stat $ofile");
my $msb = stat($memfile) || die("Can't stat $memfile");
my $otime = $osb->mtime;
my $mtime = $msb->mtime;
if ($otime > $mtime) { $needsprocessing = 1; } # is memfile out of date?
}
if ($needsprocessing == 1) {
open(FILE, $ofile) || die("Can't read $ofile");
my $mode = 0; # parse for code
my $section = "";
my $data = "";
my $address;
my $first = 0;
my $firstAddress;
# initialize to all zeros;
# *** need to fix the zeroing range. Not always 64K
for (my $i=0; $i < 65536*4; $i++) {
$memfilebytes[$i] = "00";
}
while(<FILE>) {
# objdump fill is divided into several .sections of which only some we want to actually process.
# In general we want everything except the .comment and .*attributes
if (/Disassembly of section (.*):/) {
$section = $1;
print ("setting section to $section\n");
} else {
# now check if the section is one we are interested in processing.
#if ($section ne ".comment" && $section ne ".riscv.attributes" && $section =~ /\.debug.*/) {
if ($section =~ "\.init|\.text|\..*data|\..*bss") {
# the structure is: possible space(s) hex number: possible space(s) hex number space(s) junk
# there are also lines we need to skip: possible space(s) hex number <string>:
if (/^\s*([0-9A-Fa-f]{1,16}):\s+([0-9A-Fa-f]+).*$/) {
$address = &fixadr($1);
if ($first == 0) {
$first = 1;
$firstAddress = $address;
}
$data = $2;
&emitData($address, $data);
# my $len = length($data);
# for (my $i=0; $i<$len/2; $i++) {
# $memfilebytes[$address+$i] = substr($data, $len-2-2*$i, 2);
# }
# print ("Addr $address $data\n");
# } elsif (/^\s*\.\.\./) {
# print ("Got ...\n");
# } else {
# print ("No match\n");
}
}
}
# # *** this mode stuff does not work if a section is missing or reordered.
# if ($mode == 0) { # Parse code
# # print("Examining $_\n");
# if (/^\s*(\S{1,16}):\s+(\S+)\s+/) {
# $address = &fixadr($1);
# my $instr = $2;
# my $len = length($instr);
# for (my $i=0; $i<$len/2; $i++) {
# $memfilebytes[$address+$i] = substr($instr, $len-2-2*$i, 2);
# }
# print ("address $address $instr\n");
# }
# if (/Disassembly of section .data:/) { $mode = 1;}
# } elsif ($mode == 1) { # Parse data segment
# if (/^\s*(\S{1,16}):\s+(.*)/) {
# $address = &fixadr($1);
# # print "addresss $address maxaddress $maxaddress\n";
# if ($address > $maxaddress) { $maxaddress = $address; }
# my $line = $2;
# # merge chunks with spaces
# # *** might need to change
# $line =~ s/(\S)\s(\S)/$1$2/g;
# # strip off comments
# $line =~ /^(\S*)/;
# $payload = $1;
# &emitData($address, $payload);
# }
# if (/Disassembly of section .comment:/) { $mode = 2; }
# } elsif ($mode == 2) { # parse the comment section
# if (/Disassembly of section .riscv.attributes:/) { $mode = 3; }
# }
}
close(FILE);
$maxaddress = $address + 32; # pad some zeros at the end
# print to memory file
# *** this is a problem
if ($fname =~ /rv32/) {
open(MEMFILE, ">$memfile") || die("Can't write $memfile");
for (my $i=$firstAddress; $i<= $maxaddress; $i = $i + 4) {
for ($j=3; $j>=0; $j--) {
no warnings 'uninitialized';
my $value = $memfilebytes[$i+$j];
if ($value eq ""){
print MEMFILE "00";
} else {
print MEMFILE "$memfilebytes[$i+$j]";
}
}
print MEMFILE "\n";
}
close(MEMFILE);
} else {
open(MEMFILE, ">$memfile") || die("Can't write $memfile");
for (my $i=$firstAddress; $i<= $maxaddress; $i = $i + 8) {
for ($j=7; $j>=0; $j--) {
no warnings 'uninitialized';
my $value = $memfilebytes[$i+$j];
if ($value eq ""){
print MEMFILE "00";
} else {
print MEMFILE "$memfilebytes[$i+$j]";
}
}
print MEMFILE "\n";
}
close(MEMFILE);
}
}
}
print("\n");
sub emitData {
# print the data portion of the ELF into a memroy file, including 0s for empty stuff
# deal with endianness
my $address = shift;
my $payload = shift;
# print("Emitting data. address = $address payload = $payload\n");
my $len = length($payload);
if ($len <= 8) {
# print word or halfword
for(my $i=0; $i<$len/2; $i++) {
my $adr = $address+$i;
my $b = substr($payload, $len-2-2*$i, 2);
$memfilebytes[$adr] = $b;
# print(" $adr $b\n");
}
} elsif ($len == 12) {
# weird case of three halfwords on line
&emitData($address, substr($payload, 0, 4));
&emitData($address+2, substr($payload, 4, 4));
&emitData($address+4, substr($payload, 8, 4));
} else {
&emitData($address, substr($payload, 0, 8));
&emitData($address+4, substr($payload, 8, $len-8));
}
}
sub fixadr {
# strip off leading 8 from address and convert to decimal
# if the leading 8 is not present don't remove.
my $adr = shift;
#print "addr $adr\n";
# start at 0
return hex($adr);
# start at 8
#if ($adr =~ s/^8/0/) { return hex($adr); }
#else { return hex($adr) }
}

View File

@ -107,3 +107,4 @@
`define TWO_BIT_PRELOAD "../config/busybear/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/busybear/BTBPredictor.txt"
`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -108,3 +108,4 @@
`define TWO_BIT_PRELOAD "../config/coremark/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/coremark/BTBPredictor.txt"
`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -105,3 +105,4 @@
`define TWO_BIT_PRELOAD "../config/coremark_bare/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/coremark_bare/BTBPredictor.txt"
`define BPTYPE "BPGSHARE"
`define TESTSBP 0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,101 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified: Brett Mathis
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 64
//`define MISA (32'h00000104)
`define MISA (32'h00000104 | 1<<5 | 1<<18 | 1 << 20)
`define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
`define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
`define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
`define F_SUPPORTED ((`MISA >> 5) % 2 == 1)
`define M_SUPPORTED ((`MISA >> 12) % 2 == 1)
`define S_SUPPORTED ((`MISA >> 18) % 2 == 1)
`define U_SUPPORTED ((`MISA >> 20) % 2 == 1)
`define ZCSR_SUPPORTED 1
`define COUNTERS 31
`define ZCOUNTERS_SUPPORTED 1
// N-mode user-level interrupts are depricated per Andrew Waterman 1/13/21
//`define N_SUPPORTED ((MISA >> 13) % 2 == 1)
`define N_SUPPORTED 0
`define M_MODE (2'b11)
`define S_MODE (2'b01)
`define U_MODE (2'b00)
// Microarchitectural Features
`define UARCH_PIPELINED 1
`define UARCH_SUPERSCALR 0
`define UARCH_SINGLECYCLE 0
`define MEM_DCACHE 0
`define MEM_DTIM 1
`define MEM_ICACHE 0
`define MEM_VIRTMEM 0
// Address space
`define RESET_VECTOR 64'h0000000000000000
// Bus Interface width
`define AHBW 64
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
`define BOOTTIMBASE 32'h00800000
`define BOOTTIMRANGE 32'h00003FFF
`define TIMBASE 32'h00000000
`define TIMRANGE 32'h007FFFFF
`define CLINTBASE 32'h02000000
`define CLINTRANGE 32'h0000FFFF
`define GPIOBASE 32'h10012000
`define GPIORANGE 32'h000000FF
`define UARTBASE 32'h10000000
`define UARTRANGE 32'h00000007
`define PLICBASE 32'h0C000000
`define PLICRANGE 32'h03FFFFFF
// Test modes
// Tie GPIO outputs back to inputs
`define GPIO_LOOPBACK_TEST 0
// Busybear special CSR config to match OVPSim
`define OVPSIM_CSR_CONFIG 0
// Hardware configuration
`define UART_PRESCALE 1
/* verilator lint_off STMTDLY */
/* verilator lint_off WIDTH */
/* verilator lint_off ASSIGNDLY */
/* verilator lint_off PINCONNECTEMPTY */
`define TWO_BIT_PRELOAD "../config/rv64icfd/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64icfd/BTBPredictor.txt"
`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 1

View File

@ -0,0 +1,31 @@
//////////////////////////////////////////
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
`define VPN_BITS 27
`define PPN_BITS 44
`define PA_BITS 56

View File

@ -9,7 +9,7 @@ add wave -divider
#add wave /testbench/dut/hart/ebu/IReadF
add wave /testbench/dut/hart/DataStall
add wave /testbench/dut/hart/InstrStall
add wave /testbench/dut/hart/ICacheStallF
add wave /testbench/dut/hart/StallF
add wave /testbench/dut/hart/StallD
add wave /testbench/dut/hart/StallE
@ -24,16 +24,8 @@ add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCF
add wave -hex /testbench/dut/hart/ifu/PCD
add wave -hex /testbench/dut/hart/ifu/InstrD
add wave /testbench/InstrDName
add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD
add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD
add wave -divider
add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF
add wave /testbench/dut/hart/ifu/ic/DelayF
add wave /testbench/dut/hart/ifu/ic/DelaySideF
add wave /testbench/dut/hart/ifu/ic/DelayD
add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCE
@ -64,7 +56,6 @@ add wave -hex /testbench/dut/hart/ebu/HRDATA
add wave -hex /testbench/dut/hart/ebu/HWRITE
add wave -hex /testbench/dut/hart/ebu/HWDATA
add wave -hex /testbench/dut/hart/ebu/CaptureDataM
add wave -hex /testbench/dut/hart/ebu/InstrStall
add wave -divider
add wave -hex /testbench/dut/uncore/dtim/*

View File

@ -0,0 +1,76 @@
add wave /testbench/clk
add wave /testbench/reset
add wave -divider
#add wave /testbench/dut/hart/ebu/IReadF
add wave /testbench/dut/hart/DataStall
add wave /testbench/dut/hart/ICacheStallF
add wave /testbench/dut/hart/StallF
add wave /testbench/dut/hart/StallD
add wave /testbench/dut/hart/StallE
add wave /testbench/dut/hart/StallM
add wave /testbench/dut/hart/StallW
add wave /testbench/dut/hart/FlushD
add wave /testbench/dut/hart/FlushE
add wave /testbench/dut/hart/FlushM
add wave /testbench/dut/hart/FlushW
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCF
add wave -hex /testbench/dut/hart/ifu/PCD
add wave -hex /testbench/dut/hart/ifu/InstrD
add wave /testbench/InstrDName
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCE
add wave -hex /testbench/dut/hart/ifu/InstrE
add wave /testbench/InstrEName
add wave -hex /testbench/dut/hart/ieu/dp/SrcAE
add wave -hex /testbench/dut/hart/ieu/dp/SrcBE
add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE
#add wave /testbench/dut/hart/ieu/dp/PCSrcE
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCM
add wave -hex /testbench/dut/hart/ifu/InstrM
add wave /testbench/InstrMName
add wave /testbench/dut/uncore/dtim/memwrite
add wave -hex /testbench/dut/uncore/HADDR
add wave -hex /testbench/dut/uncore/HWDATA
add wave -divider
add wave -hex /testbench/dut/hart/ebu/MemReadM
add wave -hex /testbench/dut/hart/ebu/InstrReadF
add wave -hex /testbench/dut/hart/ebu/BusState
add wave -hex /testbench/dut/hart/ebu/NextBusState
add wave -hex /testbench/dut/hart/ebu/HADDR
add wave -hex /testbench/dut/hart/ebu/HREADY
add wave -hex /testbench/dut/hart/ebu/HTRANS
add wave -hex /testbench/dut/hart/ebu/HRDATA
add wave -hex /testbench/dut/hart/ebu/HWRITE
add wave -hex /testbench/dut/hart/ebu/HWDATA
add wave -hex /testbench/dut/hart/ebu/ReadDataM
add wave -divider
add wave /testbench/dut/hart/ebu/CaptureDataM
add wave /testbench/dut/hart/ebu/CapturedDataAvailable
add wave /testbench/dut/hart/StallW
add wave -hex /testbench/dut/hart/ebu/CapturedData
add wave -hex /testbench/dut/hart/ebu/ReadDataWnext
add wave -hex /testbench/dut/hart/ebu/ReadDataW
add wave -hex /testbench/dut/hart/ifu/PCW
add wave -hex /testbench/dut/hart/ifu/InstrW
add wave /testbench/InstrWName
add wave /testbench/dut/hart/ieu/dp/RegWriteW
add wave -hex /testbench/dut/hart/ebu/ReadDataW
add wave -hex /testbench/dut/hart/ieu/dp/ResultW
add wave -hex /testbench/dut/hart/ieu/dp/RdW
add wave -divider
add wave -hex /testbench/dut/hart/dmem/*
add wave -hex /testbench/dut/hart/dmem/genblk1/*
add wave -divider
add wave -hex -r /testbench/*

View File

@ -10,7 +10,7 @@ add wave /testbench/reset
add wave -divider
#add wave /testbench/dut/hart/ebu/IReadF
add wave /testbench/dut/hart/DataStall
add wave /testbench/dut/hart/InstrStall
add wave /testbench/dut/hart/ICacheStallF
add wave /testbench/dut/hart/StallF
add wave /testbench/dut/hart/StallD
add wave /testbench/dut/hart/StallE
@ -27,11 +27,6 @@ add wave -hex /testbench/dut/hart/ifu/PCD
add wave -hex /testbench/dut/hart/ifu/InstrD
add wave /testbench/InstrDName
add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD
add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD
add wave /testbench/dut/hart/ifu/ic/DelayF
add wave /testbench/dut/hart/ifu/ic/DelaySideF
add wave /testbench/dut/hart/ifu/ic/DelayD
add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCE
add wave -hex /testbench/dut/hart/ifu/InstrE

View File

@ -1,10 +1,11 @@
onerror {resume}
quietly virtual function -install /testbench/dut/hart/ifu/icache/cachemem -env /testbench/dut/hart/ifu/icache/cachemem { &{/testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr[4], /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr[3], /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr[2], /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr[1], /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr[0] }} offset
quietly WaveActivateNextPane {} 0
add wave -noupdate /testbench/clk
add wave -noupdate /testbench/reset
add wave -noupdate /testbench/test
add wave -noupdate -radix ascii /testbench/memfilename
add wave -noupdate -expand -group {Execution Stage} /testbench/functionRadix/function_radix/FunctionName
add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE
add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE
@ -26,7 +27,6 @@ add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSR
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/InstrStall
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF
@ -39,11 +39,6 @@ add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbe
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW
add wave -noupdate /testbench/dut/hart/hzu/StallFCause_Q
add wave -noupdate /testbench/dut/hart/hzu/StallDCause_Q
add wave -noupdate /testbench/dut/hart/hzu/StallECause_Q
add wave -noupdate /testbench/dut/hart/hzu/StallMCause_Q
add wave -noupdate /testbench/dut/hart/hzu/StallWCause_Q
add wave -noupdate -group Bpred -expand -group direction -divider Update
add wave -noupdate -group Bpred -expand -group direction /testbench/dut/hart/ifu/bpred/Predictor/DirPredictor/UpdatePC
add wave -noupdate -group Bpred -expand -group direction /testbench/dut/hart/ifu/bpred/Predictor/DirPredictor/UpdateEN
@ -62,37 +57,38 @@ add wave -noupdate -group Bpred -group BTB -divider Lookup
add wave -noupdate -group Bpred -group BTB /testbench/dut/hart/ifu/bpred/TargetPredictor/TargetPC
add wave -noupdate -group Bpred -group BTB /testbench/dut/hart/ifu/bpred/TargetPredictor/Valid
add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/BPPredWrongE
add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrM
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCF
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredPCF
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext0F
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext1F
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/SelBPPredF
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredWrongE
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PrivilegedChangePCM
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCF
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredPCF
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext0F
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext1F
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/SelBPPredF
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredWrongE
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PrivilegedChangePCM
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ifu/InstrD
add wave -noupdate -group {Decode Stage} /testbench/InstrDName
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/RdD
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs1D
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs2D
add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/rf
add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/a1
add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/a2
add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/a3
add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/rd1
add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/rd2
add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/we3
add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/wd3
add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ALUResultW
add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ReadDataW
add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW
add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW
add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rf
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a1
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a2
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a3
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rd1
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rd2
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/we3
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/wd3
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ALUResultW
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ReadDataW
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol
@ -105,11 +101,21 @@ add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu
add wave -noupdate /testbench/InstrFName
add wave -noupdate -expand -group dcache /testbench/dut/hart/MemAdrM
add wave -noupdate -expand -group {dcache memory} /testbench/dut/hart/dmem/MemReadM
add wave -noupdate -expand -group {dcache memory} /testbench/dut/hart/dmem/MemWriteM
add wave -noupdate -expand -group {dcache memory} /testbench/dut/hart/dmem/MemAckW
add wave -noupdate -expand -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/MemRWM
add wave -noupdate -expand -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/AtomicM
add wave -noupdate -expand -group dcache -expand -group {cpu request} /testbench/dut/hart/MemAdrM
add wave -noupdate -expand -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/ReadDataW
add wave -noupdate -expand -group dcache -expand -group {cpu request} /testbench/dut/hart/WriteDataM
add wave -noupdate -expand -group dcache -color Gray90 /testbench/dut/hart/dmem/CurrState
add wave -noupdate -expand -group dcache /testbench/dut/hart/MemPAdrM
add wave -noupdate -expand -group dcache /testbench/dut/hart/WriteDataM
add wave -noupdate -expand -group dcache /testbench/dut/hart/dmem/MemRWM
add wave -noupdate -expand -group dcache /testbench/dut/hart/dmem/MemAccessM
add wave -noupdate -expand -group dcache /testbench/dut/hart/dmem/AtomicMaskedM
add wave -noupdate -expand -group dcache /testbench/dut/hart/dmem/MemAckW
add wave -noupdate -expand -group dcache /testbench/dut/hart/dmem/genblk1/lrM
add wave -noupdate -expand -group dcache /testbench/dut/hart/dmem/genblk1/scM
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E
@ -128,6 +134,7 @@ add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/ALURe
add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcAE
add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcBE
add wave -noupdate /testbench/dut/hart/ieu/dp/ALUResultM
add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCNextF
add wave -noupdate -expand -group PCS /testbench/dut/hart/PCF
add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCD
add wave -noupdate -expand -group PCS /testbench/dut/hart/PCE
@ -148,35 +155,96 @@ add wave -noupdate -group {function radix debug} /testbench/functionRadix/functi
add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/FunctionAddr
add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/ProgramAddrIndex
add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/FunctionName
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/InstrD
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/SrcAE
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/SrcBE
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/Funct3E
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/MulDivE
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/W64E
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/StallM
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/StallW
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/FlushM
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/FlushW
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/MulDivResultW
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/genblk1/div/start
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/DivDoneE
add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/DivBusyE
add wave -noupdate /testbench/dut/hart/mdu/genblk1/gclk
add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE
add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/N
add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/D
add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/Q
add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/rem0
add wave -noupdate /testbench/dut/hart/MulDivResultW
add wave -noupdate /testbench/dut/hart/mdu/genblk1/PrelimResultE
add wave -noupdate /testbench/dut/hart/mdu/Funct3E
add wave -noupdate /testbench/dut/hart/mdu/genblk1/QuotE
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/InstrD
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/SrcAE
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/SrcBE
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/Funct3E
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivE
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/W64E
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/StallM
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/StallW
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushM
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushW
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivResultW
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/genblk1/div/start
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneE
add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivBusyE
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0
add wave -noupdate -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn
add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/AHBByteLength
add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/AHBOFFETWIDTH
add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/BlockByteLength
add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/OFFSETWIDTH
add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/WORDSPERLINE
add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/LOGWPL
add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/LINESIZE
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWritePAdr
add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit
add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValid
add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/ReadTag
add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataTag
add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadAddr
add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData
add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/ReadPAdr
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WritePAdr
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteSet
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteTag
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData
add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF
add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD
add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD
add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCNextPF
add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF
add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF
add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/BusState
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HCLK
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HRDATA
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HREADY
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HRESP
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HADDR
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWDATA
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWRITE
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HSIZE
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HBURST
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HPROT
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HTRANS
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HMASTLOCK
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HADDRD
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HSIZED
add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWRITED
add wave -noupdate /testbench/dut/hart/dmem/genblk1/scM
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 2} {128433 ns} 0}
WaveRestoreCursors {{Cursor 2} {12215488 ns} 0} {{Cursor 4} {12211487 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 250
configure wave -valuecolwidth 229
configure wave -valuecolwidth 513
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
@ -189,4 +257,4 @@ configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {128007 ns} {128663 ns}
WaveRestoreZoom {12215315 ns} {12215675 ns}

22
wally-pipelined/src/cache/cache-sram.sv vendored Normal file
View File

@ -0,0 +1,22 @@
// Depth is number of bits in one "word" of the memory, width is number of such words
module Sram1Read1Write #(parameter DEPTH=128, WIDTH=256) (
input logic clk,
// port 1 is read only
input logic [$clog2(WIDTH)-1:0] ReadAddr,
output logic [DEPTH-1:0] ReadData,
// port 2 is write only
input logic [$clog2(WIDTH)-1:0] WriteAddr,
input logic [DEPTH-1:0] WriteData,
input logic WriteEnable
);
logic [WIDTH-1:0][DEPTH-1:0] StoredData;
always_ff @(posedge clk) begin
ReadData <= StoredData[ReadAddr];
if (WriteEnable) begin
StoredData[WriteAddr] <= WriteData;
end
end
endmodule

View File

@ -4,8 +4,7 @@
// Written: jaallen@g.hmc.edu 2021-03-23
// Modified:
//
// Purpose: An implementation of a direct-mapped cache memory
// This cache is read-only, so "write"s to the memory are loading new data
// Purpose: An implementation of a direct-mapped cache memory, with read-only and write-through versions
//
// A component of the Wally configurable RISC-V project.
//
@ -26,10 +25,12 @@
`include "wally-config.vh"
module rodirectmappedmem #(parameter LINESIZE = 256, parameter NUMLINES = 512, parameter WORDSIZE = `XLEN) (
// Read-only direct-mapped memory
module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
// Pipeline stuff
input logic clk,
input logic reset,
input logic stall,
// If flush is high, invalidate the entire cache
input logic flush,
// Select which address to read (broken for efficiency's sake)
@ -44,50 +45,312 @@ module rodirectmappedmem #(parameter LINESIZE = 256, parameter NUMLINES = 512, p
output logic DataValid
);
localparam integer SETWIDTH = $clog2(NUMLINES);
localparam integer OFFSETWIDTH = $clog2(LINESIZE/8);
localparam integer TAGWIDTH = `XLEN-SETWIDTH-OFFSETWIDTH;
// Various compile-time constants
localparam integer WORDWIDTH = $clog2(WORDSIZE/8);
localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE);
localparam integer SETWIDTH = $clog2(NUMLINES);
localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH;
logic [NUMLINES-1:0][WORDSIZE-1:0] LineOutputs;
logic [NUMLINES-1:0] ValidOutputs;
logic [NUMLINES-1:0][TAGWIDTH-1:0] TagOutputs;
logic [OFFSETWIDTH-1:0] WordSelect;
logic [`XLEN-1:0] ReadPAdr;
logic [SETWIDTH-1:0] ReadSet, WriteSet;
logic [TAGWIDTH-1:0] ReadTag, WriteTag;
localparam integer OFFSETBEGIN = WORDWIDTH;
localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1;
localparam integer SETBEGIN = OFFSETEND+1;
localparam integer SETEND = SETBEGIN + SETWIDTH - 1;
localparam integer TAGBEGIN = SETEND + 1;
localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
// Swizzle bits to get the offset, set, and tag out of the read and write addresses
// Machinery to read from and write to the correct addresses in memory
logic [`XLEN-1:0] ReadPAdr;
logic [`XLEN-1:0] OldReadPAdr;
logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset;
logic [SETWIDTH-1:0] ReadSet, WriteSet;
logic [TAGWIDTH-1:0] ReadTag, WriteTag;
logic [LINESIZE-1:0] ReadLine;
logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
// Machinery to check if a given read is valid and is the desired value
logic [TAGWIDTH-1:0] DataTag;
logic [NUMLINES-1:0] ValidOut;
logic DataValidBit;
flopenr #(`XLEN) ReadPAdrFlop(clk, reset, ~stall, ReadPAdr, OldReadPAdr);
// Assign the read and write addresses in cache memory
always_comb begin
// Read address
WordSelect = ReadLowerAdr[OFFSETWIDTH-1:0];
ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
ReadSet = ReadPAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH];
ReadTag = ReadPAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH];
// Write address
WriteSet = WritePAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH];
WriteTag = WritePAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH];
ReadSet = ReadPAdr[SETEND:SETBEGIN];
ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
WriteSet = WritePAdr[SETEND:SETBEGIN];
WriteTag = WritePAdr[TAGEND:TAGBEGIN];
end
// Depth is number of bits in one "word" of the memory, width is number of such words
Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
.*,
.ReadAddr(ReadSet),
.ReadData(ReadLine),
.WriteAddr(WriteSet),
.WriteData(WriteLine)
);
Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
.*,
.ReadAddr(ReadSet),
.ReadData(DataTag),
.WriteAddr(WriteSet),
.WriteData(WriteTag)
);
// Pick the right bits coming out the read line
assign DataWord = ReadLineTransformed[ReadOffset];
genvar i;
generate
for (i=0; i < NUMLINES; i++) begin
rocacheline #(LINESIZE, TAGWIDTH, WORDSIZE) lines (
.*,
.WriteEnable(WriteEnable & (WriteSet == i)),
.WriteData(WriteLine),
.WriteTag(WriteTag),
.DataWord(LineOutputs[i]),
.DataTag(TagOutputs[i]),
.DataValid(ValidOutputs[i])
);
for (i=0; i < LINESIZE/WORDSIZE; i++) begin
assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE];
end
endgenerate
// Get the data and valid out of the lines
always_comb begin
DataWord = LineOutputs[ReadSet];
DataValid = ValidOutputs[ReadSet] & (TagOutputs[ReadSet] == ReadTag);
// Correctly handle the valid bits
always_ff @(posedge clk, posedge reset) begin
if (reset || flush) begin
ValidOut <= {NUMLINES{1'b0}};
end else begin
if (WriteEnable) begin
ValidOut[WriteSet] <= 1;
end
end
DataValidBit <= ValidOut[ReadSet];
end
assign DataValid = DataValidBit && (DataTag == ReadTag);
endmodule
module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
// Pipeline stuff
input logic clk,
input logic reset,
input logic re,
// If flush is high, invalidate the entire cache
input logic flush,
// Select which address to read (broken for efficiency's sake)
input logic [`XLEN-1:12] ReadUpperPAdr,
input logic [11:0] ReadLowerAdr,
// Write new data to the cache
input logic WriteEnable,
input logic [LINESIZE-1:0] WriteLine,
input logic [`XLEN-1:0] WritePAdr,
// Output the word, as well as if it is valid
output logic [31:0] DataWord, // *** was WORDSIZE-1
output logic DataValid
);
// Various compile-time constants
localparam integer WORDWIDTH = $clog2(WORDSIZE/8);
localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE);
localparam integer SETWIDTH = $clog2(NUMLINES);
localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH;
localparam integer OFFSETBEGIN = WORDWIDTH;
localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1;
localparam integer SETBEGIN = OFFSETEND+1;
localparam integer SETEND = SETBEGIN + SETWIDTH - 1;
localparam integer TAGBEGIN = SETEND + 1;
localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
// Machinery to read from and write to the correct addresses in memory
logic [`XLEN-1:0] ReadPAdr;
logic [`XLEN-1:0] OldReadPAdr;
logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset;
logic [SETWIDTH-1:0] ReadSet, WriteSet;
logic [TAGWIDTH-1:0] ReadTag, WriteTag;
logic [LINESIZE-1:0] ReadLine;
logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
// Machinery to check if a given read is valid and is the desired value
logic [TAGWIDTH-1:0] DataTag;
logic [NUMLINES-1:0] ValidOut;
logic DataValidBit;
flopenr #(`XLEN) ReadPAdrFlop(clk, reset, re, ReadPAdr, OldReadPAdr);
// Assign the read and write addresses in cache memory
always_comb begin
ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
ReadSet = ReadPAdr[SETEND:SETBEGIN];
ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
WriteSet = WritePAdr[SETEND:SETBEGIN];
WriteTag = WritePAdr[TAGEND:TAGBEGIN];
end
// Depth is number of bits in one "word" of the memory, width is number of such words
Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
.*,
.ReadAddr(ReadSet),
.ReadData(ReadLine),
.WriteAddr(WriteSet),
.WriteData(WriteLine)
);
Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
.*,
.ReadAddr(ReadSet),
.ReadData(DataTag),
.WriteAddr(WriteSet),
.WriteData(WriteTag)
);
// Pick the right bits coming out the read line
//assign DataWord = ReadLineTransformed[ReadOffset];
//logic [31:0] tempRD;
always_comb begin
case (OldReadPAdr[4:1])
0: DataWord = ReadLine[31:0];
1: DataWord = ReadLine[47:16];
2: DataWord = ReadLine[63:32];
3: DataWord = ReadLine[79:48];
4: DataWord = ReadLine[95:64];
5: DataWord = ReadLine[111:80];
6: DataWord = ReadLine[127:96];
7: DataWord = ReadLine[143:112];
8: DataWord = ReadLine[159:128];
9: DataWord = ReadLine[175:144];
10: DataWord = ReadLine[191:160];
11: DataWord = ReadLine[207:176];
12: DataWord = ReadLine[223:192];
13: DataWord = ReadLine[239:208];
14: DataWord = ReadLine[255:224];
15: DataWord = {16'b0, ReadLine[255:240]};
endcase
end
genvar i;
generate
for (i=0; i < LINESIZE/WORDSIZE; i++) begin
assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE];
end
endgenerate
// Correctly handle the valid bits
always_ff @(posedge clk, posedge reset) begin
if (reset || flush) begin
ValidOut <= {NUMLINES{1'b0}};
end else begin
if (WriteEnable) begin
ValidOut[WriteSet] <= 1;
end
end
DataValidBit <= ValidOut[ReadSet];
end
assign DataValid = DataValidBit && (DataTag == ReadTag);
endmodule
// Write-through direct-mapped memory
module wtdirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
// Pipeline stuff
input logic clk,
input logic reset,
input logic stall,
// If flush is high, invalidate the entire cache
input logic flush,
// Select which address to read (broken for efficiency's sake)
input logic [`XLEN-1:12] ReadUpperPAdr,
input logic [11:0] ReadLowerAdr,
// Load new data into the cache (from main memory)
input logic LoadEnable,
input logic [LINESIZE-1:0] LoadLine,
input logic [`XLEN-1:0] LoadPAdr,
// Write data to the cache (like from a store instruction)
input logic WriteEnable,
input logic [WORDSIZE-1:0] WriteWord,
input logic [`XLEN-1:0] WritePAdr,
input logic [1:0] WriteSize, // Specify size of the write (non-written bits should be preserved)
// Output the word, as well as if it is valid
output logic [WORDSIZE-1:0] DataWord,
output logic DataValid
);
// Various compile-time constants
localparam integer WORDWIDTH = $clog2(WORDSIZE/8);
localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE);
localparam integer SETWIDTH = $clog2(NUMLINES);
localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH;
localparam integer OFFSETBEGIN = WORDWIDTH;
localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1;
localparam integer SETBEGIN = OFFSETEND+1;
localparam integer SETEND = SETBEGIN + SETWIDTH - 1;
localparam integer TAGBEGIN = SETEND + 1;
localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
// Machinery to read from and write to the correct addresses in memory
logic [`XLEN-1:0] ReadPAdr;
logic [`XLEN-1:0] OldReadPAdr;
logic [OFFSETWIDTH-1:0] ReadOffset, LoadOffset;
logic [SETWIDTH-1:0] ReadSet, LoadSet;
logic [TAGWIDTH-1:0] ReadTag, LoadTag;
logic [LINESIZE-1:0] ReadLine;
logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
// Machinery to check if a given read is valid and is the desired value
logic [TAGWIDTH-1:0] DataTag;
logic [NUMLINES-1:0] ValidOut;
logic DataValidBit;
flopenr #(`XLEN) ReadPAdrFlop(clk, reset, ~stall, ReadPAdr, OldReadPAdr);
// Assign the read and write addresses in cache memory
always_comb begin
ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
ReadSet = ReadPAdr[SETEND:SETBEGIN];
ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
LoadOffset = LoadPAdr[OFFSETEND:OFFSETBEGIN];
LoadSet = LoadPAdr[SETEND:SETBEGIN];
LoadTag = LoadPAdr[TAGEND:TAGBEGIN];
end
// Depth is number of bits in one "word" of the memory, width is number of such words
Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
.*,
.ReadAddr(ReadSet),
.ReadData(ReadLine),
.WriteAddr(LoadSet),
.WriteData(LoadLine),
.WriteEnable(LoadEnable)
);
Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
.*,
.ReadAddr(ReadSet),
.ReadData(DataTag),
.WriteAddr(LoadSet),
.WriteData(LoadTag),
.WriteEnable(LoadEnable)
);
// Pick the right bits coming out the read line
assign DataWord = ReadLineTransformed[ReadOffset];
genvar i;
generate
for (i=0; i < LINESIZE/WORDSIZE; i++) begin
assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE];
end
endgenerate
// Correctly handle the valid bits
always_ff @(posedge clk, posedge reset) begin
if (reset || flush) begin
ValidOut <= {NUMLINES{1'b0}};
end else begin
if (LoadEnable) begin
ValidOut[LoadSet] <= 1;
end
end
DataValidBit <= ValidOut[ReadSet];
end
assign DataValid = DataValidBit && (DataTag == ReadTag);
endmodule

View File

@ -1,68 +0,0 @@
///////////////////////////////////////////
// line.sv
//
// Written: jaallen@g.hmc.edu 2021-03-23
// Modified:
//
// Purpose: An implementation of a single cache line
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
// A read-only cache line ("write"ing to this line is loading new data, not writing to memory
module rocacheline #(parameter LINESIZE = 256, parameter TAGSIZE = 32, parameter WORDSIZE = `XLEN) (
// Pipeline stuff
input logic clk,
input logic reset,
// If flush is high, invalidate this word
input logic flush,
// Select which word within the line
input logic [$clog2(LINESIZE/8)-1:0] WordSelect,
// Write new data to the line
input logic WriteEnable,
input logic [LINESIZE-1:0] WriteData,
input logic [TAGSIZE-1:0] WriteTag,
// Output the word, as well as the tag and if it is valid
output logic [WORDSIZE-1:0] DataWord,
output logic [TAGSIZE-1:0] DataTag,
output logic DataValid
);
localparam integer OFFSETSIZE = $clog2(LINESIZE/8);
localparam integer NUMWORDS = LINESIZE/WORDSIZE;
logic [NUMWORDS-1:0][WORDSIZE-1:0] DataLinesIn, DataLinesOut;
flopenr #(1) ValidBitFlop(clk, reset, WriteEnable | flush, ~flush, DataValid);
flopenr #(TAGSIZE) TagFlop(clk, reset, WriteEnable, WriteTag, DataTag);
genvar i;
generate
for (i=0; i < NUMWORDS; i++) begin
assign DataLinesIn[i] = WriteData[NUMWORDS*i+WORDSIZE-1:NUMWORDS*i];
flopenr #(LINESIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]);
end
endgenerate
always_comb begin
DataWord = DataLinesOut[WordSelect[OFFSETSIZE-1:$clog2(WORDSIZE)]];
end
endmodule

View File

@ -0,0 +1,184 @@
///////////////////////////////////////////
// dcache.sv
//
// Written: jaallen@g.hmc.edu 2021-04-15
// Modified:
//
// Purpose: Cache memory for the dmem so it can access memory less often, saving cycles
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module dcache(
// Basic pipeline stuff
input logic clk, reset,
input logic StallW,
input logic FlushW,
// Upper bits of physical address
input logic [`XLEN-1:12] UpperPAdrM,
// Lower 12 bits of virtual address, since it's faster this way
input logic [11:0] LowerVAdrM,
// Write to the dcache
input logic [`XLEN-1:0] DCacheWriteDataM,
input logic DCacheReadM, DCacheWriteM,
// Data read in from the ebu unit
input logic [`XLEN-1:0] ReadDataW,
input logic MemAckW,
// Access requested from the ebu unit
output logic [`XLEN-1:0] MemPAdrM,
output logic MemReadM, MemWriteM,
// High if the dcache is requesting a stall
output logic DCacheStallW,
// The data that was requested from the cache
output logic [`XLEN-1:0] DCacheReadW
);
// Configuration parameters
// TODO Move these to a config file
localparam integer DCACHELINESIZE = 256;
localparam integer DCACHENUMLINES = 512;
// Input signals to cache memory
logic FlushMem;
logic [`XLEN-1:12] DCacheMemUpperPAdr;
logic [11:0] DCacheMemLowerAdr;
logic DCacheMemWriteEnable;
logic [DCACHELINESIZE-1:0] DCacheMemWriteData;
logic [`XLEN-1:0] DCacheMemWritePAdr;
logic EndFetchState;
// Output signals from cache memory
logic [`XLEN-1:0] DCacheMemReadData;
logic DCacheMemReadValid;
wtdirectmappedmem #(.LINESIZE(DCACHELINESIZE), .NUMLINES(DCACHENUMLINES), .WORDSIZE(`XLEN)) cachemem(
.*,
// Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall
.stall(StallW),
.flush(FlushMem),
.ReadUpperPAdr(DCacheMemUpperPAdr),
.ReadLowerAdr(DCacheMemLowerAdr),
.LoadEnable(DCacheMemWriteEnable),
.LoadLine(DCacheMemWriteData),
.LoadPAdr(DCacheMemWritePAdr),
.DataWord(DCacheMemReadData),
.DataValid(DCacheMemReadValid),
.WriteEnable(0),
.WriteWord(0),
.WritePAdr(0),
.WriteSize(2'b10)
);
dcachecontroller #(.LINESIZE(DCACHELINESIZE)) controller(.*);
// For now, assume no writes to executable memory
assign FlushMem = 1'b0;
endmodule
module dcachecontroller #(parameter LINESIZE = 256) (
// Inputs from pipeline
input logic clk, reset,
input logic StallW,
input logic FlushW,
// Input the address to read
// The upper bits of the physical pc
input logic [`XLEN-1:12] DCacheMemUpperPAdr,
// The lower bits of the virtual pc
input logic [11:0] DCacheMemLowerAdr,
// Signals to/from cache memory
// The read coming out of it
input logic [`XLEN-1:0] DCacheMemReadData,
input logic DCacheMemReadValid,
// Load data into the cache
output logic DCacheMemWriteEnable,
output logic [LINESIZE-1:0] DCacheMemWriteData,
output logic [`XLEN-1:0] DCacheMemWritePAdr,
// The read that was requested
output logic [31:0] DCacheReadW,
// Outputs to pipeline control stuff
output logic DCacheStallW, EndFetchState,
// Signals to/from ahblite interface
// A read containing the requested data
input logic [`XLEN-1:0] ReadDataW,
input logic MemAckW,
// The read we request from main memory
output logic [`XLEN-1:0] MemPAdrM,
output logic MemReadM, MemWriteM
);
// Cache fault signals
logic FaultStall;
// Handle happy path (data in cache)
always_comb begin
DCacheReadW = DCacheMemReadData;
end
// Handle cache faults
localparam integer WORDSPERLINE = LINESIZE/`XLEN;
localparam integer LOGWPL = $clog2(WORDSPERLINE);
localparam integer OFFSETWIDTH = $clog2(LINESIZE/8);
logic FetchState, BeginFetchState;
logic [LOGWPL:0] FetchWordNum, NextFetchWordNum;
logic [`XLEN-1:0] LineAlignedPCPF;
flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState);
flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum);
genvar i;
generate
for (i=0; i < WORDSPERLINE; i++) begin
flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), ReadDataW, DCacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]);
end
endgenerate
// Enter the fetch state when we hit a cache fault
always_comb begin
BeginFetchState = ~DCacheMemReadValid & ~FetchState & (FetchWordNum == 0);
end
// Exit the fetch state once the cache line has been loaded
flopr #(1) EndFetchStateFlop(clk, reset, DCacheMemWriteEnable, EndFetchState);
// Machinery to request the correct addresses from main memory
always_comb begin
MemReadM = FetchState & ~EndFetchState & ~DCacheMemWriteEnable;
LineAlignedPCPF = {DCacheMemUpperPAdr, DCacheMemLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}};
MemPAdrM = LineAlignedPCPF + FetchWordNum*(`XLEN/8);
NextFetchWordNum = FetchState ? FetchWordNum+MemAckW : {LOGWPL+1{1'b0}};
end
// Write to cache memory when we have the line here
always_comb begin
DCacheMemWritePAdr = LineAlignedPCPF;
DCacheMemWriteEnable = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState & ~EndFetchState;
end
// Stall the pipeline while loading a new line from memory
always_comb begin
DCacheStallW = FetchState | ~DCacheMemReadValid;
end
endmodule

View File

@ -27,6 +27,7 @@
`include "wally-config.vh"
// *** Ross Thompson amo misalignment check?
module dmem (
input logic clk, reset,
input logic StallW, FlushW,
@ -40,6 +41,7 @@ module dmem (
input logic [1:0] AtomicM,
output logic [`XLEN-1:0] MemPAdrM,
output logic MemReadM, MemWriteM,
output logic [1:0] AtomicMaskedM,
output logic DataMisalignedM,
// Writeback Stage
input logic MemAckW,
@ -62,6 +64,14 @@ module dmem (
logic SquashSCM;
logic DTLBPageFaultM;
logic MemAccessM;
logic [1:0] CurrState, NextState;
localparam STATE_READY = 0;
localparam STATE_FETCH = 1;
localparam STATE_FETCH_AMO = 2;
localparam STATE_STALLED = 3;
tlb #(.ENTRY_BITS(3), .ITLB(0)) dtlb(.TLBAccessType(MemRWM), .VirtualAddress(MemAdrM),
.PageTableEntryWrite(PageTableEntryM), .PageTypeWrite(PageTypeM),
@ -85,8 +95,10 @@ module dmem (
// Squash unaligned data accesses and failed store conditionals
// *** this is also the place to squash if the cache is hit
assign MemReadM = MemRWM[1] & ~DataMisalignedM;
assign MemWriteM = MemRWM[0] & ~DataMisalignedM && ~SquashSCM;
assign MemReadM = MemRWM[1] & ~DataMisalignedM & CurrState != STATE_STALLED;
assign MemWriteM = MemRWM[0] & ~DataMisalignedM && ~SquashSCM & CurrState != STATE_STALLED;
assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicM : 2'b00 ;
assign MemAccessM = |MemRWM;
// Determine if address is valid
assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1];
@ -110,8 +122,8 @@ module dmem (
else if (scM || WriteAdrMatchM) ReservationValidM = 0; // clear valid on store to same address or any sc
else ReservationValidM = ReservationValidW; // otherwise don't change valid
end
flopenrc #(`XLEN-2) resadrreg(clk, reset, FlushW, ~StallW && lrM, MemPAdrM[`XLEN-1:2], ReservationPAdrW); // could drop clear on this one but not valid
flopenrc #(1) resvldreg(clk, reset, FlushW, ~StallW, ReservationValidM, ReservationValidW);
flopenrc #(`XLEN-2) resadrreg(clk, reset, FlushW, lrM, MemPAdrM[`XLEN-1:2], ReservationPAdrW); // could drop clear on this one but not valid
flopenrc #(1) resvldreg(clk, reset, FlushW, lrM, ReservationValidM, ReservationValidW);
flopenrc #(1) squashreg(clk, reset, FlushW, ~StallW, SquashSCM, SquashSCW);
end else begin // Atomic operations not supported
assign SquashSCM = 0;
@ -122,5 +134,33 @@ module dmem (
// Data stall
//assign DataStall = 0;
// Ross Thompson April 22, 2021
// for now we need to handle the issue where the data memory interface repeately
// requests data from memory rather than issuing a single request.
flopr #(2) stateReg(.clk(clk),
.reset(reset),
.d(NextState),
.q(CurrState));
always_comb begin
case (CurrState)
STATE_READY: if (MemRWM[1] & MemRWM[0]) NextState = STATE_FETCH_AMO; // *** should be some misalign check
else if (MemAccessM & ~DataMisalignedM) NextState = STATE_FETCH;
else NextState = STATE_READY;
STATE_FETCH_AMO: if (MemAckW) NextState = STATE_FETCH;
else NextState = STATE_FETCH_AMO;
STATE_FETCH: if (MemAckW & ~StallW) NextState = STATE_READY;
else if (MemAckW & StallW) NextState = STATE_STALLED;
else NextState = STATE_FETCH;
STATE_STALLED: if (~StallW) NextState = STATE_READY;
else NextState = STATE_STALLED;
default: NextState = STATE_READY;
endcase // case (CurrState)
end
endmodule

View File

@ -39,12 +39,13 @@ module ahblite (
input logic StallW, FlushW,
// Load control
input logic UnsignedLoadM,
input logic [1:0] AtomicM,
input logic [1:0] AtomicMaskedM,
input logic [6:0] Funct7M,
// Signals from Instruction Cache
input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram
input logic InstrReadF,
output logic [`XLEN-1:0] InstrRData,
output logic InstrAckF,
// Signals from Data Cache
input logic [`XLEN-1:0] MemPAdrM,
input logic MemReadM, MemWriteM,
@ -77,16 +78,17 @@ module ahblite (
output logic [3:0] HSIZED,
output logic HWRITED,
// Stalls
output logic InstrStall,/*InstrUpdate, */DataStall
output logic /*InstrUpdate, */DataStall,
output logic MemAckW
// *** add a chip-level ready signal as part of handshake
);
logic GrantData;
logic [31:0] AccessAddress;
logic [2:0] AccessSize, PTESize, ISize;
logic [`AHBW-1:0] HRDATAMasked, ReadDataM, ReadDataNewW, ReadDataOldW, WriteData;
logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedData, ReadDataWnext, WriteData;
logic IReady, DReady;
logic CaptureDataM;
logic CaptureDataM,CapturedDataAvailable;
// Describes type of access
logic Atomic, Execute, Write, Read;
@ -112,7 +114,7 @@ module ahblite (
always_comb
case (BusState)
IDLE: if (MMUTranslate) NextBusState = MMUTRANSLATE;
else if (AtomicM[1]) NextBusState = ATOMICREAD;
else if (AtomicMaskedM[1]) NextBusState = ATOMICREAD;
else if (MemReadM) NextBusState = MEMREAD; // Memory has priority over instructions
else if (MemWriteM) NextBusState = MEMWRITE;
else if (InstrReadF) NextBusState = INSTRREAD;
@ -185,16 +187,31 @@ module ahblite (
assign MMUReady = (BusState == MMUTRANSLATE && NextBusState == IDLE);
assign InstrRData = HRDATA;
assign InstrAckF = (BusState == INSTRREAD) && (NextBusState != INSTRREAD) || (BusState == INSTRREADC) && (NextBusState != INSTRREADC);
assign MemAckW = (BusState == MEMREAD) && (NextBusState != MEMREAD) || (BusState == MEMWRITE) && (NextBusState != MEMWRITE) ||
((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD)) || ((BusState == ATOMICWRITE) && (NextBusState != ATOMICWRITE));
assign MMUReadPTE = HRDATA;
assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021
// Carefully decide when to update ReadDataW
// ReadDataMstored holds the most recent memory read.
// We need to wait until the pipeline actually advances before we can update the contents of ReadDataW
// (or else the W stage will accidentally get the M stage's data when the pipeline does advance).
assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) ||
((BusState == ATOMICREAD) && (NextBusState == ATOMICWRITE));
// We think this introduces an unnecessary cycle of latency in memory accesses
// *** can the following be simplified down to one register?
// *** examine more closely over summer?
flopenr #(`XLEN) ReadDataNewWReg(clk, reset, CaptureDataM, ReadDataM, ReadDataNewW);
flopenr #(`XLEN) ReadDataOldWReg(clk, reset, CaptureDataM, ReadDataNewW, ReadDataOldW);
assign ReadDataW = (BusState == INSTRREADC) ? ReadDataOldW : ReadDataNewW;
((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD));
flopenr #(`XLEN) ReadDataNewWReg(clk, reset, CaptureDataM, ReadDataM, CapturedData);
always @(posedge HCLK, negedge HRESETn)
if (~HRESETn)
CapturedDataAvailable <= #1 1'b0;
else
CapturedDataAvailable <= #1 (StallW) ? (CaptureDataM | CapturedDataAvailable) : 1'b0;
always_comb
casez({StallW && (BusState != ATOMICREAD),CapturedDataAvailable})
2'b00: ReadDataWnext = ReadDataM;
2'b01: ReadDataWnext = CapturedData;
2'b1?: ReadDataWnext = ReadDataW;
endcase
flopr #(`XLEN) ReadDataOldWReg(clk, reset, ReadDataWnext, ReadDataW);
// Extract and sign-extend subwords if necessary
subwordread swr(.*);
@ -207,7 +224,7 @@ module ahblite (
// .result(AMOResult));
amoalu amoalu(.srca(ReadDataW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM),
.result(AMOResult));
mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicM[1], WriteData);
mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicMaskedM[1], WriteData);
end else
assign WriteData = WriteDataM;
endgenerate

View File

@ -83,4 +83,4 @@ module pmachecker (
assign SquashAHBAccess = InstrAccessFaultF || LoadAccessFaultM || StoreAccessFaultM;
endmodule
endmodule

View File

@ -31,7 +31,7 @@ module hazard(
// Detect hazards
input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
input logic LoadStallD, MulDivStallD, CSRRdStallD,
input logic InstrStall, DataStall, ICacheStallF,
input logic DataStall, ICacheStallF,
input logic DivBusyE,
// Stall & flush outputs
output logic StallF, StallD, StallE, StallM, StallW,
@ -57,13 +57,12 @@ module hazard(
assign BranchFlushDE = BPPredWrongE | RetM | TrapM;
assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE);
assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE);
assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD) & ~(BranchFlushDE); // stall in decode if instruction is a load/mul/csr dependent on previous
// assign StallDCause = LoadStallD | MulDivStallD | CSRRdStallD; // stall in decode if instruction is a load/mul/csr dependent on previous
assign StallECause = DivBusyE;
assign StallMCause = 0;
assign StallWCause = DataStall | InstrStall;
assign StallWCause = DataStall | ICacheStallF;
// Each stage stalls if the next stage is stalled or there is a cause to stall this stage.
assign StallF = StallD | StallFCause;

View File

@ -115,9 +115,6 @@ module datapath (
flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW);
flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW);
// *** something is not right here. Before the merge I found an issue with the jal instruction not writing
// the link address through the alu.
// not sure what changed.
// handle Store Conditional result if atomic extension supported
generate
if (`A_SUPPORTED)

View File

@ -33,20 +33,24 @@ module BTBPredictor
)
(input logic clk,
input logic reset,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
input logic [`XLEN-1:0] LookUpPC,
output logic [`XLEN-1:0] TargetPC,
output logic [3:0] InstrClass,
output logic [4:0] InstrClass,
output logic Valid,
// update
input logic UpdateEN,
input logic [`XLEN-1:0] UpdatePC,
input logic [`XLEN-1:0] UpdateTarget,
input logic [3:0] UpdateInstrClass
input logic [4:0] UpdateInstrClass,
input logic UpdateInvalid
);
localparam TotalDepth = 2 ** Depth;
logic [TotalDepth-1:0] ValidBits;
logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex, LookUpPCIndexQ, UpdatePCIndexQ;
logic UpdateENQ;
// hashing function for indexing the PC
// We have Depth bits to index, but XLEN bits as the input.
@ -58,7 +62,7 @@ module BTBPredictor
flopenr #(Depth) UpdatePCIndexReg(.clk(clk),
.reset(reset),
.en(1'b1),
.en(~StallE),
.d(UpdatePCIndex),
.q(UpdatePCIndexQ));
@ -66,32 +70,53 @@ module BTBPredictor
always_ff @ (posedge clk) begin
if (reset) begin
ValidBits <= #1 {TotalDepth{1'b0}};
end else if (UpdateEN) begin
ValidBits[UpdatePCIndexQ] <= #1 1'b1;
end else
if (UpdateENQ) begin
ValidBits[UpdatePCIndexQ] <= #1 ~ UpdateInvalid;
end
end
assign Valid = ValidBits[LookUpPCIndexQ];
/* -----\/----- EXCLUDED -----\/-----
regfile2p1r1w #(10, 1) validMem(.clk(clk),
.reset(reset),
.RA1(LookUpPCIndexQ),
.RD1(Valid),
.REN1(1'b1),
.WA1(UpdatePCIndexQ),
.WD1(1'b1),
.WEN1(UpdateEN));
-----/\----- EXCLUDED -----/\----- */
flopenr #(1) UpdateENReg(.clk(clk),
.reset(reset),
.en(~StallF),
.d(UpdateEN),
.q(UpdateENQ));
flopenr #(Depth) LookupPCIndexReg(.clk(clk),
.reset(reset),
.en(1'b1),
.en(~StallF),
.d(LookUpPCIndex),
.q(LookUpPCIndexQ));
assign Valid = ValidBits[LookUpPCIndexQ];
// the BTB contains the target address.
// Another optimization may be using a PC relative address.
// *** need to add forwarding.
SRAM2P1R1W #(Depth, `XLEN+4) memory(.clk(clk),
SRAM2P1R1W #(Depth, `XLEN+5) memory(.clk(clk),
.reset(reset),
.RA1(LookUpPCIndex),
.RD1({{InstrClass, TargetPC}}),
.REN1(1'b1),
.REN1(~StallF),
.WA1(UpdatePCIndex),
.WD1({UpdateInstrClass, UpdateTarget}),
.WEN1(UpdateEN),
.BitWEN1({4'b0000, {`XLEN{1'b1}}})); // *** definitely not right.
.BitWEN1({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right.
endmodule

View File

@ -42,7 +42,7 @@ module RASPredictor
logic CounterEn;
localparam Depth = $clog2(StackSize);
logic [StackSize-1:0] PtrD, PtrQ, PtrP1, PtrM1;
logic [Depth-1:0] PtrD, PtrQ, PtrP1, PtrM1;
logic [StackSize-1:0] [`XLEN-1:0] memory;
integer index;
@ -55,7 +55,7 @@ module RASPredictor
// may have to handle a push and an incr at the same time.
// *** what happens if jal is executing and there is a return being flushed in Decode?
flopenr #(StackSize) PTR(.clk(clk),
flopenr #(Depth) PTR(.clk(clk),
.reset(reset),
.en(CounterEn),
.d(PtrD),

View File

@ -47,20 +47,25 @@ module bpred
input logic [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken.
input logic [`XLEN-1:0] PCD, // The address the branch predictor took.
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
input logic [3:0] InstrClassE,
input logic [4:0] InstrClassE,
// Report branch prediction status
output logic BPPredWrongE
output logic BPPredWrongE,
output logic BPPredDirWrongE,
output logic BTBPredPCWrongE,
output logic RASPredPCWrongE,
output logic BPPredClassNonCFIWrongE
);
logic BTBValidF;
logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE;
logic [3:0] BPInstrClassF, BPInstrClassD, BPInstrClassE;
logic [4:0] BPInstrClassF, BPInstrClassD, BPInstrClassE;
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
logic TargetWrongE;
logic FallThroughWrongE;
logic PredictionDirWrongE;
logic PredictionPCWrongE;
logic PredictionInstrClassWrongE;
logic [`XLEN-1:0] CorrectPCE;
@ -74,7 +79,7 @@ module bpred
.Prediction(BPPredF),
// update
.UpdatePC(PCE),
.UpdateEN(InstrClassE[0]),
.UpdateEN(InstrClassE[0] & ~StallE),
.UpdatePrediction(UpdateBPPredE));
end else if (`BPTYPE == "BPGLOBAL") begin:Predictor
@ -86,7 +91,7 @@ module bpred
.Prediction(BPPredF),
// update
.UpdatePC(PCE),
.UpdateEN(InstrClassE[0]),
.UpdateEN(InstrClassE[0] & ~StallE),
.PCSrcE(PCSrcE),
.UpdatePrediction(UpdateBPPredE));
end else if (`BPTYPE == "BPGSHARE") begin:Predictor
@ -98,7 +103,20 @@ module bpred
.Prediction(BPPredF),
// update
.UpdatePC(PCE),
.UpdateEN(InstrClassE[0]),
.UpdateEN(InstrClassE[0] & ~StallE),
.PCSrcE(PCSrcE),
.UpdatePrediction(UpdateBPPredE));
end
else if (`BPTYPE == "BPLOCALPAg") begin:Predictor
localHistoryPredictor DirPredictor(.clk(clk),
.reset(reset),
.*, // Stalls and flushes
.LookUpPC(PCNextF),
.Prediction(BPPredF),
// update
.UpdatePC(PCE),
.UpdateEN(InstrClassE[0] & ~StallE),
.PCSrcE(PCSrcE),
.UpdatePrediction(UpdateBPPredE));
end
@ -132,16 +150,19 @@ module bpred
// Part 2 Branch target address prediction
// *** For now the BTB will house the direct and indirect targets
// *** getting to many false positivies from the BTB, we need a partial TAG to reduce this.
BTBPredictor TargetPredictor(.clk(clk),
.reset(reset),
.*, // Stalls and flushes
.LookUpPC(PCNextF),
.TargetPC(BTBPredPCF),
.InstrClass(BPInstrClassF),
.Valid(BTBValidF),
// update
.UpdateEN(InstrClassE[2] | InstrClassE[1] | InstrClassE[0]),
.UpdateEN((|InstrClassE | (PredictionInstrClassWrongE)) & ~StallE),
.UpdatePC(PCE),
.UpdateTarget(PCTargetE),
.UpdateInvalid(PredictionInstrClassWrongE),
.UpdateInstrClass(InstrClassE));
// need to forward when updating to the same address as reading.
@ -152,9 +173,9 @@ module bpred
// *** need to add the logic to restore RAS on flushes. We will use incr for this.
RASPredictor RASPredictor(.clk(clk),
.reset(reset),
.pop(BPInstrClassF[3]),
.pop(BPInstrClassF[3] & ~StallF),
.popPC(RASPCF),
.push(InstrClassE[3]),
.push(InstrClassE[4] & ~StallE),
.incr(1'b0),
.pushPC(PCLinkE));
@ -180,14 +201,14 @@ module bpred
.q(BPPredE));
// pipeline the class
flopenrc #(4) InstrClassRegD(.clk(clk),
flopenrc #(5) InstrClassRegD(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(BPInstrClassF),
.q(BPInstrClassD));
flopenrc #(4) InstrClassRegE(.clk(clk),
flopenrc #(5) InstrClassRegE(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
@ -197,12 +218,40 @@ module bpred
// Check the prediction makes execution.
// first check if the target or fallthrough address matches what was predicted.
assign TargetWrongE = PCTargetE != PCD;
assign FallThroughWrongE = PCLinkE != PCD;
assign PredictionDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0];
assign PredictionPCWrongE = PCSrcE ? TargetWrongE : FallThroughWrongE;
assign BPPredWrongE = (PredictionPCWrongE | PredictionDirWrongE) & (|InstrClassE);
// If the target is taken check the target rather than fallthrough. The instruction needs to be a branch if PCSrcE is selected
// Remember the bpred can incorrectly predict a non cfi instruction as a branch taken. If the real instruction is non cfi
// it must have selected teh fall through.
assign PredictionPCWrongE = (PCSrcE & (|InstrClassE) ? TargetWrongE : FallThroughWrongE);
// The branch direction also need to checked.
// However if the direction is wrong then the pc will be wrong. This is only relavent to checking the
// accuracy of the direciton prediction.
assign BPPredDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0];
// Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated.
// Also we want to track this in a performance counter.
assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE;
// We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about
// the direction or class, but correct about the target we don't have the flush the pipeline. However we still
// need this information to verify the accuracy of the predictors.
//assign BPPredWrongE = ((PredictionPCWrongE | BPPredDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE;
assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE;
// If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter.
assign BTBPredPCWrongE = (InstrClassE[4] | InstrClassE[2] | InstrClassE[1]) & PredictionPCWrongE;
// similar with RAS
assign RASPredPCWrongE = InstrClassE[3] & PredictionPCWrongE;
// Finally if the real instruction class is non CFI but the predictor said it was we need to count.
assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE;
// Update predictors
satCounter2 BPDirUpdate(.BrDir(PCSrcE),

View File

@ -42,22 +42,24 @@ module gsharePredictor
);
logic [k-1:0] GHRF, GHRD, GHRE;
logic [k-1:0] LookUpPCIndexD, LookUpPCIndexE;
logic [k-1:0] GHRF, GHRD, GHRE, GHRENext;
//logic [k-1:0] LookUpPCIndexD, LookUpPCIndexE;
logic [k-1:0] LookUpPCIndex, UpdatePCIndex;
logic [1:0] PredictionMemory;
logic DoForwarding, DoForwardingF;
logic [1:0] UpdatePredictionF;
assign GHRENext = {PCSrcE, GHRE[k-1:1]};
flopenr #(k) GlobalHistoryRegister(.clk(clk),
.reset(reset),
.en(UpdateEN),
.d({PCSrcE, GHRF[k-1:1] }),
.d(GHRENext),
.q(GHRF));
// for gshare xor the PC with the GHR
assign UpdatePCIndex = GHRE ^ UpdatePC[k:1];
assign UpdatePCIndex = GHRENext ^ UpdatePC[k:1];
assign LookUpPCIndex = GHRF ^ LookUpPC[k:1];
// Make Prediction by reading the correct address in the PHT and also update the new address in the PHT
// GHR referes to the address that the past k branches points to in the prediction stage
@ -66,7 +68,7 @@ module gsharePredictor
.reset(reset),
.RA1(LookUpPCIndex),
.RD1(PredictionMemory),
.REN1(1'b1),
.REN1(~StallF),
.WA1(UpdatePCIndex),
.WD1(UpdatePrediction),
.WEN1(UpdateEN),
@ -92,6 +94,7 @@ module gsharePredictor
assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory;
//pipeline for GHR
/* -----\/----- EXCLUDED -----\/-----
flopenrc #(k) LookUpDReg(.clk(clk),
.reset(reset),
.en(~StallD),
@ -105,5 +108,21 @@ module gsharePredictor
.clear(FlushE),
.d(LookUpPCIndexD),
.q(LookUpPCIndexE));
-----/\----- EXCLUDED -----/\----- */
flopenrc #(k) GHRRegD(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(GHRF),
.q(GHRD));
flopenrc #(k) GHRRegE(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.d(GHRD),
.q(GHRE));
endmodule

View File

@ -31,11 +31,12 @@ module icache(
input logic StallF, StallD,
input logic FlushD,
// Upper bits of physical address for PC
input logic [`XLEN-1:12] UpperPCPF,
input logic [`XLEN-1:12] UpperPCNextPF,
// Lower 12 bits of virtual PC address, since it's faster this way
input logic [11:0] LowerPCF,
input logic [11:0] LowerPCNextF,
// Data read in from the ebu unit
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
// Read requested from the ebu unit
output logic [`XLEN-1:0] InstrPAdrF,
output logic InstrReadF,
@ -44,95 +45,605 @@ module icache(
// High if the icache is requesting a stall
output logic ICacheStallF,
// The raw (not decompressed) instruction that was requested
// If the next instruction is compressed, the upper 16 bits may be anything
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
output logic [31:0] InstrRawD
);
logic DelayF, DelaySideF, FlushDLastCyclen, DelayD;
logic [1:0] InstrDMuxChoice;
logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD;
logic [31:0] InstrF, AlignedInstrD;
// Buffer the last read, for ease of accessing it again
logic LastReadDataValidF;
logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF;
// Configuration parameters
// TODO Move these to a config file
localparam integer ICACHELINESIZE = 256;
localparam integer ICACHENUMLINES = 512;
// instruction for NOP
localparam [31:0] nop = 32'h00000013;
// Input signals to cache memory
logic FlushMem;
logic [`XLEN-1:12] ICacheMemReadUpperPAdr;
logic [11:0] ICacheMemReadLowerAdr;
logic ICacheMemWriteEnable;
logic [ICACHELINESIZE-1:0] ICacheMemWriteData;
logic [`XLEN-1:0] ICacheMemWritePAdr;
logic EndFetchState;
// Output signals from cache memory
logic [31:0] ICacheMemReadData;
logic ICacheMemReadValid;
logic ICacheReadEn;
rodirectmappedmemre #(.LINESIZE(ICACHELINESIZE), .NUMLINES(ICACHENUMLINES), .WORDSIZE(`XLEN))
cachemem(
.*,
// Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall
.re(ICacheReadEn),
.flush(FlushMem),
.ReadUpperPAdr(ICacheMemReadUpperPAdr),
.ReadLowerAdr(ICacheMemReadLowerAdr),
.WriteEnable(ICacheMemWriteEnable),
.WriteLine(ICacheMemWriteData),
.WritePAdr(ICacheMemWritePAdr),
.DataWord(ICacheMemReadData),
.DataValid(ICacheMemReadValid)
);
// Temporary change to bridge the new interface to old behaviors
logic [`XLEN-1:0] PCPF;
assign PCPF = {UpperPCPF, LowerPCF};
icachecontroller #(.LINESIZE(ICACHELINESIZE)) controller(.*);
// For now, assume no writes to executable memory
assign FlushMem = 1'b0;
endmodule
module icachecontroller #(parameter LINESIZE = 256) (
// Inputs from pipeline
input logic clk, reset,
input logic StallF, StallD,
input logic FlushD,
// Input the address to read
// The upper bits of the physical pc
input logic [`XLEN-1:12] UpperPCNextPF,
// The lower bits of the virtual pc
input logic [11:0] LowerPCNextF,
// Signals to/from cache memory
// The read coming out of it
input logic [31:0] ICacheMemReadData,
input logic ICacheMemReadValid,
// The address at which we want to search the cache memory
output logic [`XLEN-1:12] ICacheMemReadUpperPAdr,
output logic [11:0] ICacheMemReadLowerAdr,
output logic ICacheReadEn,
// Load data into the cache
output logic ICacheMemWriteEnable,
output logic [LINESIZE-1:0] ICacheMemWriteData,
output logic [`XLEN-1:0] ICacheMemWritePAdr,
// Outputs to rest of ifu
// High if the instruction in the fetch stage is compressed
output logic CompressedF,
// The instruction that was requested
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
output logic [31:0] InstrRawD,
// Outputs to pipeline control stuff
output logic ICacheStallF, EndFetchState,
// Signals to/from ahblite interface
// A read containing the requested data
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
// The read we request from main memory
output logic [`XLEN-1:0] InstrPAdrF,
output logic InstrReadF
);
// FSM states
localparam STATE_READY = 0;
localparam STATE_HIT_SPILL = 1; // spill, block 0 hit
localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 2; // block 1 miss, issue read to AHB and wait data.
localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 3; // write data into SRAM/LUT
localparam STATE_HIT_SPILL_MERGE = 4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL.
// a challenge is the spill signal gets us out of the ready state and moves us to
// 1 of the 2 spill branches. However the original fsm design had us return to
// the ready state when the spill + hits/misses were fully resolved. The problem
// is the spill signal is based on PCPF so when we return to READY to check if the
// cache has a hit it still expresses spill. We can fix in 1 of two ways.
// 1. we can add 1 extra state at the end of each spill branch to returns the instruction
// to the CPU advancing the CPU and icache to the next instruction.
// 2. We can assert a signal which is delayed 1 cycle to suppress the spill when we get
// to the READY state.
// The first first option is more robust and increases the number of states by 2. The
// second option is seams like it should work, but I worry there is a hidden interaction
// between CPU stalling and that register.
// Picking option 1.
localparam STATE_HIT_SPILL_FINAL = 5; // this state replicates STATE_READY's replay of the
// spill access but does nto consider spill. It also does not do another operation.
localparam STATE_MISS_FETCH_WDV = 6; // aligned miss, issue read to AHB and wait for data.
localparam STATE_MISS_FETCH_DONE = 7; // write data into SRAM/LUT
localparam STATE_MISS_READ = 8; // read block 1 from SRAM/LUT
localparam STATE_MISS_SPILL_FETCH_WDV = 9; // spill, miss on block 0, issue read to AHB and wait
localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT
localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT
localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update.
localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 13; // miss on block 1, issue read to AHB and wait
localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 14; // write data to SRAM/LUT
localparam STATE_MISS_SPILL_MERGE = 15; // read block 0 of CPU access,
localparam STATE_MISS_SPILL_FINAL = 16; // this state replicates STATE_READY's replay of the
// spill access but does nto consider spill. It also does not do another operation.
localparam STATE_INVALIDATE = 17; // *** not sure if invalidate or evict? invalidate by cache block or address?
localparam AHBByteLength = `XLEN / 8;
localparam AHBOFFETWIDTH = $clog2(AHBByteLength);
localparam BlockByteLength = LINESIZE / 8;
localparam OFFSETWIDTH = $clog2(BlockByteLength);
localparam WORDSPERLINE = LINESIZE/`XLEN;
localparam LOGWPL = $clog2(WORDSPERLINE);
logic [4:0] CurrState, NextState;
logic hit, spill;
logic SavePC;
logic [1:0] PCMux;
logic CntReset;
logic PreCntEn, CntEn;
logic spillSave;
logic UnalignedSelect;
logic FetchCountFlag;
localparam FetchCountThreshold = WORDSPERLINE - 1;
logic [LOGWPL:0] FetchCount, NextFetchCount;
logic [`XLEN-1:0] PCPreFinalF, PCPFinalF, PCSpillF, PCNextPF;
logic [`XLEN-1:OFFSETWIDTH] PCPTrunkF;
logic [31:0] FinalInstrRawF;
logic [15:0] SpillDataBlock0;
logic FlushDLastCyclen;
// Happy path signals
logic [31:0] AlignedInstrRawD;
//logic [31:0] AlignedInstrRawF, AlignedInstrRawD;
//logic FlushDLastCycleN;
//logic PCPMisalignedF;
localparam [31:0] NOP = 32'h13;
logic [`XLEN-1:0] PCPF;
logic reset_q;
// Misaligned signals
//logic [`XLEN:0] MisalignedInstrRawF;
//logic MisalignedStall;
// Cache fault signals
//logic FaultStall;
assign PCNextPF = {UpperPCNextPF, LowerPCNextF};
flopenl #(`XLEN) PCPFFlop(clk, reset, SavePC & ~StallF, PCPFinalF, `RESET_VECTOR, PCPF);
// on spill we want to get the first 2 bytes of the next cache block.
// the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can
// simply add 2 to land on the next cache block.
assign PCSpillF = PCPF + 2'b10;
// now we have to select between these three PCs
assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextPF; // *** don't like the stallf
//assign PCPreFinalF = PCMux[0] ? PCPF : PCNextPF; // *** don't like the stallf
assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF;
// truncate the offset from PCPF for memory address generation
assign PCPTrunkF = PCPFinalF[`XLEN-1:OFFSETWIDTH];
// Detect if the instruction is compressed
assign CompressedF = FinalInstrRawF[1:0] != 2'b11;
// Handle happy path (data in cache, reads aligned)
/* -----\/----- EXCLUDED -----\/-----
generate
if (`XLEN == 32) begin
assign AlignedInstrRawF = PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData;
//assign PCPMisalignedF = PCPF[1] && ~CompressedF;
end else begin
assign AlignedInstrRawF = PCPF[2]
? (PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32])
: (PCPF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]);
//assign PCPMisalignedF = PCPF[2] && PCPF[1] && ~CompressedF;
end
endgenerate
-----/\----- EXCLUDED -----/\----- */
//flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD);
//flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN);
//mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD);
// Stall for faults or misaligned reads
/* -----\/----- EXCLUDED -----\/-----
always_comb begin
assign ICacheStallF = FaultStall | MisalignedStall;
end
-----/\----- EXCLUDED -----/\----- */
// Handle misaligned, noncompressed reads
/* -----\/----- EXCLUDED -----\/-----
logic MisalignedState, NextMisalignedState;
logic [15:0] MisalignedHalfInstrF;
logic [15:0] UpperHalfWord;
-----/\----- EXCLUDED -----/\----- */
/* -----\/----- EXCLUDED -----\/-----
flopenr #(16) MisalignedHalfInstrFlop(clk, reset, ~FaultStall & (PCPMisalignedF & MisalignedState), AlignedInstrRawF[15:0], MisalignedHalfInstrF);
flopenr #(1) MisalignedStateFlop(clk, reset, ~FaultStall, NextMisalignedState, MisalignedState);
-----/\----- EXCLUDED -----/\----- */
// When doing a misaligned read, swizzle the bits correctly
/* -----\/----- EXCLUDED -----\/-----
generate
if (`XLEN == 32) begin
assign UpperHalfWord = ICacheMemReadData[31:16];
end else begin
assign UpperHalfWord = ICacheMemReadData[63:48];
end
endgenerate
always_comb begin
if (MisalignedState) begin
assign MisalignedInstrRawF = {16'b0, UpperHalfWord};
end else begin
assign MisalignedInstrRawF = {ICacheMemReadData[15:0], MisalignedHalfInstrF};
end
end
-----/\----- EXCLUDED -----/\----- */
// Manage internal state and stall when necessary
/* -----\/----- EXCLUDED -----\/-----
always_comb begin
assign MisalignedStall = PCPMisalignedF & MisalignedState;
assign NextMisalignedState = ~PCPMisalignedF | ~MisalignedState;
end
-----/\----- EXCLUDED -----/\----- */
// Pick the correct address to read
/* -----\/----- EXCLUDED -----\/-----
generate
if (`XLEN == 32) begin
assign ICacheMemReadLowerAdr = {LowerPCNextF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00};
end else begin
assign ICacheMemReadLowerAdr = {LowerPCNextF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00};
end
endgenerate
-----/\----- EXCLUDED -----/\----- */
// TODO Handle reading instructions that cross page boundaries
//assign ICacheMemReadUpperPAdr = UpperPCNextPF;
// Handle cache faults
/* -----\/----- EXCLUDED -----\/-----
logic FetchState, BeginFetchState;
logic [LOGWPL:0] FetchWordNum, NextFetchWordNum;
logic [`XLEN-1:0] LineAlignedPCPF;
flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState);
flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum);
// Enter the fetch state when we hit a cache fault
always_comb begin
BeginFetchState = ~ICacheMemReadValid & ~FetchState & (FetchWordNum == 0);
end
// Exit the fetch state once the cache line has been loaded
flopr #(1) EndFetchStateFlop(clk, reset, ICacheMemWriteEnable, EndFetchState);
// Machinery to request the correct addresses from main memory
always_comb begin
InstrReadF = FetchState & ~EndFetchState & ~ICacheMemWriteEnable; // next stage logic
LineAlignedPCPF = {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; // the fetch address for abh?
InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8); // ?
NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; // convert to enable
end
// Write to cache memory when we have the line here
always_comb begin
ICacheMemWritePAdr = LineAlignedPCPF;
ICacheMemWriteEnable = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState & ~EndFetchState;
end
// Stall the pipeline while loading a new line from memory
always_comb begin
FaultStall = FetchState | ~ICacheMemReadValid;
end
-----/\----- EXCLUDED -----/\----- */
// the FSM is always runing, do not stall.
flopr #(5) stateReg(.clk(clk),
.reset(reset),
.d(NextState),
.q(CurrState));
assign spill = PCPF[4:1] == 4'b1111 ? 1'b1 : 1'b0;
assign hit = ICacheMemReadValid; // note ICacheMemReadValid is hit.
assign FetchCountFlag = FetchCount == FetchCountThreshold;
// Next state logic
always_comb begin
UnalignedSelect = 1'b0;
CntReset = 1'b0;
PreCntEn = 1'b0;
//InstrReadF = 1'b0;
ICacheMemWriteEnable = 1'b0;
spillSave = 1'b0;
PCMux = 2'b00;
ICacheReadEn = 1'b0;
SavePC = 1'b0;
ICacheStallF = 1'b1;
case (CurrState)
STATE_READY: begin
PCMux = 2'b00;
ICacheReadEn = 1'b1;
if (hit & ~spill) begin
SavePC = 1'b1;
ICacheStallF = 1'b0;
NextState = STATE_READY;
end else if (hit & spill) begin
spillSave = 1'b1;
PCMux = 2'b10;
NextState = STATE_HIT_SPILL;
end else if (~hit & ~spill) begin
CntReset = 1'b1;
NextState = STATE_MISS_FETCH_WDV;
end else if (~hit & spill) begin
CntReset = 1'b1;
PCMux = 2'b10;
NextState = STATE_MISS_SPILL_FETCH_WDV;
end else begin
NextState = STATE_READY;
end
end
// branch 1, hit spill and 2, miss spill hit
STATE_HIT_SPILL: begin
PCMux = 2'b10;
UnalignedSelect = 1'b1;
ICacheReadEn = 1'b1;
if (hit) begin
NextState = STATE_HIT_SPILL_FINAL;
end else
CntReset = 1'b1;
NextState = STATE_HIT_SPILL_MISS_FETCH_WDV;
end
STATE_HIT_SPILL_MISS_FETCH_WDV: begin
PCMux = 2'b10;
//InstrReadF = 1'b1;
PreCntEn = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_HIT_SPILL_MISS_FETCH_DONE;
end else begin
NextState = STATE_HIT_SPILL_MISS_FETCH_WDV;
end
end
STATE_HIT_SPILL_MISS_FETCH_DONE: begin
PCMux = 2'b10;
ICacheMemWriteEnable = 1'b1;
NextState = STATE_HIT_SPILL_MERGE;
end
STATE_HIT_SPILL_MERGE: begin
PCMux = 2'b10;
UnalignedSelect = 1'b1;
ICacheReadEn = 1'b1;
NextState = STATE_HIT_SPILL_FINAL;
end
STATE_HIT_SPILL_FINAL: begin
ICacheReadEn = 1'b1;
PCMux = 2'b00;
UnalignedSelect = 1'b1;
SavePC = 1'b1;
NextState = STATE_READY;
ICacheStallF = 1'b0;
end
// branch 3 miss no spill
STATE_MISS_FETCH_WDV: begin
PCMux = 2'b01;
//InstrReadF = 1'b1;
PreCntEn = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_MISS_FETCH_DONE;
end else begin
NextState = STATE_MISS_FETCH_WDV;
end
end
STATE_MISS_FETCH_DONE: begin
PCMux = 2'b01;
ICacheMemWriteEnable = 1'b1;
NextState = STATE_MISS_READ;
end
STATE_MISS_READ: begin
PCMux = 2'b01;
ICacheReadEn = 1'b1;
NextState = STATE_READY;
end
// branch 4 miss spill hit, and 5 miss spill miss
STATE_MISS_SPILL_FETCH_WDV: begin
PCMux = 2'b01;
PreCntEn = 1'b1;
//InstrReadF = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_MISS_SPILL_FETCH_DONE;
end else begin
NextState = STATE_MISS_SPILL_FETCH_WDV;
end
end
STATE_MISS_SPILL_FETCH_DONE: begin
PCMux = 2'b01;
ICacheMemWriteEnable = 1'b1;
NextState = STATE_MISS_SPILL_READ1;
end
STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block.
PCMux = 2'b10; // there is a 1 cycle delay after setting the address before the date arrives.
spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm.
ICacheReadEn = 1'b1;
NextState = STATE_MISS_SPILL_2;
end
STATE_MISS_SPILL_2: begin
PCMux = 2'b10;
UnalignedSelect = 1'b1;
if (~hit) begin
CntReset = 1'b1;
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
end else begin
NextState = STATE_MISS_SPILL_FINAL;
end
end
STATE_MISS_SPILL_MISS_FETCH_WDV: begin
PCMux = 2'b10;
PreCntEn = 1'b1;
//InstrReadF = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_MISS_SPILL_MISS_FETCH_DONE;
end else begin
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
end
end
STATE_MISS_SPILL_MISS_FETCH_DONE: begin
PCMux = 2'b10;
ICacheMemWriteEnable = 1'b1;
NextState = STATE_MISS_SPILL_MERGE;
end
STATE_MISS_SPILL_MERGE: begin
PCMux = 2'b10;
UnalignedSelect = 1'b1;
ICacheReadEn = 1'b1;
NextState = STATE_MISS_SPILL_FINAL;
end
STATE_MISS_SPILL_FINAL: begin
ICacheReadEn = 1'b1;
PCMux = 2'b00;
UnalignedSelect = 1'b1;
SavePC = 1'b1;
ICacheStallF = 1'b0;
NextState = STATE_READY;
end
default: begin
PCMux = 2'b01;
NextState = STATE_READY;
end
// *** add in error handling and invalidate/evict
endcase
end
// fsm outputs
// stall CPU any time we are not in the ready state. any other state means the
// cache is either requesting data from the memory interface or handling a
// spill over two cycles.
// *** BUG this logic will need to change
//assign ICacheStallF = ((CurrState != STATE_READY) | ~hit | spill) | reset_q ? 1'b1 : 1'b0;
// save the PC anytime we are in the ready state. The saved value will be used as the PC may not be stable.
//assign SavePC = ((CurrState == STATE_READY) & hit) & ~spill ? 1'b1 : 1'b0;
assign CntEn = PreCntEn & InstrAckF;
assign InstrReadF = (CurrState == STATE_HIT_SPILL_MISS_FETCH_WDV) ||
(CurrState == STATE_MISS_FETCH_WDV) ||
(CurrState == STATE_MISS_SPILL_FETCH_WDV) ||
(CurrState == STATE_MISS_SPILL_MISS_FETCH_WDV);
// to compute the fetch address we need to add the bit shifted
// counter output to the address.
flopenr #(LOGWPL+1)
FetchCountReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(NextFetchCount),
.q(FetchCount));
assign NextFetchCount = FetchCount + 1'b1;
// This part is confusing.
// we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide
// we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros.
// fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with
// more zeros after the addition. This will be the number of offset bits less the AHBByteLength.
// *** now a bug need to mux between PCPF and PCPF+2
assign InstrPAdrF = {{PCPTrunkF, {{LOGWPL}{1'b0}}} + FetchCount, {{OFFSETWIDTH-LOGWPL}{1'b0}}};
// store read data from memory interface before writing into SRAM.
genvar i;
generate
for (i = 0; i < WORDSPERLINE; i++) begin
flopenr #(`XLEN) flop(.clk(clk),
.reset(reset),
.en(InstrAckF & (i == FetchCount)),
.d(InstrInF),
.q(ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]));
end
endgenerate
// what address is used to write the SRAM?
// spills require storing the first cache block so it can merged
// with the second
// can optimize size, for now just make it the size of the data
// leaving the cache memory.
flopenr #(16) SpillInstrReg(.clk(clk),
.en(spillSave),
.reset(reset),
.d(ICacheMemReadData[15:0]),
.q(SpillDataBlock0));
// use the not quite final PC to do the final selection.
logic [1:1] PCPreFinalF_q;
flopenr #(1) PCFReg(.clk(clk),
.reset(reset),
.en(~StallF),
.d(PCPreFinalF[1]),
.q(PCPreFinalF_q[1]));
assign FinalInstrRawF = spill ? {ICacheMemReadData[15:0], SpillDataBlock0} : ICacheMemReadData;
// There is a frustrating issue on the first access.
// The cache will not contain any valid data but will contain x's on
// reset. This makes FinalInstrRawF invalid. On the first cycle out of
// reset this register will pickup this x and it will propagate throughout
// the cpu causing simulation failure, most likely a trap for invalid instruction.
// Reset must be held 1 cycle longer to prevent this issue. additionally the
// reset should be to a NOP rather than 0.
// register reset
flop #(1) resetReg (.clk(clk),
.d(reset),
.q(reset_q));
flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FinalInstrRawF, NOP, AlignedInstrRawD);
// cannot have this mux as it creates a combo loop.
// This flop doesn't stall if StallF is high because we should output a nop
// when FlushD happens, even if the pipeline is also stalled.
flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen);
mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCyclen, InstrRawD);
//assign InstrRawD = AlignedInstrRawD;
assign {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr} = PCPFinalF;
flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF & ~CompressedF, DelayD);
flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF);
// This flop stores the first half of a misaligned instruction while waiting for the other half
flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD);
assign ICacheMemWritePAdr = PCPFinalF;
// This flop is here to simulate pulling data out of the cache, which is edge-triggered
flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD);
// These flops cache the previous read, to accelerate things
flopenr #(`XLEN) lastReadDataFlop(clk, reset, InstrReadF & ~StallF, InstrInF, LastReadDataF);
flopenr #(1) lastReadDataVFlop(clk, reset, InstrReadF & ~StallF, 1'b1, LastReadDataValidF);
flopenr #(`XLEN) lastReadAdrFlop(clk, reset, InstrReadF & ~StallF, InstrPAdrF, LastReadAdrF);
// Decide which address needs to be fetched and sent out over InstrPAdrF
// If the requested address fits inside one read from memory, we fetch that
// address, adjusted to the bit width. Otherwise, we request the lower word
// and then the upper word, in that order.
generate
if (`XLEN == 32) begin
assign InstrPAdrF = PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[31:2], 2'b00} : {PCPF[31:2], 2'b00}) : PCPF;
end else begin
assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000};
end
endgenerate
// Read from memory if we don't have the address we want
always_comb if (LastReadDataValidF & (InstrPAdrF == LastReadAdrF)) begin
InstrReadF = 0;
end else begin
InstrReadF = 1;
end
// Pick from the memory input or from the previous read, as appropriate
mux2 #(`XLEN) inDataMux(LastReadDataF, InstrInF, InstrReadF, InDataF);
// If the instruction fits in one memory read, then we put the right bits
// into InstrF. Otherwise, we activate DelayF to signal the rest of the
// machinery to swizzle bits.
generate
if (`XLEN == 32) begin
assign InstrF = PCPF[1] ? {16'b0, InDataF[31:16]} : InDataF;
assign DelayF = PCPF[1];
assign MisalignedHalfInstrF = InDataF[31:16];
end else begin
assign InstrF = PCPF[2] ? (PCPF[1] ? {16'b0, InDataF[63:48]} : InDataF[63:32]) : (PCPF[1] ? InDataF[47:16] : InDataF[31:0]);
assign DelayF = PCPF[1] && PCPF[2];
assign MisalignedHalfInstrF = InDataF[63:48];
end
endgenerate
// We will likely need to stall later, but stalls are handled by the rest of the pipeline for now
assign ICacheStallF = 0;
// Detect if the instruction is compressed
assign CompressedF = InstrF[1:0] != 2'b11;
// Pick the correct output, depending on whether we have to assemble this
// instruction from two reads or not.
// Output the requested instruction (we don't need to worry if the read is
// incomplete, since the pipeline stalls for us when it isn't), or a NOP for
// the cycle when the first of two reads comes in.
always_comb if (~FlushDLastCyclen) begin
InstrDMuxChoice = 2'b10;
end else if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin
InstrDMuxChoice = 2'b11;
end else begin
InstrDMuxChoice = {1'b0, DelayD};
end
mux4 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, {16'b0, MisalignedHalfInstrD}, InstrDMuxChoice, InstrRawD);
endmodule

View File

@ -27,11 +27,12 @@
`include "wally-config.vh"
module ifu (
input logic clk, reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
input logic clk, reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
// Fetch
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
output logic [`XLEN-1:0] PCF,
output logic [`XLEN-1:0] InstrPAdrF,
output logic InstrReadF,
@ -48,8 +49,11 @@ module ifu (
input logic [`XLEN-1:0] PrivilegedNextPCM,
output logic [31:0] InstrD, InstrM,
output logic [`XLEN-1:0] PCM,
output logic [3:0] InstrClassM,
output logic BPPredWrongM,
output logic [4:0] InstrClassM,
output logic BPPredDirWrongM,
output logic BTBPredPCWrongM,
output logic RASPredPCWrongM,
output logic BPPredClassNonCFIWrongM,
// Writeback
// output logic [`XLEN-1:0] PCLinkW,
// Faults
@ -72,10 +76,14 @@ module ifu (
logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM;
logic PrivilegedChangePCM;
logic IllegalCompInstrD;
logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCPF;
logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCNextPF, PCPF;
logic CompressedF;
logic [31:0] InstrRawD, InstrE, InstrW;
localparam [31:0] nop = 32'h00000013; // instruction for NOP
logic reset_q; // *** look at this later.
logic BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE;
tlb #(.ENTRY_BITS(3), .ITLB(1)) itlb(.TLBAccessType(2'b10), .VirtualAddress(PCF),
.PageTableEntryWrite(PageTableEntryF), .PageTypeWrite(PageTypeF),
@ -86,8 +94,8 @@ module ifu (
// branch predictor signals
logic SelBPPredF;
logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F;
logic [3:0] InstrClassD, InstrClassE;
logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F, PCNext2F, PCNext3F;
logic [4:0] InstrClassD, InstrClassE;
// *** put memory interface on here, InstrF becomes output
@ -96,10 +104,11 @@ module ifu (
// assign InstrReadF = 1; // *** & ICacheMissF; add later
// jarred 2021-03-14 Add instrution cache block to remove rd2
icache ic(
assign PCNextPF = PCNextF; // Temporary workaround until iTLB is live
icache icache(
.*,
.UpperPCPF(PCPF[`XLEN-1:12]),
.LowerPCF(PCF[11:0])
.UpperPCNextPF(PCNextPF[`XLEN-1:12]),
.LowerPCNextF(PCNextPF[11:0])
);
assign PrivilegedChangePCM = RetM | TrapM;
@ -118,7 +127,26 @@ module ifu (
mux2 #(`XLEN) pcmux2(.d0(PCNext1F),
.d1(PrivilegedNextPCM),
.s(PrivilegedChangePCM),
.y(UnalignedPCNextF));
.y(PCNext2F));
// *** try to remove this in the future as it can add a long path.
// StallF may arrive late.
/* -----\/----- EXCLUDED -----\/-----
mux2 #(`XLEN) pcmux3(.d0(PCNext2F),
.d1(PCF),
.s(StallF),
.y(PCNext3F));
-----/\----- EXCLUDED -----/\----- */
mux2 #(`XLEN) pcmux4(.d0(PCNext2F),
.d1(`RESET_VECTOR),
.s(reset_q),
.y(UnalignedPCNextF));
flop #(1) resetReg (.clk(clk),
.d(reset),
.q(reset_q));
assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF);
@ -129,7 +157,7 @@ module ifu (
.reset(reset),
.StallF(StallF),
.StallD(StallD),
.StallE(1'b0), // *** may need this eventually
.StallE(StallE),
.FlushF(FlushF),
.FlushD(FlushD),
.FlushE(FlushE),
@ -142,7 +170,11 @@ module ifu (
.PCD(PCD),
.PCLinkE(PCLinkE),
.InstrClassE(InstrClassE),
.BPPredWrongE(BPPredWrongE));
.BPPredWrongE(BPPredWrongE),
.BPPredDirWrongE(BPPredDirWrongE),
.BTBPredPCWrongE(BTBPredPCWrongE),
.RASPredPCWrongE(RASPredPCWrongE),
.BPPredClassNonCFIWrongE(BPPredClassNonCFIWrongE));
// The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE;
@ -167,8 +199,9 @@ module ifu (
// the branch predictor needs a compact decoding of the instruction class.
// *** consider adding in the alternate return address x5 for returns.
assign InstrClassD[3] = InstrD[6:0] == 7'h67 && InstrD[19:15] == 5'h01; // return
assign InstrClassD[2] = InstrD[6:0] == 7'h67 && InstrD[19:15] != 5'h01; // jump register, but not return
assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 && (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5
assign InstrClassD[3] = InstrD[6:0] == 7'h67 && (InstrD[19:15] & 5'h1B) == 5'h01; // return must link to ra or r5
assign InstrClassD[2] = InstrD[6:0] == 7'h67 && (InstrD[19:15] & 5'h1B) != 5'h01; // jump register, but not return
assign InstrClassD[1] = InstrD[6:0] == 7'h6F; // jump
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
@ -195,26 +228,26 @@ module ifu (
flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM);
// flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); // *** probably not needed; delete later
flopenrc #(4) InstrClassRegE(.clk(clk),
flopenrc #(5) InstrClassRegE(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.d(InstrClassD),
.q(InstrClassE));
flopenrc #(4) InstrClassRegM(.clk(clk),
flopenrc #(5) InstrClassRegM(.clk(clk),
.reset(reset),
.en(~StallM),
.clear(FlushM),
.d(InstrClassE),
.q(InstrClassM));
flopenrc #(1) BPPredWrongRegM(.clk(clk),
flopenrc #(4) BPPredWrongRegM(.clk(clk),
.reset(reset),
.en(~StallM),
.clear(FlushM),
.d(BPPredWrongE),
.q(BPPredWrongM));
.d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
.q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or

View File

@ -33,13 +33,17 @@ module csr #(parameter
UIE_REGW = 12'b0
) (
input logic clk, reset,
input logic FlushW, StallW,
input logic FlushW, StallD, StallE, StallM, StallW,
input logic [31:0] InstrM,
input logic [`XLEN-1:0] PCM, SrcAM,
input logic CSRReadM, CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM,
input logic TimerIntM, ExtIntM, SwIntM,
input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
input logic [3:0] InstrClassM,
input logic InstrValidW, FloatRegWriteW, LoadStallD,
input logic BPPredDirWrongM,
input logic BTBPredPCWrongM,
input logic RASPredPCWrongM,
input logic BPPredClassNonCFIWrongM,
input logic [4:0] InstrClassM,
input logic [1:0] NextPrivilegeModeM, PrivilegeModeW,
input logic [`XLEN-1:0] CauseM, NextFaultMtvalM,
output logic [1:0] STATUS_MPP,

View File

@ -30,15 +30,20 @@
`include "wally-config.vh"
module csrc (
input logic clk, reset,
input logic InstrValidW, LoadStallD, CSRMWriteM, BPPredWrongM,
input logic [3:0] InstrClassM,
input logic [11:0] CSRAdrM,
input logic [1:0] PrivilegeModeW,
input logic [`XLEN-1:0] CSRWriteValM,
input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW,
input logic clk, reset,
input logic StallD, StallE, StallM, StallW,
input logic InstrValidW, LoadStallD, CSRMWriteM,
input logic BPPredDirWrongM,
input logic BTBPredPCWrongM,
input logic RASPredPCWrongM,
input logic BPPredClassNonCFIWrongM,
input logic [4:0] InstrClassM,
input logic [11:0] CSRAdrM,
input logic [1:0] PrivilegeModeW,
input logic [`XLEN-1:0] CSRWriteValM,
input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW,
output logic [`XLEN-1:0] CSRCReadValM,
output logic IllegalCSRCAccessM);
output logic IllegalCSRCAccessM);
// create Counter arrays to store address of each counter
integer MHPMCOUNTER [`COUNTERS:0];
@ -65,11 +70,16 @@ module csrc (
logic [`COUNTERS:0] MCOUNTEN;
assign MCOUNTEN[0] = 1'b1;
assign MCOUNTEN[1] = 1'b0;
assign MCOUNTEN[2] = InstrValidW;
assign MCOUNTEN[3] = LoadStallD;
assign MCOUNTEN[4] = BPPredWrongM;
assign MCOUNTEN[5] = InstrClassM[0];
assign MCOUNTEN[`COUNTERS:6] = 0;
assign MCOUNTEN[2] = InstrValidW & ~StallW;
assign MCOUNTEN[3] = LoadStallD & ~StallD;
assign MCOUNTEN[4] = BPPredDirWrongM & ~StallM;
assign MCOUNTEN[5] = InstrClassM[0] & ~StallM;
assign MCOUNTEN[6] = BTBPredPCWrongM & ~StallM;
assign MCOUNTEN[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & ~StallM;
assign MCOUNTEN[8] = RASPredPCWrongM & ~StallM;
assign MCOUNTEN[9] = InstrClassM[3] & ~StallM;
assign MCOUNTEN[10] = BPPredClassNonCFIWrongM & ~StallM;
assign MCOUNTEN[`COUNTERS:11] = 0;
genvar j;
generate
@ -95,7 +105,7 @@ module csrc (
// Write / update counters
// Only the Machine mode versions of the counter CSRs are writable
if (`XLEN==64) begin // 64-bit counters
flopr #(64) HPMCOUNTERreg_j(clk, reset, NextHPMCOUNTERM[j], HPMCOUNTER_REGW[j]);
flopenr #(64) HPMCOUNTERreg_j(clk, reset, ~StallW, NextHPMCOUNTERM[j], HPMCOUNTER_REGW[j]);
end
else begin // 32-bit low and high counters
logic [`COUNTERS:0] WriteHPMCOUNTERHM;
@ -106,8 +116,8 @@ module csrc (
assign NextHPMCOUNTERHM[j] = WriteHPMCOUNTERHM[j] ? CSRWriteValM : HPMCOUNTERPlusM[j][63:32];
// Counter CSRs
flopr #(32) HPMCOUNTERreg_j(clk, reset, NextHPMCOUNTERM[j], HPMCOUNTER_REGW[j][31:0]);
flopr #(32) HPMCOUNTERHreg_j(clk, reset, NextHPMCOUNTERHM[j], HPMCOUNTER_REGW[j][63:32]);
flopenr #(32) HPMCOUNTERreg_j(clk, reset, ~StallW, NextHPMCOUNTERM[j], HPMCOUNTER_REGW[j][31:0]);
flopenr #(32) HPMCOUNTERHreg_j(clk, reset, ~StallW, NextHPMCOUNTERHM[j], HPMCOUNTER_REGW[j][63:32]);
end
end // end for

View File

@ -33,6 +33,7 @@ module csri #(parameter
SIE = 12'h104,
SIP = 12'h144) (
input logic clk, reset,
input logic StallW,
input logic CSRMWriteM, CSRSWriteM,
input logic [11:0] CSRAdrM,
input logic ExtIntM, TimerIntM, SwIntM,
@ -60,10 +61,10 @@ module csri #(parameter
end
// Interrupt Write Enables
assign WriteMIPM = CSRMWriteM && (CSRAdrM == MIP);
assign WriteMIEM = CSRMWriteM && (CSRAdrM == MIE);
assign WriteSIPM = CSRSWriteM && (CSRAdrM == SIP);
assign WriteSIEM = CSRSWriteM && (CSRAdrM == SIE);
assign WriteMIPM = CSRMWriteM && (CSRAdrM == MIP) && ~StallW;
assign WriteMIEM = CSRMWriteM && (CSRAdrM == MIE) && ~StallW;
assign WriteSIPM = CSRSWriteM && (CSRAdrM == SIP) && ~StallW;
assign WriteSIEM = CSRSWriteM && (CSRAdrM == SIE) && ~StallW;
// Interrupt Pending and Enable Registers
// MEIP, MTIP, MSIP are read-only

View File

@ -83,6 +83,7 @@ module csrm #(parameter
MIDELEG_MASK = {{(`XLEN-12){1'b0}}, 12'h222}
) (
input logic clk, reset,
input logic StallW,
input logic CSRMWriteM, MTrapM,
input logic [11:0] CSRAdrM,
input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, MSTATUS_REGW,
@ -113,34 +114,34 @@ module csrm #(parameter
assign MISA_REGW = {(`XLEN == 32 ? 2'b01 : 2'b10), {(`XLEN-28){1'b0}}, MISA_26[25:0]};
// Write machine Mode CSRs
assign WriteMSTATUSM = CSRMWriteM && (CSRAdrM == MSTATUS);
assign WriteMTVECM = CSRMWriteM && (CSRAdrM == MTVEC);
assign WriteMEDELEGM = CSRMWriteM && (CSRAdrM == MEDELEG);
assign WriteMIDELEGM = CSRMWriteM && (CSRAdrM == MIDELEG);
assign WriteMSCRATCHM = CSRMWriteM && (CSRAdrM == MSCRATCH);
assign WriteMEPCM = MTrapM | (CSRMWriteM && (CSRAdrM == MEPC));
assign WriteMCAUSEM = MTrapM | (CSRMWriteM && (CSRAdrM == MCAUSE));
assign WriteMTVALM = MTrapM | (CSRMWriteM && (CSRAdrM == MTVAL));
assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0));
assign WritePMPCFG2M = (CSRMWriteM && (CSRAdrM == PMPCFG2));
assign WritePMPADDRM[0] = (CSRMWriteM && (CSRAdrM == PMPADDR0));
assign WritePMPADDRM[1] = (CSRMWriteM && (CSRAdrM == PMPADDR1));
assign WritePMPADDRM[2] = (CSRMWriteM && (CSRAdrM == PMPADDR2));
assign WritePMPADDRM[3] = (CSRMWriteM && (CSRAdrM == PMPADDR3));
assign WritePMPADDRM[4] = (CSRMWriteM && (CSRAdrM == PMPADDR4));
assign WritePMPADDRM[5] = (CSRMWriteM && (CSRAdrM == PMPADDR5));
assign WritePMPADDRM[6] = (CSRMWriteM && (CSRAdrM == PMPADDR6));
assign WritePMPADDRM[7] = (CSRMWriteM && (CSRAdrM == PMPADDR7));
assign WritePMPADDRM[8] = (CSRMWriteM && (CSRAdrM == PMPADDR8));
assign WritePMPADDRM[9] = (CSRMWriteM && (CSRAdrM == PMPADDR9));
assign WritePMPADDRM[10] = (CSRMWriteM && (CSRAdrM == PMPADDR10));
assign WritePMPADDRM[11] = (CSRMWriteM && (CSRAdrM == PMPADDR11));
assign WritePMPADDRM[12] = (CSRMWriteM && (CSRAdrM == PMPADDR12));
assign WritePMPADDRM[13] = (CSRMWriteM && (CSRAdrM == PMPADDR13));
assign WritePMPADDRM[14] = (CSRMWriteM && (CSRAdrM == PMPADDR14));
assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15));
assign WriteMCOUNTERENM = CSRMWriteM && (CSRAdrM == MCOUNTEREN);
assign WriteMCOUNTINHIBITM = CSRMWriteM && (CSRAdrM == MCOUNTINHIBIT);
assign WriteMSTATUSM = CSRMWriteM && (CSRAdrM == MSTATUS) && ~StallW;
assign WriteMTVECM = CSRMWriteM && (CSRAdrM == MTVEC) && ~StallW;
assign WriteMEDELEGM = CSRMWriteM && (CSRAdrM == MEDELEG) && ~StallW;
assign WriteMIDELEGM = CSRMWriteM && (CSRAdrM == MIDELEG) && ~StallW;
assign WriteMSCRATCHM = CSRMWriteM && (CSRAdrM == MSCRATCH) && ~StallW;
assign WriteMEPCM = MTrapM | (CSRMWriteM && (CSRAdrM == MEPC)) && ~StallW;
assign WriteMCAUSEM = MTrapM | (CSRMWriteM && (CSRAdrM == MCAUSE)) && ~StallW;
assign WriteMTVALM = MTrapM | (CSRMWriteM && (CSRAdrM == MTVAL)) && ~StallW;
assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW;
assign WritePMPCFG2M = (CSRMWriteM && (CSRAdrM == PMPCFG2)) && ~StallW;
assign WritePMPADDRM[0] = (CSRMWriteM && (CSRAdrM == PMPADDR0)) && ~StallW;
assign WritePMPADDRM[1] = (CSRMWriteM && (CSRAdrM == PMPADDR1)) && ~StallW;
assign WritePMPADDRM[2] = (CSRMWriteM && (CSRAdrM == PMPADDR2)) && ~StallW;
assign WritePMPADDRM[3] = (CSRMWriteM && (CSRAdrM == PMPADDR3)) && ~StallW;
assign WritePMPADDRM[4] = (CSRMWriteM && (CSRAdrM == PMPADDR4)) && ~StallW;
assign WritePMPADDRM[5] = (CSRMWriteM && (CSRAdrM == PMPADDR5)) && ~StallW;
assign WritePMPADDRM[6] = (CSRMWriteM && (CSRAdrM == PMPADDR6)) && ~StallW;
assign WritePMPADDRM[7] = (CSRMWriteM && (CSRAdrM == PMPADDR7)) && ~StallW;
assign WritePMPADDRM[8] = (CSRMWriteM && (CSRAdrM == PMPADDR8)) && ~StallW;
assign WritePMPADDRM[9] = (CSRMWriteM && (CSRAdrM == PMPADDR9)) && ~StallW;
assign WritePMPADDRM[10] = (CSRMWriteM && (CSRAdrM == PMPADDR10)) && ~StallW;
assign WritePMPADDRM[11] = (CSRMWriteM && (CSRAdrM == PMPADDR11)) && ~StallW;
assign WritePMPADDRM[12] = (CSRMWriteM && (CSRAdrM == PMPADDR12)) && ~StallW;
assign WritePMPADDRM[13] = (CSRMWriteM && (CSRAdrM == PMPADDR13)) && ~StallW;
assign WritePMPADDRM[14] = (CSRMWriteM && (CSRAdrM == PMPADDR14)) && ~StallW;
assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW;
assign WriteMCOUNTERENM = CSRMWriteM && (CSRAdrM == MCOUNTEREN) && ~StallW;
assign WriteMCOUNTINHIBITM = CSRMWriteM && (CSRAdrM == MCOUNTINHIBIT) && ~StallW;
assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID);

View File

@ -36,6 +36,7 @@ module csrn #(parameter
UTVAL = 12'h043,
UIP = 12'h044) (
input logic clk, reset,
input logic StallW,
input logic CSRNWriteM, UTrapM,
input logic [11:0] CSRAdrM,
input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, USTATUS_REGW,
@ -56,11 +57,11 @@ module csrn #(parameter
logic [`XLEN-1:0] USCRATCH_REGW, UCAUSE_REGW, UTVAL_REGW;
// Write enables
assign WriteUSTATUSM = CSRNWriteM && (CSRAdrM == USTATUS);
assign WriteUTVECM = CSRNWriteM && (CSRAdrM == UTVEC);
assign WriteUEPCM = UTrapM | (CSRNWriteM && (CSRAdrM == UEPC));
assign WriteUCAUSEM = UTrapM | (CSRNWriteM && (CSRAdrM == UCAUSE));
assign WriteUTVALM = UTrapM | (CSRNWriteM && (CSRAdrM == UTVAL));
assign WriteUSTATUSM = CSRNWriteM && (CSRAdrM == USTATUS) && ~StallW;
assign WriteUTVECM = CSRNWriteM && (CSRAdrM == UTVEC) && ~StallW;
assign WriteUEPCM = UTrapM | (CSRNWriteM && (CSRAdrM == UEPC)) && ~StallW;
assign WriteUCAUSEM = UTrapM | (CSRNWriteM && (CSRAdrM == UCAUSE)) && ~StallW;
assign WriteUTVALM = UTrapM | (CSRNWriteM && (CSRAdrM == UTVAL)) && ~StallW;
// CSRs
flopenl #(`XLEN) UTVECreg(clk, reset, WriteUTVECM, CSRWriteValM, `RESET_VECTOR, UTVEC_REGW);
@ -95,4 +96,4 @@ module csrn #(parameter
assign IllegalCSRNAccessM = 1;
end
endgenerate
endmodule
endmodule

View File

@ -47,6 +47,7 @@ module csrs #(parameter
SEDELEG_MASK = ~(ZERO | 3'b111 << 9)
) (
input logic clk, reset,
input logic StallW,
input logic CSRSWriteM, STrapM,
input logic [11:0] CSRAdrM,
input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, SSTATUS_REGW,
@ -72,14 +73,14 @@ module csrs #(parameter
logic WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM;
logic [`XLEN-1:0] SSCRATCH_REGW, SCAUSE_REGW, STVAL_REGW;
assign WriteSSTATUSM = CSRSWriteM && (CSRAdrM == SSTATUS);
assign WriteSTVECM = CSRSWriteM && (CSRAdrM == STVEC);
assign WriteSSCRATCHM = CSRSWriteM && (CSRAdrM == SSCRATCH);
assign WriteSEPCM = STrapM | (CSRSWriteM && (CSRAdrM == SEPC));
assign WriteSCAUSEM = STrapM | (CSRSWriteM && (CSRAdrM == SCAUSE));
assign WriteSTVALM = STrapM | (CSRSWriteM && (CSRAdrM == STVAL));
assign WriteSATPM = CSRSWriteM && (CSRAdrM == SATP);
assign WriteSCOUNTERENM = CSRSWriteM && (CSRAdrM == SCOUNTEREN);
assign WriteSSTATUSM = CSRSWriteM && (CSRAdrM == SSTATUS) && ~StallW;
assign WriteSTVECM = CSRSWriteM && (CSRAdrM == STVEC) && ~StallW;
assign WriteSSCRATCHM = CSRSWriteM && (CSRAdrM == SSCRATCH) && ~StallW;
assign WriteSEPCM = STrapM | (CSRSWriteM && (CSRAdrM == SEPC)) && ~StallW;
assign WriteSCAUSEM = STrapM | (CSRSWriteM && (CSRAdrM == SCAUSE)) && ~StallW;
assign WriteSTVALM = STrapM | (CSRSWriteM && (CSRAdrM == STVAL)) && ~StallW;
assign WriteSATPM = CSRSWriteM && (CSRAdrM == SATP) && ~StallW;
assign WriteSCOUNTERENM = CSRSWriteM && (CSRAdrM == SCOUNTEREN) && ~StallW;
// CSRs
flopenl #(`XLEN) STVECreg(clk, reset, WriteSTVECM, CSRWriteValM, ZERO, STVEC_REGW); //busybear: change reset to 0

View File

@ -32,6 +32,7 @@ module csru #(parameter
FRM = 12'h002,
FCSR = 12'h003) (
input logic clk, reset,
input logic StallW,
input logic CSRUWriteM,
input logic [11:0] CSRAdrM,
input logic [`XLEN-1:0] CSRWriteValM,
@ -50,9 +51,9 @@ module csru #(parameter
logic [4:0] NextFFLAGSM;
// Write enables
assign WriteFCSRM = CSRUWriteM && (CSRAdrM == FCSR);
assign WriteFFLAGSM = CSRUWriteM && (CSRAdrM == FFLAGS) | WriteFCSRM ;
assign WriteFRMM = CSRUWriteM && (CSRAdrM == FRM) | WriteFCSRM;
assign WriteFCSRM = CSRUWriteM && (CSRAdrM == FCSR) && ~StallW;
assign WriteFFLAGSM = (CSRUWriteM && (CSRAdrM == FFLAGS) | WriteFCSRM) && ~StallW;
assign WriteFRMM = (CSRUWriteM && (CSRAdrM == FRM) | WriteFCSRM) && ~StallW;
// Write Values
assign NextFRMM = WriteFCSRM ? CSRWriteValM[7:5] : CSRWriteValM[2:0];
@ -81,4 +82,4 @@ module csru #(parameter
assign IllegalCSRUAccessM = 1;
end
endgenerate
endmodule
endmodule

View File

@ -37,8 +37,12 @@ module privileged (
output logic [`XLEN-1:0] PrivilegedNextPCM,
output logic RetM, TrapM,
output logic ITLBFlushF, DTLBFlushM,
input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
input logic [3:0] InstrClassM,
input logic InstrValidW, FloatRegWriteW, LoadStallD,
input logic BPPredDirWrongM,
input logic BTBPredPCWrongM,
input logic RASPredPCWrongM,
input logic BPPredClassNonCFIWrongM,
input logic [4:0] InstrClassM,
input logic PrivilegedM,
input logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM,
input logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM,

View File

@ -37,6 +37,9 @@ module dtim #(parameter BASE=0, RANGE = 65535) (
output logic HRESPTim, HREADYTim
);
localparam integer MemStartAddr = BASE>>(1+`XLEN/32);
localparam integer MemEndAddr = (RANGE+BASE)>>1+(`XLEN/32);
logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)];
logic [31:0] HWADDR, A;
logic [`XLEN-1:0] HREADTim0;
@ -48,6 +51,7 @@ module dtim #(parameter BASE=0, RANGE = 65535) (
logic memread, memwrite;
logic [3:0] busycount;
assign initTrans = HREADY & HSELTim & (HTRANS != 2'b00);
// *** this seems like a weird way to use reset
@ -82,12 +86,22 @@ module dtim #(parameter BASE=0, RANGE = 65535) (
assign risingHREADYTim = HREADYTim & ~prevHREADYTim;
// Model memory read and write
/* -----\/----- EXCLUDED -----\/-----
integer index;
initial begin
for(index = MemStartAddr; index < MemEndAddr; index = index + 1) begin
RAM[index] <= {`XLEN{1'b0}};
end
end
-----/\----- EXCLUDED -----/\----- */
generate
if (`XLEN == 64) begin
always_ff @(posedge HCLK) begin
HWADDR <= #1 A;
HREADTim0 <= #1 RAM[A[31:3]];
if (memwrite && risingHREADYTim) RAM[HWADDR[31:3]] <= #1 HWDATA;
if (memwrite && risingHREADYTim) RAM[HWADDR[31:3]] <= #1 HWDATA;
end
end else begin
always_ff @(posedge HCLK) begin

View File

@ -124,17 +124,23 @@ module wallypipelinedhart (
// bus interface to dmem
logic MemReadM, MemWriteM;
logic [1:0] AtomicMaskedM;
logic [2:0] Funct3M;
logic [`XLEN-1:0] MemAdrM, MemPAdrM, WriteDataM;
logic [`XLEN-1:0] ReadDataW;
logic [`XLEN-1:0] InstrPAdrF;
logic [`XLEN-1:0] InstrRData;
logic InstrReadF;
logic DataStall, InstrStall;
logic InstrAckD, MemAckW;
logic DataStall;
logic InstrAckF, MemAckW;
logic BPPredWrongE, BPPredWrongM;
logic [3:0] InstrClassM;
logic BPPredWrongE;
logic BPPredDirWrongM;
logic BTBPredPCWrongM;
logic RASPredPCWrongM;
logic BPPredClassNonCFIWrongM;
logic [4:0] InstrClassM;
ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache

View File

@ -27,9 +27,10 @@
`include "wally-config.vh"
module function_radix(reset, ProgramAddrMapFile, ProgramLabelMapFile);
module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile);
input logic reset;
input logic clk;
input string ProgramAddrMapFile;
input string ProgramLabelMapFile;
@ -38,13 +39,23 @@ module function_radix(reset, ProgramAddrMapFile, ProgramLabelMapFile);
string FunctionName;
logic [`XLEN-1:0] pc, FunctionAddr;
logic [`XLEN-1:0] PCF, PCD, PCE, FunctionAddr;
logic StallD, StallE, FlushD, FlushE;
integer ProgramAddrIndex;
// *** I should look into the system verilog objects instead of signal spy.
initial begin
$init_signal_spy("/testbench/dut/hart/PCE", "/testbench/functionRadix/function_radix/pc");
end
assign PCF = testbench.dut.hart.PCF;
assign StallD = testbench.dut.hart.StallD;
assign StallE = testbench.dut.hart.StallE;
assign FlushD = testbench.dut.hart.FlushD;
assign FlushE = testbench.dut.hart.FlushE;
// copy from ifu
// when the F and D stages are flushed we need to ensure the PCE is held so that the function name does not
// erroneously change.
flopenrc #(`XLEN) PCDReg(clk, reset, 1'b0, ~StallD, FlushE & FlushD ? PCE : PCF, PCD);
flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, FlushE ? PCE : PCD, PCE);
task automatic bin_search_min;
input logic [`XLEN-1:0] pc;
@ -74,7 +85,7 @@ module function_radix(reset, ProgramAddrMapFile, ProgramLabelMapFile);
end else if( array[mid] > pc) begin
right = mid -1;
end else begin
$display("Critical Error in function radix. PC, %x not found.", pc);
$display("Critical Error in FunctionName. PC, %x not found.", pc);
return;
//$stop();
end
@ -141,8 +152,8 @@ module function_radix(reset, ProgramAddrMapFile, ProgramLabelMapFile);
end
always @(pc) begin
bin_search_min(pc, ProgramAddrMapLineCount, ProgramAddrMapMemory, FunctionAddr, ProgramAddrIndex);
always @(PCE) begin
bin_search_min(PCE, ProgramAddrMapLineCount, ProgramAddrMapMemory, FunctionAddr, ProgramAddrIndex);
end

View File

@ -1,7 +1,21 @@
`include "wally-config.vh"
// *** Noah please check which way you need to fix the merge conflict.
// Ross was not sure.
// merge conflict main
module testbench();
/* -----\/----- EXCLUDED -----\/-----
// merge conflict icache-almost-ready
package ahbliteState;
typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD, INSTRREADC, ATOMICREAD, ATOMICWRITE, MMUTRANSLATE} statetype;
endpackage
module testbench_busybear();
-----/\----- EXCLUDED -----/\----- */
logic clk, reset;
logic [31:0] GPIOPinsIn;
logic [31:0] GPIOPinsOut, GPIOPinsEn;
@ -573,11 +587,11 @@ module testbench();
// Track names of instructions
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
logic [31:0] InstrW;
instrNameDecTB dec(dut.hart.ifu.ic.InstrF, InstrFName);
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
instrTrackerTB it(clk, reset,
dut.hart.ifu.icache.controller.FinalInstrRawF,
dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, InstrW,
InstrDName, InstrEName, InstrMName, InstrWName);
dut.hart.ifu.InstrM, dut.hart.ifu.InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// generate clock to sequence tests
always
@ -587,15 +601,14 @@ module testbench();
endmodule
module instrTrackerTB(
input logic clk, reset, FlushE,
input logic [31:0] InstrD,
input logic [31:0] InstrE, InstrM,
output logic [31:0] InstrW,
output string InstrDName, InstrEName, InstrMName, InstrWName);
input logic clk, reset,
input logic [31:0] InstrF,InstrD,InstrE,InstrM,InstrW,
output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// stage Instr to Writeback for visualization
//flopr #(32) InstrWReg(clk, reset, InstrM, InstrW);
instrNameDecTB fdec(InstrF, InstrFName);
instrNameDecTB ddec(InstrD, InstrDName);
instrNameDecTB edec(InstrE, InstrEName);
instrNameDecTB mdec(InstrM, InstrMName);

View File

@ -339,7 +339,9 @@ module testbench();
};
string testsBP64[] = '{
"rv64BP/reg-test", "10000"
"rv64BP/simple", "10000",
"rv64BP/qsort", "1000000",
"rv64BP/sieve", "1000000"
};
string tests64p[] = '{
@ -398,7 +400,7 @@ module testbench();
// pick tests based on modes supported
initial begin
if (`XLEN == 64) begin // RV64
if (TESTSBP) begin
if (`TESTSBP) begin
tests = {testsBP64,tests64p};
end if (TESTSPERIPH) begin
tests = tests64periph;
@ -448,23 +450,32 @@ module testbench();
// Track names of instructions
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, InstrW, InstrFName, InstrDName,
InstrEName, InstrMName, InstrWName);
dut.hart.ifu.icache.controller.FinalInstrRawF,
dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, dut.hart.ifu.InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// initialize tests
localparam integer MemStartAddr = `TIMBASE>>(1+`XLEN/32);
localparam integer MemEndAddr = (`TIMRANGE+`TIMBASE)>>1+(`XLEN/32);
initial
begin
test = 0;
totalerrors = 0;
testadr = 0;
// fill memory with defined values to reduce Xs in simulation
// Quick note the memory will need to be initialized. The C library does not
// guarantee the initialized reads. For example a strcmp can read 6 byte
// strings, but uses a load double to read them in. If the last 2 bytes are
// not initialized the compare results in an 'x' which propagates through
// the design.
if (`XLEN == 32) meminit = 32'hFEDC0123;
else meminit = 64'hFEDCBA9876543210;
for (i=0; i<=65535; i = i+1) begin
//dut.imem.RAM[i] = meminit;
// dut.uncore.RAM[i] = meminit;
end
// *** broken because DTIM also drives RAM
/*for (i=MemStartAddr; i<MemEndAddr; i = i+1) begin
dut.uncore.dtim.RAM[i] = meminit;
end*/
// read test vectors into memory
memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
$readmemh(memfilename, dut.uncore.dtim.RAM);
@ -551,10 +562,11 @@ module testbench();
end // always @ (negedge clk)
// track the current function or global label
if (DEBUG == 1) begin : functionRadix
function_radix function_radix(.reset(reset),
.ProgramAddrMapFile(ProgramAddrMapFile),
.ProgramLabelMapFile(ProgramLabelMapFile));
if (DEBUG == 1) begin : FunctionName
FunctionName FunctionName(.reset(reset),
.clk(clk),
.ProgramAddrMapFile(ProgramAddrMapFile),
.ProgramLabelMapFile(ProgramLabelMapFile));
end
// initialize the branch predictor