This commit is contained in:
bbracker 2021-06-02 10:03:23 -04:00
commit a683dd7fde
39 changed files with 1807 additions and 1490 deletions

View File

@ -4,12 +4,12 @@ ROOT := ..
LIBRARY_DIRS := LIBRARY_DIRS :=
LIBRARY_FILES := LIBRARY_FILES :=
MARCH :=-march=rv64ic MARCH :=-march=rv64imfdc
MABI :=-mabi=lp64 MABI :=-mabi=lp64d
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles
AFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -W AFLAGS =$(MARCH) $(MABI) -W
CFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -mcmodel=medany CFLAGS =$(MARCH) $(MABI) -mcmodel=medany -O2
AS=riscv64-unknown-elf-as AS=riscv64-unknown-elf-as
CC=riscv64-unknown-elf-gcc CC=riscv64-unknown-elf-gcc
AR=riscv64-unknown-elf-ar AR=riscv64-unknown-elf-ar
@ -19,7 +19,7 @@ all: libcrt0.a
%.o: %.s %.o: %.s
${AS} ${AFLAGS} -c $< -o $@ ${AS} ${AFLAGS} -c $< -o $@
libcrt0.a: start.o libcrt0.a: start.o pcnt_driver.o pre_main.o
${AR} -r $@ $^ ${AR} -r $@ $^
clean: clean:

View File

@ -43,11 +43,10 @@ _start:
# set the stack pointer to the top of memory # set the stack pointer to the top of memory - 8 bytes (pointer size)
# 0x8000_0000 + 64K - 8 bytes li sp, 0x07FFFFF8
li sp, 0x007FFFF8
jal ra, main jal ra, pre_main
jal ra, _halt jal ra, _halt
.section .text .section .text

View File

@ -8,7 +8,7 @@ MARCH :=-march=rv64ic
MABI :=-mabi=lp64 MABI :=-mabi=lp64
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2
CC=riscv64-unknown-elf-gcc CC=riscv64-unknown-elf-gcc
DA=riscv64-unknown-elf-objdump -d DA=riscv64-unknown-elf-objdump -d

View File

@ -8,7 +8,7 @@ MARCH :=-march=rv64ic
MABI :=-mabi=lp64 MABI :=-mabi=lp64
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2
CC=riscv64-unknown-elf-gcc CC=riscv64-unknown-elf-gcc
DA=riscv64-unknown-elf-objdump -d DA=riscv64-unknown-elf-objdump -d

View File

@ -66,21 +66,21 @@ int main () {
ans = sieve (); ans = sieve ();
//gettimeofday(&after , NULL); //gettimeofday(&after , NULL);
if (ans != 1899) /* /\* /\\* if (ans != 1899) *\\/ *\/ */
printf ("Sieve result wrong, ans = %d, expected 1899", ans); /* /\* /\\* printf ("Sieve result wrong, ans = %d, expected 1899", ans); *\\/ *\/ */
//printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); /* /\* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); *\/ */
printf("Round 2\n"); /* /\* printf("Round 2\n"); *\/ */
//gettimeofday(&before , NULL); /* //gettimeofday(&before , NULL); */
ans = sieve (); /* ans = sieve (); */
//gettimeofday(&after , NULL); /* //gettimeofday(&after , NULL); */
if (ans != 1899) /* if (ans != 1899) */
printf ("Sieve result wrong, ans = %d, expected 1899", ans); /* printf ("Sieve result wrong, ans = %d, expected 1899", ans); */
//printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); /* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); */
return 0; return 0;

View File

@ -8,7 +8,7 @@ MARCH :=-march=rv64ic
MABI :=-mabi=lp64 MABI :=-mabi=lp64
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2
CC=riscv64-unknown-elf-gcc CC=riscv64-unknown-elf-gcc
DA=riscv64-unknown-elf-objdump -d DA=riscv64-unknown-elf-objdump -d

View File

@ -5,4 +5,8 @@ int fail();
int simple_csrbr_test(); int simple_csrbr_test();
int lbu_test(); int lbu_test();
int icache_spill_test(); int icache_spill_test();
void global_hist_0_space_test();
void global_hist_1_space_test();
void global_hist_2_space_test();
void global_hist_3_space_test();
#endif #endif

View File

@ -2,6 +2,10 @@
int main(){ int main(){
//int res = icache_spill_test(); //int res = icache_spill_test();
global_hist_3_space_test();
global_hist_2_space_test();
global_hist_1_space_test();
global_hist_0_space_test();
int res = 1; int res = 1;
if (res < 0) { if (res < 0) {
fail(); fail();

View File

@ -2,11 +2,14 @@
// wally-constants.vh // wally-constants.vh
// //
// Written: tfleming@hmc.edu 4 March 2021 // Written: tfleming@hmc.edu 4 March 2021
// Modified: // Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
// //
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. // Purpose: Specify constants nexessary for different memory virtualization modes.
// These macros should not be changed, except in the event of an // These are specific to sv49, defined in section 4.5 of the privileged spec.
// update to the architecture or particularly special circumstances. // However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
// Virtual Memory Constants (sv39) // Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9 `define VPN_SEGMENT_BITS 9
`define VPN_BITS 27 `define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44 `define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26 `define PA_BITS 56
`define PA_BITS 56 `define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,11 +2,14 @@
// wally-constants.vh // wally-constants.vh
// //
// Written: tfleming@hmc.edu 4 March 2021 // Written: tfleming@hmc.edu 4 March 2021
// Modified: // Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
// //
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. // Purpose: Specify constants nexessary for different memory virtualization modes.
// These macros should not be changed, except in the event of an // These are specific to sv49, defined in section 4.5 of the privileged spec.
// update to the architecture or particularly special circumstances. // However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
// Virtual Memory Constants (sv39) // Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9 `define VPN_SEGMENT_BITS 9
`define VPN_BITS 27 `define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44 `define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26 `define PA_BITS 56
`define PA_BITS 56 `define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,11 +2,14 @@
// wally-constants.vh // wally-constants.vh
// //
// Written: tfleming@hmc.edu 4 March 2021 // Written: tfleming@hmc.edu 4 March 2021
// Modified: // Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
// //
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. // Purpose: Specify constants nexessary for different memory virtualization modes.
// These macros should not be changed, except in the event of an // These are specific to sv49, defined in section 4.5 of the privileged spec.
// update to the architecture or particularly special circumstances. // However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
// Virtual Memory Constants (sv39) // Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9 `define VPN_SEGMENT_BITS 9
`define VPN_BITS 27 `define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44 `define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26 `define PA_BITS 56
`define PA_BITS 56 `define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,11 +2,14 @@
// wally-constants.vh // wally-constants.vh
// //
// Written: tfleming@hmc.edu 4 March 2021 // Written: tfleming@hmc.edu 4 March 2021
// Modified: // Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
// //
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. // Purpose: Specify constants nexessary for different memory virtualization modes.
// These macros should not be changed, except in the event of an // These are specific to sv49, defined in section 4.5 of the privileged spec.
// update to the architecture or particularly special circumstances. // However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
// Virtual Memory Constants (sv39) // Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9 `define VPN_SEGMENT_BITS 9
`define VPN_BITS 27 `define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44 `define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26 `define PA_BITS 56
`define PA_BITS 56 `define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,7 +2,10 @@
// wally-constants.vh // wally-constants.vh
// //
// Written: tfleming@hmc.edu 4 March 2021 // Written: tfleming@hmc.edu 4 March 2021
// Modified: // Modified: kmacsaigoren@hmc.edu 31 May 2021
// added svmode constants. These aren't strictly necessary since we're just checking one bit,
// but they're here to stay consistent and to make sure we dont wind up
// a "NO_TRANSLATE undefined" situation.
// //
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. // Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an // These macros should not be changed, except in the event of an
@ -31,3 +34,10 @@
`define PPN_BITS 22 `define PPN_BITS 22
`define PPN_HIGH_SEGMENT_BITS 12 `define PPN_HIGH_SEGMENT_BITS 12
`define PA_BITS 34 `define PA_BITS 34
`define SVMODE_BITS 1
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8 // These two are only here to stop
`define SV48 9 // the verilator from yelling at me

View File

@ -32,7 +32,7 @@
`define XLEN 64 `define XLEN 64
//`define MISA (32'h00000105) //`define MISA (32'h00000105)
`define MISA (32'h00000104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0) `define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0)
`define A_SUPPORTED ((`MISA >> 0) % 2 == 1) `define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
`define C_SUPPORTED ((`MISA >> 2) % 2 == 1) `define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
`define D_SUPPORTED ((`MISA >> 3) % 2 == 1) `define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
@ -107,8 +107,9 @@
/* verilator lint_off ASSIGNDLY */ /* verilator lint_off ASSIGNDLY */
/* verilator lint_off PINCONNECTEMPTY */ /* verilator lint_off PINCONNECTEMPTY */
`define TWO_BIT_PRELOAD "../config/rv64icfd/twoBitPredictor.txt" `define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64icfd/BTBPredictor.txt" `define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt"
`define BPRED_ENABLED 1 `define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE //`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
`define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE
`define TESTSBP 1 `define TESTSBP 1

View File

@ -2,11 +2,14 @@
// wally-constants.vh // wally-constants.vh
// //
// Written: tfleming@hmc.edu 4 March 2021 // Written: tfleming@hmc.edu 4 March 2021
// Modified: // Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
// //
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. // Purpose: Specify constants nexessary for different memory virtualization modes.
// These macros should not be changed, except in the event of an // These are specific to sv49, defined in section 4.5 of the privileged spec.
// update to the architecture or particularly special circumstances. // However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
// Virtual Memory Constants (sv39) // Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9 `define VPN_SEGMENT_BITS 9
`define VPN_BITS 27 `define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44 `define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26 `define PA_BITS 56
`define PA_BITS 56 `define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -31,7 +31,7 @@
`define XLEN 64 `define XLEN 64
// MISA RISC-V configuration per specification // MISA RISC-V configuration per specification
`define MISA (32'h00000104 | 0 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0) `define MISA (32'h00000104 | 0 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0)
`define A_SUPPORTED ((`MISA >> 0) % 2 == 1) `define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
`define C_SUPPORTED ((`MISA >> 2) % 2 == 1) `define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
`define D_SUPPORTED ((`MISA >> 3) % 2 == 1) `define D_SUPPORTED ((`MISA >> 3) % 2 == 1)

View File

@ -2,11 +2,14 @@
// wally-constants.vh // wally-constants.vh
// //
// Written: tfleming@hmc.edu 4 March 2021 // Written: tfleming@hmc.edu 4 March 2021
// Modified: // Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
// //
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. // Purpose: Specify constants nexessary for different memory virtualization modes.
// These macros should not be changed, except in the event of an // These are specific to sv49, defined in section 4.5 of the privileged spec.
// update to the architecture or particularly special circumstances. // However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
// Virtual Memory Constants (sv39) // Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9 `define VPN_SEGMENT_BITS 9
`define VPN_BITS 27 `define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44 `define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26 `define PA_BITS 56
`define PA_BITS 56 `define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,11 +2,14 @@
// wally-constants.vh // wally-constants.vh
// //
// Written: tfleming@hmc.edu 4 March 2021 // Written: tfleming@hmc.edu 4 March 2021
// Modified: // Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
// //
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. // Purpose: Specify constants nexessary for different memory virtualization modes.
// These macros should not be changed, except in the event of an // These are specific to sv49, defined in section 4.5 of the privileged spec.
// update to the architecture or particularly special circumstances. // However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
// Virtual Memory Constants (sv39) // Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9 `define VPN_SEGMENT_BITS 9
`define VPN_BITS 27 `define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44 `define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26 `define PA_BITS 56
`define PA_BITS 56 `define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,11 +2,14 @@
// wally-constants.vh // wally-constants.vh
// //
// Written: tfleming@hmc.edu 4 March 2021 // Written: tfleming@hmc.edu 4 March 2021
// Modified: // Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
// //
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. // Purpose: Specify constants nexessary for different memory virtualization modes.
// These macros should not be changed, except in the event of an // These are specific to sv49, defined in section 4.5 of the privileged spec.
// update to the architecture or particularly special circumstances. // However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
// Virtual Memory Constants (sv39) // Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9 `define VPN_SEGMENT_BITS 9
`define VPN_BITS 27 `define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44 `define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26 `define PA_BITS 56
`define PA_BITS 56 `define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -25,528 +25,455 @@
`include "wally-config.vh" `include "wally-config.vh"
module fpu ( module fpu (
input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic reset, input logic reset,
//input logic clear, // *** not being used anywhere //input logic clear, // *** not being used anywhere
input logic clk, input logic clk,
input logic [31:0] InstrD, input logic [31:0] InstrD,
input logic [`XLEN-1:0] SrcAE, // Integer input being processed input logic [`XLEN-1:0] SrcAE, // Integer input being processed
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW, input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW, input logic FlushE, FlushM, FlushW,
input logic [`AHBW-1:0] HRDATA, input logic [`AHBW-1:0] HRDATA,
input logic RegWriteD, input logic RegWriteD,
output logic [4:0] SetFflagsM, output logic [4:0] SetFflagsM,
output logic [31:0] FSROutW, output logic [31:0] FSROutW,
output logic [1:0] FMemRWM, output logic [1:0] FMemRWM,
output logic FStallD, output logic FStallD,
output logic FWriteIntE, FWriteIntM, FWriteIntW, output logic FWriteIntE, FWriteIntM, FWriteIntW,
output logic [`XLEN-1:0] FWriteDataM, output logic [`XLEN-1:0] FWriteDataM,
output logic FDivSqrtDoneM, output logic FDivSqrtDoneM,
output logic IllegalFPUInstrD, output logic IllegalFPUInstrD,
output logic [`XLEN-1:0] FPUResultW); output logic [`XLEN-1:0] FPUResultW);
// control logic signal instantiation
logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
//control logic signal instantiation logic FDivStartD, FDivStartE; // Start division
logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable logic FWriteIntD; // Write to integer register
logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory
logic FDivStartD, FDivStartE; // Start division logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal
logic FWriteIntD; // Write to integer register logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal
logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal
logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory logic FInput2UsedD; // Is input 2 used
logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal logic FInput3UsedD; // Is input 3 used
logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic FInput2UsedD; // Is input 2 used
logic FInput3UsedD; // Is input 3 used // regfile signals
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result logic [4:0] RdE, RdM, RdW; // ***Can take from ieu
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component logic [`XLEN-1:0] FWDM; // Write data for FP register
logic [`XLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register
// regfile signals logic [`XLEN-1:0] FRD1E, FRD2E, FRD3E;
logic [4:0] RdE, RdM, RdW; // ***Can take from ieu logic [`XLEN-1:0] FInput1E, FInput1M, FInput1tmpE;
logic [`XLEN-1:0] FWDM; // Write data for FP register logic [`XLEN-1:0] FInput2E, FInput2M;
logic [`XLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register logic [`XLEN-1:0] FInput3E, FInput3M;
logic [`XLEN-1:0] FRD1E, FRD2E, FRD3E; logic [`XLEN-1:0] FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions
logic [`XLEN-1:0] FInput1E, FInput1M, FInput1tmpE;
logic [`XLEN-1:0] FInput2E, FInput2M; // div/sqrt signals
logic [`XLEN-1:0] FInput3E, FInput3M; logic DivDenormM, DivDenormW;
logic [`XLEN-1:0] FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions logic DivOvEn, DivUnEn;
logic DivBusyM;
// div/sqrt signals logic [63:0] FDivResultM, FDivResultW;
logic DivDenormM, DivDenormW; logic [4:0] FDivFlagsM, FDivFlagsW;
logic DivOvEn, DivUnEn;
logic DivBusyM; // FMA signals
logic [63:0] FDivResultM, FDivResultW; logic [12:0] aligncntE, aligncntM;
logic [4:0] FDivFlagsM, FDivFlagsW; logic [105:0] rE, rM;
logic [105:0] sE, sM;
// FMA signals logic [163:0] tE, tM;
logic [12:0] aligncntE, aligncntM; logic [8:0] normcntE, normcntM;
logic [105:0] rE, rM; logic [12:0] aeE, aeM;
logic [105:0] sE, sM; logic bsE, bsM;
logic [163:0] tE, tM; logic killprodE, killprodM;
logic [8:0] normcntE, normcntM; logic prodofE, prodofM;
logic [12:0] aeE, aeM; logic xzeroE, xzeroM;
logic bsE, bsM; logic yzeroE, yzeroM;
logic killprodE, killprodM; logic zzeroE, zzeroM;
logic prodofE, prodofM; logic xdenormE, xdenormM;
logic xzeroE, xzeroM; logic ydenormE, ydenormM;
logic yzeroE, yzeroM; logic zdenormE, zdenormM;
logic zzeroE, zzeroM; logic xinfE, xinfM;
logic xdenormE, xdenormM; logic yinfE, yinfM;
logic ydenormE, ydenormM; logic zinfE, zinfM;
logic zdenormE, zdenormM; logic xnanE, xnanM;
logic xinfE, xinfM; logic ynanE, ynanM;
logic yinfE, yinfM; logic znanE, znanM;
logic zinfE, zinfM; logic nanE, nanM;
logic xnanE, xnanM; logic [8:0] sumshiftE, sumshiftM;
logic ynanE, ynanM; logic sumshiftzeroE, sumshiftzeroM;
logic znanE, znanM; logic prodinfE, prodinfM;
logic nanE, nanM; logic [63:0] FmaResultM, FmaResultW;
logic [8:0] sumshiftE, sumshiftM; logic [4:0] FmaFlagsM, FmaFlagsW;
logic sumshiftzeroE, sumshiftzeroM;
logic prodinfE, prodinfM; // add/cvt signals
logic [63:0] FmaResultM, FmaResultW; logic [63:0] AddSumE, AddSumTcE;
logic [4:0] FmaFlagsM, FmaFlagsW; logic [3:0] AddSelInvE;
logic [10:0] AddExpPostSumE;
// add/cvt signals logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE;
logic [63:0] AddSumE, AddSumTcE; logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
logic [3:0] AddSelInvE; logic AddConvertE;
logic [10:0] AddExpPostSumE; logic [63:0] AddFloat1E, AddFloat2E;
logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE; logic [11:0] AddExp1DenormE, AddExp2DenormE;
logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE; logic [10:0] AddExponentE;
logic AddConvertE; logic [2:0] AddRmE;
logic [63:0] AddFloat1E, AddFloat2E; logic [3:0] AddOpTypeE;
logic [11:0] AddExp1DenormE, AddExp2DenormE; logic AddPE, AddOvEnE, AddUnEnE;
logic [10:0] AddExponentE; logic AddDenormM;
logic [2:0] AddRmE; logic [63:0] AddSumM, AddSumTcM;
logic [3:0] AddOpTypeE; logic [3:0] AddSelInvM;
logic AddPE, AddOvEnE, AddUnEnE; logic [10:0] AddExpPostSumM;
logic AddDenormM; logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM;
logic [63:0] AddSumM, AddSumTcM; logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
logic [3:0] AddSelInvM; logic AddConvertM, AddSignM;
logic [10:0] AddExpPostSumM; logic [63:0] AddFloat1M, AddFloat2M;
logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM; logic [11:0] AddExp1DenormM, AddExp2DenormM;
logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM; logic [10:0] AddExponentM;
logic AddConvertM, AddSignM; logic [63:0] AddOp1M, AddOp2M;
logic [63:0] AddFloat1M, AddFloat2M; logic [2:0] AddRmM;
logic [11:0] AddExp1DenormM, AddExp2DenormM; logic [3:0] AddOpTypeM;
logic [10:0] AddExponentM; logic AddPM, AddOvEnM, AddUnEnM;
logic [63:0] AddOp1M, AddOp2M; logic [63:0] FAddResultM, FAddResultW;
logic [2:0] AddRmM; logic [4:0] FAddFlagsM, FAddFlagsW;
logic [3:0] AddOpTypeM;
logic AddPM, AddOvEnM, AddUnEnM; // cmp signals
logic [63:0] FAddResultM, FAddResultW; logic [7:0] WE, WM;
logic [4:0] FAddFlagsM, FAddFlagsW; logic [7:0] XE, XM;
logic ANaNE, ANaNM;
//cmp signals logic BNaNE, BNaNM;
logic [7:0] WE, WM; logic AzeroE, AzeroM;
logic [7:0] XE, XM; logic BzeroE, BzeroM;
logic ANaNE, ANaNM; logic CmpInvalidM, CmpInvalidW;
logic BNaNE, BNaNM; logic [1:0] CmpFCCM, CmpFCCW;
logic AzeroE, AzeroM; logic [63:0] FCmpResultM, FCmpResultW;
logic BzeroE, BzeroM;
logic CmpInvalidM, CmpInvalidW; // fsgn signals
logic [1:0] CmpFCCM, CmpFCCW; logic [63:0] SgnResultE, SgnResultM, SgnResultW;
logic [63:0] FCmpResultM, FCmpResultW; logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
// fsgn signals // instantiation of W stage regfile signals
logic [63:0] SgnResultE, SgnResultM, SgnResultW; logic [`XLEN-1:0] SrcAW;
logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
// classify signals
//instantiation of W stage regfile signals logic [63:0] ClassResultE, ClassResultM, ClassResultW;
logic [`XLEN-1:0] SrcAW;
// 64-bit FPU result
// classify signals logic [63:0] FPUResult64W, FPUResult64E;
logic [63:0] ClassResultE, ClassResultM, ClassResultW; logic [4:0] FPUFlagsW;
// other // pipeline control logic
logic [63:0] FPUResult64W, FPUResult64E; // 64-bit FPU result logic PipeEnableDE;
logic [4:0] FPUFlagsW; logic PipeEnableEM;
logic PipeEnableMW;
// pipeline control logic logic PipeClearDE;
logic PipeEnableDE; logic PipeClearEM;
logic PipeEnableEM; logic PipeClearMW;
logic PipeEnableMW;
logic PipeClearDE; // temporarily assign pipe clear and enable signals
logic PipeClearEM; // to never flush & always be running
logic PipeClearMW; localparam PipeClear = 1'b0;
localparam PipeEnable = 1'b1;
//temporarily assign pipe clear and enable signals always_comb begin
//to never flush & always be running PipeEnableDE = ~StallE;
localparam PipeClear = 1'b0; PipeEnableEM = ~StallM;
localparam PipeEnable = 1'b1; PipeEnableMW = ~StallW;
always_comb begin PipeClearDE = FlushE;
PipeClearEM = FlushM;
PipeEnableDE = ~StallE; PipeClearMW = FlushW;
PipeEnableEM = ~StallM; end
PipeEnableMW = ~StallW;
PipeClearDE = FlushE; //DECODE STAGE
PipeClearEM = FlushM;
PipeClearMW = FlushW; // Hazard unit for FPU
fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
end
// top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
// regfile instantiation
//DECODE STAGE
//Hazard unit for FPU
fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
//top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
//regfile instantiation
FPregfile fpregfile (clk, reset, FWriteEnW, FPregfile fpregfile (clk, reset, FWriteEnW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
FPUResult64W, FPUResult64W,
FRD1D, FRD2D, FRD3D); FRD1D, FRD2D, FRD3D);
//*****************
// fpregfile D/E pipe registers
//*****************
flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E);
//*****************
//***************** // other D/E pipe registers
//fpregfile D/E pipe registers //*****************
//***************** flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE);
flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E); flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E); flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E); flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
//***************** flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE);
//other D/E pipe registers flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
//***************** flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E);
flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE); flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE); flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E);
flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE); flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E);
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE); flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE); flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E);
flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE); flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E); //EXECUTION STAGE
flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E); // input muxs for forwarding
flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E); mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE);
flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE); mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E);
flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E); mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E);
flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE); mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
fma1 fma1 (.*);
// first and only instance of floating-point divider
logic fpdivClk;
clockgater fpdivclkg(.E(FDivStartE),
.SE(DivBusyM),
.CLK(clk),
.ECLK(fpdivClk));
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk));
//EXECUTION STAGE // first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (.*);
// first of two-stage instance of floating-point comparator
// input muxs for forwarding fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]);
mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE);
mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E); // first and only instance of floating-point sign converter
mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E); fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
// first and only instance of floating-point classify unit
fma1 fma1 (.*); fpuclassify fpuclass (.*);
//first and only instance of floating-point divider //*****************
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .*); //fpregfile D/E pipe registers
//*****************
//first of two-stage instance of floating-point add/cvt unit flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M);
fpuaddcvt1 fpadd1 (.*); flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M);
flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M);
//first of two-stage instance of floating-point comparator
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]); //*****************
// fma E/M pipe registers
//first and only instance of floating-point sign converter //*****************
fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM);
flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM);
//first and only instance of floating-point classify unit flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM);
fpuclassify fpuclass (.*); flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM);
flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM);
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM);
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM);
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM);
flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM);
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM);
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM);
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM);
flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM);
flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM);
flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM);
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM);
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM);
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM);
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM);
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM);
//***************** flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM);
//fpregfile D/E pipe registers flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM);
//***************** flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM);
flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M); flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM);
flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M); flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM);
flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M);
//*****************
//***************** // fpadd E/M pipe registers
//fma E/M pipe registers //*****************
//***************** flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM);
flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM); flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM);
flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM); flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM);
flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM); flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM); flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM);
flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM); flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM);
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM); flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM);
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM); flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM);
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM); flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM);
flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM); flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM);
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM);
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM); flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM);
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM); flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM);
flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM); flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM);
flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM); flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM);
flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM); flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M);
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M);
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM); flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM); flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM);
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM); flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM);
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM); flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM);
flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM); flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM);
flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM); flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM);
flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM); flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM);
flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM);
//*****************
//***************** // fpcmp E/M pipe registers
//fpadd E/M pipe registers //*****************
//***************** flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM);
flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM); flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM);
flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM); flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM);
flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM); flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM);
flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM); flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM);
flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM); flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM);
flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM);
flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM); // put this in for the event we want to delay fsgn - will otherwise bypass
flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM); //*****************
flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM); // fpsgn E/M pipe registers
flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM); //*****************
flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM); flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM); flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM);
flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM);
flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM); //*****************
flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM); // other E/M pipe registers
flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); //*****************
flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM);
flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM);
flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM);
flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM); flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM);
flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM);
//*****************
//***************** // fpuclassify E/M pipe registers
//fpcmp E/M pipe registers //*****************
//***************** flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM);
flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM);
flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM); //BEGIN MEMORY STAGE
flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM);
flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM); assign FWriteDataM = FInput1M;
flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM);
flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM); mux2 #(64) FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
//put this in for the event we want to delay fsgn - will otherwise bypass fma2 fma2(.*);
//*****************
//fpsgn E/M pipe registers // second instance of two-stage floating-point add/cvt unit
//***************** fpuaddcvt2 fpadd2 (.*);
flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM); // second instance of two-stage floating-point comparator
fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM),
//***************** .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*);
//other E/M pipe registers
//***************** //*****************
flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM); // fma M/W pipe registers
flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM); //*****************
flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM); flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW);
flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM); flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW);
flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM); //*****************
flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM); // fpdiv M/W pipe registers
flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM); //*****************
flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW);
//***************** flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW);
//fpuclassify E/M pipe registers flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW);
//*****************
flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM); //*****************
// fpadd M/W pipe registers
//*****************
flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW);
//*****************
// fpcmp M/W pipe registers
//*****************
//BEGIN MEMORY STAGE flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW);
flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW);
assign FWriteDataM = FInput1M; flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW);
mux2 #(64) FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); //*****************
// fpsgn M/W pipe registers
fma2 fma2(.*); //*****************
flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW);
//second instance of two-stage floating-point add/cvt unit flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW);
fpuaddcvt2 fpadd2 (.*);
//*****************
//second instance of two-stage floating-point comparator // other M/W pipe registers
fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*); //*****************
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW);
flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
//*****************
// fpuclassify M/W pipe registers
//*****************
//***************** flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW);
//fma M/W pipe registers
//*****************
flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW);
flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW);
//*****************
//fpdiv M/W pipe registers
//*****************
flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW);
flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW);
//*****************
//fpadd M/W pipe registers
//*****************
flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW);
//*****************
//fpcmp M/W pipe registers
//*****************
flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW);
flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW);
flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW);
//*****************
//fpsgn M/W pipe registers
//*****************
flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW);
flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW);
//*****************
//other M/W pipe registers
//*****************
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW);
flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
//*****************
//fpuclassify M/W pipe registers
//*****************
flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW);
//######################################### //#########################################
//BEGIN WRITEBACK STAGE // BEGIN WRITEBACK STAGE
//######################################### //#########################################
always_comb begin always_comb begin
case (FResultSelW) case (FResultSelW)
// div/sqrt // div/sqrt
3'b000 : FPUFlagsW = FDivFlagsW; 3'b000 : FPUFlagsW = FDivFlagsW;
// cmp // cmp
3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0}; 3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
//fma/mult //fma/mult
3'b010 : FPUFlagsW = FmaFlagsW; 3'b010 : FPUFlagsW = FmaFlagsW;
// sgn inj // sgn inj
3'b011 : FPUFlagsW = SgnFlagsW; 3'b011 : FPUFlagsW = SgnFlagsW;
// add/sub/cnvt // add/sub/cnvt
3'b100 : FPUFlagsW = FAddFlagsW; 3'b100 : FPUFlagsW = FAddFlagsW;
// classify // classify
3'b101 : FPUFlagsW = 5'b0; 3'b101 : FPUFlagsW = 5'b0;
// output SrcAW // output SrcAW
3'b110 : FPUFlagsW = 5'b0; 3'b110 : FPUFlagsW = 5'b0;
// output FRD1 // output FRD1
3'b111 : FPUFlagsW = 5'b0; 3'b111 : FPUFlagsW = 5'b0;
default : FPUFlagsW = 5'bxxxxx; default : FPUFlagsW = 5'bxxxxx;
endcase endcase
end end
always_comb begin
always_comb begin case (FResultSelW)
case (FResultSelW) // div/sqrt
// div/sqrt 3'b000 : FPUResult64W = FDivResultW;
3'b000 : FPUResult64W = FDivResultW; // cmp
// cmp 3'b001 : FPUResult64W = FCmpResultW;
3'b001 : FPUResult64W = FCmpResultW; //fma/mult
//fma/mult 3'b010 : FPUResult64W = FmaResultW;
3'b010 : FPUResult64W = FmaResultW; // sgn inj
// sgn inj 3'b011 : FPUResult64W = SgnResultW;
3'b011 : FPUResult64W = SgnResultW; // add/sub/cnvt
// add/sub/cnvt 3'b100 : FPUResult64W = FAddResultW;
3'b100 : FPUResult64W = FAddResultW; // classify
// classify 3'b101 : FPUResult64W = ClassResultW;
3'b101 : FPUResult64W = ClassResultW; // output SrcAW
// output SrcAW 3'b110 : FPUResult64W = SrcAW;
3'b110 : FPUResult64W = SrcAW; // Load/Store/Move to FP-register
// Load/Store/Move to FP-register 3'b111 : FPUResult64W = FLoadStoreResultW;
3'b111 : FPUResult64W = FLoadStoreResultW; default : FPUResult64W = {64{1'bx}};
default : FPUResult64W = {64{1'bx}}; endcase
endcase end // always_comb
end
//interface between XLEN size datapath and double-precision sized // interface between XLEN size datapath and double-precision sized
//floating-point results // floating-point results
// //
//define offsets for LSB zero extension or truncation // define offsets for LSB zero extension or truncation
always_comb begin always_comb begin
// zero extension
//zero extension
FPUResultW = FPUResult64W[63:64-`XLEN]; FPUResultW = FPUResult64W[63:64-`XLEN];
SetFflagsM = FPUFlagsW; SetFflagsM = FPUFlagsW;
end
endmodule // fpu
end
endmodule

View File

@ -0,0 +1,46 @@
///////////////////////////////////////////
// clockgater.sv
//
// Written: Ross Thompson 9 January 2021
// Modified:
//
// Purpose: Clock gater model. Must use standard cell for synthesis.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module clockgater
(input logic E,
input logic SE,
input logic CLK,
output logic ECLK);
// VERY IMPORTANT.
// This part functionally models a clock gater, but does not necessarily meet the timing constrains a real standard cell would.
// Do not use this in synthesis!
logic enable_q;
always @(~CLK) begin
enable_q <= E | SE;
end
assign ECLK = enable_q & CLK;
endmodule

View File

@ -0,0 +1,195 @@
///////////////////////////////////////////
// lzd.sv
//
// Written: James.Stine@okstate.edu 1 February 2021
// Modified:
//
// Purpose: Integer Divide instructions
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off DECLFILENAME */
// Original idea came from V. G. Oklobdzija, "An algorithmic and novel
// design of a leading zero detector circuit: comparison with logic
// synthesis," in IEEE Transactions on Very Large Scale Integration
// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
// 10.1109/92.273153.
// Modified to be more hierarchical
module lzd2 (P, V, B);
input logic [1:0] B;
output logic P;
output logic V;
assign V = B[0] | B[1];
assign P = B[0] & ~B[1];
endmodule // lz2
module lzd_hier #(parameter WIDTH=8)
(input logic [WIDTH-1:0] B,
output logic [$clog2(WIDTH)-1:0] ZP,
output logic ZV);
if (WIDTH == 128)
lzd128 lz127 (ZP, ZV, B);
else if (WIDTH == 64)
lzd64 lz64 (ZP, ZV, B);
else if (WIDTH == 32)
lzd32 lz32 (ZP, ZV, B);
else if (WIDTH == 16)
lzd16 lz16 (ZP, ZV, B);
else if (WIDTH == 8)
lzd8 lz8 (ZP, ZV, B);
else if (WIDTH == 4)
lzd4 lz4 (ZP, ZV, B);
endmodule // lzd_hier
module lzd4 (ZP, ZV, B);
input logic [3:0] B;
logic ZPa;
logic ZPb;
logic ZVa;
logic ZVb;
output logic [1:0] ZP;
output logic ZV;
lz2 l1(ZPa, ZVa, B[1:0]);
lz2 l2(ZPb, ZVb, B[3:2]);
assign ZP[0:0] = ZVb ? ZPb : ZPa;
assign ZP[1] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd4
module lzd8 (ZP, ZV, B);
input logic [7:0] B;
logic [1:0] ZPa;
logic [1:0] ZPb;
logic ZVa;
logic ZVb;
output logic [2:0] ZP;
output logic ZV;
lz4 l1(ZPa, ZVa, B[3:0]);
lz4 l2(ZPb, ZVb, B[7:4]);
assign ZP[1:0] = ZVb ? ZPb : ZPa;
assign ZP[2] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd8
module lzd16 (ZP, ZV, B);
input logic [15:0] B;
logic [2:0] ZPa;
logic [2:0] ZPb;
logic ZVa;
logic ZVb;
output logic [3:0] ZP;
output logic ZV;
lz8 l1(ZPa, ZVa, B[7:0]);
lz8 l2(ZPb, ZVb, B[15:8]);
assign ZP[2:0] = ZVb ? ZPb : ZPa;
assign ZP[3] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd16
module lzd32 (ZP, ZV, B);
input logic [31:0] B;
logic [3:0] ZPa;
logic [3:0] ZPb;
logic ZVa;
logic ZVb;
output logic [4:0] ZP;
output logic ZV;
lz16 l1(ZPa, ZVa, B[15:0]);
lz16 l2(ZPb, ZVb, B[31:16]);
assign ZP[3:0] = ZVb ? ZPb : ZPa;
assign ZP[4] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd32
module lzd64 (ZP, ZV, B);
input logic [63:0] B;
logic [4:0] ZPa;
logic [4:0] ZPb;
logic ZVa;
logic ZVb;
output logic [5:0] ZP;
output logic ZV;
lz32 l1(ZPa, ZVa, B[31:0]);
lz32 l2(ZPb, ZVb, B[63:32]);
assign ZP[4:0] = ZVb ? ZPb : ZPa;
assign ZP[5] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd64
module lzd128 (ZP, ZV, B);
input logic [127:0] B;
logic [5:0] ZPa;
logic [5:0] ZPb;
logic ZVa;
logic ZVb;
output logic [6:0] ZP;
output logic ZV;
lz64 l1(ZPa, ZVa, B[64:0]);
lz64 l2(ZPb, ZVb, B[127:63]);
assign ZP[5:0] = ZVb ? ZPb : ZPa;
assign ZP[6] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd128
/* verilator lint_on DECLFILENAME */

View File

@ -0,0 +1,195 @@
///////////////////////////////////////////
// lzd.sv
//
// Written: James.Stine@okstate.edu 1 February 2021
// Modified:
//
// Purpose: Integer Divide instructions
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off DECLFILENAME */
// Original idea came from V. G. Oklobdzija, "An algorithmic and novel
// design of a leading zero detector circuit: comparison with logic
// synthesis," in IEEE Transactions on Very Large Scale Integration
// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
// 10.1109/92.273153.
// Modified to be more hierarchical
module lz2 (P, V, B);
input logic [1:0] B;
output logic P;
output logic V;
assign V = B[0] | B[1];
assign P = B[0] & ~B[1];
endmodule // lz2
module lzd_hier #(parameter WIDTH=8)
(input logic [WIDTH-1:0] B,
output logic [$clog2(WIDTH)-1:0] ZP,
output logic ZV);
if (WIDTH == 128)
lz128 lzd127 (ZP, ZV, B);
else if (WIDTH == 64)
lz64 lzd64 (ZP, ZV, B);
else if (WIDTH == 32)
lz32 lzd32 (ZP, ZV, B);
else if (WIDTH == 16)
lz16 lzd16 (ZP, ZV, B);
else if (WIDTH == 8)
lz8 lzd8 (ZP, ZV, B);
else if (WIDTH == 4)
lz4 lzd4 (ZP, ZV, B);
endmodule // lzd_hier
module lz4 (ZP, ZV, B);
input logic [3:0] B;
logic ZPa;
logic ZPb;
logic ZVa;
logic ZVb;
output logic [1:0] ZP;
output logic ZV;
lz2 l1(ZPa, ZVa, B[1:0]);
lz2 l2(ZPb, ZVb, B[3:2]);
assign ZP[0:0] = ZVb ? ZPb : ZPa;
assign ZP[1] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule
module lz8 (ZP, ZV, B);
input logic [7:0] B;
logic [1:0] ZPa;
logic [1:0] ZPb;
logic ZVa;
logic ZVb;
output logic [2:0] ZP;
output logic ZV;
lz4 l1(ZPa, ZVa, B[3:0]);
lz4 l2(ZPb, ZVb, B[7:4]);
assign ZP[1:0] = ZVb ? ZPb : ZPa;
assign ZP[2] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule
module lz16 (ZP, ZV, B);
input logic [15:0] B;
logic [2:0] ZPa;
logic [2:0] ZPb;
logic ZVa;
logic ZVb;
output logic [3:0] ZP;
output logic ZV;
lz8 l1(ZPa, ZVa, B[7:0]);
lz8 l2(ZPb, ZVb, B[15:8]);
assign ZP[2:0] = ZVb ? ZPb : ZPa;
assign ZP[3] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz16
module lz32 (ZP, ZV, B);
input logic [31:0] B;
logic [3:0] ZPa;
logic [3:0] ZPb;
logic ZVa;
logic ZVb;
output logic [4:0] ZP;
output logic ZV;
lz16 l1(ZPa, ZVa, B[15:0]);
lz16 l2(ZPb, ZVb, B[31:16]);
assign ZP[3:0] = ZVb ? ZPb : ZPa;
assign ZP[4] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz32
module lz64 (ZP, ZV, B);
input logic [63:0] B;
logic [4:0] ZPa;
logic [4:0] ZPb;
logic ZVa;
logic ZVb;
output logic [5:0] ZP;
output logic ZV;
lz32 l1(ZPa, ZVa, B[31:0]);
lz32 l2(ZPb, ZVb, B[63:32]);
assign ZP[4:0] = ZVb ? ZPb : ZPa;
assign ZP[5] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz64
module lz128 (ZP, ZV, B);
input logic [127:0] B;
logic [5:0] ZPa;
logic [5:0] ZPb;
logic ZVa;
logic ZVb;
output logic [6:0] ZP;
output logic ZV;
lz64 l1(ZPa, ZVa, B[64:0]);
lz64 l2(ZPb, ZVb, B[127:63]);
assign ZP[5:0] = ZVb ? ZPb : ZPa;
assign ZP[6] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz128
/* verilator lint_on DECLFILENAME */

View File

@ -0,0 +1,76 @@
///////////////////////////////////////////
// shifters.sv
//
// Written: James.Stine@okstate.edu 1 February 2021
// Modified:
//
// Purpose: Integer Divide instructions
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off DECLFILENAME */
/* verilator lint_off UNOPTFLAT */
module shift_right #(parameter WIDTH=8)
(input logic [WIDTH-1:0] A,
input logic [$clog2(WIDTH)-1:0] Shift,
output logic [WIDTH-1:0] Z);
logic [WIDTH-1:0] stage [$clog2(WIDTH):0];
logic sign;
genvar i;
assign stage[0] = A;
generate
for (i=0;i<$clog2(WIDTH);i=i+1)
begin : genbit
mux2 #(WIDTH) mux_inst (stage[i],
{{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]},
Shift[$clog2(WIDTH)-i-1],
stage[i+1]);
end
endgenerate
assign Z = stage[$clog2(WIDTH)];
endmodule // shift_right
module shift_left #(parameter WIDTH=8)
(input logic [WIDTH-1:0] A,
input logic [$clog2(WIDTH)-1:0] Shift,
output logic [WIDTH-1:0] Z);
logic [WIDTH-1:0] stage [$clog2(WIDTH):0];
genvar i;
assign stage[0] = A;
generate
for (i=0;i<$clog2(WIDTH);i=i+1)
begin : genbit
mux2 #(WIDTH) mux_inst (stage[i],
{stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}},
Shift[$clog2(WIDTH)-i-1],
stage[i+1]);
end
endgenerate
assign Z = stage[$clog2(WIDTH)];
endmodule // shift_left
/* verilator lint_on DECLFILENAME */
/* verilator lint_on UNOPTFLAT */

View File

@ -30,7 +30,8 @@
module bpred module bpred
(input logic clk, reset, (input logic clk, reset,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
// Fetch stage // Fetch stage
// the prediction // the prediction
input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list
@ -88,25 +89,29 @@ module bpred
globalHistoryPredictor DirPredictor(.clk(clk), globalHistoryPredictor DirPredictor(.clk(clk),
.reset(reset), .reset(reset),
.*, // Stalls and flushes .*, // Stalls and flushes
.LookUpPC(PCNextF), .PCNextF(PCNextF),
.Prediction(BPPredF), .BPPredF(BPPredF),
// update // update
.UpdatePC(PCE), .InstrClassE(InstrClassE),
.UpdateEN(InstrClassE[0] & ~StallE), .BPInstrClassE(BPInstrClassE),
.BPPredDirWrongE(BPPredDirWrongE),
.PCE(PCE),
.PCSrcE(PCSrcE), .PCSrcE(PCSrcE),
.UpdatePrediction(UpdateBPPredE)); .UpdateBPPredE(UpdateBPPredE));
end else if (`BPTYPE == "BPGSHARE") begin:Predictor end else if (`BPTYPE == "BPGSHARE") begin:Predictor
gsharePredictor DirPredictor(.clk(clk), gsharePredictor DirPredictor(.clk(clk),
.reset(reset), .reset(reset),
.*, // Stalls and flushes .*, // Stalls and flushes
.LookUpPC(PCNextF), .PCNextF(PCNextF),
.Prediction(BPPredF), .BPPredF(BPPredF),
// update // update
.UpdatePC(PCE), .InstrClassE(InstrClassE),
.UpdateEN(InstrClassE[0] & ~StallE), .BPInstrClassE(BPInstrClassE),
.PCSrcE(PCSrcE), .BPPredDirWrongE(BPPredDirWrongE),
.UpdatePrediction(UpdateBPPredE)); .PCE(PCE),
.PCSrcE(PCSrcE),
.UpdateBPPredE(UpdateBPPredE));
end end
else if (`BPTYPE == "BPLOCALPAg") begin:Predictor else if (`BPTYPE == "BPLOCALPAg") begin:Predictor
@ -190,14 +195,14 @@ module bpred
flopenrc #(2) BPPredRegD(.clk(clk), flopenrc #(2) BPPredRegD(.clk(clk),
.reset(reset), .reset(reset),
.en(~StallD), .en(~StallD),
.clear(FlushD), .clear(1'b0),
.d(BPPredF), .d(BPPredF),
.q(BPPredD)); .q(BPPredD));
flopenrc #(2) BPPredRegE(.clk(clk), flopenrc #(2) BPPredRegE(.clk(clk),
.reset(reset), .reset(reset),
.en(~StallE), .en(~StallE),
.clear(FlushE), .clear(1'b0),
.d(BPPredD), .d(BPPredD),
.q(BPPredE)); .q(BPPredE));

View File

@ -32,76 +32,89 @@ module globalHistoryPredictor
) )
(input logic clk, (input logic clk,
input logic reset, input logic reset,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
input logic [`XLEN-1:0] LookUpPC, input logic [`XLEN-1:0] PCNextF,
output logic [1:0] Prediction, output logic [1:0] BPPredF,
// update // update
input logic [`XLEN-1:0] UpdatePC, input logic [4:0] InstrClassE,
input logic UpdateEN, PCSrcE, input logic [4:0] BPInstrClassE,
input logic [1:0] UpdatePrediction input logic [4:0] BPInstrClassD,
input logic [4:0] BPInstrClassF,
input logic BPPredDirWrongE,
input logic [`XLEN-1:0] PCE,
input logic PCSrcE,
input logic [1:0] UpdateBPPredE
); );
logic [k-1:0] GHRF, GHRFNext; logic [k+1:0] GHR, GHRNext;
assign GHRFNext = {PCSrcE, GHRF[k-1:1]}; logic [k-1:0] PHTUpdateAdr, PHTUpdateAdr0, PHTUpdateAdr1;
logic PHTUpdateEN;
logic BPClassWrongNonCFI;
logic BPClassWrongCFI;
logic BPClassRightNonCFI;
flopenr #(k) GlobalHistoryRegister(.clk(clk), logic [6:0] GHRMuxSel;
.reset(reset), logic GHRUpdateEN;
.en(UpdateEN), logic [k-1:0] GHRLookup;
.d(GHRFNext),
.q(GHRF));
assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0];
assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0];
assign BPClassWrongNonCFI = BPInstrClassE[0] & ~InstrClassE[0];
assign BPClassRightBPWrong = BPInstrClassE[0] & InstrClassE[0] & BPPredDirWrongE;
assign BPClassRightBPRight = BPInstrClassE[0] & InstrClassE[0] & ~BPPredDirWrongE;
// GHR update selection, 1 hot encoded.
assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight);
assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0];
assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0];
assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]);
assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0];
assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0];
assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight));
assign GHRUpdateEN = (| GHRMuxSel[5:1] & ~StallE) | GHRMuxSel[6] & ~StallF;
// hoping this created a AND-OR mux.
always_comb begin
case (GHRMuxSel)
7'b000_0001: GHRNext = GHR[k-1+2:0]; // no change
7'b000_0010: GHRNext = {GHR[k-2+2:0], PCSrcE}; // branch update
7'b000_0100: GHRNext = {1'b0, GHR[k+1:1]}; // repair 1
7'b000_1000: GHRNext = {GHR[k-1+2:1], PCSrcE}; // branch update with mis prediction correction
7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2
7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1
7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update
default: GHRNext = GHR[k-1+2:0];
endcase
end
logic [1:0] PredictionMemory; flopenr #(k+2) GlobalHistoryRegister(.clk(clk),
logic DoForwarding, DoForwardingF; .reset(reset),
logic [1:0] UpdatePredictionF; .en((GHRUpdateEN)),
.d(GHRNext),
.q(GHR));
// if actively updating the GHR at the time of prediction we want to us
// GHRNext as the lookup rather than GHR.
assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0];
assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1];
assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0;
assign PHTUpdateEN = InstrClassE[0] & ~StallE;
assign GHRLookup = |GHRMuxSel[6:1] ? GHRNext[k-1:0] : GHR[k-1:0];
// Make Prediction by reading the correct address in the PHT and also update the new address in the PHT // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT
// GHR referes to the address that the past k branches points to in the prediction stage SRAM2P1R1W #(k, 2) PHT(.clk(clk),
// GHRE refers to the address that the past k branches points to in the exectution stage .reset(reset),
SRAM2P1R1W #(k, 2) PHT(.clk(clk), //.RA1(GHR[k-1:0]),
.reset(reset), .RA1(GHRLookup),
.RA1(GHRF), .RD1(BPPredF),
.RD1(PredictionMemory), .REN1(~StallF),
.REN1(~StallF), .WA1(PHTUpdateAdr),
.WA1(GHRFNext), .WD1(UpdateBPPredE),
.WD1(UpdatePrediction), .WEN1(PHTUpdateEN),
.WEN1(UpdateEN), .BitWEN1(2'b11));
.BitWEN1(2'b11));
// need to forward when updating to the same address as reading.
// first we compare to see if the update and lookup addreses are the same
assign DoForwarding = GHRF == GHRFNext;
// register the update value and the forwarding signal into the Fetch stage
// TODO: add stall logic ***
flopr #(1) DoForwardingReg(.clk(clk),
.reset(reset),
.d(DoForwarding),
.q(DoForwardingF));
flopr #(2) UpdatePredictionReg(.clk(clk),
.reset(reset),
.d(UpdatePrediction),
.q(UpdatePredictionF));
assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory;
//pipeline for GHR
/*flopenrc #(k) GHRDReg(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(GHRF),
.q(GHRD));
flopenrc #(k) GHREReg(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.d(GHRD),
.q(GHRE));
*/
endmodule endmodule

View File

@ -1,128 +0,0 @@
///////////////////////////////////////////
// gshare.sv
//
// Written: Shreya Sanghai
// Email: ssanghai@hmc.edu
// Created: March 16, 2021
// Modified:
//
// Purpose: Gshare predictor with parameterized global history register
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module gsharePredictor
#(parameter int k = 10
)
(input logic clk,
input logic reset,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
input logic [`XLEN-1:0] LookUpPC,
output logic [1:0] Prediction,
// update
input logic [`XLEN-1:0] UpdatePC,
input logic UpdateEN, PCSrcE,
input logic [1:0] UpdatePrediction
);
logic [k-1:0] GHRF, GHRFNext;
//logic [k-1:0] LookUpPCIndexD, LookUpPCIndexE;
logic [k-1:0] LookUpPCIndex, UpdatePCIndex;
logic [1:0] PredictionMemory;
logic DoForwarding, DoForwardingF;
logic [1:0] UpdatePredictionF;
assign GHRFNext = {PCSrcE, GHRF[k-1:1]};
flopenr #(k) GlobalHistoryRegister(.clk(clk),
.reset(reset),
.en(UpdateEN),
.d(GHRFNext),
.q(GHRF));
// for gshare xor the PC with the GHR
assign UpdatePCIndex = GHRFNext ^ UpdatePC[k:1];
assign LookUpPCIndex = GHRF ^ LookUpPC[k:1];
// Make Prediction by reading the correct address in the PHT and also update the new address in the PHT
// GHR referes to the address that the past k branches points to in the prediction stage
// GHRE refers to the address that the past k branches points to in the exectution stage
SRAM2P1R1W #(k, 2) PHT(.clk(clk),
.reset(reset),
.RA1(LookUpPCIndex),
.RD1(PredictionMemory),
.REN1(~StallF),
.WA1(UpdatePCIndex),
.WD1(UpdatePrediction),
.WEN1(UpdateEN),
.BitWEN1(2'b11));
// need to forward when updating to the same address as reading.
// first we compare to see if the update and lookup addreses are the same
assign DoForwarding = LookUpPCIndex == UpdatePCIndex;
// register the update value and the forwarding signal into the Fetch stage
// TODO: add stall logic ***
flopr #(1) DoForwardingReg(.clk(clk),
.reset(reset),
.d(DoForwarding),
.q(DoForwardingF));
flopr #(2) UpdatePredictionReg(.clk(clk),
.reset(reset),
.d(UpdatePrediction),
.q(UpdatePredictionF));
assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory;
//pipeline for GHR
/* -----\/----- EXCLUDED -----\/-----
flopenrc #(k) LookUpDReg(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(LookUpPCIndex),
.q(LookUpPCIndexD));
flopenrc #(k) LookUpEReg(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.d(LookUpPCIndexD),
.q(LookUpPCIndexE));
-----/\----- EXCLUDED -----/\----- */
/* flopenrc #(k) GHRRegD(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(GHRF),
.q(GHRD));
flopenrc #(k) GHRRegE(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.d(GHRD),
.q(GHRE));
*/
endmodule

View File

@ -0,0 +1,120 @@
///////////////////////////////////////////
// globalHistoryPredictor.sv
//
// Written: Shreya Sanghai
// Email: ssanghai@hmc.edu
// Created: March 16, 2021
// Modified:
//
// Purpose: Gshare predictor with parameterized global history register
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module gsharePredictor
#(parameter int k = 10
)
(input logic clk,
input logic reset,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
input logic [`XLEN-1:0] PCNextF,
output logic [1:0] BPPredF,
// update
input logic [4:0] InstrClassE,
input logic [4:0] BPInstrClassE,
input logic [4:0] BPInstrClassD,
input logic [4:0] BPInstrClassF,
input logic BPPredDirWrongE,
input logic [`XLEN-1:0] PCE,
input logic PCSrcE,
input logic [1:0] UpdateBPPredE
);
logic [k+1:0] GHR, GHRNext;
logic [k-1:0] PHTUpdateAdr, PHTUpdateAdr0, PHTUpdateAdr1;
logic PHTUpdateEN;
logic BPClassWrongNonCFI;
logic BPClassWrongCFI;
logic BPClassRightNonCFI;
logic [6:0] GHRMuxSel;
logic GHRUpdateEN;
logic [k-1:0] GHRLookup;
assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0];
assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0];
assign BPClassWrongNonCFI = BPInstrClassE[0] & ~InstrClassE[0];
assign BPClassRightBPWrong = BPInstrClassE[0] & InstrClassE[0] & BPPredDirWrongE;
assign BPClassRightBPRight = BPInstrClassE[0] & InstrClassE[0] & ~BPPredDirWrongE;
// GHR update selection, 1 hot encoded.
assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight);
assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0];
assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0];
assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]);
assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0];
assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0];
assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight));
assign GHRUpdateEN = (| GHRMuxSel[5:1] & ~StallE) | GHRMuxSel[6] & ~StallF;
// hoping this created a AND-OR mux.
always_comb begin
case (GHRMuxSel)
7'b000_0001: GHRNext = GHR[k-1+2:0]; // no change
7'b000_0010: GHRNext = {GHR[k-2+2:0], PCSrcE}; // branch update
7'b000_0100: GHRNext = {1'b0, GHR[k+1:1]}; // repair 1
7'b000_1000: GHRNext = {GHR[k-1+2:1], PCSrcE}; // branch update with mis prediction correction
7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2
7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1
7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update
default: GHRNext = GHR[k-1+2:0];
endcase
end
flopenr #(k+2) GlobalHistoryRegister(.clk(clk),
.reset(reset),
.en((GHRUpdateEN)),
.d(GHRNext),
.q(GHR));
// if actively updating the GHR at the time of prediction we want to us
// GHRNext as the lookup rather than GHR.
assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0];
assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1];
assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0;
assign PHTUpdateEN = InstrClassE[0] & ~StallE;
assign GHRLookup = |GHRMuxSel[6:1] ? GHRNext[k-1:0] : GHR[k-1:0];
// Make Prediction by reading the correct address in the PHT and also update the new address in the PHT
SRAM2P1R1W #(k, 2) PHT(.clk(clk),
.reset(reset),
//.RA1(GHR[k-1:0]),
.RA1(GHRLookup ^ PCNextF[k:1]),
.RD1(BPPredF),
.REN1(~StallF),
.WA1(PHTUpdateAdr ^ PCE[k:1]),
.WD1(UpdateBPPredE),
.WEN1(PHTUpdateEN),
.BitWEN1(2'b11));
endmodule // gsharePredictor

View File

@ -154,15 +154,16 @@ module icachecontroller #(parameter LINESIZE = 256) (
localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT
localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT
localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update. localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update.
localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 13; // miss on block 1, issue read to AHB and wait localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update.
localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 14; // write data to SRAM/LUT localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait
localparam STATE_MISS_SPILL_MERGE = 15; // read block 0 of CPU access, localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT
localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access,
localparam STATE_MISS_SPILL_FINAL = 16; // this state replicates STATE_READY's replay of the localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the
// spill access but does nto consider spill. It also does not do another operation. // spill access but does nto consider spill. It also does not do another operation.
localparam STATE_INVALIDATE = 17; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address?
localparam AHBByteLength = `XLEN / 8; localparam AHBByteLength = `XLEN / 8;
localparam AHBOFFETWIDTH = $clog2(AHBByteLength); localparam AHBOFFETWIDTH = $clog2(AHBByteLength);
@ -380,11 +381,20 @@ module icachecontroller #(parameter LINESIZE = 256) (
PCMux = 2'b10; PCMux = 2'b10;
UnalignedSelect = 1'b1; UnalignedSelect = 1'b1;
spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm.
ICacheReadEn = 1'b1;
NextState = STATE_MISS_SPILL_2_START;
end
STATE_MISS_SPILL_2_START: begin
if (~hit) begin if (~hit) begin
CntReset = 1'b1; CntReset = 1'b1;
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
end else begin end else begin
NextState = STATE_MISS_SPILL_FINAL; NextState = STATE_READY;
ICacheReadEn = 1'b1;
PCMux = 2'b00;
UnalignedSelect = 1'b1;
SavePC = 1'b1;
ICacheStallF = 1'b0;
end end
end end
STATE_MISS_SPILL_MISS_FETCH_WDV: begin STATE_MISS_SPILL_MISS_FETCH_WDV: begin

View File

@ -154,14 +154,7 @@ module ifu (
generate generate
if (`BPRED_ENABLED == 1) begin : bpred if (`BPRED_ENABLED == 1) begin : bpred
// I am making the port connection explicit for now as I want to see them and they will be changing. // I am making the port connection explicit for now as I want to see them and they will be changing.
bpred bpred(.clk(clk), bpred bpred(.*,
.reset(reset),
.StallF(StallF),
.StallD(StallD),
.StallE(StallE),
.FlushF(FlushF),
.FlushD(FlushD),
.FlushE(FlushE),
.PCNextF(PCNextF), .PCNextF(PCNextF),
.BPPredPCF(BPPredPCF), .BPPredPCF(BPPredPCF),
.SelBPPredF(SelBPPredF), .SelBPPredF(SelBPPredF),

View File

@ -2,7 +2,9 @@
// cam_line.sv // cam_line.sv
// //
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021 // Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021
// Modified: // Modified: kmacsaigoren@hmc.edu 1 June 2021
// Implemented SV48 on top of SV39. This included adding SvMode input signal and the wally constants
// Mostly this was done to make the PageNumberMixer work.
// //
// Purpose: CAM line for the translation lookaside buffer (TLB) // Purpose: CAM line for the translation lookaside buffer (TLB)
// Determines whether a virtual address matches the stored key. // Determines whether a virtual address matches the stored key.
@ -24,12 +26,17 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
`include "wally-constants.vh"
module cam_line #(parameter KEY_BITS = 20, module cam_line #(parameter KEY_BITS = 20,
parameter HIGH_SEGMENT_BITS = 10) ( parameter HIGH_SEGMENT_BITS = 10) (
input clk, reset, input clk, reset,
// input to scheck which SvMode is running
input [`SVMODE_BITS-1:0] SvMode,
// The requested page number to compare against the key // The requested page number to compare against the key
input [KEY_BITS-1:0] VirtualPageNumber, input [KEY_BITS-1:0] VirtualPageNumber,
// Signals to write a new entry to this line // Signals to write a new entry to this line
input CAMLineWrite, input CAMLineWrite,
@ -38,10 +45,11 @@ module cam_line #(parameter KEY_BITS = 20,
// Flush this line (set valid to 0) // Flush this line (set valid to 0)
input TLBFlush, input TLBFlush,
// This entry is a key for a giga, mega, or kilopage. // This entry is a key for a tera, giga, mega, or kilopage.
// PageType == 2'b00 --> kilopage // PageType == 2'b00 --> kilopage
// PageType == 2'b01 --> megapage // PageType == 2'b01 --> megapage
// PageType == 2'b11 --> gigapage // PageType == 2'b10 --> gigapage
// PageType == 2'b11 --> terapage
output [1:0] PageType, // *** should this be the stored version or the always updated one? output [1:0] PageType, // *** should this be the stored version or the always updated one?
output Match output Match
); );
@ -67,9 +75,9 @@ module cam_line #(parameter KEY_BITS = 20,
flopenr #(KEY_BITS) keyflop(clk, reset, CAMLineWrite, VirtualPageNumber, Key); flopenr #(KEY_BITS) keyflop(clk, reset, CAMLineWrite, VirtualPageNumber, Key);
// Calculate the actual query key based on the input key and the page type. // Calculate the actual query key based on the input key and the page type.
// For example, a megapage in sv39 only cares about VPN2 and VPN1, so VPN0 // For example, a megapage in SV39 only cares about VPN2 and VPN1, so VPN0
// should automatically match. // should automatically match.
page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, VirtualPageNumberQuery); page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, SvMode, VirtualPageNumberQuery);
assign Match = ({1'b1, VirtualPageNumberQuery} == {Valid, Key}); assign Match = ({1'b1, VirtualPageNumberQuery} == {Valid, Key});

View File

@ -2,7 +2,11 @@
// page_number_mixer.sv // page_number_mixer.sv
// //
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021 // Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021
// Modified: // Modified: kmacsaigoren@hmc.edu 1 June 2021
// Implemented SV48 on top of SV39. This included adding a 3rd Segment to each of the pagenumbers,
// Ensuring that the BITS and HIGH_SEGMENT_BITS inputs were correct everywhere this module gets instatniated,
// Adding seveeral muxes to decide the bit selection to turn pagenumbers into segments based on SV mode,
// Adding support for terapage/newgigapage encoding.
// //
// Purpose: Takes two page numbers and replaces segments of the first page // Purpose: Takes two page numbers and replaces segments of the first page
// number with segments from the second, based on the page type. // number with segments from the second, based on the page type.
@ -25,22 +29,29 @@
/////////////////////////////////////////// ///////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
`include "wally-constants.vh"
module page_number_mixer #(parameter BITS = 20, module page_number_mixer #(parameter BITS = 20,
parameter HIGH_SEGMENT_BITS = 10) ( parameter HIGH_SEGMENT_BITS = 10) (
input [BITS-1:0] PageNumber, input [BITS-1:0] PageNumber,
input [BITS-1:0] MixPageNumber, input [BITS-1:0] MixPageNumber,
input [1:0] PageType, input [1:0] PageType,
output [BITS-1:0] PageNumberCombined input [`SVMODE_BITS-1:0] SvMode,
output [BITS-1:0] PageNumberCombined
); );
// The upper segment might have a different width than the lower segments.
// For example, an SV39 PTE has 26 bits for PPN2 and 9 bits for the other
// segments. This is outside the 'if XLEN' b/c the constant is already configured
// to the correct value for the XLEN in the relevant wally-constants.vh file.
localparam LOW_SEGMENT_BITS = `VPN_SEGMENT_BITS;
// *** each time this module is implemented, low segment bits is either
// `VPN_SEGMENT_BITS or `PPN_LOW_SEGMENT_BITS (if it existed)
// in every mode so far, these are the same, so it's left as it is above.
generate generate
// *** Just checking XLEN is not enough to support sv39 AND sv48.
if (`XLEN == 32) begin if (`XLEN == 32) begin
// The upper segment might have a different width than the lower segments.
// For example, an sv39 PTE has 26 bits for PPN2 and 9 bits for the other
// segments.
localparam LOW_SEGMENT_BITS = (BITS - HIGH_SEGMENT_BITS);
logic [HIGH_SEGMENT_BITS-1:0] Segment1, MixSegment1, Segment1Combined; logic [HIGH_SEGMENT_BITS-1:0] Segment1, MixSegment1, Segment1Combined;
logic [LOW_SEGMENT_BITS-1:0] Segment0, MixSegment0, Segment0Combined; logic [LOW_SEGMENT_BITS-1:0] Segment0, MixSegment0, Segment0Combined;
@ -58,28 +69,60 @@ module page_number_mixer #(parameter BITS = 20,
// Reswizzle segments of the combined page number // Reswizzle segments of the combined page number
assign PageNumberCombined = {Segment1Combined, Segment0Combined}; assign PageNumberCombined = {Segment1Combined, Segment0Combined};
end else begin end else begin
// The upper segment might have a different width than the lower segments.
// For example, an sv39 PTE has 26 bits for PPN2 and 9 bits for the other
// segments.
localparam LOW_SEGMENT_BITS = (BITS - HIGH_SEGMENT_BITS) / 2;
logic [HIGH_SEGMENT_BITS-1:0] Segment2, MixSegment2, Segment2Combined; // After segment 0 and 1 of the page number, the width of each segment is dependant on the SvMode.
// For this reason, each segment bus is the width of its widest value across each mode
// when a smaller value needs to be loaded in to a wider bus, it's loaded in the least significant bits
// and left padded with zeros. MAKE SURE that if a value is being padded with zeros here,
// that it's padded with zeros everywhere else in the MMU ans beyond to avoid false misses in the TLB.
logic [HIGH_SEGMENT_BITS-1:0] Segment3, MixSegment3, Segment3Combined;
logic [HIGH_SEGMENT_BITS + LOW_SEGMENT_BITS-1:0] Segment2, MixSegment2, Segment2Combined;
logic [LOW_SEGMENT_BITS-1:0] Segment1, MixSegment1, Segment1Combined; logic [LOW_SEGMENT_BITS-1:0] Segment1, MixSegment1, Segment1Combined;
logic [LOW_SEGMENT_BITS-1:0] Segment0, MixSegment0, Segment0Combined; logic [LOW_SEGMENT_BITS-1:0] Segment0, MixSegment0, Segment0Combined;
// Unswizzle segments of the input page number // Unswizzle segments of the input page number
assign {Segment2, Segment1, Segment0} = PageNumber; // *** these muxes assume that only Sv48 and SV39 are implemented in rv64. for future SV57 and up,
assign {MixSegment2, MixSegment1, MixSegment0} = MixPageNumber; // there will have to be more muxes to select which value each segment gets.
// as a cool reminder: BITS is the width of the page number, virt or phys, coming into this module
// while high segment bits is the width of the highest segment of that page number.
// Note for future work: this module has to work with both VPNs and PPNs and due to their differing
// widths and the fact that the ppn has one longer segment at the top makes the muxes below very confusing.
// Potentially very annoying thing for future workers: the number of bits in a ppn is always 44 (for SV39 and48)
// but in SV57 and above, this might be a new longer length. In that case these selectors will most likely
// become even more complicated and confusing.
assign Segment3 = (SvMode == `SV48) ?
PageNumber[BITS-1:3*LOW_SEGMENT_BITS] : // take the top segment or not
{HIGH_SEGMENT_BITS{1'b0}}; // for virtual page numbers in SV39, both options should be zeros.
assign Segment2 = (SvMode == `SV48) ?
{{HIGH_SEGMENT_BITS{1'b0}}, PageNumber[3*LOW_SEGMENT_BITS-1:2*LOW_SEGMENT_BITS]} : // just take another low segment left padded with zeros.
PageNumber[BITS-1:2*LOW_SEGMENT_BITS]; // otherwise take the rest of the PageNumber
assign Segment1 = PageNumber[2*LOW_SEGMENT_BITS-1:LOW_SEGMENT_BITS];
assign Segment0 = PageNumber[LOW_SEGMENT_BITS-1:0];
assign MixSegment3 = (SvMode == `SV48) ?
MixPageNumber[BITS-1:3*LOW_SEGMENT_BITS] : // take the top segment or not
{HIGH_SEGMENT_BITS{1'b0}}; // for virtual page numbers in SV39, both options should be zeros.
assign MixSegment2 = (SvMode == `SV48) ?
{{HIGH_SEGMENT_BITS{1'b0}}, MixPageNumber[3*LOW_SEGMENT_BITS-1:2*LOW_SEGMENT_BITS]} : // just take another low segment left padded with zeros.
MixPageNumber[BITS-1:2*LOW_SEGMENT_BITS]; // otherwise take the rest of the PageNumber
assign MixSegment1 = MixPageNumber[2*LOW_SEGMENT_BITS-1:LOW_SEGMENT_BITS];
assign MixSegment0 = MixPageNumber[LOW_SEGMENT_BITS-1:0];
// Pass through the high segment // Pass through the high segment
assign Segment2Combined = Segment2; assign Segment3Combined = Segment3;
// Either pass through or zero out segments 1 and 0 based on the page type // Either pass through or zero out lower segments based on the page type
mux2 #(LOW_SEGMENT_BITS) segment1mux(Segment1, MixSegment1, PageType[1], Segment1Combined); assign Segment2Combined = (PageType[1] && PageType[0]) ? MixSegment2 : Segment2; // terapage (page == 11)
mux2 #(LOW_SEGMENT_BITS) segment0mux(Segment0, MixSegment0, PageType[0], Segment0Combined); assign Segment1Combined = (PageType[1]) ? MixSegment1 : Segment1; // gigapage and higher (page == 10 or 11)
assign Segment0Combined = (PageType[1] || PageType[0]) ? MixSegment0 : Segment0; // megapage and higher (page == 01 or 10 or 11)
// Reswizzle segments of the combined page number // Reswizzle segments of the combined page number
assign PageNumberCombined = {Segment2Combined, Segment1Combined, Segment0Combined}; assign PageNumberCombined = (SvMode == `SV48) ?
{Segment3Combined, Segment2Combined[LOW_SEGMENT_BITS-1:0], Segment1Combined, Segment0Combined} :
{Segment2Combined, Segment1Combined, Segment0Combined};
end end
endgenerate endgenerate
endmodule endmodule

View File

@ -2,7 +2,10 @@
// pagetablewalker.sv // pagetablewalker.sv
// //
// Written: tfleming@hmc.edu 2 March 2021 // Written: tfleming@hmc.edu 2 March 2021
// Modified: // Modified: kmacsaigoren@hmc.edu 1 June 2021
// implemented SV48 on top of SV39. This included, adding a level of the FSM for the extra page number segment
// adding support for terapage encoding, and for setting the TranslationPAdr using the new level,
// adding the internal SvMode signal
// //
// Purpose: Page Table Walker // Purpose: Page Table Walker
// Part of the Memory Management Unit (MMU) // Part of the Memory Management Unit (MMU)
@ -70,6 +73,7 @@ module pagetablewalker (
logic [`XLEN-1:0] SavedPTE, CurrentPTE; logic [`XLEN-1:0] SavedPTE, CurrentPTE;
logic [`PA_BITS-1:0] TranslationPAdr; logic [`PA_BITS-1:0] TranslationPAdr;
logic [`PPN_BITS-1:0] CurrentPPN; logic [`PPN_BITS-1:0] CurrentPPN;
logic [`SVMODE_BITS-1:0] SvMode;
logic MemStore; logic MemStore;
// PTE Control Bits // PTE Control Bits
@ -82,6 +86,8 @@ module pagetablewalker (
logic [`XLEN-1:0] PageTableEntry; logic [`XLEN-1:0] PageTableEntry;
logic [1:0] PageType; logic [1:0] PageType;
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0]; assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0];
assign MemStore = MemRWM[0]; assign MemStore = MemRWM[0];
@ -105,11 +111,12 @@ module pagetablewalker (
assign PageTypeF = PageType; assign PageTypeF = PageType;
assign PageTypeM = PageType; assign PageTypeM = PageType;
localparam IDLE = 3'h0; localparam LEVEL0 = 3'h0;
localparam LEVEL1 = 3'h1; localparam LEVEL1 = 3'h1;
localparam LEVEL0 = 3'h2; // space left for more levels
localparam LEAF = 3'h3; localparam LEAF = 3'h5;
localparam FAULT = 3'h4; localparam IDLE = 3'h6;
localparam FAULT = 3'h7;
logic [2:0] WalkerState, NextWalkerState; logic [2:0] WalkerState, NextWalkerState;
@ -208,18 +215,32 @@ module pagetablewalker (
assign MMUPAdr = TranslationPAdr[31:0]; assign MMUPAdr = TranslationPAdr[31:0];
end else begin end else begin
localparam LEVEL2 = 3'h5; localparam LEVEL2 = 3'h2;
localparam LEVEL3 = 3'h3;
logic [8:0] VPN2, VPN1, VPN0; logic [8:0] VPN3, VPN2, VPN1, VPN0;
logic GigapageMisaligned, BadGigapage; logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage;
flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState); flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
always_comb begin always_comb begin
case (WalkerState) case (WalkerState)
IDLE: if (MMUTranslate) NextWalkerState = LEVEL2; IDLE: if (MMUTranslate) NextWalkerState = LEVEL3;
else NextWalkerState = IDLE; else NextWalkerState = IDLE;
LEVEL3: if (SvMode != `SV48) NextWalkerState = LEVEL2;
// 3rd level used if SV48 is enabled.
else begin
if (~MMUReady) NextWalkerState = LEVEL3;
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
// access bit. The following commented line of code is
// supposed to perform that check. However, it is untested.
else if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF;
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL2;
else NextWalkerState = FAULT;
end
LEVEL2: if (~MMUReady) NextWalkerState = LEVEL2; LEVEL2: if (~MMUReady) NextWalkerState = LEVEL2;
// *** <FUTURE WORK> According to the architecture, we should // *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0 // fault upon finding a superpage that is misaligned or has 0
@ -242,24 +263,29 @@ module pagetablewalker (
else if (ValidPTE && LeafPTE && ~AccessAlert) else if (ValidPTE && LeafPTE && ~AccessAlert)
NextWalkerState = LEAF; NextWalkerState = LEAF;
else NextWalkerState = FAULT; else NextWalkerState = FAULT;
LEAF: if (MMUTranslate) NextWalkerState = LEVEL2; LEAF: if (MMUTranslate) NextWalkerState = LEVEL3;
else NextWalkerState = IDLE; else NextWalkerState = IDLE;
FAULT: if (MMUTranslate) NextWalkerState = LEVEL2; FAULT: if (MMUTranslate) NextWalkerState = LEVEL3;
else NextWalkerState = IDLE; else NextWalkerState = IDLE;
// Default case should never happen, but is included for linter. // Default case should never happen, but is included for linter.
default: NextWalkerState = IDLE; default: NextWalkerState = IDLE;
endcase endcase
end end
// A terapage is a level 3 leaf page. This page must have zero PPN[2],
// zero PPN[1], and zero PPN[0]
assign TerapageMisaligned = |(CurrentPPN[26:0]);
// A gigapage is a Level 2 leaf page. This page must have zero PPN[1] and // A gigapage is a Level 2 leaf page. This page must have zero PPN[1] and
// zero PPN[0] // zero PPN[0]
assign GigapageMisaligned = |(CurrentPPN[17:0]); assign GigapageMisaligned = |(CurrentPPN[17:0]);
// A megapage is a Level 1 leaf page. This page must have zero PPN[0]. // A megapage is a Level 1 leaf page. This page must have zero PPN[0].
assign MegapageMisaligned = |(CurrentPPN[8:0]); assign MegapageMisaligned = |(CurrentPPN[8:0]);
assign BadTerapage = TerapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
assign BadGigapage = GigapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme assign BadGigapage = GigapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
assign VPN3 = TranslationVAdr[47:39];
assign VPN2 = TranslationVAdr[38:30]; assign VPN2 = TranslationVAdr[38:30];
assign VPN1 = TranslationVAdr[29:21]; assign VPN1 = TranslationVAdr[29:21];
assign VPN0 = TranslationVAdr[20:12]; assign VPN0 = TranslationVAdr[20:12];
@ -282,8 +308,13 @@ module pagetablewalker (
IDLE: begin IDLE: begin
MMUStall = '0; MMUStall = '0;
end end
LEVEL3: begin
TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000};
// *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off,
// what should translationPAdr be when level3 is just off?
end
LEVEL2: begin LEVEL2: begin
TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000}; TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
end end
LEVEL1: begin LEVEL1: begin
TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
@ -295,8 +326,9 @@ module pagetablewalker (
// Keep physical address alive to prevent HADDR dropping to 0 // Keep physical address alive to prevent HADDR dropping to 0
TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
PageTableEntry = CurrentPTE; PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL2) ? 2'b11 : PageType = (WalkerState == LEVEL3) ? 2'b11 :
((WalkerState == LEVEL1) ? 2'b01 : 2'b00); ((WalkerState == LEVEL2) ? 2'b10 :
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
DTLBWriteM = DTLBMissM; DTLBWriteM = DTLBMissM;
ITLBWriteF = ~DTLBMissM; // Prefer data over instructions ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
end end

View File

@ -4,7 +4,11 @@
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021 // Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021
// Based on implementation from https://www.allaboutcircuits.com/ip-cores/communication-controller/priority-encoder/ // Based on implementation from https://www.allaboutcircuits.com/ip-cores/communication-controller/priority-encoder/
// *** Give proper LGPL attribution for above source // *** Give proper LGPL attribution for above source
// Modified: // Modified: Teo Ene 15 Apr 2021:
// Temporarily removed paramterized priority encoder for non-parameterized one
// To get synthesis working quickly
// Kmacsaigoren@hmc.edu 28 May 2021:
// Added working version of parameterized priority encoder.
// //
// Purpose: One-hot encoding to binary encoder // Purpose: One-hot encoding to binary encoder
// //
@ -27,51 +31,33 @@
`include "wally-config.vh" `include "wally-config.vh"
// Teo Ene 04/15:
// Temporarily removed paramterized priority encoder for non-parameterized one
// To get synthesis working quickly
module priority_encoder #(parameter BINARY_BITS = 3) ( module priority_encoder #(parameter BINARY_BITS = 3) (
input logic [7:0] one_hot, input logic [2**BINARY_BITS - 1:0] one_hot,
output logic [2:0] binary output logic [BINARY_BITS - 1:0] binary
); );
// localparam ONE_HOT_BITS = 2**BINARY_BITS; integer i;
/*
genvar i, j;
generate
for (i = 0; i < ONE_HOT_BITS; i++) begin
for (j = 0; j < BINARY_BITS; j++) begin
if (i[j]) begin
assign binary[j] = one_hot[i];
end
end
end
endgenerate
*/
/*
logic [BINARY_BITS-1:0] binary_comb;
always_comb begin always_comb begin
binary_comb = 0; binary = 0;
for (int i = 0; i < ONE_HOT_BITS; i++) for (i = 0; i < 2**BINARY_BITS; i++) begin
if (one_hot[i]) binary_comb = i; if (one_hot[i]) binary = i; // prioritizes the most significant bit
end
end end
// *** triple check synthesizability here
assign binary = binary_comb; // Ideally this mimics the following:
/*
always_comb begin
casex (one_hot)
1xx ... x: binary = BINARY_BITS - 1;
01x ... x: binary = BINARY_BITS - 2;
001 ... x: binary = BINARY_BITS - 3;
{...}
00 ... 1xx: binary = 2;
00 ... 01x: binary = 1;
00 ... 001: binary = 0;
end
*/ */
always_comb
case (one_hot)
8'h1: binary=3'h0;
8'h2: binary=3'h1;
8'h4: binary=3'h2;
8'h8: binary=3'h3;
8'h10: binary=3'h4;
8'h20: binary=3'h5;
8'h40: binary=3'h6;
8'h80: binary=3'h7;
default: binary=3'h0; //should never happen
endcase
endmodule endmodule

View File

@ -2,7 +2,9 @@
// tlb.sv // tlb.sv
// //
// Written: jtorrey@hmc.edu 16 February 2021 // Written: jtorrey@hmc.edu 16 February 2021
// Modified: // Modified: kmacsaigoren@hmc.edu 1 June 2021
// Implemented SV48 on top of SV39. This included adding the SvMode signal,
// and using it to decide the translate signal and get the virtual page number
// //
// Purpose: Translation lookaside buffer // Purpose: Translation lookaside buffer
// Cache of virtural-to-physical address translations // Cache of virtural-to-physical address translations
@ -25,7 +27,7 @@
/////////////////////////////////////////// ///////////////////////////////////////////
/** /**
* sv32 specs * SV32 specs
* ---------- * ----------
* Virtual address [31:0] (32 bits) * Virtual address [31:0] (32 bits)
* [________________________________] * [________________________________]
@ -85,14 +87,11 @@ module tlb #(parameter ENTRY_BITS = 3,
output TLBPageFault output TLBPageFault
); );
logic SvMode;
logic Translate; logic Translate;
logic TLBAccess, ReadAccess, WriteAccess; logic TLBAccess, ReadAccess, WriteAccess;
// *** If we want to support multiple virtual memory modes (ie sv39 AND sv48), // Store current virtual memory mode (SV32, SV39, SV48, ect...)
// we could have some muxes that control which parameters are current. logic [`SVMODE_BITS-1:0] SvMode;
// Although then some of the signals are not big enough. But that's a problem
// for much later.
// Index (currently random) to write the next TLB entry // Index (currently random) to write the next TLB entry
logic [ENTRY_BITS-1:0] WriteIndex; logic [ENTRY_BITS-1:0] WriteIndex;
@ -116,17 +115,24 @@ module tlb #(parameter ENTRY_BITS = 3,
// Whether the virtual address has a match in the CAM // Whether the virtual address has a match in the CAM
logic CAMHit; logic CAMHit;
// Grab the sv bit from SATP // Grab the sv mode from SATP
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
// The bus width is always the largest it could be for that XLEN. For example, vpn will be 36 bits wide in rv64
// this, even though it could be 27 bits (SV39) or 36 bits (SV48) wide. When the value of VPN is narrower,
// is shorter, the extra bits are used as padded zeros on the left of the full value.
generate generate
if (`XLEN == 32) begin if (`XLEN == 32) begin
assign SvMode = SATP_REGW[31]; // *** change to an enum somehow? assign VirtualPageNumber = VirtualAddress[`VPN_BITS+11:12];
end else begin end else begin
assign SvMode = SATP_REGW[63]; // currently just a boolean whether translation enabled assign VirtualPageNumber = (SvMode == `SV48) ?
VirtualAddress[`VPN_BITS+11:12] :
{{`VPN_SEGMENT_BITS{1'b0}}, VirtualAddress[3*`VPN_SEGMENT_BITS+11:12]};
end end
endgenerate endgenerate
// Whether translation should occur // Whether translation should occur
assign Translate = SvMode & (PrivilegeModeW != `M_MODE); assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE);
// Determine how the TLB is currently being used // Determine how the TLB is currently being used
// Note that we use ReadAccess for both loads and instruction fetches // Note that we use ReadAccess for both loads and instruction fetches
@ -134,7 +140,7 @@ module tlb #(parameter ENTRY_BITS = 3,
assign WriteAccess = TLBAccessType[0]; assign WriteAccess = TLBAccessType[0];
assign TLBAccess = ReadAccess || WriteAccess; assign TLBAccess = ReadAccess || WriteAccess;
assign VirtualPageNumber = VirtualAddress[`VPN_BITS+11:12];
assign PageOffset = VirtualAddress[11:0]; assign PageOffset = VirtualAddress[11:0];
// TLB entries are evicted according to the LRU algorithm // TLB entries are evicted according to the LRU algorithm
@ -188,9 +194,10 @@ module tlb #(parameter ENTRY_BITS = 3,
// page number. For 4 KB pages, the entire virtual page number is replaced. // page number. For 4 KB pages, the entire virtual page number is replaced.
// For superpages, some segments are considered offsets into a larger page. // For superpages, some segments are considered offsets into a larger page.
page_number_mixer #(`PPN_BITS, `PPN_HIGH_SEGMENT_BITS) page_number_mixer #(`PPN_BITS, `PPN_HIGH_SEGMENT_BITS)
physical_mixer(PhysicalPageNumber, physical_mixer(PhysicalPageNumber,
{{EXTRA_PHYSICAL_BITS{1'b0}}, VirtualPageNumber}, {{EXTRA_PHYSICAL_BITS{1'b0}}, VirtualPageNumber},
HitPageType, HitPageType,
SvMode,
PhysicalPageNumberMixed); PhysicalPageNumberMixed);
// Provide physical address only on TLBHits to cause catastrophic errors if // Provide physical address only on TLBHits to cause catastrophic errors if

View File

@ -2,7 +2,9 @@
// tlb_cam.sv // tlb_cam.sv
// //
// Written: jtorrey@hmc.edu 16 February 2021 // Written: jtorrey@hmc.edu 16 February 2021
// Modified: // Modified: kmacsaigoren@hmc.edu 1 June 2021
// Implemented SV48 on top of SV39. This included adding the SvMode signal input and wally constants
// Mostly this was to make the cam_lines work.
// //
// Purpose: Stores virtual page numbers with cached translations. // Purpose: Stores virtual page numbers with cached translations.
// Determines whether a given virtual page number is in the TLB. // Determines whether a given virtual page number is in the TLB.
@ -24,18 +26,21 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
`include "wally-constants.vh"
module tlb_cam #(parameter ENTRY_BITS = 3, module tlb_cam #(parameter ENTRY_BITS = 3,
parameter KEY_BITS = 20, parameter KEY_BITS = 20,
parameter HIGH_SEGMENT_BITS = 10) ( parameter HIGH_SEGMENT_BITS = 10) (
input clk, reset, input clk, reset,
input [KEY_BITS-1:0] VirtualPageNumber, input [KEY_BITS-1:0] VirtualPageNumber,
input [1:0] PageTypeWrite, input [1:0] PageTypeWrite,
input [ENTRY_BITS-1:0] WriteIndex, input [ENTRY_BITS-1:0] WriteIndex,
input TLBWrite, input [`SVMODE_BITS-1:0] SvMode,
input TLBFlush, input TLBWrite,
output [ENTRY_BITS-1:0] VPNIndex, input TLBFlush,
output [1:0] HitPageType, output [ENTRY_BITS-1:0] VPNIndex,
output CAMHit output [1:0] HitPageType,
output CAMHit
); );
localparam NENTRIES = 2**ENTRY_BITS; localparam NENTRIES = 2**ENTRY_BITS;

File diff suppressed because it is too large Load Diff

View File

@ -47,13 +47,13 @@ module muldiv (
logic [`XLEN-1:0] MulDivResultE, MulDivResultM; logic [`XLEN-1:0] MulDivResultE, MulDivResultM;
logic [`XLEN-1:0] PrelimResultE; logic [`XLEN-1:0] PrelimResultE;
logic [`XLEN-1:0] QuotE, RemE; logic [`XLEN-1:0] QuotE, RemE;
//logic [`XLEN-1:0] Q, R;
logic [`XLEN*2-1:0] ProdE; logic [`XLEN*2-1:0] ProdE;
logic enable_q; logic enable_q;
logic [2:0] Funct3E_Q; logic [2:0] Funct3E_Q;
logic div0error; logic div0error;
logic [`XLEN-1:0] N, D; logic [`XLEN-1:0] N, D;
logic [`XLEN-1:0] Num0, Den0;
logic gclk; logic gclk;
logic DivStartE; logic DivStartE;
@ -70,15 +70,25 @@ module muldiv (
end end
assign gclk = enable_q & clk; assign gclk = enable_q & clk;
// Handle sign extension for W-type instructions
if (`XLEN == 64) begin // RV64 has W-type instructions
assign Num0 = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE;
assign Den0 = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE;
end else begin // RV32 has no W-type instructions
assign Num0 = SrcAE;
assign Den0 = SrcBE;
end
// capture the Numerator/Denominator // capture the Numerator/Denominator
flopenrc #(`XLEN) reg_num (.d(SrcAE), .q(N), flopenrc #(`XLEN) reg_num (.d(Num0), .q(N),
.en(startDivideE), .clear(DivDoneE), .en(startDivideE), .clear(DivDoneE),
.reset(reset), .clk(~gclk)); .reset(reset), .clk(~gclk));
flopenrc #(`XLEN) reg_den (.d(SrcBE), .q(D), flopenrc #(`XLEN) reg_den (.d(Den0), .q(D),
.en(startDivideE), .clear(DivDoneE), .en(startDivideE), .clear(DivDoneE),
.reset(reset), .clk(~gclk)); .reset(reset), .clk(~gclk));
assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]);
div div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide);
// Added for debugging of start signal for divide // Added for debugging of start signal for divide
assign startDivideE = MulDivE&DivStartE&~DivBusyE; assign startDivideE = MulDivE&DivStartE&~DivBusyE;
@ -93,7 +103,6 @@ module muldiv (
// Select result // Select result
always_comb always_comb
// case (DivDoneE ? Funct3E_Q : Funct3E)
case (Funct3E) case (Funct3E)
3'b000: PrelimResultE = ProdE[`XLEN-1:0]; 3'b000: PrelimResultE = ProdE[`XLEN-1:0];
3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN]; 3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];

View File

@ -166,12 +166,12 @@ string tests32f[] = '{
"rv64m/I-MULW-01", "3000", "rv64m/I-MULW-01", "3000",
"rv64m/I-DIV-01", "3000", "rv64m/I-DIV-01", "3000",
"rv64m/I-DIVU-01", "3000", "rv64m/I-DIVU-01", "3000",
//"rv64m/I-DIVUW-01", "3000", "rv64m/I-DIVUW-01", "3000",
//"rv64m/I-DIVW-01", "3000", "rv64m/I-DIVW-01", "3000",
"rv64m/I-REM-01", "3000", "rv64m/I-REM-01", "3000",
"rv64m/I-REMU-01", "3000" "rv64m/I-REMU-01", "3000",
//"rv64m/I-REMUW-01", "3000", "rv64m/I-REMUW-01", "3000",
//"rv64m/I-REMW-01", "3000" "rv64m/I-REMW-01", "3000"
}; };
string tests64ic[] = '{ string tests64ic[] = '{
@ -320,11 +320,11 @@ string tests32f[] = '{
"rv32m/I-MUL-01", "2000", "rv32m/I-MUL-01", "2000",
"rv32m/I-MULH-01", "2000", "rv32m/I-MULH-01", "2000",
"rv32m/I-MULHSU-01", "2000", "rv32m/I-MULHSU-01", "2000",
"rv32m/I-MULHU-01", "2000" "rv32m/I-MULHU-01", "2000",
//"rv32m/I-DIV-01", "2000", "rv32m/I-DIV-01", "2000",
//"rv32m/I-DIVU-01", "2000", "rv32m/I-DIVU-01", "2000",
//"rv32m/I-REM-01", "2000", "rv32m/I-REM-01", "2000",
//"rv32m/I-REMU-01", "2000" "rv32m/I-REMU-01", "2000"
}; };
string tests32ic[] = '{ string tests32ic[] = '{
@ -439,8 +439,11 @@ string tests32f[] = '{
string testsBP64[] = '{ string testsBP64[] = '{
"rv64BP/simple", "10000", "rv64BP/simple", "10000",
"rv64BP/mmm", "1000000",
"rv64BP/linpack_bench", "1000000",
"rv64BP/sieve", "1000000",
"rv64BP/qsort", "1000000", "rv64BP/qsort", "1000000",
"rv64BP/sieve", "1000000" "rv64BP/dhrystone", "1000000"
}; };
string tests64p[] = '{ string tests64p[] = '{