diff --git a/testsBP/crt0/Makefile b/testsBP/crt0/Makefile index ab47384f..2af43a40 100644 --- a/testsBP/crt0/Makefile +++ b/testsBP/crt0/Makefile @@ -4,12 +4,12 @@ ROOT := .. LIBRARY_DIRS := LIBRARY_FILES := -MARCH :=-march=rv64ic -MABI :=-mabi=lp64 +MARCH :=-march=rv64imfdc +MABI :=-mabi=lp64d LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -AFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -W -CFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -mcmodel=medany +AFLAGS =$(MARCH) $(MABI) -W +CFLAGS =$(MARCH) $(MABI) -mcmodel=medany -O2 AS=riscv64-unknown-elf-as CC=riscv64-unknown-elf-gcc AR=riscv64-unknown-elf-ar @@ -19,7 +19,7 @@ all: libcrt0.a %.o: %.s ${AS} ${AFLAGS} -c $< -o $@ -libcrt0.a: start.o +libcrt0.a: start.o pcnt_driver.o pre_main.o ${AR} -r $@ $^ clean: diff --git a/testsBP/crt0/start.s b/testsBP/crt0/start.s index 19a240d8..731a61e3 100644 --- a/testsBP/crt0/start.s +++ b/testsBP/crt0/start.s @@ -43,11 +43,10 @@ _start: - # set the stack pointer to the top of memory - # 0x8000_0000 + 64K - 8 bytes - li sp, 0x007FFFF8 + # set the stack pointer to the top of memory - 8 bytes (pointer size) + li sp, 0x07FFFFF8 - jal ra, main + jal ra, pre_main jal ra, _halt .section .text diff --git a/testsBP/mibench_qsort/Makefile b/testsBP/mibench_qsort/Makefile index f4d36839..b1cf7b67 100644 --- a/testsBP/mibench_qsort/Makefile +++ b/testsBP/mibench_qsort/Makefile @@ -8,7 +8,7 @@ MARCH :=-march=rv64ic MABI :=-mabi=lp64 LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map -CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align +CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2 CC=riscv64-unknown-elf-gcc DA=riscv64-unknown-elf-objdump -d diff --git a/testsBP/sieve/Makefile b/testsBP/sieve/Makefile index 1d38d123..9c884f48 100644 --- a/testsBP/sieve/Makefile +++ b/testsBP/sieve/Makefile @@ -8,7 +8,7 @@ MARCH :=-march=rv64ic MABI :=-mabi=lp64 LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map -CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align +CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2 CC=riscv64-unknown-elf-gcc DA=riscv64-unknown-elf-objdump -d diff --git a/testsBP/sieve/sieve.c b/testsBP/sieve/sieve.c index e8207404..f7d36d95 100644 --- a/testsBP/sieve/sieve.c +++ b/testsBP/sieve/sieve.c @@ -66,21 +66,21 @@ int main () { ans = sieve (); //gettimeofday(&after , NULL); - if (ans != 1899) - printf ("Sieve result wrong, ans = %d, expected 1899", ans); + /* /\* /\\* if (ans != 1899) *\\/ *\/ */ + /* /\* /\\* printf ("Sieve result wrong, ans = %d, expected 1899", ans); *\\/ *\/ */ - //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); + /* /\* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); *\/ */ - printf("Round 2\n"); - //gettimeofday(&before , NULL); + /* /\* printf("Round 2\n"); *\/ */ + /* //gettimeofday(&before , NULL); */ - ans = sieve (); - //gettimeofday(&after , NULL); - if (ans != 1899) - printf ("Sieve result wrong, ans = %d, expected 1899", ans); + /* ans = sieve (); */ + /* //gettimeofday(&after , NULL); */ + /* if (ans != 1899) */ + /* printf ("Sieve result wrong, ans = %d, expected 1899", ans); */ - //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); + /* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); */ return 0; diff --git a/testsBP/simple/Makefile b/testsBP/simple/Makefile index 450aacaa..4447f284 100644 --- a/testsBP/simple/Makefile +++ b/testsBP/simple/Makefile @@ -8,7 +8,7 @@ MARCH :=-march=rv64ic MABI :=-mabi=lp64 LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map -CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align +CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2 CC=riscv64-unknown-elf-gcc DA=riscv64-unknown-elf-objdump -d diff --git a/testsBP/simple/header.h b/testsBP/simple/header.h index bfe014a4..aab8973f 100644 --- a/testsBP/simple/header.h +++ b/testsBP/simple/header.h @@ -5,4 +5,8 @@ int fail(); int simple_csrbr_test(); int lbu_test(); int icache_spill_test(); +void global_hist_0_space_test(); +void global_hist_1_space_test(); +void global_hist_2_space_test(); +void global_hist_3_space_test(); #endif diff --git a/testsBP/simple/main.c b/testsBP/simple/main.c index 0d14fcfb..564b474e 100644 --- a/testsBP/simple/main.c +++ b/testsBP/simple/main.c @@ -2,6 +2,10 @@ int main(){ //int res = icache_spill_test(); + global_hist_3_space_test(); + global_hist_2_space_test(); + global_hist_1_space_test(); + global_hist_0_space_test(); int res = 1; if (res < 0) { fail(); diff --git a/wally-pipelined/config/buildroot/wally-constants.vh b/wally-pipelined/config/buildroot/wally-constants.vh index 43d95863..cc6c27fc 100644 --- a/wally-pipelined/config/buildroot/wally-constants.vh +++ b/wally-pipelined/config/buildroot/wally-constants.vh @@ -2,11 +2,14 @@ // wally-constants.vh // // Written: tfleming@hmc.edu 4 March 2021 -// Modified: +// Modified: Kmacsaigoren@hmc.edu 31 May 2021 +// Added constants for checking sv mode and changed existing constants to accomodate +// both sv48 and sv39 // -// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. -// These macros should not be changed, except in the event of an -// update to the architecture or particularly special circumstances. +// Purpose: Specify constants nexessary for different memory virtualization modes. +// These are specific to sv49, defined in section 4.5 of the privileged spec. +// However, despite different constants for different modes, the hardware helps distinguish between +// each mode. // // A component of the Wally configurable RISC-V project. // @@ -25,9 +28,16 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -// Virtual Memory Constants (sv39) +// Virtual Memory Constants (sv48) `define VPN_SEGMENT_BITS 9 -`define VPN_BITS 27 +`define VPN_BITS 36 +`define PPN_HIGH_SEGMENT_BITS 17 `define PPN_BITS 44 -`define PPN_HIGH_SEGMENT_BITS 26 -`define PA_BITS 56 +`define PA_BITS 56 +`define SVMODE_BITS 4 +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec +`define NO_TRANSLATE 0 +`define SV32 1 +`define SV39 8 +`define SV48 9 diff --git a/wally-pipelined/config/busybear/wally-constants.vh b/wally-pipelined/config/busybear/wally-constants.vh index 43d95863..cc6c27fc 100644 --- a/wally-pipelined/config/busybear/wally-constants.vh +++ b/wally-pipelined/config/busybear/wally-constants.vh @@ -2,11 +2,14 @@ // wally-constants.vh // // Written: tfleming@hmc.edu 4 March 2021 -// Modified: +// Modified: Kmacsaigoren@hmc.edu 31 May 2021 +// Added constants for checking sv mode and changed existing constants to accomodate +// both sv48 and sv39 // -// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. -// These macros should not be changed, except in the event of an -// update to the architecture or particularly special circumstances. +// Purpose: Specify constants nexessary for different memory virtualization modes. +// These are specific to sv49, defined in section 4.5 of the privileged spec. +// However, despite different constants for different modes, the hardware helps distinguish between +// each mode. // // A component of the Wally configurable RISC-V project. // @@ -25,9 +28,16 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -// Virtual Memory Constants (sv39) +// Virtual Memory Constants (sv48) `define VPN_SEGMENT_BITS 9 -`define VPN_BITS 27 +`define VPN_BITS 36 +`define PPN_HIGH_SEGMENT_BITS 17 `define PPN_BITS 44 -`define PPN_HIGH_SEGMENT_BITS 26 -`define PA_BITS 56 +`define PA_BITS 56 +`define SVMODE_BITS 4 +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec +`define NO_TRANSLATE 0 +`define SV32 1 +`define SV39 8 +`define SV48 9 diff --git a/wally-pipelined/config/coremark/wally-constants.vh b/wally-pipelined/config/coremark/wally-constants.vh index 43d95863..cc6c27fc 100644 --- a/wally-pipelined/config/coremark/wally-constants.vh +++ b/wally-pipelined/config/coremark/wally-constants.vh @@ -2,11 +2,14 @@ // wally-constants.vh // // Written: tfleming@hmc.edu 4 March 2021 -// Modified: +// Modified: Kmacsaigoren@hmc.edu 31 May 2021 +// Added constants for checking sv mode and changed existing constants to accomodate +// both sv48 and sv39 // -// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. -// These macros should not be changed, except in the event of an -// update to the architecture or particularly special circumstances. +// Purpose: Specify constants nexessary for different memory virtualization modes. +// These are specific to sv49, defined in section 4.5 of the privileged spec. +// However, despite different constants for different modes, the hardware helps distinguish between +// each mode. // // A component of the Wally configurable RISC-V project. // @@ -25,9 +28,16 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -// Virtual Memory Constants (sv39) +// Virtual Memory Constants (sv48) `define VPN_SEGMENT_BITS 9 -`define VPN_BITS 27 +`define VPN_BITS 36 +`define PPN_HIGH_SEGMENT_BITS 17 `define PPN_BITS 44 -`define PPN_HIGH_SEGMENT_BITS 26 -`define PA_BITS 56 +`define PA_BITS 56 +`define SVMODE_BITS 4 +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec +`define NO_TRANSLATE 0 +`define SV32 1 +`define SV39 8 +`define SV48 9 diff --git a/wally-pipelined/config/coremark_bare/wally-constants.vh b/wally-pipelined/config/coremark_bare/wally-constants.vh index 43d95863..cc6c27fc 100644 --- a/wally-pipelined/config/coremark_bare/wally-constants.vh +++ b/wally-pipelined/config/coremark_bare/wally-constants.vh @@ -2,11 +2,14 @@ // wally-constants.vh // // Written: tfleming@hmc.edu 4 March 2021 -// Modified: +// Modified: Kmacsaigoren@hmc.edu 31 May 2021 +// Added constants for checking sv mode and changed existing constants to accomodate +// both sv48 and sv39 // -// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. -// These macros should not be changed, except in the event of an -// update to the architecture or particularly special circumstances. +// Purpose: Specify constants nexessary for different memory virtualization modes. +// These are specific to sv49, defined in section 4.5 of the privileged spec. +// However, despite different constants for different modes, the hardware helps distinguish between +// each mode. // // A component of the Wally configurable RISC-V project. // @@ -25,9 +28,16 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -// Virtual Memory Constants (sv39) +// Virtual Memory Constants (sv48) `define VPN_SEGMENT_BITS 9 -`define VPN_BITS 27 +`define VPN_BITS 36 +`define PPN_HIGH_SEGMENT_BITS 17 `define PPN_BITS 44 -`define PPN_HIGH_SEGMENT_BITS 26 -`define PA_BITS 56 +`define PA_BITS 56 +`define SVMODE_BITS 4 +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec +`define NO_TRANSLATE 0 +`define SV32 1 +`define SV39 8 +`define SV48 9 diff --git a/wally-pipelined/config/rv32ic/wally-constants.vh b/wally-pipelined/config/rv32ic/wally-constants.vh index ec4a48b4..f4c5ce9a 100644 --- a/wally-pipelined/config/rv32ic/wally-constants.vh +++ b/wally-pipelined/config/rv32ic/wally-constants.vh @@ -2,7 +2,10 @@ // wally-constants.vh // // Written: tfleming@hmc.edu 4 March 2021 -// Modified: +// Modified: kmacsaigoren@hmc.edu 31 May 2021 +// added svmode constants. These aren't strictly necessary since we're just checking one bit, +// but they're here to stay consistent and to make sure we dont wind up +// a "NO_TRANSLATE undefined" situation. // // Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. // These macros should not be changed, except in the event of an @@ -31,3 +34,10 @@ `define PPN_BITS 22 `define PPN_HIGH_SEGMENT_BITS 12 `define PA_BITS 34 +`define SVMODE_BITS 1 +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec +`define NO_TRANSLATE 0 +`define SV32 1 +`define SV39 8 // These two are only here to stop +`define SV48 9 // the verilator from yelling at me diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh index 17a8c284..f85e0c22 100644 --- a/wally-pipelined/config/rv64BP/wally-config.vh +++ b/wally-pipelined/config/rv64BP/wally-config.vh @@ -32,7 +32,7 @@ `define XLEN 64 //`define MISA (32'h00000105) -`define MISA (32'h00000104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0) +`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0) `define A_SUPPORTED ((`MISA >> 0) % 2 == 1) `define C_SUPPORTED ((`MISA >> 2) % 2 == 1) `define D_SUPPORTED ((`MISA >> 3) % 2 == 1) @@ -107,8 +107,9 @@ /* verilator lint_off ASSIGNDLY */ /* verilator lint_off PINCONNECTEMPTY */ -`define TWO_BIT_PRELOAD "../config/rv64icfd/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64icfd/BTBPredictor.txt" +`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt" +`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt" `define BPRED_ENABLED 1 -`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE +//`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE +`define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE `define TESTSBP 1 diff --git a/wally-pipelined/config/rv64BP/wally-constants.vh b/wally-pipelined/config/rv64BP/wally-constants.vh index 43d95863..cc6c27fc 100644 --- a/wally-pipelined/config/rv64BP/wally-constants.vh +++ b/wally-pipelined/config/rv64BP/wally-constants.vh @@ -2,11 +2,14 @@ // wally-constants.vh // // Written: tfleming@hmc.edu 4 March 2021 -// Modified: +// Modified: Kmacsaigoren@hmc.edu 31 May 2021 +// Added constants for checking sv mode and changed existing constants to accomodate +// both sv48 and sv39 // -// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. -// These macros should not be changed, except in the event of an -// update to the architecture or particularly special circumstances. +// Purpose: Specify constants nexessary for different memory virtualization modes. +// These are specific to sv49, defined in section 4.5 of the privileged spec. +// However, despite different constants for different modes, the hardware helps distinguish between +// each mode. // // A component of the Wally configurable RISC-V project. // @@ -25,9 +28,16 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -// Virtual Memory Constants (sv39) +// Virtual Memory Constants (sv48) `define VPN_SEGMENT_BITS 9 -`define VPN_BITS 27 +`define VPN_BITS 36 +`define PPN_HIGH_SEGMENT_BITS 17 `define PPN_BITS 44 -`define PPN_HIGH_SEGMENT_BITS 26 -`define PA_BITS 56 +`define PA_BITS 56 +`define SVMODE_BITS 4 +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec +`define NO_TRANSLATE 0 +`define SV32 1 +`define SV39 8 +`define SV48 9 diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index 259e41ae..12d254ba 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -31,7 +31,7 @@ `define XLEN 64 // MISA RISC-V configuration per specification -`define MISA (32'h00000104 | 0 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0) +`define MISA (32'h00000104 | 0 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0) `define A_SUPPORTED ((`MISA >> 0) % 2 == 1) `define C_SUPPORTED ((`MISA >> 2) % 2 == 1) `define D_SUPPORTED ((`MISA >> 3) % 2 == 1) diff --git a/wally-pipelined/config/rv64ic/wally-constants.vh b/wally-pipelined/config/rv64ic/wally-constants.vh index 43d95863..cc6c27fc 100644 --- a/wally-pipelined/config/rv64ic/wally-constants.vh +++ b/wally-pipelined/config/rv64ic/wally-constants.vh @@ -2,11 +2,14 @@ // wally-constants.vh // // Written: tfleming@hmc.edu 4 March 2021 -// Modified: +// Modified: Kmacsaigoren@hmc.edu 31 May 2021 +// Added constants for checking sv mode and changed existing constants to accomodate +// both sv48 and sv39 // -// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. -// These macros should not be changed, except in the event of an -// update to the architecture or particularly special circumstances. +// Purpose: Specify constants nexessary for different memory virtualization modes. +// These are specific to sv49, defined in section 4.5 of the privileged spec. +// However, despite different constants for different modes, the hardware helps distinguish between +// each mode. // // A component of the Wally configurable RISC-V project. // @@ -25,9 +28,16 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -// Virtual Memory Constants (sv39) +// Virtual Memory Constants (sv48) `define VPN_SEGMENT_BITS 9 -`define VPN_BITS 27 +`define VPN_BITS 36 +`define PPN_HIGH_SEGMENT_BITS 17 `define PPN_BITS 44 -`define PPN_HIGH_SEGMENT_BITS 26 -`define PA_BITS 56 +`define PA_BITS 56 +`define SVMODE_BITS 4 +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec +`define NO_TRANSLATE 0 +`define SV32 1 +`define SV39 8 +`define SV48 9 diff --git a/wally-pipelined/config/rv64icfd/wally-constants.vh b/wally-pipelined/config/rv64icfd/wally-constants.vh index 43d95863..cc6c27fc 100644 --- a/wally-pipelined/config/rv64icfd/wally-constants.vh +++ b/wally-pipelined/config/rv64icfd/wally-constants.vh @@ -2,11 +2,14 @@ // wally-constants.vh // // Written: tfleming@hmc.edu 4 March 2021 -// Modified: +// Modified: Kmacsaigoren@hmc.edu 31 May 2021 +// Added constants for checking sv mode and changed existing constants to accomodate +// both sv48 and sv39 // -// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. -// These macros should not be changed, except in the event of an -// update to the architecture or particularly special circumstances. +// Purpose: Specify constants nexessary for different memory virtualization modes. +// These are specific to sv49, defined in section 4.5 of the privileged spec. +// However, despite different constants for different modes, the hardware helps distinguish between +// each mode. // // A component of the Wally configurable RISC-V project. // @@ -25,9 +28,16 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -// Virtual Memory Constants (sv39) +// Virtual Memory Constants (sv48) `define VPN_SEGMENT_BITS 9 -`define VPN_BITS 27 +`define VPN_BITS 36 +`define PPN_HIGH_SEGMENT_BITS 17 `define PPN_BITS 44 -`define PPN_HIGH_SEGMENT_BITS 26 -`define PA_BITS 56 +`define PA_BITS 56 +`define SVMODE_BITS 4 +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec +`define NO_TRANSLATE 0 +`define SV32 1 +`define SV39 8 +`define SV48 9 diff --git a/wally-pipelined/config/rv64imc/wally-constants.vh b/wally-pipelined/config/rv64imc/wally-constants.vh index 43d95863..cc6c27fc 100644 --- a/wally-pipelined/config/rv64imc/wally-constants.vh +++ b/wally-pipelined/config/rv64imc/wally-constants.vh @@ -2,11 +2,14 @@ // wally-constants.vh // // Written: tfleming@hmc.edu 4 March 2021 -// Modified: +// Modified: Kmacsaigoren@hmc.edu 31 May 2021 +// Added constants for checking sv mode and changed existing constants to accomodate +// both sv48 and sv39 // -// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture. -// These macros should not be changed, except in the event of an -// update to the architecture or particularly special circumstances. +// Purpose: Specify constants nexessary for different memory virtualization modes. +// These are specific to sv49, defined in section 4.5 of the privileged spec. +// However, despite different constants for different modes, the hardware helps distinguish between +// each mode. // // A component of the Wally configurable RISC-V project. // @@ -25,9 +28,16 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -// Virtual Memory Constants (sv39) +// Virtual Memory Constants (sv48) `define VPN_SEGMENT_BITS 9 -`define VPN_BITS 27 +`define VPN_BITS 36 +`define PPN_HIGH_SEGMENT_BITS 17 `define PPN_BITS 44 -`define PPN_HIGH_SEGMENT_BITS 26 -`define PA_BITS 56 +`define PA_BITS 56 +`define SVMODE_BITS 4 +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec +`define NO_TRANSLATE 0 +`define SV32 1 +`define SV39 8 +`define SV48 9 diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index c876b313..e303f205 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -25,528 +25,455 @@ `include "wally-config.vh" module fpu ( - input logic [2:0] FRM_REGW, // Rounding mode from CSR - input logic reset, + input logic [2:0] FRM_REGW, // Rounding mode from CSR + input logic reset, //input logic clear, // *** not being used anywhere - input logic clk, - input logic [31:0] InstrD, - input logic [`XLEN-1:0] SrcAE, // Integer input being processed - input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg - input logic StallE, StallM, StallW, - input logic FlushE, FlushM, FlushW, - input logic [`AHBW-1:0] HRDATA, - input logic RegWriteD, - output logic [4:0] SetFflagsM, - output logic [31:0] FSROutW, - output logic [1:0] FMemRWM, - output logic FStallD, - output logic FWriteIntE, FWriteIntM, FWriteIntW, + input logic clk, + input logic [31:0] InstrD, + input logic [`XLEN-1:0] SrcAE, // Integer input being processed + input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg + input logic StallE, StallM, StallW, + input logic FlushE, FlushM, FlushW, + input logic [`AHBW-1:0] HRDATA, + input logic RegWriteD, + output logic [4:0] SetFflagsM, + output logic [31:0] FSROutW, + output logic [1:0] FMemRWM, + output logic FStallD, + output logic FWriteIntE, FWriteIntM, FWriteIntW, output logic [`XLEN-1:0] FWriteDataM, - output logic FDivSqrtDoneM, - output logic IllegalFPUInstrD, + output logic FDivSqrtDoneM, + output logic IllegalFPUInstrD, output logic [`XLEN-1:0] FPUResultW); - - - - - //control logic signal instantiation - logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division - logic FWriteIntD; // Write to integer register - logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction - logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory - logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal - logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal - logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal - logic FInput2UsedD; // Is input 2 used - logic FInput3UsedD; // Is input 3 used - logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - - // regfile signals - logic [4:0] RdE, RdM, RdW; // ***Can take from ieu - logic [`XLEN-1:0] FWDM; // Write data for FP register - logic [`XLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - logic [`XLEN-1:0] FRD1E, FRD2E, FRD3E; - logic [`XLEN-1:0] FInput1E, FInput1M, FInput1tmpE; - logic [`XLEN-1:0] FInput2E, FInput2M; - logic [`XLEN-1:0] FInput3E, FInput3M; - logic [`XLEN-1:0] FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions - - // div/sqrt signals - logic DivDenormM, DivDenormW; - logic DivOvEn, DivUnEn; - logic DivBusyM; - logic [63:0] FDivResultM, FDivResultW; - logic [4:0] FDivFlagsM, FDivFlagsW; - - // FMA signals - logic [12:0] aligncntE, aligncntM; - logic [105:0] rE, rM; - logic [105:0] sE, sM; - logic [163:0] tE, tM; - logic [8:0] normcntE, normcntM; - logic [12:0] aeE, aeM; - logic bsE, bsM; - logic killprodE, killprodM; - logic prodofE, prodofM; - logic xzeroE, xzeroM; - logic yzeroE, yzeroM; - logic zzeroE, zzeroM; - logic xdenormE, xdenormM; - logic ydenormE, ydenormM; - logic zdenormE, zdenormM; - logic xinfE, xinfM; - logic yinfE, yinfM; - logic zinfE, zinfM; - logic xnanE, xnanM; - logic ynanE, ynanM; - logic znanE, znanM; - logic nanE, nanM; - logic [8:0] sumshiftE, sumshiftM; - logic sumshiftzeroE, sumshiftzeroM; - logic prodinfE, prodinfM; - logic [63:0] FmaResultM, FmaResultW; - logic [4:0] FmaFlagsM, FmaFlagsW; - - // add/cvt signals - logic [63:0] AddSumE, AddSumTcE; - logic [3:0] AddSelInvE; - logic [10:0] AddExpPostSumE; - logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE; - logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE; - logic AddConvertE; - logic [63:0] AddFloat1E, AddFloat2E; - logic [11:0] AddExp1DenormE, AddExp2DenormE; - logic [10:0] AddExponentE; - logic [2:0] AddRmE; - logic [3:0] AddOpTypeE; - logic AddPE, AddOvEnE, AddUnEnE; - logic AddDenormM; - logic [63:0] AddSumM, AddSumTcM; - logic [3:0] AddSelInvM; - logic [10:0] AddExpPostSumM; - logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM; - logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM; - logic AddConvertM, AddSignM; - logic [63:0] AddFloat1M, AddFloat2M; - logic [11:0] AddExp1DenormM, AddExp2DenormM; - logic [10:0] AddExponentM; - logic [63:0] AddOp1M, AddOp2M; - logic [2:0] AddRmM; - logic [3:0] AddOpTypeM; - logic AddPM, AddOvEnM, AddUnEnM; - logic [63:0] FAddResultM, FAddResultW; - logic [4:0] FAddFlagsM, FAddFlagsW; - - //cmp signals - logic [7:0] WE, WM; - logic [7:0] XE, XM; - logic ANaNE, ANaNM; - logic BNaNE, BNaNM; - logic AzeroE, AzeroM; - logic BzeroE, BzeroM; - logic CmpInvalidM, CmpInvalidW; - logic [1:0] CmpFCCM, CmpFCCW; - logic [63:0] FCmpResultM, FCmpResultW; - - // fsgn signals - logic [63:0] SgnResultE, SgnResultM, SgnResultW; - logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; - - //instantiation of W stage regfile signals - logic [`XLEN-1:0] SrcAW; - - // classify signals - logic [63:0] ClassResultE, ClassResultM, ClassResultW; - - // other - logic [63:0] FPUResult64W, FPUResult64E; // 64-bit FPU result - logic [4:0] FPUFlagsW; - - // pipeline control logic - logic PipeEnableDE; - logic PipeEnableEM; - logic PipeEnableMW; - logic PipeClearDE; - logic PipeClearEM; - logic PipeClearMW; - - //temporarily assign pipe clear and enable signals - //to never flush & always be running - localparam PipeClear = 1'b0; - localparam PipeEnable = 1'b1; - always_comb begin - - PipeEnableDE = ~StallE; - PipeEnableEM = ~StallM; - PipeEnableMW = ~StallW; - PipeClearDE = FlushE; - PipeClearEM = FlushM; - PipeClearMW = FlushW; - - end - - - - - - - - - - - - - - //DECODE STAGE - - //Hazard unit for FPU - fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*); - - //top-level controller for FPU - fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); - - - //regfile instantiation + // control logic signal instantiation + logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division + logic FWriteIntD; // Write to integer register + logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction + logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory + logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal + logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal + logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal + logic FInput2UsedD; // Is input 2 used + logic FInput3UsedD; // Is input 3 used + logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + + // regfile signals + logic [4:0] RdE, RdM, RdW; // ***Can take from ieu + logic [`XLEN-1:0] FWDM; // Write data for FP register + logic [`XLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register + logic [`XLEN-1:0] FRD1E, FRD2E, FRD3E; + logic [`XLEN-1:0] FInput1E, FInput1M, FInput1tmpE; + logic [`XLEN-1:0] FInput2E, FInput2M; + logic [`XLEN-1:0] FInput3E, FInput3M; + logic [`XLEN-1:0] FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions + + // div/sqrt signals + logic DivDenormM, DivDenormW; + logic DivOvEn, DivUnEn; + logic DivBusyM; + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivFlagsM, FDivFlagsW; + + // FMA signals + logic [12:0] aligncntE, aligncntM; + logic [105:0] rE, rM; + logic [105:0] sE, sM; + logic [163:0] tE, tM; + logic [8:0] normcntE, normcntM; + logic [12:0] aeE, aeM; + logic bsE, bsM; + logic killprodE, killprodM; + logic prodofE, prodofM; + logic xzeroE, xzeroM; + logic yzeroE, yzeroM; + logic zzeroE, zzeroM; + logic xdenormE, xdenormM; + logic ydenormE, ydenormM; + logic zdenormE, zdenormM; + logic xinfE, xinfM; + logic yinfE, yinfM; + logic zinfE, zinfM; + logic xnanE, xnanM; + logic ynanE, ynanM; + logic znanE, znanM; + logic nanE, nanM; + logic [8:0] sumshiftE, sumshiftM; + logic sumshiftzeroE, sumshiftzeroM; + logic prodinfE, prodinfM; + logic [63:0] FmaResultM, FmaResultW; + logic [4:0] FmaFlagsM, FmaFlagsW; + + // add/cvt signals + logic [63:0] AddSumE, AddSumTcE; + logic [3:0] AddSelInvE; + logic [10:0] AddExpPostSumE; + logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE; + logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE; + logic AddConvertE; + logic [63:0] AddFloat1E, AddFloat2E; + logic [11:0] AddExp1DenormE, AddExp2DenormE; + logic [10:0] AddExponentE; + logic [2:0] AddRmE; + logic [3:0] AddOpTypeE; + logic AddPE, AddOvEnE, AddUnEnE; + logic AddDenormM; + logic [63:0] AddSumM, AddSumTcM; + logic [3:0] AddSelInvM; + logic [10:0] AddExpPostSumM; + logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM; + logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM; + logic AddConvertM, AddSignM; + logic [63:0] AddFloat1M, AddFloat2M; + logic [11:0] AddExp1DenormM, AddExp2DenormM; + logic [10:0] AddExponentM; + logic [63:0] AddOp1M, AddOp2M; + logic [2:0] AddRmM; + logic [3:0] AddOpTypeM; + logic AddPM, AddOvEnM, AddUnEnM; + logic [63:0] FAddResultM, FAddResultW; + logic [4:0] FAddFlagsM, FAddFlagsW; + + // cmp signals + logic [7:0] WE, WM; + logic [7:0] XE, XM; + logic ANaNE, ANaNM; + logic BNaNE, BNaNM; + logic AzeroE, AzeroM; + logic BzeroE, BzeroM; + logic CmpInvalidM, CmpInvalidW; + logic [1:0] CmpFCCM, CmpFCCW; + logic [63:0] FCmpResultM, FCmpResultW; + + // fsgn signals + logic [63:0] SgnResultE, SgnResultM, SgnResultW; + logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; + + // instantiation of W stage regfile signals + logic [`XLEN-1:0] SrcAW; + + // classify signals + logic [63:0] ClassResultE, ClassResultM, ClassResultW; + + // 64-bit FPU result + logic [63:0] FPUResult64W, FPUResult64E; + logic [4:0] FPUFlagsW; + + // pipeline control logic + logic PipeEnableDE; + logic PipeEnableEM; + logic PipeEnableMW; + logic PipeClearDE; + logic PipeClearEM; + logic PipeClearMW; + + // temporarily assign pipe clear and enable signals + // to never flush & always be running + localparam PipeClear = 1'b0; + localparam PipeEnable = 1'b1; + always_comb begin + PipeEnableDE = ~StallE; + PipeEnableEM = ~StallM; + PipeEnableMW = ~StallW; + PipeClearDE = FlushE; + PipeClearEM = FlushM; + PipeClearMW = FlushW; + end + + //DECODE STAGE + + // Hazard unit for FPU + fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*); + + // top-level controller for FPU + fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); + + // regfile instantiation FPregfile fpregfile (clk, reset, FWriteEnW, InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, FPUResult64W, FRD1D, FRD2D, FRD3D); - - - - - - - - - - //***************** - //fpregfile D/E pipe registers - //***************** - flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E); - - //***************** - //other D/E pipe registers - //***************** - flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE); - flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE); - flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE); - flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE); - flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE); - flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE); - flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE); - flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E); - flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E); - flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E); - flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E); - flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE); - flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E); - flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE); - - - - - - - - - - - - - - //EXECUTION STAGE - - - - // input muxs for forwarding - mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE); - mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E); - mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E); - mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E); - - fma1 fma1 (.*); - - //first and only instance of floating-point divider - fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .*); - - //first of two-stage instance of floating-point add/cvt unit - fpuaddcvt1 fpadd1 (.*); - - //first of two-stage instance of floating-point comparator - fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]); - - //first and only instance of floating-point sign converter - fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); - - //first and only instance of floating-point classify unit - fpuclassify fpuclass (.*); - - - - - - - - - - - - - - - - - //***************** - //fpregfile D/E pipe registers - //***************** - flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M); - flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M); - flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M); - - //***************** - //fma E/M pipe registers - //***************** - flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM); - flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM); - flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM); - flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM); - flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM); - flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM); - flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM); - flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM); - flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM); - flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); - flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM); - flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM); - flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM); - flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM); - flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM); - flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); - flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM); - flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM); - flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); - flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM); - flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM); - flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM); - flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM); - flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM); - flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM); - - //***************** - //fpadd E/M pipe registers - //***************** - flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM); - flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM); - flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM); - flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM); - flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM); - flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM); - flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM); - flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM); - flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM); - flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM); - flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM); - flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM); - flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM); - flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM); - flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); - flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); - flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); - flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); - flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM); - flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM); - - //***************** - //fpcmp E/M pipe registers - //***************** - flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM); - flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM); - flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM); - flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM); - flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM); - flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM); - - //put this in for the event we want to delay fsgn - will otherwise bypass - //***************** - //fpsgn E/M pipe registers - //***************** - flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM); - flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM); - - //***************** - //other E/M pipe registers - //***************** - flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM); - flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM); - flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM); - flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM); - flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM); - flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM); - flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM); - flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM); - - //***************** - //fpuclassify E/M pipe registers - //***************** - flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM); - - - - - - - - - //BEGIN MEMORY STAGE - - assign FWriteDataM = FInput1M; - - mux2 #(64) FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); - - fma2 fma2(.*); - - //second instance of two-stage floating-point add/cvt unit - fpuaddcvt2 fpadd2 (.*); - - //second instance of two-stage floating-point comparator - fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*); - - - - - - - - - - - - //***************** - //fma M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW); - flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW); - - //***************** - //fpdiv M/W pipe registers - //***************** - flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW); - flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW); - - //***************** - //fpadd M/W pipe registers - //***************** - flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW); - flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW); - - //***************** - //fpcmp M/W pipe registers - //***************** - flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW); - flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW); - flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW); - - //***************** - //fpsgn M/W pipe registers - //***************** - flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW); - flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW); - - //***************** - //other M/W pipe registers - //***************** - flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW); - flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW); - flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW); - flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW); - flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW); - flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW); - flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW); - - - //***************** - //fpuclassify M/W pipe registers - //***************** - flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW); - - - - - - - + + //***************** + // fpregfile D/E pipe registers + //***************** + flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E); + + //***************** + // other D/E pipe registers + //***************** + flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE); + flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE); + flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE); + flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE); + flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE); + flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE); + flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE); + flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E); + flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E); + flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E); + flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E); + flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE); + flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E); + flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE); + + //EXECUTION STAGE + + // input muxs for forwarding + mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE); + mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E); + mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E); + mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E); + + fma1 fma1 (.*); + + // first and only instance of floating-point divider + logic fpdivClk; + + clockgater fpdivclkg(.E(FDivStartE), + .SE(DivBusyM), + .CLK(clk), + .ECLK(fpdivClk)); + + fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk)); + + // first of two-stage instance of floating-point add/cvt unit + fpuaddcvt1 fpadd1 (.*); + + // first of two-stage instance of floating-point comparator + fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]); + + // first and only instance of floating-point sign converter + fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); + + // first and only instance of floating-point classify unit + fpuclassify fpuclass (.*); + + //***************** + //fpregfile D/E pipe registers + //***************** + flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M); + flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M); + flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M); + + //***************** + // fma E/M pipe registers + //***************** + flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM); + flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM); + flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM); + flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM); + flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM); + flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM); + flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM); + flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM); + flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM); + flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); + flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM); + flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM); + flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM); + flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM); + flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM); + flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); + flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM); + flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM); + flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); + flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM); + flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM); + flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM); + flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM); + flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM); + flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM); + + //***************** + // fpadd E/M pipe registers + //***************** + flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM); + flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM); + flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM); + flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM); + flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM); + flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM); + flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM); + flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM); + flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM); + flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM); + flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM); + flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM); + flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM); + flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM); + flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); + flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); + flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); + flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); + flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM); + flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM); + + //***************** + // fpcmp E/M pipe registers + //***************** + flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM); + flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM); + flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM); + flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM); + flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM); + flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM); + + // put this in for the event we want to delay fsgn - will otherwise bypass + //***************** + // fpsgn E/M pipe registers + //***************** + flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM); + flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM); + + //***************** + // other E/M pipe registers + //***************** + flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM); + flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM); + flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM); + flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM); + flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM); + flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM); + flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM); + flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM); + + //***************** + // fpuclassify E/M pipe registers + //***************** + flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM); + + //BEGIN MEMORY STAGE + + assign FWriteDataM = FInput1M; + + mux2 #(64) FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); + + fma2 fma2(.*); + + // second instance of two-stage floating-point add/cvt unit + fpuaddcvt2 fpadd2 (.*); + + // second instance of two-stage floating-point comparator + fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), + .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*); + + //***************** + // fma M/W pipe registers + //***************** + flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW); + flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW); + + //***************** + // fpdiv M/W pipe registers + //***************** + flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW); + flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW); + flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW); + + //***************** + // fpadd M/W pipe registers + //***************** + flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW); + flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW); + + //***************** + // fpcmp M/W pipe registers + //***************** + flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW); + flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW); + flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW); + + //***************** + // fpsgn M/W pipe registers + //***************** + flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW); + flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW); + + //***************** + // other M/W pipe registers + //***************** + flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW); + flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW); + flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW); + flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW); + flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW); + flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW); + flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW); + + //***************** + // fpuclassify M/W pipe registers + //***************** + flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW); //######################################### - //BEGIN WRITEBACK STAGE + // BEGIN WRITEBACK STAGE //######################################### - - always_comb begin - case (FResultSelW) - // div/sqrt - 3'b000 : FPUFlagsW = FDivFlagsW; - // cmp - 3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0}; - //fma/mult - 3'b010 : FPUFlagsW = FmaFlagsW; - // sgn inj - 3'b011 : FPUFlagsW = SgnFlagsW; - // add/sub/cnvt - 3'b100 : FPUFlagsW = FAddFlagsW; - // classify - 3'b101 : FPUFlagsW = 5'b0; - // output SrcAW - 3'b110 : FPUFlagsW = 5'b0; - // output FRD1 - 3'b111 : FPUFlagsW = 5'b0; - default : FPUFlagsW = 5'bxxxxx; - endcase - end - - - always_comb begin - case (FResultSelW) - // div/sqrt - 3'b000 : FPUResult64W = FDivResultW; - // cmp - 3'b001 : FPUResult64W = FCmpResultW; - //fma/mult - 3'b010 : FPUResult64W = FmaResultW; - // sgn inj - 3'b011 : FPUResult64W = SgnResultW; - // add/sub/cnvt - 3'b100 : FPUResult64W = FAddResultW; - // classify - 3'b101 : FPUResult64W = ClassResultW; - // output SrcAW - 3'b110 : FPUResult64W = SrcAW; - // Load/Store/Move to FP-register - 3'b111 : FPUResult64W = FLoadStoreResultW; - default : FPUResult64W = {64{1'bx}}; - endcase - end - //interface between XLEN size datapath and double-precision sized - //floating-point results - // - //define offsets for LSB zero extension or truncation - always_comb begin - - //zero extension + + always_comb begin + case (FResultSelW) + // div/sqrt + 3'b000 : FPUFlagsW = FDivFlagsW; + // cmp + 3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0}; + //fma/mult + 3'b010 : FPUFlagsW = FmaFlagsW; + // sgn inj + 3'b011 : FPUFlagsW = SgnFlagsW; + // add/sub/cnvt + 3'b100 : FPUFlagsW = FAddFlagsW; + // classify + 3'b101 : FPUFlagsW = 5'b0; + // output SrcAW + 3'b110 : FPUFlagsW = 5'b0; + // output FRD1 + 3'b111 : FPUFlagsW = 5'b0; + default : FPUFlagsW = 5'bxxxxx; + endcase + end + + always_comb begin + case (FResultSelW) + // div/sqrt + 3'b000 : FPUResult64W = FDivResultW; + // cmp + 3'b001 : FPUResult64W = FCmpResultW; + //fma/mult + 3'b010 : FPUResult64W = FmaResultW; + // sgn inj + 3'b011 : FPUResult64W = SgnResultW; + // add/sub/cnvt + 3'b100 : FPUResult64W = FAddResultW; + // classify + 3'b101 : FPUResult64W = ClassResultW; + // output SrcAW + 3'b110 : FPUResult64W = SrcAW; + // Load/Store/Move to FP-register + 3'b111 : FPUResult64W = FLoadStoreResultW; + default : FPUResult64W = {64{1'bx}}; + endcase + end // always_comb + + // interface between XLEN size datapath and double-precision sized + // floating-point results + // + // define offsets for LSB zero extension or truncation + always_comb begin + // zero extension FPUResultW = FPUResult64W[63:64-`XLEN]; - SetFflagsM = FPUFlagsW; + SetFflagsM = FPUFlagsW; + end + +endmodule // fpu - end -endmodule diff --git a/wally-pipelined/src/generic/clockgater.sv b/wally-pipelined/src/generic/clockgater.sv new file mode 100644 index 00000000..c06a1cbd --- /dev/null +++ b/wally-pipelined/src/generic/clockgater.sv @@ -0,0 +1,46 @@ +/////////////////////////////////////////// +// clockgater.sv +// +// Written: Ross Thompson 9 January 2021 +// Modified: +// +// Purpose: Clock gater model. Must use standard cell for synthesis. +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module clockgater + (input logic E, + input logic SE, + input logic CLK, + output logic ECLK); + + // VERY IMPORTANT. + // This part functionally models a clock gater, but does not necessarily meet the timing constrains a real standard cell would. + // Do not use this in synthesis! + + logic enable_q; + + + always @(~CLK) begin + enable_q <= E | SE; + end + assign ECLK = enable_q & CLK; + +endmodule diff --git a/wally-pipelined/src/generic/lzd.sv b/wally-pipelined/src/generic/lzd.sv new file mode 100755 index 00000000..98642c15 --- /dev/null +++ b/wally-pipelined/src/generic/lzd.sv @@ -0,0 +1,195 @@ +/////////////////////////////////////////// +// lzd.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" +/* verilator lint_off DECLFILENAME */ + +// Original idea came from V. G. Oklobdzija, "An algorithmic and novel +// design of a leading zero detector circuit: comparison with logic +// synthesis," in IEEE Transactions on Very Large Scale Integration +// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi: +// 10.1109/92.273153. + +// Modified to be more hierarchical + +module lzd2 (P, V, B); + + input logic [1:0] B; + + output logic P; + output logic V; + + assign V = B[0] | B[1]; + assign P = B[0] & ~B[1]; + +endmodule // lz2 + +module lzd_hier #(parameter WIDTH=8) + (input logic [WIDTH-1:0] B, + output logic [$clog2(WIDTH)-1:0] ZP, + output logic ZV); + + if (WIDTH == 128) + lzd128 lz127 (ZP, ZV, B); + else if (WIDTH == 64) + lzd64 lz64 (ZP, ZV, B); + else if (WIDTH == 32) + lzd32 lz32 (ZP, ZV, B); + else if (WIDTH == 16) + lzd16 lz16 (ZP, ZV, B); + else if (WIDTH == 8) + lzd8 lz8 (ZP, ZV, B); + else if (WIDTH == 4) + lzd4 lz4 (ZP, ZV, B); + +endmodule // lzd_hier + +module lzd4 (ZP, ZV, B); + + input logic [3:0] B; + + logic ZPa; + logic ZPb; + logic ZVa; + logic ZVb; + + output logic [1:0] ZP; + output logic ZV; + + lz2 l1(ZPa, ZVa, B[1:0]); + lz2 l2(ZPb, ZVb, B[3:2]); + + assign ZP[0:0] = ZVb ? ZPb : ZPa; + assign ZP[1] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd4 + +module lzd8 (ZP, ZV, B); + + input logic [7:0] B; + + logic [1:0] ZPa; + logic [1:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [2:0] ZP; + output logic ZV; + + lz4 l1(ZPa, ZVa, B[3:0]); + lz4 l2(ZPb, ZVb, B[7:4]); + + assign ZP[1:0] = ZVb ? ZPb : ZPa; + assign ZP[2] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd8 + +module lzd16 (ZP, ZV, B); + + input logic [15:0] B; + + logic [2:0] ZPa; + logic [2:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [3:0] ZP; + output logic ZV; + + lz8 l1(ZPa, ZVa, B[7:0]); + lz8 l2(ZPb, ZVb, B[15:8]); + + assign ZP[2:0] = ZVb ? ZPb : ZPa; + assign ZP[3] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd16 + +module lzd32 (ZP, ZV, B); + + input logic [31:0] B; + + logic [3:0] ZPa; + logic [3:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [4:0] ZP; + output logic ZV; + + lz16 l1(ZPa, ZVa, B[15:0]); + lz16 l2(ZPb, ZVb, B[31:16]); + + assign ZP[3:0] = ZVb ? ZPb : ZPa; + assign ZP[4] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd32 + +module lzd64 (ZP, ZV, B); + + input logic [63:0] B; + + logic [4:0] ZPa; + logic [4:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [5:0] ZP; + output logic ZV; + + lz32 l1(ZPa, ZVa, B[31:0]); + lz32 l2(ZPb, ZVb, B[63:32]); + + assign ZP[4:0] = ZVb ? ZPb : ZPa; + assign ZP[5] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd64 + +module lzd128 (ZP, ZV, B); + + input logic [127:0] B; + + logic [5:0] ZPa; + logic [5:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [6:0] ZP; + output logic ZV; + + lz64 l1(ZPa, ZVa, B[64:0]); + lz64 l2(ZPb, ZVb, B[127:63]); + + assign ZP[5:0] = ZVb ? ZPb : ZPa; + assign ZP[6] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd128 + +/* verilator lint_on DECLFILENAME */ diff --git a/wally-pipelined/src/generic/lzd.sv~ b/wally-pipelined/src/generic/lzd.sv~ new file mode 100755 index 00000000..bfffe5e5 --- /dev/null +++ b/wally-pipelined/src/generic/lzd.sv~ @@ -0,0 +1,195 @@ +/////////////////////////////////////////// +// lzd.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" +/* verilator lint_off DECLFILENAME */ + +// Original idea came from V. G. Oklobdzija, "An algorithmic and novel +// design of a leading zero detector circuit: comparison with logic +// synthesis," in IEEE Transactions on Very Large Scale Integration +// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi: +// 10.1109/92.273153. + +// Modified to be more hierarchical + +module lz2 (P, V, B); + + input logic [1:0] B; + + output logic P; + output logic V; + + assign V = B[0] | B[1]; + assign P = B[0] & ~B[1]; + +endmodule // lz2 + +module lzd_hier #(parameter WIDTH=8) + (input logic [WIDTH-1:0] B, + output logic [$clog2(WIDTH)-1:0] ZP, + output logic ZV); + + if (WIDTH == 128) + lz128 lzd127 (ZP, ZV, B); + else if (WIDTH == 64) + lz64 lzd64 (ZP, ZV, B); + else if (WIDTH == 32) + lz32 lzd32 (ZP, ZV, B); + else if (WIDTH == 16) + lz16 lzd16 (ZP, ZV, B); + else if (WIDTH == 8) + lz8 lzd8 (ZP, ZV, B); + else if (WIDTH == 4) + lz4 lzd4 (ZP, ZV, B); + +endmodule // lzd_hier + +module lz4 (ZP, ZV, B); + + input logic [3:0] B; + + logic ZPa; + logic ZPb; + logic ZVa; + logic ZVb; + + output logic [1:0] ZP; + output logic ZV; + + lz2 l1(ZPa, ZVa, B[1:0]); + lz2 l2(ZPb, ZVb, B[3:2]); + + assign ZP[0:0] = ZVb ? ZPb : ZPa; + assign ZP[1] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule + +module lz8 (ZP, ZV, B); + + input logic [7:0] B; + + logic [1:0] ZPa; + logic [1:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [2:0] ZP; + output logic ZV; + + lz4 l1(ZPa, ZVa, B[3:0]); + lz4 l2(ZPb, ZVb, B[7:4]); + + assign ZP[1:0] = ZVb ? ZPb : ZPa; + assign ZP[2] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule + +module lz16 (ZP, ZV, B); + + input logic [15:0] B; + + logic [2:0] ZPa; + logic [2:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [3:0] ZP; + output logic ZV; + + lz8 l1(ZPa, ZVa, B[7:0]); + lz8 l2(ZPb, ZVb, B[15:8]); + + assign ZP[2:0] = ZVb ? ZPb : ZPa; + assign ZP[3] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lz16 + +module lz32 (ZP, ZV, B); + + input logic [31:0] B; + + logic [3:0] ZPa; + logic [3:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [4:0] ZP; + output logic ZV; + + lz16 l1(ZPa, ZVa, B[15:0]); + lz16 l2(ZPb, ZVb, B[31:16]); + + assign ZP[3:0] = ZVb ? ZPb : ZPa; + assign ZP[4] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lz32 + +module lz64 (ZP, ZV, B); + + input logic [63:0] B; + + logic [4:0] ZPa; + logic [4:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [5:0] ZP; + output logic ZV; + + lz32 l1(ZPa, ZVa, B[31:0]); + lz32 l2(ZPb, ZVb, B[63:32]); + + assign ZP[4:0] = ZVb ? ZPb : ZPa; + assign ZP[5] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lz64 + +module lz128 (ZP, ZV, B); + + input logic [127:0] B; + + logic [5:0] ZPa; + logic [5:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [6:0] ZP; + output logic ZV; + + lz64 l1(ZPa, ZVa, B[64:0]); + lz64 l2(ZPb, ZVb, B[127:63]); + + assign ZP[5:0] = ZVb ? ZPb : ZPa; + assign ZP[6] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lz128 + +/* verilator lint_on DECLFILENAME */ diff --git a/wally-pipelined/src/generic/shift.sv b/wally-pipelined/src/generic/shift.sv new file mode 100755 index 00000000..88152588 --- /dev/null +++ b/wally-pipelined/src/generic/shift.sv @@ -0,0 +1,76 @@ +/////////////////////////////////////////// +// shifters.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" +/* verilator lint_off DECLFILENAME */ +/* verilator lint_off UNOPTFLAT */ + +module shift_right #(parameter WIDTH=8) + (input logic [WIDTH-1:0] A, + input logic [$clog2(WIDTH)-1:0] Shift, + output logic [WIDTH-1:0] Z); + + logic [WIDTH-1:0] stage [$clog2(WIDTH):0]; + logic sign; + genvar i; + + assign stage[0] = A; + generate + for (i=0;i<$clog2(WIDTH);i=i+1) + begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], + {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, + Shift[$clog2(WIDTH)-i-1], + stage[i+1]); + end + endgenerate + assign Z = stage[$clog2(WIDTH)]; + +endmodule // shift_right + +module shift_left #(parameter WIDTH=8) + (input logic [WIDTH-1:0] A, + input logic [$clog2(WIDTH)-1:0] Shift, + output logic [WIDTH-1:0] Z); + + logic [WIDTH-1:0] stage [$clog2(WIDTH):0]; + genvar i; + + assign stage[0] = A; + generate + for (i=0;i<$clog2(WIDTH);i=i+1) + begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], + {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, + Shift[$clog2(WIDTH)-i-1], + stage[i+1]); + end + endgenerate + assign Z = stage[$clog2(WIDTH)]; + +endmodule // shift_left + +/* verilator lint_on DECLFILENAME */ +/* verilator lint_on UNOPTFLAT */ diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index de0f8143..92471c57 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -30,7 +30,8 @@ module bpred (input logic clk, reset, - input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Fetch stage // the prediction input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list @@ -88,25 +89,29 @@ module bpred globalHistoryPredictor DirPredictor(.clk(clk), .reset(reset), .*, // Stalls and flushes - .LookUpPC(PCNextF), - .Prediction(BPPredF), + .PCNextF(PCNextF), + .BPPredF(BPPredF), // update - .UpdatePC(PCE), - .UpdateEN(InstrClassE[0] & ~StallE), + .InstrClassE(InstrClassE), + .BPInstrClassE(BPInstrClassE), + .BPPredDirWrongE(BPPredDirWrongE), + .PCE(PCE), .PCSrcE(PCSrcE), - .UpdatePrediction(UpdateBPPredE)); + .UpdateBPPredE(UpdateBPPredE)); end else if (`BPTYPE == "BPGSHARE") begin:Predictor gsharePredictor DirPredictor(.clk(clk), - .reset(reset), - .*, // Stalls and flushes - .LookUpPC(PCNextF), - .Prediction(BPPredF), - // update - .UpdatePC(PCE), - .UpdateEN(InstrClassE[0] & ~StallE), - .PCSrcE(PCSrcE), - .UpdatePrediction(UpdateBPPredE)); + .reset(reset), + .*, // Stalls and flushes + .PCNextF(PCNextF), + .BPPredF(BPPredF), + // update + .InstrClassE(InstrClassE), + .BPInstrClassE(BPInstrClassE), + .BPPredDirWrongE(BPPredDirWrongE), + .PCE(PCE), + .PCSrcE(PCSrcE), + .UpdateBPPredE(UpdateBPPredE)); end else if (`BPTYPE == "BPLOCALPAg") begin:Predictor @@ -190,14 +195,14 @@ module bpred flopenrc #(2) BPPredRegD(.clk(clk), .reset(reset), .en(~StallD), - .clear(FlushD), + .clear(1'b0), .d(BPPredF), .q(BPPredD)); flopenrc #(2) BPPredRegE(.clk(clk), .reset(reset), .en(~StallE), - .clear(FlushE), + .clear(1'b0), .d(BPPredD), .q(BPPredE)); diff --git a/wally-pipelined/src/ifu/globalHistoryPredictor.sv b/wally-pipelined/src/ifu/globalHistoryPredictor.sv index 087458df..516de633 100644 --- a/wally-pipelined/src/ifu/globalHistoryPredictor.sv +++ b/wally-pipelined/src/ifu/globalHistoryPredictor.sv @@ -32,76 +32,89 @@ module globalHistoryPredictor ) (input logic clk, input logic reset, - input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, - input logic [`XLEN-1:0] LookUpPC, - output logic [1:0] Prediction, + input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, + input logic [`XLEN-1:0] PCNextF, + output logic [1:0] BPPredF, // update - input logic [`XLEN-1:0] UpdatePC, - input logic UpdateEN, PCSrcE, - input logic [1:0] UpdatePrediction - + input logic [4:0] InstrClassE, + input logic [4:0] BPInstrClassE, + input logic [4:0] BPInstrClassD, + input logic [4:0] BPInstrClassF, + input logic BPPredDirWrongE, + + input logic [`XLEN-1:0] PCE, + input logic PCSrcE, + input logic [1:0] UpdateBPPredE + ); - logic [k-1:0] GHRF, GHRFNext; - assign GHRFNext = {PCSrcE, GHRF[k-1:1]}; + logic [k+1:0] GHR, GHRNext; + logic [k-1:0] PHTUpdateAdr, PHTUpdateAdr0, PHTUpdateAdr1; + logic PHTUpdateEN; + logic BPClassWrongNonCFI; + logic BPClassWrongCFI; + logic BPClassRightNonCFI; - flopenr #(k) GlobalHistoryRegister(.clk(clk), - .reset(reset), - .en(UpdateEN), - .d(GHRFNext), - .q(GHRF)); + logic [6:0] GHRMuxSel; + logic GHRUpdateEN; + logic [k-1:0] GHRLookup; + assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0]; + assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0]; + assign BPClassWrongNonCFI = BPInstrClassE[0] & ~InstrClassE[0]; + assign BPClassRightBPWrong = BPInstrClassE[0] & InstrClassE[0] & BPPredDirWrongE; + assign BPClassRightBPRight = BPInstrClassE[0] & InstrClassE[0] & ~BPPredDirWrongE; + + + // GHR update selection, 1 hot encoded. + assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight); + assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0]; + assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0]; + assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]); + assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0]; + assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0]; + assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight)); + assign GHRUpdateEN = (| GHRMuxSel[5:1] & ~StallE) | GHRMuxSel[6] & ~StallF; + // hoping this created a AND-OR mux. + always_comb begin + case (GHRMuxSel) + 7'b000_0001: GHRNext = GHR[k-1+2:0]; // no change + 7'b000_0010: GHRNext = {GHR[k-2+2:0], PCSrcE}; // branch update + 7'b000_0100: GHRNext = {1'b0, GHR[k+1:1]}; // repair 1 + 7'b000_1000: GHRNext = {GHR[k-1+2:1], PCSrcE}; // branch update with mis prediction correction + 7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2 + 7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1 + 7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update + default: GHRNext = GHR[k-1+2:0]; + endcase + end - logic [1:0] PredictionMemory; - logic DoForwarding, DoForwardingF; - logic [1:0] UpdatePredictionF; - + flopenr #(k+2) GlobalHistoryRegister(.clk(clk), + .reset(reset), + .en((GHRUpdateEN)), + .d(GHRNext), + .q(GHR)); + // if actively updating the GHR at the time of prediction we want to us + // GHRNext as the lookup rather than GHR. + + assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0]; + assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1]; + assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0; + assign PHTUpdateEN = InstrClassE[0] & ~StallE; + + assign GHRLookup = |GHRMuxSel[6:1] ? GHRNext[k-1:0] : GHR[k-1:0]; + // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT - // GHR referes to the address that the past k branches points to in the prediction stage - // GHRE refers to the address that the past k branches points to in the exectution stage - SRAM2P1R1W #(k, 2) PHT(.clk(clk), - .reset(reset), - .RA1(GHRF), - .RD1(PredictionMemory), - .REN1(~StallF), - .WA1(GHRFNext), - .WD1(UpdatePrediction), - .WEN1(UpdateEN), - .BitWEN1(2'b11)); + SRAM2P1R1W #(k, 2) PHT(.clk(clk), + .reset(reset), + //.RA1(GHR[k-1:0]), + .RA1(GHRLookup), + .RD1(BPPredF), + .REN1(~StallF), + .WA1(PHTUpdateAdr), + .WD1(UpdateBPPredE), + .WEN1(PHTUpdateEN), + .BitWEN1(2'b11)); - - // need to forward when updating to the same address as reading. - // first we compare to see if the update and lookup addreses are the same - assign DoForwarding = GHRF == GHRFNext; - - // register the update value and the forwarding signal into the Fetch stage - // TODO: add stall logic *** - flopr #(1) DoForwardingReg(.clk(clk), - .reset(reset), - .d(DoForwarding), - .q(DoForwardingF)); - - flopr #(2) UpdatePredictionReg(.clk(clk), - .reset(reset), - .d(UpdatePrediction), - .q(UpdatePredictionF)); - - assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory; - - //pipeline for GHR - /*flopenrc #(k) GHRDReg(.clk(clk), - .reset(reset), - .en(~StallD), - .clear(FlushD), - .d(GHRF), - .q(GHRD)); - - flopenrc #(k) GHREReg(.clk(clk), - .reset(reset), - .en(~StallE), - .clear(FlushE), - .d(GHRD), - .q(GHRE)); -*/ endmodule diff --git a/wally-pipelined/src/ifu/gshare.sv b/wally-pipelined/src/ifu/gshare.sv deleted file mode 100644 index 4d31e519..00000000 --- a/wally-pipelined/src/ifu/gshare.sv +++ /dev/null @@ -1,128 +0,0 @@ -/////////////////////////////////////////// -// gshare.sv -// -// Written: Shreya Sanghai -// Email: ssanghai@hmc.edu -// Created: March 16, 2021 -// Modified: -// -// Purpose: Gshare predictor with parameterized global history register -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module gsharePredictor - #(parameter int k = 10 - ) - (input logic clk, - input logic reset, - input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, - input logic [`XLEN-1:0] LookUpPC, - output logic [1:0] Prediction, - // update - input logic [`XLEN-1:0] UpdatePC, - input logic UpdateEN, PCSrcE, - input logic [1:0] UpdatePrediction - - ); - - logic [k-1:0] GHRF, GHRFNext; - //logic [k-1:0] LookUpPCIndexD, LookUpPCIndexE; - logic [k-1:0] LookUpPCIndex, UpdatePCIndex; - logic [1:0] PredictionMemory; - logic DoForwarding, DoForwardingF; - logic [1:0] UpdatePredictionF; - - assign GHRFNext = {PCSrcE, GHRF[k-1:1]}; - - flopenr #(k) GlobalHistoryRegister(.clk(clk), - .reset(reset), - .en(UpdateEN), - .d(GHRFNext), - .q(GHRF)); - - - // for gshare xor the PC with the GHR - assign UpdatePCIndex = GHRFNext ^ UpdatePC[k:1]; - assign LookUpPCIndex = GHRF ^ LookUpPC[k:1]; - // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT - // GHR referes to the address that the past k branches points to in the prediction stage - // GHRE refers to the address that the past k branches points to in the exectution stage - SRAM2P1R1W #(k, 2) PHT(.clk(clk), - .reset(reset), - .RA1(LookUpPCIndex), - .RD1(PredictionMemory), - .REN1(~StallF), - .WA1(UpdatePCIndex), - .WD1(UpdatePrediction), - .WEN1(UpdateEN), - .BitWEN1(2'b11)); - - - // need to forward when updating to the same address as reading. - // first we compare to see if the update and lookup addreses are the same - assign DoForwarding = LookUpPCIndex == UpdatePCIndex; - - // register the update value and the forwarding signal into the Fetch stage - // TODO: add stall logic *** - flopr #(1) DoForwardingReg(.clk(clk), - .reset(reset), - .d(DoForwarding), - .q(DoForwardingF)); - - flopr #(2) UpdatePredictionReg(.clk(clk), - .reset(reset), - .d(UpdatePrediction), - .q(UpdatePredictionF)); - - assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory; - - //pipeline for GHR -/* -----\/----- EXCLUDED -----\/----- - flopenrc #(k) LookUpDReg(.clk(clk), - .reset(reset), - .en(~StallD), - .clear(FlushD), - .d(LookUpPCIndex), - .q(LookUpPCIndexD)); - - flopenrc #(k) LookUpEReg(.clk(clk), - .reset(reset), - .en(~StallE), - .clear(FlushE), - .d(LookUpPCIndexD), - .q(LookUpPCIndexE)); - -----/\----- EXCLUDED -----/\----- */ - -/* flopenrc #(k) GHRRegD(.clk(clk), - .reset(reset), - .en(~StallD), - .clear(FlushD), - .d(GHRF), - .q(GHRD)); - - flopenrc #(k) GHRRegE(.clk(clk), - .reset(reset), - .en(~StallE), - .clear(FlushE), - .d(GHRD), - .q(GHRE)); - -*/ -endmodule diff --git a/wally-pipelined/src/ifu/gsharePredictor.sv b/wally-pipelined/src/ifu/gsharePredictor.sv new file mode 100644 index 00000000..b4a60827 --- /dev/null +++ b/wally-pipelined/src/ifu/gsharePredictor.sv @@ -0,0 +1,120 @@ +/////////////////////////////////////////// +// globalHistoryPredictor.sv +// +// Written: Shreya Sanghai +// Email: ssanghai@hmc.edu +// Created: March 16, 2021 +// Modified: +// +// Purpose: Gshare predictor with parameterized global history register +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module gsharePredictor + #(parameter int k = 10 + ) + (input logic clk, + input logic reset, + input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, + input logic [`XLEN-1:0] PCNextF, + output logic [1:0] BPPredF, + // update + input logic [4:0] InstrClassE, + input logic [4:0] BPInstrClassE, + input logic [4:0] BPInstrClassD, + input logic [4:0] BPInstrClassF, + input logic BPPredDirWrongE, + + input logic [`XLEN-1:0] PCE, + input logic PCSrcE, + input logic [1:0] UpdateBPPredE + + ); + logic [k+1:0] GHR, GHRNext; + logic [k-1:0] PHTUpdateAdr, PHTUpdateAdr0, PHTUpdateAdr1; + logic PHTUpdateEN; + logic BPClassWrongNonCFI; + logic BPClassWrongCFI; + logic BPClassRightNonCFI; + + logic [6:0] GHRMuxSel; + logic GHRUpdateEN; + logic [k-1:0] GHRLookup; + + assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0]; + assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0]; + assign BPClassWrongNonCFI = BPInstrClassE[0] & ~InstrClassE[0]; + assign BPClassRightBPWrong = BPInstrClassE[0] & InstrClassE[0] & BPPredDirWrongE; + assign BPClassRightBPRight = BPInstrClassE[0] & InstrClassE[0] & ~BPPredDirWrongE; + + + // GHR update selection, 1 hot encoded. + assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight); + assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0]; + assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0]; + assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]); + assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0]; + assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0]; + assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight)); + assign GHRUpdateEN = (| GHRMuxSel[5:1] & ~StallE) | GHRMuxSel[6] & ~StallF; + + // hoping this created a AND-OR mux. + always_comb begin + case (GHRMuxSel) + 7'b000_0001: GHRNext = GHR[k-1+2:0]; // no change + 7'b000_0010: GHRNext = {GHR[k-2+2:0], PCSrcE}; // branch update + 7'b000_0100: GHRNext = {1'b0, GHR[k+1:1]}; // repair 1 + 7'b000_1000: GHRNext = {GHR[k-1+2:1], PCSrcE}; // branch update with mis prediction correction + 7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2 + 7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1 + 7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update + default: GHRNext = GHR[k-1+2:0]; + endcase + end + + flopenr #(k+2) GlobalHistoryRegister(.clk(clk), + .reset(reset), + .en((GHRUpdateEN)), + .d(GHRNext), + .q(GHR)); + + // if actively updating the GHR at the time of prediction we want to us + // GHRNext as the lookup rather than GHR. + + assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0]; + assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1]; + assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0; + assign PHTUpdateEN = InstrClassE[0] & ~StallE; + + assign GHRLookup = |GHRMuxSel[6:1] ? GHRNext[k-1:0] : GHR[k-1:0]; + + // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT + SRAM2P1R1W #(k, 2) PHT(.clk(clk), + .reset(reset), + //.RA1(GHR[k-1:0]), + .RA1(GHRLookup ^ PCNextF[k:1]), + .RD1(BPPredF), + .REN1(~StallF), + .WA1(PHTUpdateAdr ^ PCE[k:1]), + .WD1(UpdateBPPredE), + .WEN1(PHTUpdateEN), + .BitWEN1(2'b11)); + +endmodule // gsharePredictor diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 9e30a083..4f51edd7 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -154,15 +154,16 @@ module icachecontroller #(parameter LINESIZE = 256) ( localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update. - localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 13; // miss on block 1, issue read to AHB and wait - localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 14; // write data to SRAM/LUT - localparam STATE_MISS_SPILL_MERGE = 15; // read block 0 of CPU access, + localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait + localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT + localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access, - localparam STATE_MISS_SPILL_FINAL = 16; // this state replicates STATE_READY's replay of the + localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the // spill access but does nto consider spill. It also does not do another operation. - localparam STATE_INVALIDATE = 17; // *** not sure if invalidate or evict? invalidate by cache block or address? + localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -380,11 +381,20 @@ module icachecontroller #(parameter LINESIZE = 256) ( PCMux = 2'b10; UnalignedSelect = 1'b1; spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_2_START; + end + STATE_MISS_SPILL_2_START: begin if (~hit) begin CntReset = 1'b1; NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; end else begin - NextState = STATE_MISS_SPILL_FINAL; + NextState = STATE_READY; + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + ICacheStallF = 1'b0; end end STATE_MISS_SPILL_MISS_FETCH_WDV: begin diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 28f7597e..e0507b63 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -154,14 +154,7 @@ module ifu ( generate if (`BPRED_ENABLED == 1) begin : bpred // I am making the port connection explicit for now as I want to see them and they will be changing. - bpred bpred(.clk(clk), - .reset(reset), - .StallF(StallF), - .StallD(StallD), - .StallE(StallE), - .FlushF(FlushF), - .FlushD(FlushD), - .FlushE(FlushE), + bpred bpred(.*, .PCNextF(PCNextF), .BPPredPCF(BPPredPCF), .SelBPPredF(SelBPPredF), diff --git a/wally-pipelined/src/mmu/cam_line.sv b/wally-pipelined/src/mmu/cam_line.sv index b7577573..6bab0b60 100644 --- a/wally-pipelined/src/mmu/cam_line.sv +++ b/wally-pipelined/src/mmu/cam_line.sv @@ -2,7 +2,9 @@ // cam_line.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021 -// Modified: +// Modified: kmacsaigoren@hmc.edu 1 June 2021 +// Implemented SV48 on top of SV39. This included adding SvMode input signal and the wally constants +// Mostly this was done to make the PageNumberMixer work. // // Purpose: CAM line for the translation lookaside buffer (TLB) // Determines whether a virtual address matches the stored key. @@ -24,12 +26,17 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// +`include "wally-constants.vh" + module cam_line #(parameter KEY_BITS = 20, parameter HIGH_SEGMENT_BITS = 10) ( input clk, reset, + // input to scheck which SvMode is running + input [`SVMODE_BITS-1:0] SvMode, + // The requested page number to compare against the key - input [KEY_BITS-1:0] VirtualPageNumber, + input [KEY_BITS-1:0] VirtualPageNumber, // Signals to write a new entry to this line input CAMLineWrite, @@ -38,10 +45,11 @@ module cam_line #(parameter KEY_BITS = 20, // Flush this line (set valid to 0) input TLBFlush, - // This entry is a key for a giga, mega, or kilopage. + // This entry is a key for a tera, giga, mega, or kilopage. // PageType == 2'b00 --> kilopage // PageType == 2'b01 --> megapage - // PageType == 2'b11 --> gigapage + // PageType == 2'b10 --> gigapage + // PageType == 2'b11 --> terapage output [1:0] PageType, // *** should this be the stored version or the always updated one? output Match ); @@ -67,9 +75,9 @@ module cam_line #(parameter KEY_BITS = 20, flopenr #(KEY_BITS) keyflop(clk, reset, CAMLineWrite, VirtualPageNumber, Key); // Calculate the actual query key based on the input key and the page type. - // For example, a megapage in sv39 only cares about VPN2 and VPN1, so VPN0 + // For example, a megapage in SV39 only cares about VPN2 and VPN1, so VPN0 // should automatically match. - page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, VirtualPageNumberQuery); + page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, SvMode, VirtualPageNumberQuery); assign Match = ({1'b1, VirtualPageNumberQuery} == {Valid, Key}); diff --git a/wally-pipelined/src/mmu/page_number_mixer.sv b/wally-pipelined/src/mmu/page_number_mixer.sv index 57b8e4b7..03851018 100644 --- a/wally-pipelined/src/mmu/page_number_mixer.sv +++ b/wally-pipelined/src/mmu/page_number_mixer.sv @@ -2,7 +2,11 @@ // page_number_mixer.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021 -// Modified: +// Modified: kmacsaigoren@hmc.edu 1 June 2021 +// Implemented SV48 on top of SV39. This included adding a 3rd Segment to each of the pagenumbers, +// Ensuring that the BITS and HIGH_SEGMENT_BITS inputs were correct everywhere this module gets instatniated, +// Adding seveeral muxes to decide the bit selection to turn pagenumbers into segments based on SV mode, +// Adding support for terapage/newgigapage encoding. // // Purpose: Takes two page numbers and replaces segments of the first page // number with segments from the second, based on the page type. @@ -25,22 +29,29 @@ /////////////////////////////////////////// `include "wally-config.vh" +`include "wally-constants.vh" module page_number_mixer #(parameter BITS = 20, parameter HIGH_SEGMENT_BITS = 10) ( - input [BITS-1:0] PageNumber, - input [BITS-1:0] MixPageNumber, - input [1:0] PageType, - output [BITS-1:0] PageNumberCombined + input [BITS-1:0] PageNumber, + input [BITS-1:0] MixPageNumber, + input [1:0] PageType, + input [`SVMODE_BITS-1:0] SvMode, + + output [BITS-1:0] PageNumberCombined ); + // The upper segment might have a different width than the lower segments. + // For example, an SV39 PTE has 26 bits for PPN2 and 9 bits for the other + // segments. This is outside the 'if XLEN' b/c the constant is already configured + // to the correct value for the XLEN in the relevant wally-constants.vh file. + localparam LOW_SEGMENT_BITS = `VPN_SEGMENT_BITS; + // *** each time this module is implemented, low segment bits is either + // `VPN_SEGMENT_BITS or `PPN_LOW_SEGMENT_BITS (if it existed) + // in every mode so far, these are the same, so it's left as it is above. + generate - // *** Just checking XLEN is not enough to support sv39 AND sv48. if (`XLEN == 32) begin - // The upper segment might have a different width than the lower segments. - // For example, an sv39 PTE has 26 bits for PPN2 and 9 bits for the other - // segments. - localparam LOW_SEGMENT_BITS = (BITS - HIGH_SEGMENT_BITS); logic [HIGH_SEGMENT_BITS-1:0] Segment1, MixSegment1, Segment1Combined; logic [LOW_SEGMENT_BITS-1:0] Segment0, MixSegment0, Segment0Combined; @@ -58,28 +69,60 @@ module page_number_mixer #(parameter BITS = 20, // Reswizzle segments of the combined page number assign PageNumberCombined = {Segment1Combined, Segment0Combined}; end else begin - // The upper segment might have a different width than the lower segments. - // For example, an sv39 PTE has 26 bits for PPN2 and 9 bits for the other - // segments. - localparam LOW_SEGMENT_BITS = (BITS - HIGH_SEGMENT_BITS) / 2; - logic [HIGH_SEGMENT_BITS-1:0] Segment2, MixSegment2, Segment2Combined; + // After segment 0 and 1 of the page number, the width of each segment is dependant on the SvMode. + // For this reason, each segment bus is the width of its widest value across each mode + // when a smaller value needs to be loaded in to a wider bus, it's loaded in the least significant bits + // and left padded with zeros. MAKE SURE that if a value is being padded with zeros here, + // that it's padded with zeros everywhere else in the MMU ans beyond to avoid false misses in the TLB. + logic [HIGH_SEGMENT_BITS-1:0] Segment3, MixSegment3, Segment3Combined; + logic [HIGH_SEGMENT_BITS + LOW_SEGMENT_BITS-1:0] Segment2, MixSegment2, Segment2Combined; logic [LOW_SEGMENT_BITS-1:0] Segment1, MixSegment1, Segment1Combined; logic [LOW_SEGMENT_BITS-1:0] Segment0, MixSegment0, Segment0Combined; + // Unswizzle segments of the input page number - assign {Segment2, Segment1, Segment0} = PageNumber; - assign {MixSegment2, MixSegment1, MixSegment0} = MixPageNumber; + // *** these muxes assume that only Sv48 and SV39 are implemented in rv64. for future SV57 and up, + // there will have to be more muxes to select which value each segment gets. + // as a cool reminder: BITS is the width of the page number, virt or phys, coming into this module + // while high segment bits is the width of the highest segment of that page number. + // Note for future work: this module has to work with both VPNs and PPNs and due to their differing + // widths and the fact that the ppn has one longer segment at the top makes the muxes below very confusing. + // Potentially very annoying thing for future workers: the number of bits in a ppn is always 44 (for SV39 and48) + // but in SV57 and above, this might be a new longer length. In that case these selectors will most likely + // become even more complicated and confusing. + assign Segment3 = (SvMode == `SV48) ? + PageNumber[BITS-1:3*LOW_SEGMENT_BITS] : // take the top segment or not + {HIGH_SEGMENT_BITS{1'b0}}; // for virtual page numbers in SV39, both options should be zeros. + assign Segment2 = (SvMode == `SV48) ? + {{HIGH_SEGMENT_BITS{1'b0}}, PageNumber[3*LOW_SEGMENT_BITS-1:2*LOW_SEGMENT_BITS]} : // just take another low segment left padded with zeros. + PageNumber[BITS-1:2*LOW_SEGMENT_BITS]; // otherwise take the rest of the PageNumber + assign Segment1 = PageNumber[2*LOW_SEGMENT_BITS-1:LOW_SEGMENT_BITS]; + assign Segment0 = PageNumber[LOW_SEGMENT_BITS-1:0]; + + + assign MixSegment3 = (SvMode == `SV48) ? + MixPageNumber[BITS-1:3*LOW_SEGMENT_BITS] : // take the top segment or not + {HIGH_SEGMENT_BITS{1'b0}}; // for virtual page numbers in SV39, both options should be zeros. + assign MixSegment2 = (SvMode == `SV48) ? + {{HIGH_SEGMENT_BITS{1'b0}}, MixPageNumber[3*LOW_SEGMENT_BITS-1:2*LOW_SEGMENT_BITS]} : // just take another low segment left padded with zeros. + MixPageNumber[BITS-1:2*LOW_SEGMENT_BITS]; // otherwise take the rest of the PageNumber + assign MixSegment1 = MixPageNumber[2*LOW_SEGMENT_BITS-1:LOW_SEGMENT_BITS]; + assign MixSegment0 = MixPageNumber[LOW_SEGMENT_BITS-1:0]; + // Pass through the high segment - assign Segment2Combined = Segment2; + assign Segment3Combined = Segment3; - // Either pass through or zero out segments 1 and 0 based on the page type - mux2 #(LOW_SEGMENT_BITS) segment1mux(Segment1, MixSegment1, PageType[1], Segment1Combined); - mux2 #(LOW_SEGMENT_BITS) segment0mux(Segment0, MixSegment0, PageType[0], Segment0Combined); + // Either pass through or zero out lower segments based on the page type + assign Segment2Combined = (PageType[1] && PageType[0]) ? MixSegment2 : Segment2; // terapage (page == 11) + assign Segment1Combined = (PageType[1]) ? MixSegment1 : Segment1; // gigapage and higher (page == 10 or 11) + assign Segment0Combined = (PageType[1] || PageType[0]) ? MixSegment0 : Segment0; // megapage and higher (page == 01 or 10 or 11) // Reswizzle segments of the combined page number - assign PageNumberCombined = {Segment2Combined, Segment1Combined, Segment0Combined}; + assign PageNumberCombined = (SvMode == `SV48) ? + {Segment3Combined, Segment2Combined[LOW_SEGMENT_BITS-1:0], Segment1Combined, Segment0Combined} : + {Segment2Combined, Segment1Combined, Segment0Combined}; end endgenerate endmodule diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index f2aada44..b0e4fe8e 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -2,7 +2,10 @@ // pagetablewalker.sv // // Written: tfleming@hmc.edu 2 March 2021 -// Modified: +// Modified: kmacsaigoren@hmc.edu 1 June 2021 +// implemented SV48 on top of SV39. This included, adding a level of the FSM for the extra page number segment +// adding support for terapage encoding, and for setting the TranslationPAdr using the new level, +// adding the internal SvMode signal // // Purpose: Page Table Walker // Part of the Memory Management Unit (MMU) @@ -70,6 +73,7 @@ module pagetablewalker ( logic [`XLEN-1:0] SavedPTE, CurrentPTE; logic [`PA_BITS-1:0] TranslationPAdr; logic [`PPN_BITS-1:0] CurrentPPN; + logic [`SVMODE_BITS-1:0] SvMode; logic MemStore; // PTE Control Bits @@ -82,6 +86,8 @@ module pagetablewalker ( logic [`XLEN-1:0] PageTableEntry; logic [1:0] PageType; + assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; + assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0]; assign MemStore = MemRWM[0]; @@ -105,11 +111,12 @@ module pagetablewalker ( assign PageTypeF = PageType; assign PageTypeM = PageType; - localparam IDLE = 3'h0; + localparam LEVEL0 = 3'h0; localparam LEVEL1 = 3'h1; - localparam LEVEL0 = 3'h2; - localparam LEAF = 3'h3; - localparam FAULT = 3'h4; + // space left for more levels + localparam LEAF = 3'h5; + localparam IDLE = 3'h6; + localparam FAULT = 3'h7; logic [2:0] WalkerState, NextWalkerState; @@ -208,18 +215,32 @@ module pagetablewalker ( assign MMUPAdr = TranslationPAdr[31:0]; end else begin - localparam LEVEL2 = 3'h5; + localparam LEVEL2 = 3'h2; + localparam LEVEL3 = 3'h3; - logic [8:0] VPN2, VPN1, VPN0; + logic [8:0] VPN3, VPN2, VPN1, VPN0; - logic GigapageMisaligned, BadGigapage; + logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage; flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb begin case (WalkerState) - IDLE: if (MMUTranslate) NextWalkerState = LEVEL2; + IDLE: if (MMUTranslate) NextWalkerState = LEVEL3; else NextWalkerState = IDLE; + LEVEL3: if (SvMode != `SV48) NextWalkerState = LEVEL2; + // 3rd level used if SV48 is enabled. + else begin + if (~MMUReady) NextWalkerState = LEVEL3; + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + else if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF; + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL2; + else NextWalkerState = FAULT; + end LEVEL2: if (~MMUReady) NextWalkerState = LEVEL2; // *** According to the architecture, we should // fault upon finding a superpage that is misaligned or has 0 @@ -242,24 +263,29 @@ module pagetablewalker ( else if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF; else NextWalkerState = FAULT; - LEAF: if (MMUTranslate) NextWalkerState = LEVEL2; + LEAF: if (MMUTranslate) NextWalkerState = LEVEL3; else NextWalkerState = IDLE; - FAULT: if (MMUTranslate) NextWalkerState = LEVEL2; + FAULT: if (MMUTranslate) NextWalkerState = LEVEL3; else NextWalkerState = IDLE; // Default case should never happen, but is included for linter. default: NextWalkerState = IDLE; endcase end + // A terapage is a level 3 leaf page. This page must have zero PPN[2], + // zero PPN[1], and zero PPN[0] + assign TerapageMisaligned = |(CurrentPPN[26:0]); // A gigapage is a Level 2 leaf page. This page must have zero PPN[1] and // zero PPN[0] assign GigapageMisaligned = |(CurrentPPN[17:0]); // A megapage is a Level 1 leaf page. This page must have zero PPN[0]. assign MegapageMisaligned = |(CurrentPPN[8:0]); + assign BadTerapage = TerapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme assign BadGigapage = GigapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme + assign VPN3 = TranslationVAdr[47:39]; assign VPN2 = TranslationVAdr[38:30]; assign VPN1 = TranslationVAdr[29:21]; assign VPN0 = TranslationVAdr[20:12]; @@ -282,8 +308,13 @@ module pagetablewalker ( IDLE: begin MMUStall = '0; end + LEVEL3: begin + TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; + // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off, + // what should translationPAdr be when level3 is just off? + end LEVEL2: begin - TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000}; + TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; end LEVEL1: begin TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; @@ -295,8 +326,9 @@ module pagetablewalker ( // Keep physical address alive to prevent HADDR dropping to 0 TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL2) ? 2'b11 : - ((WalkerState == LEVEL1) ? 2'b01 : 2'b00); + PageType = (WalkerState == LEVEL3) ? 2'b11 : + ((WalkerState == LEVEL2) ? 2'b10 : + ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissM; ITLBWriteF = ~DTLBMissM; // Prefer data over instructions end diff --git a/wally-pipelined/src/mmu/priority_encoder.sv b/wally-pipelined/src/mmu/priority_encoder.sv index e4a62ce1..dade2e83 100644 --- a/wally-pipelined/src/mmu/priority_encoder.sv +++ b/wally-pipelined/src/mmu/priority_encoder.sv @@ -4,7 +4,11 @@ // Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021 // Based on implementation from https://www.allaboutcircuits.com/ip-cores/communication-controller/priority-encoder/ // *** Give proper LGPL attribution for above source -// Modified: +// Modified: Teo Ene 15 Apr 2021: +// Temporarily removed paramterized priority encoder for non-parameterized one +// To get synthesis working quickly +// Kmacsaigoren@hmc.edu 28 May 2021: +// Added working version of parameterized priority encoder. // // Purpose: One-hot encoding to binary encoder // @@ -27,51 +31,33 @@ `include "wally-config.vh" -// Teo Ene 04/15: -// Temporarily removed paramterized priority encoder for non-parameterized one -// To get synthesis working quickly module priority_encoder #(parameter BINARY_BITS = 3) ( - input logic [7:0] one_hot, - output logic [2:0] binary + input logic [2**BINARY_BITS - 1:0] one_hot, + output logic [BINARY_BITS - 1:0] binary ); - // localparam ONE_HOT_BITS = 2**BINARY_BITS; - - /* - genvar i, j; - generate - for (i = 0; i < ONE_HOT_BITS; i++) begin - for (j = 0; j < BINARY_BITS; j++) begin - if (i[j]) begin - assign binary[j] = one_hot[i]; - end - end - end - endgenerate - */ - - /* - logic [BINARY_BITS-1:0] binary_comb; - + integer i; always_comb begin - binary_comb = 0; - for (int i = 0; i < ONE_HOT_BITS; i++) - if (one_hot[i]) binary_comb = i; + binary = 0; + for (i = 0; i < 2**BINARY_BITS; i++) begin + if (one_hot[i]) binary = i; // prioritizes the most significant bit + end end + // *** triple check synthesizability here - assign binary = binary_comb; + // Ideally this mimics the following: + /* + always_comb begin + casex (one_hot) + 1xx ... x: binary = BINARY_BITS - 1; + 01x ... x: binary = BINARY_BITS - 2; + 001 ... x: binary = BINARY_BITS - 3; + + {...} + + 00 ... 1xx: binary = 2; + 00 ... 01x: binary = 1; + 00 ... 001: binary = 0; + end */ - always_comb - case (one_hot) - 8'h1: binary=3'h0; - 8'h2: binary=3'h1; - 8'h4: binary=3'h2; - 8'h8: binary=3'h3; - 8'h10: binary=3'h4; - 8'h20: binary=3'h5; - 8'h40: binary=3'h6; - 8'h80: binary=3'h7; - default: binary=3'h0; //should never happen - endcase - endmodule diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 7ed594e4..1828c98e 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -2,7 +2,9 @@ // tlb.sv // // Written: jtorrey@hmc.edu 16 February 2021 -// Modified: +// Modified: kmacsaigoren@hmc.edu 1 June 2021 +// Implemented SV48 on top of SV39. This included adding the SvMode signal, +// and using it to decide the translate signal and get the virtual page number // // Purpose: Translation lookaside buffer // Cache of virtural-to-physical address translations @@ -25,7 +27,7 @@ /////////////////////////////////////////// /** - * sv32 specs + * SV32 specs * ---------- * Virtual address [31:0] (32 bits) * [________________________________] @@ -85,14 +87,11 @@ module tlb #(parameter ENTRY_BITS = 3, output TLBPageFault ); - logic SvMode; logic Translate; logic TLBAccess, ReadAccess, WriteAccess; - // *** If we want to support multiple virtual memory modes (ie sv39 AND sv48), - // we could have some muxes that control which parameters are current. - // Although then some of the signals are not big enough. But that's a problem - // for much later. + // Store current virtual memory mode (SV32, SV39, SV48, ect...) + logic [`SVMODE_BITS-1:0] SvMode; // Index (currently random) to write the next TLB entry logic [ENTRY_BITS-1:0] WriteIndex; @@ -116,17 +115,24 @@ module tlb #(parameter ENTRY_BITS = 3, // Whether the virtual address has a match in the CAM logic CAMHit; - // Grab the sv bit from SATP + // Grab the sv mode from SATP + assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; + + // The bus width is always the largest it could be for that XLEN. For example, vpn will be 36 bits wide in rv64 + // this, even though it could be 27 bits (SV39) or 36 bits (SV48) wide. When the value of VPN is narrower, + // is shorter, the extra bits are used as padded zeros on the left of the full value. generate if (`XLEN == 32) begin - assign SvMode = SATP_REGW[31]; // *** change to an enum somehow? + assign VirtualPageNumber = VirtualAddress[`VPN_BITS+11:12]; end else begin - assign SvMode = SATP_REGW[63]; // currently just a boolean whether translation enabled + assign VirtualPageNumber = (SvMode == `SV48) ? + VirtualAddress[`VPN_BITS+11:12] : + {{`VPN_SEGMENT_BITS{1'b0}}, VirtualAddress[3*`VPN_SEGMENT_BITS+11:12]}; end endgenerate // Whether translation should occur - assign Translate = SvMode & (PrivilegeModeW != `M_MODE); + assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE); // Determine how the TLB is currently being used // Note that we use ReadAccess for both loads and instruction fetches @@ -134,7 +140,7 @@ module tlb #(parameter ENTRY_BITS = 3, assign WriteAccess = TLBAccessType[0]; assign TLBAccess = ReadAccess || WriteAccess; - assign VirtualPageNumber = VirtualAddress[`VPN_BITS+11:12]; + assign PageOffset = VirtualAddress[11:0]; // TLB entries are evicted according to the LRU algorithm @@ -188,9 +194,10 @@ module tlb #(parameter ENTRY_BITS = 3, // page number. For 4 KB pages, the entire virtual page number is replaced. // For superpages, some segments are considered offsets into a larger page. page_number_mixer #(`PPN_BITS, `PPN_HIGH_SEGMENT_BITS) - physical_mixer(PhysicalPageNumber, + physical_mixer(PhysicalPageNumber, {{EXTRA_PHYSICAL_BITS{1'b0}}, VirtualPageNumber}, HitPageType, + SvMode, PhysicalPageNumberMixed); // Provide physical address only on TLBHits to cause catastrophic errors if diff --git a/wally-pipelined/src/mmu/tlb_cam.sv b/wally-pipelined/src/mmu/tlb_cam.sv index 330bb382..78d9ff8d 100644 --- a/wally-pipelined/src/mmu/tlb_cam.sv +++ b/wally-pipelined/src/mmu/tlb_cam.sv @@ -2,7 +2,9 @@ // tlb_cam.sv // // Written: jtorrey@hmc.edu 16 February 2021 -// Modified: +// Modified: kmacsaigoren@hmc.edu 1 June 2021 +// Implemented SV48 on top of SV39. This included adding the SvMode signal input and wally constants +// Mostly this was to make the cam_lines work. // // Purpose: Stores virtual page numbers with cached translations. // Determines whether a given virtual page number is in the TLB. @@ -24,18 +26,21 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// +`include "wally-constants.vh" + module tlb_cam #(parameter ENTRY_BITS = 3, parameter KEY_BITS = 20, parameter HIGH_SEGMENT_BITS = 10) ( - input clk, reset, - input [KEY_BITS-1:0] VirtualPageNumber, - input [1:0] PageTypeWrite, - input [ENTRY_BITS-1:0] WriteIndex, - input TLBWrite, - input TLBFlush, - output [ENTRY_BITS-1:0] VPNIndex, - output [1:0] HitPageType, - output CAMHit + input clk, reset, + input [KEY_BITS-1:0] VirtualPageNumber, + input [1:0] PageTypeWrite, + input [ENTRY_BITS-1:0] WriteIndex, + input [`SVMODE_BITS-1:0] SvMode, + input TLBWrite, + input TLBFlush, + output [ENTRY_BITS-1:0] VPNIndex, + output [1:0] HitPageType, + output CAMHit ); localparam NENTRIES = 2**ENTRY_BITS; diff --git a/wally-pipelined/src/muldiv/div.sv b/wally-pipelined/src/muldiv/div.sv index db830ca3..10af5eee 100755 --- a/wally-pipelined/src/muldiv/div.sv +++ b/wally-pipelined/src/muldiv/div.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// mul.sv +// divide4x64.sv // // Written: James.Stine@okstate.edu 1 February 2021 // Modified: @@ -29,60 +29,55 @@ /* verilator lint_off COMBDLY */ /* verilator lint_off IMPLICIT */ -`include "wally-config.vh" +module intdiv #(parameter WIDTH=64) + (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); -module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); - - input logic [63:0] N, D; - input logic clk; - input logic reset; - input logic start; - input logic S; + input logic [WIDTH-1:0] N, D; + input logic clk; + input logic reset; + input logic start; + input logic S; + + output logic [WIDTH-1:0] Qf; + output logic [WIDTH-1:0] remf; + output logic div0; + output logic done; + output logic divBusy; + + logic enable; + logic state0; + logic V; + logic [$clog2(WIDTH):0] Num; + logic [$clog2(WIDTH)-1:0] P, NumIter, RemShift; + logic [WIDTH-1:0] op1, op2, op1shift, Rem5; + logic [WIDTH:0] Qd, Rd, Qd2, Rd2; + logic [WIDTH-1:0] Q, rem0; + logic [3:0] quotient; + logic otfzero; + logic shiftResult; + logic enablev, state0v, donev, oftzerov, divBusyv, ulp; + + logic [WIDTH-1:0] twoD; + logic [WIDTH-1:0] twoN; + logic SignD; + logic SignN; + logic [WIDTH-1:0] QT, remT; + logic D_NegOne; + logic Max_N; - output logic [63:0] Qf; - output logic [63:0] remf; - output logic div0; - output logic done; - output logic divBusy; - - logic divdone; - logic enable; - logic state0; - logic V; - logic [7:0] Num; - logic [5:0] P, NumIter, RemShift; - logic [63:0] op1, op2, op1shift, Rem5; - logic [64:0] Qd, Rd, Qd2, Rd2; - logic [63:0] Q, rem0; - logic [3:0] quotient; - logic otfzero; - logic shiftResult; - logic enablev, state0v, donev, divdonev, oftzerov, divBusyv, ulp; - - logic [63:0] twoD; - logic [63:0] twoN; - logic SignD; - logic SignN; - logic [63:0] QT, remT; - logic D_NegOne; - logic Max_N; // Check if negative (two's complement) // If so, convert to positive - adder #(64) cpa1 ((D ^ {64{D[63]&S}}), {63'h0, D[63]&S}, twoD); - adder #(64) cpa2 ((N ^ {64{N[63]&S}}), {63'h0, N[63]&S}, twoN); - assign SignD = D[63]; - assign SignN = N[63]; + adder #(WIDTH) cpa1 ((D ^ {WIDTH{D[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, D[WIDTH-1]&S}, twoD); + adder #(WIDTH) cpa2 ((N ^ {WIDTH{N[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, N[WIDTH-1]&S}, twoN); + assign SignD = D[WIDTH-1]; + assign SignN = N[WIDTH-1]; // Max N and D = -1 (Overflow) - assign Max_N = (~|N[62:0]) & N[63]; + assign Max_N = (~|N[WIDTH-2:0]) & N[WIDTH-1]; assign D_NegOne = &D; - + // Divider goes the distance to 37 cycles - // (thanks the evil divisor for D = 0x1) - // but could theoretically be stopped when - // divdone is asserted. The enable signal - // turns off register storage thus invalidating - // any future cycles. + // (thanks to the evil divisor for D = 0x1) // Shift D, if needed (for integer) // needed to allow qst to be in range for integer @@ -92,32 +87,31 @@ module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); // is 0 and thus a divide by 0 exception. This div0 // exception is given to FSM to tell the operation to // quit gracefully. - - lz64 p1 (P, V, twoD); - shifter_l64 p2 (op2, twoD, P); - assign op1 = twoN; + lzd_hier #(WIDTH) p1 (.ZP(P), .ZV(V), .B(twoD)); + shift_left #(WIDTH) p2 (twoD, P, op2); + assign op1 = twoN; assign div0 = ~V; - // #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0) + // #iter: N = m+v+s = m+2+s (mod k = 0) // v = 2 since \rho < 1 (add 4 to make sure its a ceil) - adder #(8) cpa3 ({2'b0, P}, - {5'h0, shiftResult, ~shiftResult, 1'b0}, - Num); + // k = 2 (r = 2^k) + adder #($clog2(WIDTH)+1) cpa3 ({1'b0, P}, + {{$clog2(WIDTH)+1-3{1'b0}}, shiftResult, ~shiftResult, 1'b0}, + Num); // Determine whether need to add just Q/Rem assign shiftResult = P[0]; // div by 2 (ceil) - assign NumIter = Num[6:1]; + assign NumIter = Num[$clog2(WIDTH):1]; assign RemShift = P; // FSM to control integer divider // assume inputs are postive edge and // datapath (divider) is negative edge - fsm64 fsm1 (enablev, state0v, donev, divdonev, otfzerov, divBusyv, - start, div0, NumIter, ~clk, reset); + fsm64 #($clog2(WIDTH)) fsm1 (enablev, state0v, donev, otfzerov, divBusyv, + start, div0, NumIter, ~clk, reset); flopr #(1) rega (~clk, reset, donev, done); - flopr #(1) regb (~clk, reset, divdonev, divdone); flopr #(1) regc (~clk, reset, otfzerov, otfzero); flopr #(1) regd (~clk, reset, enablev, enable); flopr #(1) rege (~clk, reset, state0v, state0); @@ -129,65 +123,66 @@ module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); // integer bit and m fractional bits), this is achieved by // shifting N right by v+s so that (m+v+s) mod k = 0. And, // the quotient has to be aligned to the integer position. - - divide4x64 p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, - enable, otfzero, shiftResult); + divide4 #(WIDTH) p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); // Storage registers to hold contents stable - flopenr #(65) reg3 (clk, reset, enable, Rd, Rd2); - flopenr #(65) reg4 (clk, reset, enable, Qd, Qd2); + flopenr #(WIDTH+1) reg3 (clk, reset, enable, Rd, Rd2); + flopenr #(WIDTH+1) reg4 (clk, reset, enable, Qd, Qd2); // Probably not needed - just assigns results - assign Q = Qd2[63:0]; - assign Rem5 = Rd2[64:1]; + assign Q = Qd2[WIDTH-1:0]; + assign Rem5 = Rd2[WIDTH:1]; // Adjust remainder by m (no need to adjust by - // n ln(r) - shifter_r64 p4 (rem0, Rem5, RemShift); + shift_right #(WIDTH) p4 (Rem5, RemShift, rem0); // Adjust Q/Rem for Signed assign tcQ = (SignN ^ SignD) & S; assign tcR = SignN & S; - // Signed Divide + + // When Dividend (N) and/or Divisor (D) are negative (first bit is '1'): // - When N and D are negative: Remainder is negative (undergoes a two's complement). // - When N is negative: Quotient and Remainder are both negative (undergo a two's complement). // - When D is negative: Quotient is negative (undergoes a two's complement). - adder #(64) cpa4 ((rem0 ^ {64{tcR}}), {63'h0, tcR}, remT); - adder #(64) cpa5 ((Q ^ {64{tcQ}}), {63'h0, tcQ}, QT); + adder #(WIDTH) cpa4 ((rem0 ^ {WIDTH{tcR}}), {{WIDTH-1{1'b0}}, tcR}, remT); + adder #(WIDTH) cpa5 ((Q ^ {WIDTH{tcQ}}), {{WIDTH-1{1'b0}}, tcQ}, QT); // RISC-V has exceptions for divide by 0 and overflow (see Table 6.1 of spec) - exception_int exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf); - + exception_int #(WIDTH) exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf); + endmodule // int32div -module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0, - enable, otfzero, shiftResult); +// Division by Recurrence (r=4) +module divide4 #(parameter WIDTH=64) + (Q, rem0, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); - input logic [63:0] op1, op2; - input logic clk, state0; - input logic reset; - input logic enable; - input logic otfzero; - input logic shiftResult; + input logic [WIDTH-1:0] op1, op2; + input logic clk, state0; + input logic reset; + input logic enable; + input logic otfzero; + input logic shiftResult; - output logic [64:0] rem0; - output logic [64:0] Q; - output logic [3:0] quotient; + output logic [WIDTH:0] rem0; + output logic [WIDTH:0] Q; + output logic [3:0] quotient; - logic [67:0] Sum, Carry; - logic [64:0] Qstar; - logic [64:0] QMstar; - logic [7:0] qtotal; - logic [67:0] SumN, CarryN, SumN2, CarryN2; - logic [67:0] divi1, divi2, divi1c, divi2c, dive1; - logic [67:0] mdivi_temp, mdivi; - logic zero; - logic [1:0] qsel; - logic [1:0] Qin, QMin; - logic CshiftQ, CshiftQM; - logic [67:0] rem1, rem2, rem3; - logic [67:0] SumR, CarryR; - logic [64:0] Qt; + logic [WIDTH+3:0] Sum, Carry; + logic [WIDTH:0] Qstar; + logic [WIDTH:0] QMstar; + logic [7:0] qtotal; + logic [WIDTH+3:0] SumN, CarryN, SumN2, CarryN2; + logic [WIDTH+3:0] divi1, divi2, divi1c, divi2c, dive1; + logic [WIDTH+3:0] mdivi_temp, mdivi; + logic zero; + logic [1:0] qsel; + logic [1:0] Qin, QMin; + logic CshiftQ, CshiftQM; + logic [WIDTH+3:0] rem1, rem2, rem3; + logic [WIDTH+3:0] SumR, CarryR; + logic [WIDTH:0] Qt; // Create one's complement values of Divisor (for q*D) assign divi1 = {3'h0, op2, 1'b0}; @@ -195,46 +190,47 @@ module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0, assign divi1c = ~divi1; assign divi2c = ~divi2; // Shift x1 if not mod k - mux2 #(68) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); + mux2 #(WIDTH+4) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D) - mux2 #(68) mx2 ({CarryN2[65:0], 2'h0}, 68'h0, state0, CarryN); - mux2 #(68) mx3 ({SumN2[65:0], 2'h0}, dive1, state0, SumN); + mux2 #(WIDTH+4) mx2 ({CarryN2[WIDTH+1:0], 2'h0}, {WIDTH+4{1'b0}}, state0, CarryN); + mux2 #(WIDTH+4) mx3 ({SumN2[WIDTH+1:0], 2'h0}, dive1, state0, SumN); // Simplify QST - adder #(8) cpa1 (SumN[67:60], CarryN[67:60], qtotal); + adder #(8) cpa1 (SumN[WIDTH+3:WIDTH-4], CarryN[WIDTH+3:WIDTH-4], qtotal); // q = {+2, +1, -1, -2} else q = 0 - qst4 pd1 (qtotal[7:1], divi1[63:61], quotient); + qst4 pd1 (qtotal[7:1], divi1[WIDTH-1:WIDTH-3], quotient); assign ulp = quotient[2]|quotient[3]; assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]); // Map to binary encoding assign qsel[1] = quotient[3]|quotient[2]; assign qsel[0] = quotient[3]|quotient[1]; - mux4 #(68) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); - mux2 #(68) mx5 (mdivi_temp, 68'h0, zero, mdivi); - csa #(68) csa1 (mdivi, SumN, {CarryN[67:1], ulp}, Sum, Carry); + mux4 #(WIDTH+4) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); + mux2 #(WIDTH+4) mx5 (mdivi_temp, {WIDTH+4{1'b0}}, zero, mdivi); + csa #(WIDTH+4) csa1 (mdivi, SumN, {CarryN[WIDTH+3:1], ulp}, Sum, Carry); // regs : save CSA - flopenr #(68) reg1 (clk, reset, enable, Sum, SumN2); - flopenr #(68) reg2 (clk, reset, enable, Carry, CarryN2); + flopenr #(WIDTH+4) reg1 (clk, reset, enable, Sum, SumN2); + flopenr #(WIDTH+4) reg2 (clk, reset, enable, Carry, CarryN2); // OTF ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM); - otf #(65) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, - otfzero, enable, Qstar, QMstar); + otf #(WIDTH+1) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, + otfzero, enable, Qstar, QMstar); // Correction and generation of Remainder - adder #(68) cpa2 (SumN2[67:0], CarryN2[67:0], rem1); + adder #(WIDTH+4) cpa2 (SumN2[WIDTH+3:0], CarryN2[WIDTH+3:0], rem1); // Add back +D as correction - csa #(68) csa2 (CarryN2[67:0], SumN2[67:0], divi1, SumR, CarryR); - adder #(68) cpa3 (SumR, CarryR, rem2); + csa #(WIDTH+4) csa2 (CarryN2[WIDTH+3:0], SumN2[WIDTH+3:0], divi1, SumR, CarryR); + adder #(WIDTH+4) cpa3 (SumR, CarryR, rem2); // Choose remainder (Rem or Rem+D) - mux2 #(68) mx6 (rem1, rem2, rem1[67], rem3); + mux2 #(WIDTH+4) mx6 (rem1, rem2, rem1[WIDTH+3], rem3); // Choose correct Q or QM - mux2 #(65) mx7 (Qstar, QMstar, rem1[67], Qt); + mux2 #(WIDTH+1) mx7 (Qstar, QMstar, rem1[WIDTH+3], Qt); // Final results - assign rem0 = rem3[64:0]; + assign rem0 = rem3[WIDTH:0]; assign Q = Qt; endmodule // divide4x64 +// Load/Control for OTFC module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); input logic [3:0] quot; @@ -255,8 +251,7 @@ module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); endmodule -// On-the-fly Conversion per Ercegovac/Lang - +// On-the-fly Conversion (OTFC) module otf #(parameter WIDTH=8) (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q); @@ -309,10 +304,9 @@ module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c, fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]); end endgenerate - //assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0}; // trmimmed excess bit dh 5/3/21 - assign carry = {carry_temp[WIDTH-1:1], 1'b0}; + assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0}; -endmodule // adder +endmodule // csa module eqcmp #(parameter WIDTH = 8) (input logic [WIDTH-1:0] a, b, @@ -322,6 +316,7 @@ module eqcmp #(parameter WIDTH = 8) endmodule // eqcmp +// QST for r=4 module qst4 (input logic [6:0] s, input logic [2:0] d, output logic [3:0] q); @@ -368,8 +363,6 @@ module qst4 (input logic [6:0] s, input logic [2:0] d, endmodule // qst4 -// LZD - module lz2 (P, V, B0, B1); input logic B0; @@ -497,27 +490,24 @@ module lz64 (ZP, ZV, B); endmodule // lz64 // FSM Control for Integer Divider +module fsm64 #(parameter WIDTH=6) + (en, state0, done, otfzero, divBusy, start, error, NumIter, clk, reset); -module fsm64 (en, state0, done, divdone, otfzero, divBusy, - start, error, NumIter, clk, reset); - - input logic [5:0] NumIter; - input logic clk; - input logic reset; - input logic start; - input logic error; + input logic [WIDTH-1:0] NumIter; + input logic clk; + input logic reset; + input logic start; + input logic error; - output logic done; - output logic en; - output logic state0; - output logic divdone; - output logic otfzero; - output logic divBusy; + output logic done; + output logic en; + output logic state0; + output logic otfzero; + output logic divBusy; - logic LT, EQ; - logic Divide0; - logic [5:0] CURRENT_STATE; - logic [5:0] NEXT_STATE; + logic LT, EQ; + logic [5:0] CURRENT_STATE; + logic [5:0] NEXT_STATE; parameter [5:0] S0=6'd0, S1=6'd1, S2=6'd2, @@ -542,12 +532,8 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, CURRENT_STATE<=NEXT_STATE; end - // Going to cheat and hard code number of states - // needed into FSM instead of using a counter - // FIXME: could counter be better - // Cheated and made 8 - let synthesis do its magic - magcompare8 comp1 (LT, EQ, {2'h0, CURRENT_STATE}, {2'h0, NumIter}); + magcompare8 comp1 (LT, EQ, {2'h0, CURRENT_STATE}, {{8-WIDTH{1'b0}}, NumIter}); always @(CURRENT_STATE or start) begin @@ -560,7 +546,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; divBusy = 1'b0; state0 = 1'b0; - divdone = 1'b0; done = 1'b0; NEXT_STATE <= S0; end @@ -568,30 +553,21 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, begin otfzero = 1'b0; en = 1'b1; - divBusy = 1'b1; + divBusy = 1'b1; state0 = 1'b1; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; done = 1'b0; - divdone = 1'b0; NEXT_STATE <= S1; end end S1: begin - otfzero = 1'b0; - divBusy = 1'b1; + otfzero = 1'b0; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S2; end else @@ -599,8 +575,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S2; + NEXT_STATE <= S36; end end // case: S1 S2: @@ -612,10 +587,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S3; end // if (LT|EQ) else @@ -623,8 +594,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S3; + NEXT_STATE <= S36; end end // case: S2 S3: @@ -636,10 +606,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S4; end else @@ -647,8 +613,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S4; + NEXT_STATE <= S36; end end // case: S3 S4: @@ -660,10 +625,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S5; end else @@ -671,8 +632,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S5; + NEXT_STATE <= S36; end end // case: S4 S5: @@ -684,10 +644,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S6; end // if (LT|EQ) else @@ -695,8 +651,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S6; + NEXT_STATE <= S36; end end // case: S5 S6: @@ -708,10 +663,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S7; end // if (LT|EQ) else @@ -719,8 +670,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S7; + NEXT_STATE <= S36; end end // case: S6 S7: @@ -732,10 +682,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S8; end // if (LT|EQ) else @@ -743,8 +689,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S8; + NEXT_STATE <= S36; end end // case: S7 S8: @@ -756,10 +701,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S9; end // if (LT|EQ) else @@ -767,8 +708,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S9; + NEXT_STATE <= S36; end end // case: S8 S9: @@ -780,10 +720,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S10; end // if (LT|EQ) else @@ -791,8 +727,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S10; + NEXT_STATE <= S36; end end // case: S9 S10: @@ -804,10 +739,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S11; end // if (LT|EQ) else @@ -815,8 +746,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S11; + NEXT_STATE <= S36; end end // case: S10 S11: @@ -828,10 +758,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S12; end // if (LT|EQ) else @@ -839,8 +765,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S12; + NEXT_STATE <= S36; end end // case: S11 S12: @@ -852,10 +777,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S13; end // if (LT|EQ) else @@ -863,8 +784,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S13; + NEXT_STATE <= S36; end end // case: S12 S13: @@ -876,10 +796,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S14; end // if (LT|EQ) else @@ -887,23 +803,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S14; + NEXT_STATE <= S36; end end // case: S13 S14: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S15; end // if (LT|EQ) else @@ -911,23 +822,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S15; + NEXT_STATE <= S36; end end // case: S14 S15: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S16; end // if (LT|EQ) else @@ -935,23 +841,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S16; + NEXT_STATE <= S36; end end // case: S15 S16: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S17; end // if (LT|EQ) else @@ -959,23 +860,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S17; + NEXT_STATE <= S36; end end // case: S16 S17: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S18; end // if (LT|EQ) else @@ -983,23 +879,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S18; + NEXT_STATE <= S36; end end // case: S17 S18: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S19; end // if (LT|EQ) else @@ -1007,23 +898,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S19; + NEXT_STATE <= S36; end end // case: S18 S19: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S20; end // if (LT|EQ) else @@ -1031,23 +917,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S20; + NEXT_STATE <= S36; end end // case: S19 S20: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S21; end // if (LT|EQ) else @@ -1055,23 +936,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S21; + NEXT_STATE <= S36; end end // case: S20 S21: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S22; end // if (LT|EQ) else @@ -1079,23 +955,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S22; + NEXT_STATE <= S36; end end // case: S21 S22: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S23; end // if (LT|EQ) else @@ -1103,23 +974,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S23; + NEXT_STATE <= S36; end end // case: S22 S23: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S24; end // if (LT|EQ) else @@ -1127,23 +993,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S24; + NEXT_STATE <= S36; end end // case: S23 S24: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S25; end // if (LT|EQ) else @@ -1151,23 +1012,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S25; + NEXT_STATE <= S36; end end // case: S24 S25: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S26; end // if (LT|EQ) else @@ -1175,23 +1031,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S26; + NEXT_STATE <= S36; end end // case: S25 S26: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S27; end // if (LT|EQ) else @@ -1199,23 +1050,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S27; + NEXT_STATE <= S36; end end // case: S26 S27: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S28; end // if (LT|EQ) else @@ -1223,23 +1069,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S28; + NEXT_STATE <= S36; end end // case: S27 S28: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S29; end // if (LT|EQ) else @@ -1247,23 +1088,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S29; + NEXT_STATE <= S36; end end // case: S28 S29: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S30; end // if (LT|EQ) else @@ -1271,23 +1107,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S30; + NEXT_STATE <= S36; end end // case: S29 S30: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S31; end // if (LT|EQ) else @@ -1295,8 +1126,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S31; + NEXT_STATE <= S36; end end // case: S30 S31: @@ -1308,10 +1138,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S32; end // if (LT|EQ) else @@ -1319,8 +1145,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S32; + NEXT_STATE <= S36; end end // case: S31 S32: @@ -1332,10 +1157,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S33; end // if (LT|EQ) else @@ -1343,8 +1164,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S33; + NEXT_STATE <= S36; end end // case: S32 S33: @@ -1356,10 +1176,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S34; end // if (LT|EQ) else @@ -1367,23 +1183,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S34; + NEXT_STATE <= S36; end end // case: S33 S34: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S35; end // if (LT|EQ) else @@ -1391,8 +1202,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S35; + NEXT_STATE <= S36; end end // case: S34 S35: @@ -1404,10 +1214,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S36; end // if (LT|EQ) else @@ -1415,7 +1221,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; NEXT_STATE <= S36; end end // case: S35 @@ -1427,12 +1232,10 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, done = 1'b1; if (EQ) begin - divdone = 1'b1; en = 1'b1; end else begin - divdone = 1'b0; en = 1'b0; end NEXT_STATE <= S0; @@ -1440,11 +1243,10 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, default: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b0; en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; NEXT_STATE <= S0; end endcase // case(CURRENT_STATE) @@ -1505,166 +1307,39 @@ module magcompare8 (LT, EQ, A, B); endmodule // magcompare8 -module shifter_l64 (Z, A, Shift); +// RISC-V Exception Logic for Divide by 0 and Overflow (Signed Integer Divide) +module exception_int #(parameter WIDTH=8) + (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); - input logic [63:0] A; - input logic [5:0] Shift; + input logic [WIDTH-1:0] Q; + input logic [WIDTH-1:0] rem; + input logic [WIDTH-1:0] op1; + input logic S; + input logic div0; + input logic Max_N; + input logic D_NegOne; - logic [63:0] stage1; - logic [63:0] stage2; - logic [63:0] stage3; - logic [63:0] stage4; - logic [63:0] stage5; - - output logic [63:0] Z; - - mux2 #(64) mx01(A, {A[31:0], 32'h0}, Shift[5], stage1); - mux2 #(64) mx02(stage1, {stage1[47:0], 16'h0}, Shift[4], stage2); - mux2 #(64) mx03(stage2, {stage2[55:0], 8'h0}, Shift[3], stage3); - mux2 #(64) mx04(stage3, {stage3[59:0], 4'h0}, Shift[2], stage4); - mux2 #(64) mx05(stage4, {stage4[61:0], 2'h0}, Shift[1], stage5); - mux2 #(64) mx06(stage5, {stage5[62:0], 1'h0}, Shift[0], Z); + output logic [WIDTH-1:0] Qf; + output logic [WIDTH-1:0] remf; -endmodule // shifter_l64 - -module shifter_r64 (Z, A, Shift); - - input logic [63:0] A; - input logic [5:0] Shift; - - logic [63:0] stage1; - logic [63:0] stage2; - logic [63:0] stage3; - logic [63:0] stage4; - logic [63:0] stage5; - - output logic [63:0] Z; - - mux2 #(64) mx01(A, {32'h0, A[63:32]}, Shift[5], stage1); - mux2 #(64) mx02(stage1, {16'h0, stage1[63:16]}, Shift[4], stage2); - mux2 #(64) mx03(stage2, {8'h0, stage2[63:8]}, Shift[3], stage3); - mux2 #(64) mx04(stage3, {4'h0, stage3[63:4]}, Shift[2], stage4); - mux2 #(64) mx05(stage4, {2'h0, stage4[63:2]}, Shift[1], stage5); - mux2 #(64) mx06(stage5, {1'h0, stage5[63:1]}, Shift[0], Z); - -endmodule // shifter_r64 - -module shifter_l32 (Z, A, Shift); - - input logic [31:0] A; - input logic [4:0] Shift; - - logic [31:0] stage1; - logic [31:0] stage2; - logic [31:0] stage3; - logic [31:0] stage4; - - output logic [31:0] Z; - - mux2 #(32) mx01(A, {A[15:0], 16'h0}, Shift[4], stage1); - mux2 #(32) mx02(stage1, {stage1[23:0], 8'h0}, Shift[3], stage2); - mux2 #(32) mx03(stage2, {stage2[27:0], 4'h0}, Shift[2], stage3); - mux2 #(32) mx04(stage3, {stage3[29:0], 2'h0}, Shift[1], stage4); - mux2 #(32) mx05(stage4, {stage4[30:0], 1'h0}, Shift[0], Z); - -endmodule // shifter_l32 - -module shifter_r32 (Z, A, Shift); - - input logic [31:0] A; - input logic [4:0] Shift; - - logic [31:0] stage1; - logic [31:0] stage2; - logic [31:0] stage3; - logic [31:0] stage4; - - output logic [31:0] Z; - - mux2 #(32) mx01(A, {16'h0, A[31:16]}, Shift[4], stage1); - mux2 #(32) mx02(stage1, {8'h0, stage1[31:8]}, Shift[3], stage2); - mux2 #(32) mx03(stage2, {4'h0, stage2[31:4]}, Shift[2], stage3); - mux2 #(32) mx04(stage3, {2'h0, stage3[31:2]}, Shift[1], stage4); - mux2 #(32) mx05(stage4, {1'h0, stage4[31:1]}, Shift[0], Z); - -endmodule // shifter_r32 - -module shift_right #(parameter WIDTH=8) - (input logic [`XLEN-1:0] A, - input logic [$clog2(`XLEN)-1:0] Shift, - output logic [`XLEN-1:0] Z); - - logic [`XLEN-1:0] stage [$clog2(`XLEN):0]; - genvar i; - - assign stage[0] = A; - generate - for (i=0;i<$clog2(`XLEN);i=i+1) - begin : genbit - mux2 #(`XLEN) mux_inst (stage[i], - {{(`XLEN/(2**(i+1))){1'b0}}, stage[i][`XLEN-1:`XLEN/(2**(i+1))]}, - Shift[$clog2(`XLEN)-i-1], - stage[i+1]); - end - endgenerate - assign Z = stage[$clog2(`XLEN)]; - -endmodule // shift_right - -module shift_left #(parameter WIDTH=8) - (input logic [`XLEN-1:0] A, - input logic [$clog2(`XLEN)-1:0] Shift, - output logic [`XLEN-1:0] Z); - - logic [`XLEN-1:0] stage [$clog2(`XLEN):0]; - genvar i; - - assign stage[0] = A; - generate - for (i=0;i<$clog2(`XLEN);i=i+1) - begin : genbit - mux2 #(`XLEN) mux_inst (stage[i], - {stage[i][`XLEN-1-`XLEN/(2**(i+1)):0], {(`XLEN/(2**(i+1))){1'b0}}}, - Shift[$clog2(`XLEN)-i-1], - stage[i+1]); - end - endgenerate - assign Z = stage[$clog2(`XLEN)]; - -endmodule // shift_right - -module exception_int (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); - - input logic [63:0] Q; - input logic [63:0] rem; - input logic [63:0] op1; - input logic S; - input logic div0; - input logic Max_N; - input logic D_NegOne; - - output logic [63:0] Qf; - output logic [63:0] remf; - - // Needs to be optimized always_comb case ({div0, S, Max_N, D_NegOne}) 4'b0000 : Qf = Q; 4'b0001 : Qf = Q; - 4'b0010 : Qf = Q; - 4'b0011 : Qf = Q; + 4'b0010 : Qf = Q; + 4'b0011 : Qf = Q; 4'b0100 : Qf = Q; - 4'b0101 : Qf = Q; + 4'b0101 : Qf = Q; 4'b0110 : Qf = Q; - 4'b0111 : Qf = {1'b1, 31'h0}; - 4'b1000 : Qf = {64{1'b1}}; - 4'b1001 : Qf = {64{1'b1}}; - 4'b1010 : Qf = {64{1'b1}}; - 4'b1011 : Qf = {64{1'b1}}; - 4'b1100 : Qf = {64{1'b1}}; - 4'b1101 : Qf = {64{1'b1}}; - 4'b1110 : Qf = {64{1'b1}}; - 4'b1111 : Qf = {64{1'b1}}; + 4'b0111 : Qf = {1'b1, {WIDTH-1{1'h0}}}; + 4'b1000 : Qf = {WIDTH{1'b1}}; + 4'b1001 : Qf = {WIDTH{1'b1}}; + 4'b1010 : Qf = {WIDTH{1'b1}}; + 4'b1011 : Qf = {WIDTH{1'b1}}; + 4'b1100 : Qf = {WIDTH{1'b1}}; + 4'b1101 : Qf = {WIDTH{1'b1}}; + 4'b1110 : Qf = {WIDTH{1'b1}}; + 4'b1111 : Qf = {WIDTH{1'b1}}; default: Qf = Q; endcase @@ -1672,18 +1347,18 @@ module exception_int (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); case ({div0, S, Max_N, D_NegOne}) 4'b0000 : remf = rem; 4'b0001 : remf = rem; - 4'b0010 : remf = rem; + 4'b0010 : remf = rem; 4'b0011 : remf = rem; 4'b0100 : remf = rem; 4'b0101 : remf = rem; 4'b0110 : remf = rem; - 4'b0111 : remf = 64'h0; + 4'b0111 : remf = {WIDTH{1'h0}}; 4'b1000 : remf = op1; 4'b1001 : remf = op1; 4'b1010 : remf = op1; 4'b1011 : remf = op1; 4'b1100 : remf = op1; - 4'b1101 : remf = op1; + 4'b1101 : remf = op1; 4'b1110 : remf = op1; 4'b1111 : remf = op1; default: remf = rem; @@ -1693,4 +1368,3 @@ endmodule // exception_int /* verilator lint_on COMBDLY */ /* verilator lint_on IMPLICIT */ - diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 17c4aac5..e10b0c55 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -47,13 +47,13 @@ module muldiv ( logic [`XLEN-1:0] MulDivResultE, MulDivResultM; logic [`XLEN-1:0] PrelimResultE; logic [`XLEN-1:0] QuotE, RemE; - //logic [`XLEN-1:0] Q, R; logic [`XLEN*2-1:0] ProdE; logic enable_q; logic [2:0] Funct3E_Q; logic div0error; logic [`XLEN-1:0] N, D; + logic [`XLEN-1:0] Num0, Den0; logic gclk; logic DivStartE; @@ -70,15 +70,25 @@ module muldiv ( end assign gclk = enable_q & clk; + // Handle sign extension for W-type instructions + if (`XLEN == 64) begin // RV64 has W-type instructions + assign Num0 = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE; + assign Den0 = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE; + end else begin // RV32 has no W-type instructions + assign Num0 = SrcAE; + assign Den0 = SrcBE; + end + // capture the Numerator/Denominator - flopenrc #(`XLEN) reg_num (.d(SrcAE), .q(N), + flopenrc #(`XLEN) reg_num (.d(Num0), .q(N), .en(startDivideE), .clear(DivDoneE), .reset(reset), .clk(~gclk)); - flopenrc #(`XLEN) reg_den (.d(SrcBE), .q(D), + flopenrc #(`XLEN) reg_den (.d(Den0), .q(D), .en(startDivideE), .clear(DivDoneE), - .reset(reset), .clk(~gclk)); + .reset(reset), .clk(~gclk)); + assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); - div div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); + intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); // Added for debugging of start signal for divide assign startDivideE = MulDivE&DivStartE&~DivBusyE; @@ -93,7 +103,6 @@ module muldiv ( // Select result always_comb - // case (DivDoneE ? Funct3E_Q : Funct3E) case (Funct3E) 3'b000: PrelimResultE = ProdE[`XLEN-1:0]; 3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN]; diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index ea693900..dabc6d12 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -166,12 +166,12 @@ string tests32f[] = '{ "rv64m/I-MULW-01", "3000", "rv64m/I-DIV-01", "3000", "rv64m/I-DIVU-01", "3000", - //"rv64m/I-DIVUW-01", "3000", - //"rv64m/I-DIVW-01", "3000", + "rv64m/I-DIVUW-01", "3000", + "rv64m/I-DIVW-01", "3000", "rv64m/I-REM-01", "3000", - "rv64m/I-REMU-01", "3000" - //"rv64m/I-REMUW-01", "3000", - //"rv64m/I-REMW-01", "3000" + "rv64m/I-REMU-01", "3000", + "rv64m/I-REMUW-01", "3000", + "rv64m/I-REMW-01", "3000" }; string tests64ic[] = '{ @@ -320,11 +320,11 @@ string tests32f[] = '{ "rv32m/I-MUL-01", "2000", "rv32m/I-MULH-01", "2000", "rv32m/I-MULHSU-01", "2000", - "rv32m/I-MULHU-01", "2000" - //"rv32m/I-DIV-01", "2000", - //"rv32m/I-DIVU-01", "2000", - //"rv32m/I-REM-01", "2000", - //"rv32m/I-REMU-01", "2000" + "rv32m/I-MULHU-01", "2000", + "rv32m/I-DIV-01", "2000", + "rv32m/I-DIVU-01", "2000", + "rv32m/I-REM-01", "2000", + "rv32m/I-REMU-01", "2000" }; string tests32ic[] = '{ @@ -439,8 +439,11 @@ string tests32f[] = '{ string testsBP64[] = '{ "rv64BP/simple", "10000", + "rv64BP/mmm", "1000000", + "rv64BP/linpack_bench", "1000000", + "rv64BP/sieve", "1000000", "rv64BP/qsort", "1000000", - "rv64BP/sieve", "1000000" + "rv64BP/dhrystone", "1000000" }; string tests64p[] = '{