This commit is contained in:
bbracker 2021-06-02 10:03:23 -04:00
commit a683dd7fde
39 changed files with 1807 additions and 1490 deletions

View File

@ -4,12 +4,12 @@ ROOT := ..
LIBRARY_DIRS :=
LIBRARY_FILES :=
MARCH :=-march=rv64ic
MABI :=-mabi=lp64
MARCH :=-march=rv64imfdc
MABI :=-mabi=lp64d
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles
AFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -W
CFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -mcmodel=medany
AFLAGS =$(MARCH) $(MABI) -W
CFLAGS =$(MARCH) $(MABI) -mcmodel=medany -O2
AS=riscv64-unknown-elf-as
CC=riscv64-unknown-elf-gcc
AR=riscv64-unknown-elf-ar
@ -19,7 +19,7 @@ all: libcrt0.a
%.o: %.s
${AS} ${AFLAGS} -c $< -o $@
libcrt0.a: start.o
libcrt0.a: start.o pcnt_driver.o pre_main.o
${AR} -r $@ $^
clean:

View File

@ -43,11 +43,10 @@ _start:
# set the stack pointer to the top of memory
# 0x8000_0000 + 64K - 8 bytes
li sp, 0x007FFFF8
# set the stack pointer to the top of memory - 8 bytes (pointer size)
li sp, 0x07FFFFF8
jal ra, main
jal ra, pre_main
jal ra, _halt
.section .text

View File

@ -8,7 +8,7 @@ MARCH :=-march=rv64ic
MABI :=-mabi=lp64
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2
CC=riscv64-unknown-elf-gcc
DA=riscv64-unknown-elf-objdump -d

View File

@ -8,7 +8,7 @@ MARCH :=-march=rv64ic
MABI :=-mabi=lp64
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2
CC=riscv64-unknown-elf-gcc
DA=riscv64-unknown-elf-objdump -d

View File

@ -66,21 +66,21 @@ int main () {
ans = sieve ();
//gettimeofday(&after , NULL);
if (ans != 1899)
printf ("Sieve result wrong, ans = %d, expected 1899", ans);
/* /\* /\\* if (ans != 1899) *\\/ *\/ */
/* /\* /\\* printf ("Sieve result wrong, ans = %d, expected 1899", ans); *\\/ *\/ */
//printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) );
/* /\* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); *\/ */
printf("Round 2\n");
//gettimeofday(&before , NULL);
/* /\* printf("Round 2\n"); *\/ */
/* //gettimeofday(&before , NULL); */
ans = sieve ();
//gettimeofday(&after , NULL);
if (ans != 1899)
printf ("Sieve result wrong, ans = %d, expected 1899", ans);
/* ans = sieve (); */
/* //gettimeofday(&after , NULL); */
/* if (ans != 1899) */
/* printf ("Sieve result wrong, ans = %d, expected 1899", ans); */
//printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) );
/* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); */
return 0;

View File

@ -8,7 +8,7 @@ MARCH :=-march=rv64ic
MABI :=-mabi=lp64
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align
CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2
CC=riscv64-unknown-elf-gcc
DA=riscv64-unknown-elf-objdump -d

View File

@ -5,4 +5,8 @@ int fail();
int simple_csrbr_test();
int lbu_test();
int icache_spill_test();
void global_hist_0_space_test();
void global_hist_1_space_test();
void global_hist_2_space_test();
void global_hist_3_space_test();
#endif

View File

@ -2,6 +2,10 @@
int main(){
//int res = icache_spill_test();
global_hist_3_space_test();
global_hist_2_space_test();
global_hist_1_space_test();
global_hist_0_space_test();
int res = 1;
if (res < 0) {
fail();

View File

@ -2,11 +2,14 @@
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
// Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
// Purpose: Specify constants nexessary for different memory virtualization modes.
// These are specific to sv49, defined in section 4.5 of the privileged spec.
// However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
//
// A component of the Wally configurable RISC-V project.
//
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
// Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9
`define VPN_BITS 27
`define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26
`define PA_BITS 56
`define PA_BITS 56
`define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,11 +2,14 @@
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
// Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
// Purpose: Specify constants nexessary for different memory virtualization modes.
// These are specific to sv49, defined in section 4.5 of the privileged spec.
// However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
//
// A component of the Wally configurable RISC-V project.
//
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
// Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9
`define VPN_BITS 27
`define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26
`define PA_BITS 56
`define PA_BITS 56
`define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,11 +2,14 @@
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
// Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
// Purpose: Specify constants nexessary for different memory virtualization modes.
// These are specific to sv49, defined in section 4.5 of the privileged spec.
// However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
//
// A component of the Wally configurable RISC-V project.
//
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
// Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9
`define VPN_BITS 27
`define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26
`define PA_BITS 56
`define PA_BITS 56
`define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,11 +2,14 @@
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
// Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
// Purpose: Specify constants nexessary for different memory virtualization modes.
// These are specific to sv49, defined in section 4.5 of the privileged spec.
// However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
//
// A component of the Wally configurable RISC-V project.
//
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
// Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9
`define VPN_BITS 27
`define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26
`define PA_BITS 56
`define PA_BITS 56
`define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,7 +2,10 @@
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
// Modified: kmacsaigoren@hmc.edu 31 May 2021
// added svmode constants. These aren't strictly necessary since we're just checking one bit,
// but they're here to stay consistent and to make sure we dont wind up
// a "NO_TRANSLATE undefined" situation.
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
@ -31,3 +34,10 @@
`define PPN_BITS 22
`define PPN_HIGH_SEGMENT_BITS 12
`define PA_BITS 34
`define SVMODE_BITS 1
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8 // These two are only here to stop
`define SV48 9 // the verilator from yelling at me

View File

@ -32,7 +32,7 @@
`define XLEN 64
//`define MISA (32'h00000105)
`define MISA (32'h00000104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0)
`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0)
`define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
`define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
`define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
@ -107,8 +107,9 @@
/* verilator lint_off ASSIGNDLY */
/* verilator lint_off PINCONNECTEMPTY */
`define TWO_BIT_PRELOAD "../config/rv64icfd/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64icfd/BTBPredictor.txt"
`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
//`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
`define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE
`define TESTSBP 1

View File

@ -2,11 +2,14 @@
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
// Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
// Purpose: Specify constants nexessary for different memory virtualization modes.
// These are specific to sv49, defined in section 4.5 of the privileged spec.
// However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
//
// A component of the Wally configurable RISC-V project.
//
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
// Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9
`define VPN_BITS 27
`define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26
`define PA_BITS 56
`define PA_BITS 56
`define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -31,7 +31,7 @@
`define XLEN 64
// MISA RISC-V configuration per specification
`define MISA (32'h00000104 | 0 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0)
`define MISA (32'h00000104 | 0 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0)
`define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
`define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
`define D_SUPPORTED ((`MISA >> 3) % 2 == 1)

View File

@ -2,11 +2,14 @@
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
// Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
// Purpose: Specify constants nexessary for different memory virtualization modes.
// These are specific to sv49, defined in section 4.5 of the privileged spec.
// However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
//
// A component of the Wally configurable RISC-V project.
//
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
// Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9
`define VPN_BITS 27
`define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26
`define PA_BITS 56
`define PA_BITS 56
`define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,11 +2,14 @@
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
// Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
// Purpose: Specify constants nexessary for different memory virtualization modes.
// These are specific to sv49, defined in section 4.5 of the privileged spec.
// However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
//
// A component of the Wally configurable RISC-V project.
//
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
// Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9
`define VPN_BITS 27
`define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26
`define PA_BITS 56
`define PA_BITS 56
`define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -2,11 +2,14 @@
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
// Modified: Kmacsaigoren@hmc.edu 31 May 2021
// Added constants for checking sv mode and changed existing constants to accomodate
// both sv48 and sv39
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
// Purpose: Specify constants nexessary for different memory virtualization modes.
// These are specific to sv49, defined in section 4.5 of the privileged spec.
// However, despite different constants for different modes, the hardware helps distinguish between
// each mode.
//
// A component of the Wally configurable RISC-V project.
//
@ -25,9 +28,16 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
// Virtual Memory Constants (sv48)
`define VPN_SEGMENT_BITS 9
`define VPN_BITS 27
`define VPN_BITS 36
`define PPN_HIGH_SEGMENT_BITS 17
`define PPN_BITS 44
`define PPN_HIGH_SEGMENT_BITS 26
`define PA_BITS 56
`define PA_BITS 56
`define SVMODE_BITS 4
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
`define NO_TRANSLATE 0
`define SV32 1
`define SV39 8
`define SV48 9

View File

@ -25,528 +25,455 @@
`include "wally-config.vh"
module fpu (
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic reset,
//input logic clear, // *** not being used anywhere
input logic clk,
input logic [31:0] InstrD,
input logic [`XLEN-1:0] SrcAE, // Integer input being processed
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
input logic [`AHBW-1:0] HRDATA,
input logic RegWriteD,
output logic [4:0] SetFflagsM,
output logic [31:0] FSROutW,
output logic [1:0] FMemRWM,
output logic FStallD,
output logic FWriteIntE, FWriteIntM, FWriteIntW,
input logic clk,
input logic [31:0] InstrD,
input logic [`XLEN-1:0] SrcAE, // Integer input being processed
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
input logic [`AHBW-1:0] HRDATA,
input logic RegWriteD,
output logic [4:0] SetFflagsM,
output logic [31:0] FSROutW,
output logic [1:0] FMemRWM,
output logic FStallD,
output logic FWriteIntE, FWriteIntM, FWriteIntW,
output logic [`XLEN-1:0] FWriteDataM,
output logic FDivSqrtDoneM,
output logic IllegalFPUInstrD,
output logic FDivSqrtDoneM,
output logic IllegalFPUInstrD,
output logic [`XLEN-1:0] FPUResultW);
// control logic signal instantiation
logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register
logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction
logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory
logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal
logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal
logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal
logic FInput2UsedD; // Is input 2 used
logic FInput3UsedD; // Is input 3 used
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
// regfile signals
logic [4:0] RdE, RdM, RdW; // ***Can take from ieu
logic [`XLEN-1:0] FWDM; // Write data for FP register
logic [`XLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register
logic [`XLEN-1:0] FRD1E, FRD2E, FRD3E;
logic [`XLEN-1:0] FInput1E, FInput1M, FInput1tmpE;
logic [`XLEN-1:0] FInput2E, FInput2M;
logic [`XLEN-1:0] FInput3E, FInput3M;
logic [`XLEN-1:0] FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions
// div/sqrt signals
logic DivDenormM, DivDenormW;
logic DivOvEn, DivUnEn;
logic DivBusyM;
logic [63:0] FDivResultM, FDivResultW;
logic [4:0] FDivFlagsM, FDivFlagsW;
// FMA signals
logic [12:0] aligncntE, aligncntM;
logic [105:0] rE, rM;
logic [105:0] sE, sM;
logic [163:0] tE, tM;
logic [8:0] normcntE, normcntM;
logic [12:0] aeE, aeM;
logic bsE, bsM;
logic killprodE, killprodM;
logic prodofE, prodofM;
logic xzeroE, xzeroM;
logic yzeroE, yzeroM;
logic zzeroE, zzeroM;
logic xdenormE, xdenormM;
logic ydenormE, ydenormM;
logic zdenormE, zdenormM;
logic xinfE, xinfM;
logic yinfE, yinfM;
logic zinfE, zinfM;
logic xnanE, xnanM;
logic ynanE, ynanM;
logic znanE, znanM;
logic nanE, nanM;
logic [8:0] sumshiftE, sumshiftM;
logic sumshiftzeroE, sumshiftzeroM;
logic prodinfE, prodinfM;
logic [63:0] FmaResultM, FmaResultW;
logic [4:0] FmaFlagsM, FmaFlagsW;
//control logic signal instantiation
logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register
logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction
logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory
logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal
logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal
logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal
logic FInput2UsedD; // Is input 2 used
logic FInput3UsedD; // Is input 3 used
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
// add/cvt signals
logic [63:0] AddSumE, AddSumTcE;
logic [3:0] AddSelInvE;
logic [10:0] AddExpPostSumE;
logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE;
logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
logic AddConvertE;
logic [63:0] AddFloat1E, AddFloat2E;
logic [11:0] AddExp1DenormE, AddExp2DenormE;
logic [10:0] AddExponentE;
logic [2:0] AddRmE;
logic [3:0] AddOpTypeE;
logic AddPE, AddOvEnE, AddUnEnE;
logic AddDenormM;
logic [63:0] AddSumM, AddSumTcM;
logic [3:0] AddSelInvM;
logic [10:0] AddExpPostSumM;
logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM;
logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
logic AddConvertM, AddSignM;
logic [63:0] AddFloat1M, AddFloat2M;
logic [11:0] AddExp1DenormM, AddExp2DenormM;
logic [10:0] AddExponentM;
logic [63:0] AddOp1M, AddOp2M;
logic [2:0] AddRmM;
logic [3:0] AddOpTypeM;
logic AddPM, AddOvEnM, AddUnEnM;
logic [63:0] FAddResultM, FAddResultW;
logic [4:0] FAddFlagsM, FAddFlagsW;
// regfile signals
logic [4:0] RdE, RdM, RdW; // ***Can take from ieu
logic [`XLEN-1:0] FWDM; // Write data for FP register
logic [`XLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register
logic [`XLEN-1:0] FRD1E, FRD2E, FRD3E;
logic [`XLEN-1:0] FInput1E, FInput1M, FInput1tmpE;
logic [`XLEN-1:0] FInput2E, FInput2M;
logic [`XLEN-1:0] FInput3E, FInput3M;
logic [`XLEN-1:0] FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions
// cmp signals
logic [7:0] WE, WM;
logic [7:0] XE, XM;
logic ANaNE, ANaNM;
logic BNaNE, BNaNM;
logic AzeroE, AzeroM;
logic BzeroE, BzeroM;
logic CmpInvalidM, CmpInvalidW;
logic [1:0] CmpFCCM, CmpFCCW;
logic [63:0] FCmpResultM, FCmpResultW;
// div/sqrt signals
logic DivDenormM, DivDenormW;
logic DivOvEn, DivUnEn;
logic DivBusyM;
logic [63:0] FDivResultM, FDivResultW;
logic [4:0] FDivFlagsM, FDivFlagsW;
// fsgn signals
logic [63:0] SgnResultE, SgnResultM, SgnResultW;
logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
// FMA signals
logic [12:0] aligncntE, aligncntM;
logic [105:0] rE, rM;
logic [105:0] sE, sM;
logic [163:0] tE, tM;
logic [8:0] normcntE, normcntM;
logic [12:0] aeE, aeM;
logic bsE, bsM;
logic killprodE, killprodM;
logic prodofE, prodofM;
logic xzeroE, xzeroM;
logic yzeroE, yzeroM;
logic zzeroE, zzeroM;
logic xdenormE, xdenormM;
logic ydenormE, ydenormM;
logic zdenormE, zdenormM;
logic xinfE, xinfM;
logic yinfE, yinfM;
logic zinfE, zinfM;
logic xnanE, xnanM;
logic ynanE, ynanM;
logic znanE, znanM;
logic nanE, nanM;
logic [8:0] sumshiftE, sumshiftM;
logic sumshiftzeroE, sumshiftzeroM;
logic prodinfE, prodinfM;
logic [63:0] FmaResultM, FmaResultW;
logic [4:0] FmaFlagsM, FmaFlagsW;
// instantiation of W stage regfile signals
logic [`XLEN-1:0] SrcAW;
// add/cvt signals
logic [63:0] AddSumE, AddSumTcE;
logic [3:0] AddSelInvE;
logic [10:0] AddExpPostSumE;
logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE;
logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
logic AddConvertE;
logic [63:0] AddFloat1E, AddFloat2E;
logic [11:0] AddExp1DenormE, AddExp2DenormE;
logic [10:0] AddExponentE;
logic [2:0] AddRmE;
logic [3:0] AddOpTypeE;
logic AddPE, AddOvEnE, AddUnEnE;
logic AddDenormM;
logic [63:0] AddSumM, AddSumTcM;
logic [3:0] AddSelInvM;
logic [10:0] AddExpPostSumM;
logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM;
logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
logic AddConvertM, AddSignM;
logic [63:0] AddFloat1M, AddFloat2M;
logic [11:0] AddExp1DenormM, AddExp2DenormM;
logic [10:0] AddExponentM;
logic [63:0] AddOp1M, AddOp2M;
logic [2:0] AddRmM;
logic [3:0] AddOpTypeM;
logic AddPM, AddOvEnM, AddUnEnM;
logic [63:0] FAddResultM, FAddResultW;
logic [4:0] FAddFlagsM, FAddFlagsW;
// classify signals
logic [63:0] ClassResultE, ClassResultM, ClassResultW;
//cmp signals
logic [7:0] WE, WM;
logic [7:0] XE, XM;
logic ANaNE, ANaNM;
logic BNaNE, BNaNM;
logic AzeroE, AzeroM;
logic BzeroE, BzeroM;
logic CmpInvalidM, CmpInvalidW;
logic [1:0] CmpFCCM, CmpFCCW;
logic [63:0] FCmpResultM, FCmpResultW;
// 64-bit FPU result
logic [63:0] FPUResult64W, FPUResult64E;
logic [4:0] FPUFlagsW;
// fsgn signals
logic [63:0] SgnResultE, SgnResultM, SgnResultW;
logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
// pipeline control logic
logic PipeEnableDE;
logic PipeEnableEM;
logic PipeEnableMW;
logic PipeClearDE;
logic PipeClearEM;
logic PipeClearMW;
//instantiation of W stage regfile signals
logic [`XLEN-1:0] SrcAW;
// temporarily assign pipe clear and enable signals
// to never flush & always be running
localparam PipeClear = 1'b0;
localparam PipeEnable = 1'b1;
always_comb begin
PipeEnableDE = ~StallE;
PipeEnableEM = ~StallM;
PipeEnableMW = ~StallW;
PipeClearDE = FlushE;
PipeClearEM = FlushM;
PipeClearMW = FlushW;
end
// classify signals
logic [63:0] ClassResultE, ClassResultM, ClassResultW;
//DECODE STAGE
// other
logic [63:0] FPUResult64W, FPUResult64E; // 64-bit FPU result
logic [4:0] FPUFlagsW;
// Hazard unit for FPU
fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
// pipeline control logic
logic PipeEnableDE;
logic PipeEnableEM;
logic PipeEnableMW;
logic PipeClearDE;
logic PipeClearEM;
logic PipeClearMW;
// top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
//temporarily assign pipe clear and enable signals
//to never flush & always be running
localparam PipeClear = 1'b0;
localparam PipeEnable = 1'b1;
always_comb begin
PipeEnableDE = ~StallE;
PipeEnableEM = ~StallM;
PipeEnableMW = ~StallW;
PipeClearDE = FlushE;
PipeClearEM = FlushM;
PipeClearMW = FlushW;
end
//DECODE STAGE
//Hazard unit for FPU
fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
//top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
//regfile instantiation
// regfile instantiation
FPregfile fpregfile (clk, reset, FWriteEnW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
FPUResult64W,
FRD1D, FRD2D, FRD3D);
//*****************
//fpregfile D/E pipe registers
//*****************
flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E);
//*****************
//other D/E pipe registers
//*****************
flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE);
flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE);
flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E);
flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E);
flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E);
flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E);
flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
//EXECUTION STAGE
// input muxs for forwarding
mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE);
mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E);
mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E);
mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
fma1 fma1 (.*);
//first and only instance of floating-point divider
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .*);
//first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (.*);
//first of two-stage instance of floating-point comparator
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]);
//first and only instance of floating-point sign converter
fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
//first and only instance of floating-point classify unit
fpuclassify fpuclass (.*);
//*****************
//fpregfile D/E pipe registers
//*****************
flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M);
flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M);
flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M);
//*****************
//fma E/M pipe registers
//*****************
flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM);
flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM);
flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM);
flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM);
flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM);
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM);
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM);
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM);
flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM);
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM);
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM);
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM);
flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM);
flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM);
flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM);
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM);
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM);
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM);
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM);
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM);
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM);
flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM);
flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM);
flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM);
flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM);
//*****************
//fpadd E/M pipe registers
//*****************
flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM);
flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM);
flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM);
flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM);
flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM);
flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM);
flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM);
flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM);
flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM);
flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM);
flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM);
flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM);
flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM);
flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM);
flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM);
flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM);
flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM);
flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM);
flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM);
//*****************
//fpcmp E/M pipe registers
//*****************
flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM);
flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM);
flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM);
flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM);
flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM);
flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM);
//put this in for the event we want to delay fsgn - will otherwise bypass
//*****************
//fpsgn E/M pipe registers
//*****************
flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM);
//*****************
//other E/M pipe registers
//*****************
flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM);
flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM);
flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM);
flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM);
//*****************
//fpuclassify E/M pipe registers
//*****************
flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM);
//BEGIN MEMORY STAGE
assign FWriteDataM = FInput1M;
mux2 #(64) FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
fma2 fma2(.*);
//second instance of two-stage floating-point add/cvt unit
fpuaddcvt2 fpadd2 (.*);
//second instance of two-stage floating-point comparator
fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*);
//*****************
//fma M/W pipe registers
//*****************
flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW);
flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW);
//*****************
//fpdiv M/W pipe registers
//*****************
flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW);
flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW);
//*****************
//fpadd M/W pipe registers
//*****************
flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW);
//*****************
//fpcmp M/W pipe registers
//*****************
flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW);
flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW);
flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW);
//*****************
//fpsgn M/W pipe registers
//*****************
flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW);
flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW);
//*****************
//other M/W pipe registers
//*****************
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW);
flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
//*****************
//fpuclassify M/W pipe registers
//*****************
flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW);
//*****************
// fpregfile D/E pipe registers
//*****************
flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E);
//*****************
// other D/E pipe registers
//*****************
flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE);
flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE);
flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E);
flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E);
flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E);
flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E);
flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
//EXECUTION STAGE
// input muxs for forwarding
mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE);
mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E);
mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E);
mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
fma1 fma1 (.*);
// first and only instance of floating-point divider
logic fpdivClk;
clockgater fpdivclkg(.E(FDivStartE),
.SE(DivBusyM),
.CLK(clk),
.ECLK(fpdivClk));
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk));
// first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (.*);
// first of two-stage instance of floating-point comparator
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]);
// first and only instance of floating-point sign converter
fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
// first and only instance of floating-point classify unit
fpuclassify fpuclass (.*);
//*****************
//fpregfile D/E pipe registers
//*****************
flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M);
flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M);
flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M);
//*****************
// fma E/M pipe registers
//*****************
flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM);
flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM);
flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM);
flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM);
flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM);
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM);
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM);
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM);
flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM);
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM);
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM);
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM);
flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM);
flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM);
flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM);
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM);
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM);
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM);
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM);
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM);
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM);
flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM);
flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM);
flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM);
flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM);
//*****************
// fpadd E/M pipe registers
//*****************
flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM);
flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM);
flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM);
flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM);
flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM);
flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM);
flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM);
flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM);
flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM);
flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM);
flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM);
flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM);
flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM);
flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM);
flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM);
flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM);
flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM);
flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM);
flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM);
//*****************
// fpcmp E/M pipe registers
//*****************
flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM);
flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM);
flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM);
flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM);
flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM);
flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM);
// put this in for the event we want to delay fsgn - will otherwise bypass
//*****************
// fpsgn E/M pipe registers
//*****************
flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM);
//*****************
// other E/M pipe registers
//*****************
flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM);
flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM);
flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM);
flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM);
//*****************
// fpuclassify E/M pipe registers
//*****************
flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM);
//BEGIN MEMORY STAGE
assign FWriteDataM = FInput1M;
mux2 #(64) FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
fma2 fma2(.*);
// second instance of two-stage floating-point add/cvt unit
fpuaddcvt2 fpadd2 (.*);
// second instance of two-stage floating-point comparator
fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM),
.Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*);
//*****************
// fma M/W pipe registers
//*****************
flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW);
flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW);
//*****************
// fpdiv M/W pipe registers
//*****************
flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW);
flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW);
//*****************
// fpadd M/W pipe registers
//*****************
flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW);
//*****************
// fpcmp M/W pipe registers
//*****************
flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW);
flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW);
flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW);
//*****************
// fpsgn M/W pipe registers
//*****************
flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW);
flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW);
//*****************
// other M/W pipe registers
//*****************
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW);
flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
//*****************
// fpuclassify M/W pipe registers
//*****************
flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW);
//#########################################
//BEGIN WRITEBACK STAGE
// BEGIN WRITEBACK STAGE
//#########################################
always_comb begin
case (FResultSelW)
// div/sqrt
3'b000 : FPUFlagsW = FDivFlagsW;
// cmp
3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
//fma/mult
3'b010 : FPUFlagsW = FmaFlagsW;
// sgn inj
3'b011 : FPUFlagsW = SgnFlagsW;
// add/sub/cnvt
3'b100 : FPUFlagsW = FAddFlagsW;
// classify
3'b101 : FPUFlagsW = 5'b0;
// output SrcAW
3'b110 : FPUFlagsW = 5'b0;
// output FRD1
3'b111 : FPUFlagsW = 5'b0;
default : FPUFlagsW = 5'bxxxxx;
endcase
end
always_comb begin
case (FResultSelW)
// div/sqrt
3'b000 : FPUFlagsW = FDivFlagsW;
// cmp
3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
//fma/mult
3'b010 : FPUFlagsW = FmaFlagsW;
// sgn inj
3'b011 : FPUFlagsW = SgnFlagsW;
// add/sub/cnvt
3'b100 : FPUFlagsW = FAddFlagsW;
// classify
3'b101 : FPUFlagsW = 5'b0;
// output SrcAW
3'b110 : FPUFlagsW = 5'b0;
// output FRD1
3'b111 : FPUFlagsW = 5'b0;
default : FPUFlagsW = 5'bxxxxx;
endcase
end
always_comb begin
case (FResultSelW)
// div/sqrt
3'b000 : FPUResult64W = FDivResultW;
// cmp
3'b001 : FPUResult64W = FCmpResultW;
//fma/mult
3'b010 : FPUResult64W = FmaResultW;
// sgn inj
3'b011 : FPUResult64W = SgnResultW;
// add/sub/cnvt
3'b100 : FPUResult64W = FAddResultW;
// classify
3'b101 : FPUResult64W = ClassResultW;
// output SrcAW
3'b110 : FPUResult64W = SrcAW;
// Load/Store/Move to FP-register
3'b111 : FPUResult64W = FLoadStoreResultW;
default : FPUResult64W = {64{1'bx}};
endcase
end // always_comb
always_comb begin
case (FResultSelW)
// div/sqrt
3'b000 : FPUResult64W = FDivResultW;
// cmp
3'b001 : FPUResult64W = FCmpResultW;
//fma/mult
3'b010 : FPUResult64W = FmaResultW;
// sgn inj
3'b011 : FPUResult64W = SgnResultW;
// add/sub/cnvt
3'b100 : FPUResult64W = FAddResultW;
// classify
3'b101 : FPUResult64W = ClassResultW;
// output SrcAW
3'b110 : FPUResult64W = SrcAW;
// Load/Store/Move to FP-register
3'b111 : FPUResult64W = FLoadStoreResultW;
default : FPUResult64W = {64{1'bx}};
endcase
end
//interface between XLEN size datapath and double-precision sized
//floating-point results
//
//define offsets for LSB zero extension or truncation
always_comb begin
//zero extension
// interface between XLEN size datapath and double-precision sized
// floating-point results
//
// define offsets for LSB zero extension or truncation
always_comb begin
// zero extension
FPUResultW = FPUResult64W[63:64-`XLEN];
SetFflagsM = FPUFlagsW;
end
endmodule // fpu
end
endmodule

View File

@ -0,0 +1,46 @@
///////////////////////////////////////////
// clockgater.sv
//
// Written: Ross Thompson 9 January 2021
// Modified:
//
// Purpose: Clock gater model. Must use standard cell for synthesis.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module clockgater
(input logic E,
input logic SE,
input logic CLK,
output logic ECLK);
// VERY IMPORTANT.
// This part functionally models a clock gater, but does not necessarily meet the timing constrains a real standard cell would.
// Do not use this in synthesis!
logic enable_q;
always @(~CLK) begin
enable_q <= E | SE;
end
assign ECLK = enable_q & CLK;
endmodule

View File

@ -0,0 +1,195 @@
///////////////////////////////////////////
// lzd.sv
//
// Written: James.Stine@okstate.edu 1 February 2021
// Modified:
//
// Purpose: Integer Divide instructions
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off DECLFILENAME */
// Original idea came from V. G. Oklobdzija, "An algorithmic and novel
// design of a leading zero detector circuit: comparison with logic
// synthesis," in IEEE Transactions on Very Large Scale Integration
// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
// 10.1109/92.273153.
// Modified to be more hierarchical
module lzd2 (P, V, B);
input logic [1:0] B;
output logic P;
output logic V;
assign V = B[0] | B[1];
assign P = B[0] & ~B[1];
endmodule // lz2
module lzd_hier #(parameter WIDTH=8)
(input logic [WIDTH-1:0] B,
output logic [$clog2(WIDTH)-1:0] ZP,
output logic ZV);
if (WIDTH == 128)
lzd128 lz127 (ZP, ZV, B);
else if (WIDTH == 64)
lzd64 lz64 (ZP, ZV, B);
else if (WIDTH == 32)
lzd32 lz32 (ZP, ZV, B);
else if (WIDTH == 16)
lzd16 lz16 (ZP, ZV, B);
else if (WIDTH == 8)
lzd8 lz8 (ZP, ZV, B);
else if (WIDTH == 4)
lzd4 lz4 (ZP, ZV, B);
endmodule // lzd_hier
module lzd4 (ZP, ZV, B);
input logic [3:0] B;
logic ZPa;
logic ZPb;
logic ZVa;
logic ZVb;
output logic [1:0] ZP;
output logic ZV;
lz2 l1(ZPa, ZVa, B[1:0]);
lz2 l2(ZPb, ZVb, B[3:2]);
assign ZP[0:0] = ZVb ? ZPb : ZPa;
assign ZP[1] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd4
module lzd8 (ZP, ZV, B);
input logic [7:0] B;
logic [1:0] ZPa;
logic [1:0] ZPb;
logic ZVa;
logic ZVb;
output logic [2:0] ZP;
output logic ZV;
lz4 l1(ZPa, ZVa, B[3:0]);
lz4 l2(ZPb, ZVb, B[7:4]);
assign ZP[1:0] = ZVb ? ZPb : ZPa;
assign ZP[2] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd8
module lzd16 (ZP, ZV, B);
input logic [15:0] B;
logic [2:0] ZPa;
logic [2:0] ZPb;
logic ZVa;
logic ZVb;
output logic [3:0] ZP;
output logic ZV;
lz8 l1(ZPa, ZVa, B[7:0]);
lz8 l2(ZPb, ZVb, B[15:8]);
assign ZP[2:0] = ZVb ? ZPb : ZPa;
assign ZP[3] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd16
module lzd32 (ZP, ZV, B);
input logic [31:0] B;
logic [3:0] ZPa;
logic [3:0] ZPb;
logic ZVa;
logic ZVb;
output logic [4:0] ZP;
output logic ZV;
lz16 l1(ZPa, ZVa, B[15:0]);
lz16 l2(ZPb, ZVb, B[31:16]);
assign ZP[3:0] = ZVb ? ZPb : ZPa;
assign ZP[4] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd32
module lzd64 (ZP, ZV, B);
input logic [63:0] B;
logic [4:0] ZPa;
logic [4:0] ZPb;
logic ZVa;
logic ZVb;
output logic [5:0] ZP;
output logic ZV;
lz32 l1(ZPa, ZVa, B[31:0]);
lz32 l2(ZPb, ZVb, B[63:32]);
assign ZP[4:0] = ZVb ? ZPb : ZPa;
assign ZP[5] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd64
module lzd128 (ZP, ZV, B);
input logic [127:0] B;
logic [5:0] ZPa;
logic [5:0] ZPb;
logic ZVa;
logic ZVb;
output logic [6:0] ZP;
output logic ZV;
lz64 l1(ZPa, ZVa, B[64:0]);
lz64 l2(ZPb, ZVb, B[127:63]);
assign ZP[5:0] = ZVb ? ZPb : ZPa;
assign ZP[6] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lzd128
/* verilator lint_on DECLFILENAME */

View File

@ -0,0 +1,195 @@
///////////////////////////////////////////
// lzd.sv
//
// Written: James.Stine@okstate.edu 1 February 2021
// Modified:
//
// Purpose: Integer Divide instructions
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off DECLFILENAME */
// Original idea came from V. G. Oklobdzija, "An algorithmic and novel
// design of a leading zero detector circuit: comparison with logic
// synthesis," in IEEE Transactions on Very Large Scale Integration
// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
// 10.1109/92.273153.
// Modified to be more hierarchical
module lz2 (P, V, B);
input logic [1:0] B;
output logic P;
output logic V;
assign V = B[0] | B[1];
assign P = B[0] & ~B[1];
endmodule // lz2
module lzd_hier #(parameter WIDTH=8)
(input logic [WIDTH-1:0] B,
output logic [$clog2(WIDTH)-1:0] ZP,
output logic ZV);
if (WIDTH == 128)
lz128 lzd127 (ZP, ZV, B);
else if (WIDTH == 64)
lz64 lzd64 (ZP, ZV, B);
else if (WIDTH == 32)
lz32 lzd32 (ZP, ZV, B);
else if (WIDTH == 16)
lz16 lzd16 (ZP, ZV, B);
else if (WIDTH == 8)
lz8 lzd8 (ZP, ZV, B);
else if (WIDTH == 4)
lz4 lzd4 (ZP, ZV, B);
endmodule // lzd_hier
module lz4 (ZP, ZV, B);
input logic [3:0] B;
logic ZPa;
logic ZPb;
logic ZVa;
logic ZVb;
output logic [1:0] ZP;
output logic ZV;
lz2 l1(ZPa, ZVa, B[1:0]);
lz2 l2(ZPb, ZVb, B[3:2]);
assign ZP[0:0] = ZVb ? ZPb : ZPa;
assign ZP[1] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule
module lz8 (ZP, ZV, B);
input logic [7:0] B;
logic [1:0] ZPa;
logic [1:0] ZPb;
logic ZVa;
logic ZVb;
output logic [2:0] ZP;
output logic ZV;
lz4 l1(ZPa, ZVa, B[3:0]);
lz4 l2(ZPb, ZVb, B[7:4]);
assign ZP[1:0] = ZVb ? ZPb : ZPa;
assign ZP[2] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule
module lz16 (ZP, ZV, B);
input logic [15:0] B;
logic [2:0] ZPa;
logic [2:0] ZPb;
logic ZVa;
logic ZVb;
output logic [3:0] ZP;
output logic ZV;
lz8 l1(ZPa, ZVa, B[7:0]);
lz8 l2(ZPb, ZVb, B[15:8]);
assign ZP[2:0] = ZVb ? ZPb : ZPa;
assign ZP[3] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz16
module lz32 (ZP, ZV, B);
input logic [31:0] B;
logic [3:0] ZPa;
logic [3:0] ZPb;
logic ZVa;
logic ZVb;
output logic [4:0] ZP;
output logic ZV;
lz16 l1(ZPa, ZVa, B[15:0]);
lz16 l2(ZPb, ZVb, B[31:16]);
assign ZP[3:0] = ZVb ? ZPb : ZPa;
assign ZP[4] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz32
module lz64 (ZP, ZV, B);
input logic [63:0] B;
logic [4:0] ZPa;
logic [4:0] ZPb;
logic ZVa;
logic ZVb;
output logic [5:0] ZP;
output logic ZV;
lz32 l1(ZPa, ZVa, B[31:0]);
lz32 l2(ZPb, ZVb, B[63:32]);
assign ZP[4:0] = ZVb ? ZPb : ZPa;
assign ZP[5] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz64
module lz128 (ZP, ZV, B);
input logic [127:0] B;
logic [5:0] ZPa;
logic [5:0] ZPb;
logic ZVa;
logic ZVb;
output logic [6:0] ZP;
output logic ZV;
lz64 l1(ZPa, ZVa, B[64:0]);
lz64 l2(ZPb, ZVb, B[127:63]);
assign ZP[5:0] = ZVb ? ZPb : ZPa;
assign ZP[6] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz128
/* verilator lint_on DECLFILENAME */

View File

@ -0,0 +1,76 @@
///////////////////////////////////////////
// shifters.sv
//
// Written: James.Stine@okstate.edu 1 February 2021
// Modified:
//
// Purpose: Integer Divide instructions
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off DECLFILENAME */
/* verilator lint_off UNOPTFLAT */
module shift_right #(parameter WIDTH=8)
(input logic [WIDTH-1:0] A,
input logic [$clog2(WIDTH)-1:0] Shift,
output logic [WIDTH-1:0] Z);
logic [WIDTH-1:0] stage [$clog2(WIDTH):0];
logic sign;
genvar i;
assign stage[0] = A;
generate
for (i=0;i<$clog2(WIDTH);i=i+1)
begin : genbit
mux2 #(WIDTH) mux_inst (stage[i],
{{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]},
Shift[$clog2(WIDTH)-i-1],
stage[i+1]);
end
endgenerate
assign Z = stage[$clog2(WIDTH)];
endmodule // shift_right
module shift_left #(parameter WIDTH=8)
(input logic [WIDTH-1:0] A,
input logic [$clog2(WIDTH)-1:0] Shift,
output logic [WIDTH-1:0] Z);
logic [WIDTH-1:0] stage [$clog2(WIDTH):0];
genvar i;
assign stage[0] = A;
generate
for (i=0;i<$clog2(WIDTH);i=i+1)
begin : genbit
mux2 #(WIDTH) mux_inst (stage[i],
{stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}},
Shift[$clog2(WIDTH)-i-1],
stage[i+1]);
end
endgenerate
assign Z = stage[$clog2(WIDTH)];
endmodule // shift_left
/* verilator lint_on DECLFILENAME */
/* verilator lint_on UNOPTFLAT */

View File

@ -30,7 +30,8 @@
module bpred
(input logic clk, reset,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
// Fetch stage
// the prediction
input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list
@ -88,25 +89,29 @@ module bpred
globalHistoryPredictor DirPredictor(.clk(clk),
.reset(reset),
.*, // Stalls and flushes
.LookUpPC(PCNextF),
.Prediction(BPPredF),
.PCNextF(PCNextF),
.BPPredF(BPPredF),
// update
.UpdatePC(PCE),
.UpdateEN(InstrClassE[0] & ~StallE),
.InstrClassE(InstrClassE),
.BPInstrClassE(BPInstrClassE),
.BPPredDirWrongE(BPPredDirWrongE),
.PCE(PCE),
.PCSrcE(PCSrcE),
.UpdatePrediction(UpdateBPPredE));
.UpdateBPPredE(UpdateBPPredE));
end else if (`BPTYPE == "BPGSHARE") begin:Predictor
gsharePredictor DirPredictor(.clk(clk),
.reset(reset),
.*, // Stalls and flushes
.LookUpPC(PCNextF),
.Prediction(BPPredF),
// update
.UpdatePC(PCE),
.UpdateEN(InstrClassE[0] & ~StallE),
.PCSrcE(PCSrcE),
.UpdatePrediction(UpdateBPPredE));
.reset(reset),
.*, // Stalls and flushes
.PCNextF(PCNextF),
.BPPredF(BPPredF),
// update
.InstrClassE(InstrClassE),
.BPInstrClassE(BPInstrClassE),
.BPPredDirWrongE(BPPredDirWrongE),
.PCE(PCE),
.PCSrcE(PCSrcE),
.UpdateBPPredE(UpdateBPPredE));
end
else if (`BPTYPE == "BPLOCALPAg") begin:Predictor
@ -190,14 +195,14 @@ module bpred
flopenrc #(2) BPPredRegD(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.clear(1'b0),
.d(BPPredF),
.q(BPPredD));
flopenrc #(2) BPPredRegE(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.clear(1'b0),
.d(BPPredD),
.q(BPPredE));

View File

@ -32,76 +32,89 @@ module globalHistoryPredictor
)
(input logic clk,
input logic reset,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
input logic [`XLEN-1:0] LookUpPC,
output logic [1:0] Prediction,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
input logic [`XLEN-1:0] PCNextF,
output logic [1:0] BPPredF,
// update
input logic [`XLEN-1:0] UpdatePC,
input logic UpdateEN, PCSrcE,
input logic [1:0] UpdatePrediction
input logic [4:0] InstrClassE,
input logic [4:0] BPInstrClassE,
input logic [4:0] BPInstrClassD,
input logic [4:0] BPInstrClassF,
input logic BPPredDirWrongE,
input logic [`XLEN-1:0] PCE,
input logic PCSrcE,
input logic [1:0] UpdateBPPredE
);
logic [k-1:0] GHRF, GHRFNext;
assign GHRFNext = {PCSrcE, GHRF[k-1:1]};
logic [k+1:0] GHR, GHRNext;
logic [k-1:0] PHTUpdateAdr, PHTUpdateAdr0, PHTUpdateAdr1;
logic PHTUpdateEN;
logic BPClassWrongNonCFI;
logic BPClassWrongCFI;
logic BPClassRightNonCFI;
flopenr #(k) GlobalHistoryRegister(.clk(clk),
.reset(reset),
.en(UpdateEN),
.d(GHRFNext),
.q(GHRF));
logic [6:0] GHRMuxSel;
logic GHRUpdateEN;
logic [k-1:0] GHRLookup;
assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0];
assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0];
assign BPClassWrongNonCFI = BPInstrClassE[0] & ~InstrClassE[0];
assign BPClassRightBPWrong = BPInstrClassE[0] & InstrClassE[0] & BPPredDirWrongE;
assign BPClassRightBPRight = BPInstrClassE[0] & InstrClassE[0] & ~BPPredDirWrongE;
// GHR update selection, 1 hot encoded.
assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight);
assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0];
assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0];
assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]);
assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0];
assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0];
assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight));
assign GHRUpdateEN = (| GHRMuxSel[5:1] & ~StallE) | GHRMuxSel[6] & ~StallF;
logic [1:0] PredictionMemory;
logic DoForwarding, DoForwardingF;
logic [1:0] UpdatePredictionF;
// hoping this created a AND-OR mux.
always_comb begin
case (GHRMuxSel)
7'b000_0001: GHRNext = GHR[k-1+2:0]; // no change
7'b000_0010: GHRNext = {GHR[k-2+2:0], PCSrcE}; // branch update
7'b000_0100: GHRNext = {1'b0, GHR[k+1:1]}; // repair 1
7'b000_1000: GHRNext = {GHR[k-1+2:1], PCSrcE}; // branch update with mis prediction correction
7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2
7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1
7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update
default: GHRNext = GHR[k-1+2:0];
endcase
end
flopenr #(k+2) GlobalHistoryRegister(.clk(clk),
.reset(reset),
.en((GHRUpdateEN)),
.d(GHRNext),
.q(GHR));
// if actively updating the GHR at the time of prediction we want to us
// GHRNext as the lookup rather than GHR.
assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0];
assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1];
assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0;
assign PHTUpdateEN = InstrClassE[0] & ~StallE;
assign GHRLookup = |GHRMuxSel[6:1] ? GHRNext[k-1:0] : GHR[k-1:0];
// Make Prediction by reading the correct address in the PHT and also update the new address in the PHT
// GHR referes to the address that the past k branches points to in the prediction stage
// GHRE refers to the address that the past k branches points to in the exectution stage
SRAM2P1R1W #(k, 2) PHT(.clk(clk),
.reset(reset),
.RA1(GHRF),
.RD1(PredictionMemory),
.REN1(~StallF),
.WA1(GHRFNext),
.WD1(UpdatePrediction),
.WEN1(UpdateEN),
.BitWEN1(2'b11));
SRAM2P1R1W #(k, 2) PHT(.clk(clk),
.reset(reset),
//.RA1(GHR[k-1:0]),
.RA1(GHRLookup),
.RD1(BPPredF),
.REN1(~StallF),
.WA1(PHTUpdateAdr),
.WD1(UpdateBPPredE),
.WEN1(PHTUpdateEN),
.BitWEN1(2'b11));
// need to forward when updating to the same address as reading.
// first we compare to see if the update and lookup addreses are the same
assign DoForwarding = GHRF == GHRFNext;
// register the update value and the forwarding signal into the Fetch stage
// TODO: add stall logic ***
flopr #(1) DoForwardingReg(.clk(clk),
.reset(reset),
.d(DoForwarding),
.q(DoForwardingF));
flopr #(2) UpdatePredictionReg(.clk(clk),
.reset(reset),
.d(UpdatePrediction),
.q(UpdatePredictionF));
assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory;
//pipeline for GHR
/*flopenrc #(k) GHRDReg(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(GHRF),
.q(GHRD));
flopenrc #(k) GHREReg(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.d(GHRD),
.q(GHRE));
*/
endmodule

View File

@ -1,128 +0,0 @@
///////////////////////////////////////////
// gshare.sv
//
// Written: Shreya Sanghai
// Email: ssanghai@hmc.edu
// Created: March 16, 2021
// Modified:
//
// Purpose: Gshare predictor with parameterized global history register
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module gsharePredictor
#(parameter int k = 10
)
(input logic clk,
input logic reset,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
input logic [`XLEN-1:0] LookUpPC,
output logic [1:0] Prediction,
// update
input logic [`XLEN-1:0] UpdatePC,
input logic UpdateEN, PCSrcE,
input logic [1:0] UpdatePrediction
);
logic [k-1:0] GHRF, GHRFNext;
//logic [k-1:0] LookUpPCIndexD, LookUpPCIndexE;
logic [k-1:0] LookUpPCIndex, UpdatePCIndex;
logic [1:0] PredictionMemory;
logic DoForwarding, DoForwardingF;
logic [1:0] UpdatePredictionF;
assign GHRFNext = {PCSrcE, GHRF[k-1:1]};
flopenr #(k) GlobalHistoryRegister(.clk(clk),
.reset(reset),
.en(UpdateEN),
.d(GHRFNext),
.q(GHRF));
// for gshare xor the PC with the GHR
assign UpdatePCIndex = GHRFNext ^ UpdatePC[k:1];
assign LookUpPCIndex = GHRF ^ LookUpPC[k:1];
// Make Prediction by reading the correct address in the PHT and also update the new address in the PHT
// GHR referes to the address that the past k branches points to in the prediction stage
// GHRE refers to the address that the past k branches points to in the exectution stage
SRAM2P1R1W #(k, 2) PHT(.clk(clk),
.reset(reset),
.RA1(LookUpPCIndex),
.RD1(PredictionMemory),
.REN1(~StallF),
.WA1(UpdatePCIndex),
.WD1(UpdatePrediction),
.WEN1(UpdateEN),
.BitWEN1(2'b11));
// need to forward when updating to the same address as reading.
// first we compare to see if the update and lookup addreses are the same
assign DoForwarding = LookUpPCIndex == UpdatePCIndex;
// register the update value and the forwarding signal into the Fetch stage
// TODO: add stall logic ***
flopr #(1) DoForwardingReg(.clk(clk),
.reset(reset),
.d(DoForwarding),
.q(DoForwardingF));
flopr #(2) UpdatePredictionReg(.clk(clk),
.reset(reset),
.d(UpdatePrediction),
.q(UpdatePredictionF));
assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory;
//pipeline for GHR
/* -----\/----- EXCLUDED -----\/-----
flopenrc #(k) LookUpDReg(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(LookUpPCIndex),
.q(LookUpPCIndexD));
flopenrc #(k) LookUpEReg(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.d(LookUpPCIndexD),
.q(LookUpPCIndexE));
-----/\----- EXCLUDED -----/\----- */
/* flopenrc #(k) GHRRegD(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(GHRF),
.q(GHRD));
flopenrc #(k) GHRRegE(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.d(GHRD),
.q(GHRE));
*/
endmodule

View File

@ -0,0 +1,120 @@
///////////////////////////////////////////
// globalHistoryPredictor.sv
//
// Written: Shreya Sanghai
// Email: ssanghai@hmc.edu
// Created: March 16, 2021
// Modified:
//
// Purpose: Gshare predictor with parameterized global history register
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module gsharePredictor
#(parameter int k = 10
)
(input logic clk,
input logic reset,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
input logic [`XLEN-1:0] PCNextF,
output logic [1:0] BPPredF,
// update
input logic [4:0] InstrClassE,
input logic [4:0] BPInstrClassE,
input logic [4:0] BPInstrClassD,
input logic [4:0] BPInstrClassF,
input logic BPPredDirWrongE,
input logic [`XLEN-1:0] PCE,
input logic PCSrcE,
input logic [1:0] UpdateBPPredE
);
logic [k+1:0] GHR, GHRNext;
logic [k-1:0] PHTUpdateAdr, PHTUpdateAdr0, PHTUpdateAdr1;
logic PHTUpdateEN;
logic BPClassWrongNonCFI;
logic BPClassWrongCFI;
logic BPClassRightNonCFI;
logic [6:0] GHRMuxSel;
logic GHRUpdateEN;
logic [k-1:0] GHRLookup;
assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0];
assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0];
assign BPClassWrongNonCFI = BPInstrClassE[0] & ~InstrClassE[0];
assign BPClassRightBPWrong = BPInstrClassE[0] & InstrClassE[0] & BPPredDirWrongE;
assign BPClassRightBPRight = BPInstrClassE[0] & InstrClassE[0] & ~BPPredDirWrongE;
// GHR update selection, 1 hot encoded.
assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight);
assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0];
assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0];
assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]);
assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0];
assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0];
assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight));
assign GHRUpdateEN = (| GHRMuxSel[5:1] & ~StallE) | GHRMuxSel[6] & ~StallF;
// hoping this created a AND-OR mux.
always_comb begin
case (GHRMuxSel)
7'b000_0001: GHRNext = GHR[k-1+2:0]; // no change
7'b000_0010: GHRNext = {GHR[k-2+2:0], PCSrcE}; // branch update
7'b000_0100: GHRNext = {1'b0, GHR[k+1:1]}; // repair 1
7'b000_1000: GHRNext = {GHR[k-1+2:1], PCSrcE}; // branch update with mis prediction correction
7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2
7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1
7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update
default: GHRNext = GHR[k-1+2:0];
endcase
end
flopenr #(k+2) GlobalHistoryRegister(.clk(clk),
.reset(reset),
.en((GHRUpdateEN)),
.d(GHRNext),
.q(GHR));
// if actively updating the GHR at the time of prediction we want to us
// GHRNext as the lookup rather than GHR.
assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0];
assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1];
assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0;
assign PHTUpdateEN = InstrClassE[0] & ~StallE;
assign GHRLookup = |GHRMuxSel[6:1] ? GHRNext[k-1:0] : GHR[k-1:0];
// Make Prediction by reading the correct address in the PHT and also update the new address in the PHT
SRAM2P1R1W #(k, 2) PHT(.clk(clk),
.reset(reset),
//.RA1(GHR[k-1:0]),
.RA1(GHRLookup ^ PCNextF[k:1]),
.RD1(BPPredF),
.REN1(~StallF),
.WA1(PHTUpdateAdr ^ PCE[k:1]),
.WD1(UpdateBPPredE),
.WEN1(PHTUpdateEN),
.BitWEN1(2'b11));
endmodule // gsharePredictor

View File

@ -154,15 +154,16 @@ module icachecontroller #(parameter LINESIZE = 256) (
localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT
localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT
localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update.
localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 13; // miss on block 1, issue read to AHB and wait
localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 14; // write data to SRAM/LUT
localparam STATE_MISS_SPILL_MERGE = 15; // read block 0 of CPU access,
localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update.
localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait
localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT
localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access,
localparam STATE_MISS_SPILL_FINAL = 16; // this state replicates STATE_READY's replay of the
localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the
// spill access but does nto consider spill. It also does not do another operation.
localparam STATE_INVALIDATE = 17; // *** not sure if invalidate or evict? invalidate by cache block or address?
localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address?
localparam AHBByteLength = `XLEN / 8;
localparam AHBOFFETWIDTH = $clog2(AHBByteLength);
@ -380,11 +381,20 @@ module icachecontroller #(parameter LINESIZE = 256) (
PCMux = 2'b10;
UnalignedSelect = 1'b1;
spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm.
ICacheReadEn = 1'b1;
NextState = STATE_MISS_SPILL_2_START;
end
STATE_MISS_SPILL_2_START: begin
if (~hit) begin
CntReset = 1'b1;
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
end else begin
NextState = STATE_MISS_SPILL_FINAL;
NextState = STATE_READY;
ICacheReadEn = 1'b1;
PCMux = 2'b00;
UnalignedSelect = 1'b1;
SavePC = 1'b1;
ICacheStallF = 1'b0;
end
end
STATE_MISS_SPILL_MISS_FETCH_WDV: begin

View File

@ -154,14 +154,7 @@ module ifu (
generate
if (`BPRED_ENABLED == 1) begin : bpred
// I am making the port connection explicit for now as I want to see them and they will be changing.
bpred bpred(.clk(clk),
.reset(reset),
.StallF(StallF),
.StallD(StallD),
.StallE(StallE),
.FlushF(FlushF),
.FlushD(FlushD),
.FlushE(FlushE),
bpred bpred(.*,
.PCNextF(PCNextF),
.BPPredPCF(BPPredPCF),
.SelBPPredF(SelBPPredF),

View File

@ -2,7 +2,9 @@
// cam_line.sv
//
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021
// Modified:
// Modified: kmacsaigoren@hmc.edu 1 June 2021
// Implemented SV48 on top of SV39. This included adding SvMode input signal and the wally constants
// Mostly this was done to make the PageNumberMixer work.
//
// Purpose: CAM line for the translation lookaside buffer (TLB)
// Determines whether a virtual address matches the stored key.
@ -24,12 +26,17 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-constants.vh"
module cam_line #(parameter KEY_BITS = 20,
parameter HIGH_SEGMENT_BITS = 10) (
input clk, reset,
// input to scheck which SvMode is running
input [`SVMODE_BITS-1:0] SvMode,
// The requested page number to compare against the key
input [KEY_BITS-1:0] VirtualPageNumber,
input [KEY_BITS-1:0] VirtualPageNumber,
// Signals to write a new entry to this line
input CAMLineWrite,
@ -38,10 +45,11 @@ module cam_line #(parameter KEY_BITS = 20,
// Flush this line (set valid to 0)
input TLBFlush,
// This entry is a key for a giga, mega, or kilopage.
// This entry is a key for a tera, giga, mega, or kilopage.
// PageType == 2'b00 --> kilopage
// PageType == 2'b01 --> megapage
// PageType == 2'b11 --> gigapage
// PageType == 2'b10 --> gigapage
// PageType == 2'b11 --> terapage
output [1:0] PageType, // *** should this be the stored version or the always updated one?
output Match
);
@ -67,9 +75,9 @@ module cam_line #(parameter KEY_BITS = 20,
flopenr #(KEY_BITS) keyflop(clk, reset, CAMLineWrite, VirtualPageNumber, Key);
// Calculate the actual query key based on the input key and the page type.
// For example, a megapage in sv39 only cares about VPN2 and VPN1, so VPN0
// For example, a megapage in SV39 only cares about VPN2 and VPN1, so VPN0
// should automatically match.
page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, VirtualPageNumberQuery);
page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, SvMode, VirtualPageNumberQuery);
assign Match = ({1'b1, VirtualPageNumberQuery} == {Valid, Key});

View File

@ -2,7 +2,11 @@
// page_number_mixer.sv
//
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021
// Modified:
// Modified: kmacsaigoren@hmc.edu 1 June 2021
// Implemented SV48 on top of SV39. This included adding a 3rd Segment to each of the pagenumbers,
// Ensuring that the BITS and HIGH_SEGMENT_BITS inputs were correct everywhere this module gets instatniated,
// Adding seveeral muxes to decide the bit selection to turn pagenumbers into segments based on SV mode,
// Adding support for terapage/newgigapage encoding.
//
// Purpose: Takes two page numbers and replaces segments of the first page
// number with segments from the second, based on the page type.
@ -25,22 +29,29 @@
///////////////////////////////////////////
`include "wally-config.vh"
`include "wally-constants.vh"
module page_number_mixer #(parameter BITS = 20,
parameter HIGH_SEGMENT_BITS = 10) (
input [BITS-1:0] PageNumber,
input [BITS-1:0] MixPageNumber,
input [1:0] PageType,
output [BITS-1:0] PageNumberCombined
input [BITS-1:0] PageNumber,
input [BITS-1:0] MixPageNumber,
input [1:0] PageType,
input [`SVMODE_BITS-1:0] SvMode,
output [BITS-1:0] PageNumberCombined
);
// The upper segment might have a different width than the lower segments.
// For example, an SV39 PTE has 26 bits for PPN2 and 9 bits for the other
// segments. This is outside the 'if XLEN' b/c the constant is already configured
// to the correct value for the XLEN in the relevant wally-constants.vh file.
localparam LOW_SEGMENT_BITS = `VPN_SEGMENT_BITS;
// *** each time this module is implemented, low segment bits is either
// `VPN_SEGMENT_BITS or `PPN_LOW_SEGMENT_BITS (if it existed)
// in every mode so far, these are the same, so it's left as it is above.
generate
// *** Just checking XLEN is not enough to support sv39 AND sv48.
if (`XLEN == 32) begin
// The upper segment might have a different width than the lower segments.
// For example, an sv39 PTE has 26 bits for PPN2 and 9 bits for the other
// segments.
localparam LOW_SEGMENT_BITS = (BITS - HIGH_SEGMENT_BITS);
logic [HIGH_SEGMENT_BITS-1:0] Segment1, MixSegment1, Segment1Combined;
logic [LOW_SEGMENT_BITS-1:0] Segment0, MixSegment0, Segment0Combined;
@ -58,28 +69,60 @@ module page_number_mixer #(parameter BITS = 20,
// Reswizzle segments of the combined page number
assign PageNumberCombined = {Segment1Combined, Segment0Combined};
end else begin
// The upper segment might have a different width than the lower segments.
// For example, an sv39 PTE has 26 bits for PPN2 and 9 bits for the other
// segments.
localparam LOW_SEGMENT_BITS = (BITS - HIGH_SEGMENT_BITS) / 2;
logic [HIGH_SEGMENT_BITS-1:0] Segment2, MixSegment2, Segment2Combined;
// After segment 0 and 1 of the page number, the width of each segment is dependant on the SvMode.
// For this reason, each segment bus is the width of its widest value across each mode
// when a smaller value needs to be loaded in to a wider bus, it's loaded in the least significant bits
// and left padded with zeros. MAKE SURE that if a value is being padded with zeros here,
// that it's padded with zeros everywhere else in the MMU ans beyond to avoid false misses in the TLB.
logic [HIGH_SEGMENT_BITS-1:0] Segment3, MixSegment3, Segment3Combined;
logic [HIGH_SEGMENT_BITS + LOW_SEGMENT_BITS-1:0] Segment2, MixSegment2, Segment2Combined;
logic [LOW_SEGMENT_BITS-1:0] Segment1, MixSegment1, Segment1Combined;
logic [LOW_SEGMENT_BITS-1:0] Segment0, MixSegment0, Segment0Combined;
// Unswizzle segments of the input page number
assign {Segment2, Segment1, Segment0} = PageNumber;
assign {MixSegment2, MixSegment1, MixSegment0} = MixPageNumber;
// *** these muxes assume that only Sv48 and SV39 are implemented in rv64. for future SV57 and up,
// there will have to be more muxes to select which value each segment gets.
// as a cool reminder: BITS is the width of the page number, virt or phys, coming into this module
// while high segment bits is the width of the highest segment of that page number.
// Note for future work: this module has to work with both VPNs and PPNs and due to their differing
// widths and the fact that the ppn has one longer segment at the top makes the muxes below very confusing.
// Potentially very annoying thing for future workers: the number of bits in a ppn is always 44 (for SV39 and48)
// but in SV57 and above, this might be a new longer length. In that case these selectors will most likely
// become even more complicated and confusing.
assign Segment3 = (SvMode == `SV48) ?
PageNumber[BITS-1:3*LOW_SEGMENT_BITS] : // take the top segment or not
{HIGH_SEGMENT_BITS{1'b0}}; // for virtual page numbers in SV39, both options should be zeros.
assign Segment2 = (SvMode == `SV48) ?
{{HIGH_SEGMENT_BITS{1'b0}}, PageNumber[3*LOW_SEGMENT_BITS-1:2*LOW_SEGMENT_BITS]} : // just take another low segment left padded with zeros.
PageNumber[BITS-1:2*LOW_SEGMENT_BITS]; // otherwise take the rest of the PageNumber
assign Segment1 = PageNumber[2*LOW_SEGMENT_BITS-1:LOW_SEGMENT_BITS];
assign Segment0 = PageNumber[LOW_SEGMENT_BITS-1:0];
assign MixSegment3 = (SvMode == `SV48) ?
MixPageNumber[BITS-1:3*LOW_SEGMENT_BITS] : // take the top segment or not
{HIGH_SEGMENT_BITS{1'b0}}; // for virtual page numbers in SV39, both options should be zeros.
assign MixSegment2 = (SvMode == `SV48) ?
{{HIGH_SEGMENT_BITS{1'b0}}, MixPageNumber[3*LOW_SEGMENT_BITS-1:2*LOW_SEGMENT_BITS]} : // just take another low segment left padded with zeros.
MixPageNumber[BITS-1:2*LOW_SEGMENT_BITS]; // otherwise take the rest of the PageNumber
assign MixSegment1 = MixPageNumber[2*LOW_SEGMENT_BITS-1:LOW_SEGMENT_BITS];
assign MixSegment0 = MixPageNumber[LOW_SEGMENT_BITS-1:0];
// Pass through the high segment
assign Segment2Combined = Segment2;
assign Segment3Combined = Segment3;
// Either pass through or zero out segments 1 and 0 based on the page type
mux2 #(LOW_SEGMENT_BITS) segment1mux(Segment1, MixSegment1, PageType[1], Segment1Combined);
mux2 #(LOW_SEGMENT_BITS) segment0mux(Segment0, MixSegment0, PageType[0], Segment0Combined);
// Either pass through or zero out lower segments based on the page type
assign Segment2Combined = (PageType[1] && PageType[0]) ? MixSegment2 : Segment2; // terapage (page == 11)
assign Segment1Combined = (PageType[1]) ? MixSegment1 : Segment1; // gigapage and higher (page == 10 or 11)
assign Segment0Combined = (PageType[1] || PageType[0]) ? MixSegment0 : Segment0; // megapage and higher (page == 01 or 10 or 11)
// Reswizzle segments of the combined page number
assign PageNumberCombined = {Segment2Combined, Segment1Combined, Segment0Combined};
assign PageNumberCombined = (SvMode == `SV48) ?
{Segment3Combined, Segment2Combined[LOW_SEGMENT_BITS-1:0], Segment1Combined, Segment0Combined} :
{Segment2Combined, Segment1Combined, Segment0Combined};
end
endgenerate
endmodule

View File

@ -2,7 +2,10 @@
// pagetablewalker.sv
//
// Written: tfleming@hmc.edu 2 March 2021
// Modified:
// Modified: kmacsaigoren@hmc.edu 1 June 2021
// implemented SV48 on top of SV39. This included, adding a level of the FSM for the extra page number segment
// adding support for terapage encoding, and for setting the TranslationPAdr using the new level,
// adding the internal SvMode signal
//
// Purpose: Page Table Walker
// Part of the Memory Management Unit (MMU)
@ -70,6 +73,7 @@ module pagetablewalker (
logic [`XLEN-1:0] SavedPTE, CurrentPTE;
logic [`PA_BITS-1:0] TranslationPAdr;
logic [`PPN_BITS-1:0] CurrentPPN;
logic [`SVMODE_BITS-1:0] SvMode;
logic MemStore;
// PTE Control Bits
@ -82,6 +86,8 @@ module pagetablewalker (
logic [`XLEN-1:0] PageTableEntry;
logic [1:0] PageType;
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0];
assign MemStore = MemRWM[0];
@ -105,11 +111,12 @@ module pagetablewalker (
assign PageTypeF = PageType;
assign PageTypeM = PageType;
localparam IDLE = 3'h0;
localparam LEVEL0 = 3'h0;
localparam LEVEL1 = 3'h1;
localparam LEVEL0 = 3'h2;
localparam LEAF = 3'h3;
localparam FAULT = 3'h4;
// space left for more levels
localparam LEAF = 3'h5;
localparam IDLE = 3'h6;
localparam FAULT = 3'h7;
logic [2:0] WalkerState, NextWalkerState;
@ -208,18 +215,32 @@ module pagetablewalker (
assign MMUPAdr = TranslationPAdr[31:0];
end else begin
localparam LEVEL2 = 3'h5;
localparam LEVEL2 = 3'h2;
localparam LEVEL3 = 3'h3;
logic [8:0] VPN2, VPN1, VPN0;
logic [8:0] VPN3, VPN2, VPN1, VPN0;
logic GigapageMisaligned, BadGigapage;
logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage;
flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
always_comb begin
case (WalkerState)
IDLE: if (MMUTranslate) NextWalkerState = LEVEL2;
IDLE: if (MMUTranslate) NextWalkerState = LEVEL3;
else NextWalkerState = IDLE;
LEVEL3: if (SvMode != `SV48) NextWalkerState = LEVEL2;
// 3rd level used if SV48 is enabled.
else begin
if (~MMUReady) NextWalkerState = LEVEL3;
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
// access bit. The following commented line of code is
// supposed to perform that check. However, it is untested.
else if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF;
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL2;
else NextWalkerState = FAULT;
end
LEVEL2: if (~MMUReady) NextWalkerState = LEVEL2;
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
@ -242,24 +263,29 @@ module pagetablewalker (
else if (ValidPTE && LeafPTE && ~AccessAlert)
NextWalkerState = LEAF;
else NextWalkerState = FAULT;
LEAF: if (MMUTranslate) NextWalkerState = LEVEL2;
LEAF: if (MMUTranslate) NextWalkerState = LEVEL3;
else NextWalkerState = IDLE;
FAULT: if (MMUTranslate) NextWalkerState = LEVEL2;
FAULT: if (MMUTranslate) NextWalkerState = LEVEL3;
else NextWalkerState = IDLE;
// Default case should never happen, but is included for linter.
default: NextWalkerState = IDLE;
endcase
end
// A terapage is a level 3 leaf page. This page must have zero PPN[2],
// zero PPN[1], and zero PPN[0]
assign TerapageMisaligned = |(CurrentPPN[26:0]);
// A gigapage is a Level 2 leaf page. This page must have zero PPN[1] and
// zero PPN[0]
assign GigapageMisaligned = |(CurrentPPN[17:0]);
// A megapage is a Level 1 leaf page. This page must have zero PPN[0].
assign MegapageMisaligned = |(CurrentPPN[8:0]);
assign BadTerapage = TerapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
assign BadGigapage = GigapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
assign VPN3 = TranslationVAdr[47:39];
assign VPN2 = TranslationVAdr[38:30];
assign VPN1 = TranslationVAdr[29:21];
assign VPN0 = TranslationVAdr[20:12];
@ -282,8 +308,13 @@ module pagetablewalker (
IDLE: begin
MMUStall = '0;
end
LEVEL3: begin
TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000};
// *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off,
// what should translationPAdr be when level3 is just off?
end
LEVEL2: begin
TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000};
TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
end
LEVEL1: begin
TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
@ -295,8 +326,9 @@ module pagetablewalker (
// Keep physical address alive to prevent HADDR dropping to 0
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL2) ? 2'b11 :
((WalkerState == LEVEL1) ? 2'b01 : 2'b00);
PageType = (WalkerState == LEVEL3) ? 2'b11 :
((WalkerState == LEVEL2) ? 2'b10 :
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
DTLBWriteM = DTLBMissM;
ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
end

View File

@ -4,7 +4,11 @@
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021
// Based on implementation from https://www.allaboutcircuits.com/ip-cores/communication-controller/priority-encoder/
// *** Give proper LGPL attribution for above source
// Modified:
// Modified: Teo Ene 15 Apr 2021:
// Temporarily removed paramterized priority encoder for non-parameterized one
// To get synthesis working quickly
// Kmacsaigoren@hmc.edu 28 May 2021:
// Added working version of parameterized priority encoder.
//
// Purpose: One-hot encoding to binary encoder
//
@ -27,51 +31,33 @@
`include "wally-config.vh"
// Teo Ene 04/15:
// Temporarily removed paramterized priority encoder for non-parameterized one
// To get synthesis working quickly
module priority_encoder #(parameter BINARY_BITS = 3) (
input logic [7:0] one_hot,
output logic [2:0] binary
input logic [2**BINARY_BITS - 1:0] one_hot,
output logic [BINARY_BITS - 1:0] binary
);
// localparam ONE_HOT_BITS = 2**BINARY_BITS;
/*
genvar i, j;
generate
for (i = 0; i < ONE_HOT_BITS; i++) begin
for (j = 0; j < BINARY_BITS; j++) begin
if (i[j]) begin
assign binary[j] = one_hot[i];
end
end
end
endgenerate
*/
/*
logic [BINARY_BITS-1:0] binary_comb;
integer i;
always_comb begin
binary_comb = 0;
for (int i = 0; i < ONE_HOT_BITS; i++)
if (one_hot[i]) binary_comb = i;
binary = 0;
for (i = 0; i < 2**BINARY_BITS; i++) begin
if (one_hot[i]) binary = i; // prioritizes the most significant bit
end
end
// *** triple check synthesizability here
assign binary = binary_comb;
// Ideally this mimics the following:
/*
always_comb begin
casex (one_hot)
1xx ... x: binary = BINARY_BITS - 1;
01x ... x: binary = BINARY_BITS - 2;
001 ... x: binary = BINARY_BITS - 3;
{...}
00 ... 1xx: binary = 2;
00 ... 01x: binary = 1;
00 ... 001: binary = 0;
end
*/
always_comb
case (one_hot)
8'h1: binary=3'h0;
8'h2: binary=3'h1;
8'h4: binary=3'h2;
8'h8: binary=3'h3;
8'h10: binary=3'h4;
8'h20: binary=3'h5;
8'h40: binary=3'h6;
8'h80: binary=3'h7;
default: binary=3'h0; //should never happen
endcase
endmodule

View File

@ -2,7 +2,9 @@
// tlb.sv
//
// Written: jtorrey@hmc.edu 16 February 2021
// Modified:
// Modified: kmacsaigoren@hmc.edu 1 June 2021
// Implemented SV48 on top of SV39. This included adding the SvMode signal,
// and using it to decide the translate signal and get the virtual page number
//
// Purpose: Translation lookaside buffer
// Cache of virtural-to-physical address translations
@ -25,7 +27,7 @@
///////////////////////////////////////////
/**
* sv32 specs
* SV32 specs
* ----------
* Virtual address [31:0] (32 bits)
* [________________________________]
@ -85,14 +87,11 @@ module tlb #(parameter ENTRY_BITS = 3,
output TLBPageFault
);
logic SvMode;
logic Translate;
logic TLBAccess, ReadAccess, WriteAccess;
// *** If we want to support multiple virtual memory modes (ie sv39 AND sv48),
// we could have some muxes that control which parameters are current.
// Although then some of the signals are not big enough. But that's a problem
// for much later.
// Store current virtual memory mode (SV32, SV39, SV48, ect...)
logic [`SVMODE_BITS-1:0] SvMode;
// Index (currently random) to write the next TLB entry
logic [ENTRY_BITS-1:0] WriteIndex;
@ -116,17 +115,24 @@ module tlb #(parameter ENTRY_BITS = 3,
// Whether the virtual address has a match in the CAM
logic CAMHit;
// Grab the sv bit from SATP
// Grab the sv mode from SATP
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
// The bus width is always the largest it could be for that XLEN. For example, vpn will be 36 bits wide in rv64
// this, even though it could be 27 bits (SV39) or 36 bits (SV48) wide. When the value of VPN is narrower,
// is shorter, the extra bits are used as padded zeros on the left of the full value.
generate
if (`XLEN == 32) begin
assign SvMode = SATP_REGW[31]; // *** change to an enum somehow?
assign VirtualPageNumber = VirtualAddress[`VPN_BITS+11:12];
end else begin
assign SvMode = SATP_REGW[63]; // currently just a boolean whether translation enabled
assign VirtualPageNumber = (SvMode == `SV48) ?
VirtualAddress[`VPN_BITS+11:12] :
{{`VPN_SEGMENT_BITS{1'b0}}, VirtualAddress[3*`VPN_SEGMENT_BITS+11:12]};
end
endgenerate
// Whether translation should occur
assign Translate = SvMode & (PrivilegeModeW != `M_MODE);
assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE);
// Determine how the TLB is currently being used
// Note that we use ReadAccess for both loads and instruction fetches
@ -134,7 +140,7 @@ module tlb #(parameter ENTRY_BITS = 3,
assign WriteAccess = TLBAccessType[0];
assign TLBAccess = ReadAccess || WriteAccess;
assign VirtualPageNumber = VirtualAddress[`VPN_BITS+11:12];
assign PageOffset = VirtualAddress[11:0];
// TLB entries are evicted according to the LRU algorithm
@ -191,6 +197,7 @@ module tlb #(parameter ENTRY_BITS = 3,
physical_mixer(PhysicalPageNumber,
{{EXTRA_PHYSICAL_BITS{1'b0}}, VirtualPageNumber},
HitPageType,
SvMode,
PhysicalPageNumberMixed);
// Provide physical address only on TLBHits to cause catastrophic errors if

View File

@ -2,7 +2,9 @@
// tlb_cam.sv
//
// Written: jtorrey@hmc.edu 16 February 2021
// Modified:
// Modified: kmacsaigoren@hmc.edu 1 June 2021
// Implemented SV48 on top of SV39. This included adding the SvMode signal input and wally constants
// Mostly this was to make the cam_lines work.
//
// Purpose: Stores virtual page numbers with cached translations.
// Determines whether a given virtual page number is in the TLB.
@ -24,18 +26,21 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-constants.vh"
module tlb_cam #(parameter ENTRY_BITS = 3,
parameter KEY_BITS = 20,
parameter HIGH_SEGMENT_BITS = 10) (
input clk, reset,
input [KEY_BITS-1:0] VirtualPageNumber,
input [1:0] PageTypeWrite,
input [ENTRY_BITS-1:0] WriteIndex,
input TLBWrite,
input TLBFlush,
output [ENTRY_BITS-1:0] VPNIndex,
output [1:0] HitPageType,
output CAMHit
input clk, reset,
input [KEY_BITS-1:0] VirtualPageNumber,
input [1:0] PageTypeWrite,
input [ENTRY_BITS-1:0] WriteIndex,
input [`SVMODE_BITS-1:0] SvMode,
input TLBWrite,
input TLBFlush,
output [ENTRY_BITS-1:0] VPNIndex,
output [1:0] HitPageType,
output CAMHit
);
localparam NENTRIES = 2**ENTRY_BITS;

File diff suppressed because it is too large Load Diff

View File

@ -47,13 +47,13 @@ module muldiv (
logic [`XLEN-1:0] MulDivResultE, MulDivResultM;
logic [`XLEN-1:0] PrelimResultE;
logic [`XLEN-1:0] QuotE, RemE;
//logic [`XLEN-1:0] Q, R;
logic [`XLEN*2-1:0] ProdE;
logic enable_q;
logic [2:0] Funct3E_Q;
logic div0error;
logic [`XLEN-1:0] N, D;
logic [`XLEN-1:0] Num0, Den0;
logic gclk;
logic DivStartE;
@ -70,15 +70,25 @@ module muldiv (
end
assign gclk = enable_q & clk;
// Handle sign extension for W-type instructions
if (`XLEN == 64) begin // RV64 has W-type instructions
assign Num0 = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE;
assign Den0 = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE;
end else begin // RV32 has no W-type instructions
assign Num0 = SrcAE;
assign Den0 = SrcBE;
end
// capture the Numerator/Denominator
flopenrc #(`XLEN) reg_num (.d(SrcAE), .q(N),
flopenrc #(`XLEN) reg_num (.d(Num0), .q(N),
.en(startDivideE), .clear(DivDoneE),
.reset(reset), .clk(~gclk));
flopenrc #(`XLEN) reg_den (.d(SrcBE), .q(D),
flopenrc #(`XLEN) reg_den (.d(Den0), .q(D),
.en(startDivideE), .clear(DivDoneE),
.reset(reset), .clk(~gclk));
assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]);
div div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide);
intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide);
// Added for debugging of start signal for divide
assign startDivideE = MulDivE&DivStartE&~DivBusyE;
@ -93,7 +103,6 @@ module muldiv (
// Select result
always_comb
// case (DivDoneE ? Funct3E_Q : Funct3E)
case (Funct3E)
3'b000: PrelimResultE = ProdE[`XLEN-1:0];
3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];

View File

@ -166,12 +166,12 @@ string tests32f[] = '{
"rv64m/I-MULW-01", "3000",
"rv64m/I-DIV-01", "3000",
"rv64m/I-DIVU-01", "3000",
//"rv64m/I-DIVUW-01", "3000",
//"rv64m/I-DIVW-01", "3000",
"rv64m/I-DIVUW-01", "3000",
"rv64m/I-DIVW-01", "3000",
"rv64m/I-REM-01", "3000",
"rv64m/I-REMU-01", "3000"
//"rv64m/I-REMUW-01", "3000",
//"rv64m/I-REMW-01", "3000"
"rv64m/I-REMU-01", "3000",
"rv64m/I-REMUW-01", "3000",
"rv64m/I-REMW-01", "3000"
};
string tests64ic[] = '{
@ -320,11 +320,11 @@ string tests32f[] = '{
"rv32m/I-MUL-01", "2000",
"rv32m/I-MULH-01", "2000",
"rv32m/I-MULHSU-01", "2000",
"rv32m/I-MULHU-01", "2000"
//"rv32m/I-DIV-01", "2000",
//"rv32m/I-DIVU-01", "2000",
//"rv32m/I-REM-01", "2000",
//"rv32m/I-REMU-01", "2000"
"rv32m/I-MULHU-01", "2000",
"rv32m/I-DIV-01", "2000",
"rv32m/I-DIVU-01", "2000",
"rv32m/I-REM-01", "2000",
"rv32m/I-REMU-01", "2000"
};
string tests32ic[] = '{
@ -439,8 +439,11 @@ string tests32f[] = '{
string testsBP64[] = '{
"rv64BP/simple", "10000",
"rv64BP/mmm", "1000000",
"rv64BP/linpack_bench", "1000000",
"rv64BP/sieve", "1000000",
"rv64BP/qsort", "1000000",
"rv64BP/sieve", "1000000"
"rv64BP/dhrystone", "1000000"
};
string tests64p[] = '{