From fec40a1b75f8a929760baa22ee8becabbb84850a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 25 May 2021 14:26:22 -0500 Subject: [PATCH 1/7] fixed bug with icache miss spill fsm branch. --- .../src/ifu/globalHistoryPredictor.sv | 78 +++++-------------- wally-pipelined/src/ifu/icache.sv | 22 ++++-- 2 files changed, 34 insertions(+), 66 deletions(-) diff --git a/wally-pipelined/src/ifu/globalHistoryPredictor.sv b/wally-pipelined/src/ifu/globalHistoryPredictor.sv index 087458df3..b2357ecce 100644 --- a/wally-pipelined/src/ifu/globalHistoryPredictor.sv +++ b/wally-pipelined/src/ifu/globalHistoryPredictor.sv @@ -32,76 +32,34 @@ module globalHistoryPredictor ) (input logic clk, input logic reset, - input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, + input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, input logic [`XLEN-1:0] LookUpPC, output logic [1:0] Prediction, // update input logic [`XLEN-1:0] UpdatePC, input logic UpdateEN, PCSrcE, input logic [1:0] UpdatePrediction - + ); - logic [k-1:0] GHRF, GHRFNext; - assign GHRFNext = {PCSrcE, GHRF[k-1:1]}; + logic [k-1:0] GHRF, GHRFNext; + assign GHRFNext = {PCSrcE, GHRF[k-1:1]}; - flopenr #(k) GlobalHistoryRegister(.clk(clk), - .reset(reset), - .en(UpdateEN), - .d(GHRFNext), - .q(GHRF)); - - - - logic [1:0] PredictionMemory; - logic DoForwarding, DoForwardingF; - logic [1:0] UpdatePredictionF; - + flopenr #(k) GlobalHistoryRegister(.clk(clk), + .reset(reset), + .en(UpdateEN), + .d(GHRFNext), + .q(GHRF)); // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT - // GHR referes to the address that the past k branches points to in the prediction stage - // GHRE refers to the address that the past k branches points to in the exectution stage - SRAM2P1R1W #(k, 2) PHT(.clk(clk), - .reset(reset), - .RA1(GHRF), - .RD1(PredictionMemory), - .REN1(~StallF), - .WA1(GHRFNext), - .WD1(UpdatePrediction), - .WEN1(UpdateEN), - .BitWEN1(2'b11)); + SRAM2P1R1W #(k, 2) PHT(.clk(clk), + .reset(reset), + .RA1(GHRF), + .RD1(Prediction), + .REN1(~StallF), + .WA1(GHRF), + .WD1(UpdatePrediction), + .WEN1(UpdateEN), + .BitWEN1(2'b11)); - // need to forward when updating to the same address as reading. - // first we compare to see if the update and lookup addreses are the same - assign DoForwarding = GHRF == GHRFNext; - - // register the update value and the forwarding signal into the Fetch stage - // TODO: add stall logic *** - flopr #(1) DoForwardingReg(.clk(clk), - .reset(reset), - .d(DoForwarding), - .q(DoForwardingF)); - - flopr #(2) UpdatePredictionReg(.clk(clk), - .reset(reset), - .d(UpdatePrediction), - .q(UpdatePredictionF)); - - assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory; - - //pipeline for GHR - /*flopenrc #(k) GHRDReg(.clk(clk), - .reset(reset), - .en(~StallD), - .clear(FlushD), - .d(GHRF), - .q(GHRD)); - - flopenrc #(k) GHREReg(.clk(clk), - .reset(reset), - .en(~StallE), - .clear(FlushE), - .d(GHRD), - .q(GHRE)); -*/ endmodule diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 9e30a083a..4f51edd79 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -154,15 +154,16 @@ module icachecontroller #(parameter LINESIZE = 256) ( localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update. - localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 13; // miss on block 1, issue read to AHB and wait - localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 14; // write data to SRAM/LUT - localparam STATE_MISS_SPILL_MERGE = 15; // read block 0 of CPU access, + localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait + localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT + localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access, - localparam STATE_MISS_SPILL_FINAL = 16; // this state replicates STATE_READY's replay of the + localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the // spill access but does nto consider spill. It also does not do another operation. - localparam STATE_INVALIDATE = 17; // *** not sure if invalidate or evict? invalidate by cache block or address? + localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -380,11 +381,20 @@ module icachecontroller #(parameter LINESIZE = 256) ( PCMux = 2'b10; UnalignedSelect = 1'b1; spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_2_START; + end + STATE_MISS_SPILL_2_START: begin if (~hit) begin CntReset = 1'b1; NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; end else begin - NextState = STATE_MISS_SPILL_FINAL; + NextState = STATE_READY; + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + ICacheStallF = 1'b0; end end STATE_MISS_SPILL_MISS_FETCH_WDV: begin From 7e84c3f51481d788f0714ce6f128a4eff881290b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 27 May 2021 11:48:29 -0500 Subject: [PATCH 2/7] Updated benchmarking code. --- testsBP/crt0/Makefile | 4 ++-- testsBP/crt0/start.s | 7 +++---- testsBP/mibench_qsort/Makefile | 2 +- testsBP/sieve/Makefile | 2 +- testsBP/sieve/sieve.c | 20 ++++++++++---------- testsBP/simple/Makefile | 2 +- testsBP/simple/header.h | 1 + testsBP/simple/main.c | 1 + 8 files changed, 20 insertions(+), 19 deletions(-) diff --git a/testsBP/crt0/Makefile b/testsBP/crt0/Makefile index ab47384fc..b42e86cb8 100644 --- a/testsBP/crt0/Makefile +++ b/testsBP/crt0/Makefile @@ -9,7 +9,7 @@ MABI :=-mabi=lp64 LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles AFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -W -CFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -mcmodel=medany +CFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -mcmodel=medany -O2 AS=riscv64-unknown-elf-as CC=riscv64-unknown-elf-gcc AR=riscv64-unknown-elf-ar @@ -19,7 +19,7 @@ all: libcrt0.a %.o: %.s ${AS} ${AFLAGS} -c $< -o $@ -libcrt0.a: start.o +libcrt0.a: start.o pcnt_driver.o pre_main.o ${AR} -r $@ $^ clean: diff --git a/testsBP/crt0/start.s b/testsBP/crt0/start.s index 19a240d87..731a61e34 100644 --- a/testsBP/crt0/start.s +++ b/testsBP/crt0/start.s @@ -43,11 +43,10 @@ _start: - # set the stack pointer to the top of memory - # 0x8000_0000 + 64K - 8 bytes - li sp, 0x007FFFF8 + # set the stack pointer to the top of memory - 8 bytes (pointer size) + li sp, 0x07FFFFF8 - jal ra, main + jal ra, pre_main jal ra, _halt .section .text diff --git a/testsBP/mibench_qsort/Makefile b/testsBP/mibench_qsort/Makefile index f4d368392..b1cf7b679 100644 --- a/testsBP/mibench_qsort/Makefile +++ b/testsBP/mibench_qsort/Makefile @@ -8,7 +8,7 @@ MARCH :=-march=rv64ic MABI :=-mabi=lp64 LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map -CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align +CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2 CC=riscv64-unknown-elf-gcc DA=riscv64-unknown-elf-objdump -d diff --git a/testsBP/sieve/Makefile b/testsBP/sieve/Makefile index 1d38d123d..9c884f48a 100644 --- a/testsBP/sieve/Makefile +++ b/testsBP/sieve/Makefile @@ -8,7 +8,7 @@ MARCH :=-march=rv64ic MABI :=-mabi=lp64 LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map -CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align +CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2 CC=riscv64-unknown-elf-gcc DA=riscv64-unknown-elf-objdump -d diff --git a/testsBP/sieve/sieve.c b/testsBP/sieve/sieve.c index e82074045..f7d36d957 100644 --- a/testsBP/sieve/sieve.c +++ b/testsBP/sieve/sieve.c @@ -66,21 +66,21 @@ int main () { ans = sieve (); //gettimeofday(&after , NULL); - if (ans != 1899) - printf ("Sieve result wrong, ans = %d, expected 1899", ans); + /* /\* /\\* if (ans != 1899) *\\/ *\/ */ + /* /\* /\\* printf ("Sieve result wrong, ans = %d, expected 1899", ans); *\\/ *\/ */ - //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); + /* /\* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); *\/ */ - printf("Round 2\n"); - //gettimeofday(&before , NULL); + /* /\* printf("Round 2\n"); *\/ */ + /* //gettimeofday(&before , NULL); */ - ans = sieve (); - //gettimeofday(&after , NULL); - if (ans != 1899) - printf ("Sieve result wrong, ans = %d, expected 1899", ans); + /* ans = sieve (); */ + /* //gettimeofday(&after , NULL); */ + /* if (ans != 1899) */ + /* printf ("Sieve result wrong, ans = %d, expected 1899", ans); */ - //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); + /* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); */ return 0; diff --git a/testsBP/simple/Makefile b/testsBP/simple/Makefile index 450aacaa4..4447f2843 100644 --- a/testsBP/simple/Makefile +++ b/testsBP/simple/Makefile @@ -8,7 +8,7 @@ MARCH :=-march=rv64ic MABI :=-mabi=lp64 LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map -CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align +CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2 CC=riscv64-unknown-elf-gcc DA=riscv64-unknown-elf-objdump -d diff --git a/testsBP/simple/header.h b/testsBP/simple/header.h index bfe014a4b..6def656f8 100644 --- a/testsBP/simple/header.h +++ b/testsBP/simple/header.h @@ -5,4 +5,5 @@ int fail(); int simple_csrbr_test(); int lbu_test(); int icache_spill_test(); +void global_hist_test(); #endif diff --git a/testsBP/simple/main.c b/testsBP/simple/main.c index 0d14fcfb8..036a351d1 100644 --- a/testsBP/simple/main.c +++ b/testsBP/simple/main.c @@ -2,6 +2,7 @@ int main(){ //int res = icache_spill_test(); + global_hist_test(); int res = 1; if (res < 0) { fail(); From 8a035104ac47678fc1de4fc1110511c5334233ae Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 27 May 2021 23:06:28 -0500 Subject: [PATCH 3/7] It's a bit sloppy, but the global history predictor is working correctly now. There were two major bugs with the predictor. First the update mechanism was completely wrong. The PHT is updated with the GHR that was used to lookup the prediction. PHT[GHR] = Sat2(PHT[GHR], branch outcome). Second the GHR needs to be updated speculatively as the branch is predicted. This is important so that back to back branches' GHRs are not the same. The must be different to avoid aliasing. Speculation of the GHR update allows them to be different. On mis prediction the GHR must be reverted. This implementation is a bit sloppy with names and now the GHR recovery is performed. Updates to follow. --- wally-pipelined/config/rv64BP/wally-config.vh | 3 +- wally-pipelined/src/ifu/bpred.sv | 9 ++-- .../src/ifu/globalHistoryPredictor.sv | 47 +++++++++++++++++-- wally-pipelined/src/ifu/ifu.sv | 9 +--- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh index 17a8c284a..fd482bfde 100644 --- a/wally-pipelined/config/rv64BP/wally-config.vh +++ b/wally-pipelined/config/rv64BP/wally-config.vh @@ -110,5 +110,6 @@ `define TWO_BIT_PRELOAD "../config/rv64icfd/twoBitPredictor.txt" `define BTB_PRELOAD "../config/rv64icfd/BTBPredictor.txt" `define BPRED_ENABLED 1 -`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE +//`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE +`define BPTYPE "BPGLOBAL" // BPTWOBIT or "BPGSHARE" or BPLOCALPAg or BPGSHARE `define TESTSBP 1 diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index de0f8143b..c5b4dde48 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -30,7 +30,8 @@ module bpred (input logic clk, reset, - input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Fetch stage // the prediction input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list @@ -93,6 +94,8 @@ module bpred // update .UpdatePC(PCE), .UpdateEN(InstrClassE[0] & ~StallE), + .SpeculativeUpdateEn(BPInstrClassF[0] & ~StallF), + .BPPredDirWrongE(BPPredDirWrongE), .PCSrcE(PCSrcE), .UpdatePrediction(UpdateBPPredE)); end else if (`BPTYPE == "BPGSHARE") begin:Predictor @@ -190,14 +193,14 @@ module bpred flopenrc #(2) BPPredRegD(.clk(clk), .reset(reset), .en(~StallD), - .clear(FlushD), + .clear(1'b0), .d(BPPredF), .q(BPPredD)); flopenrc #(2) BPPredRegE(.clk(clk), .reset(reset), .en(~StallE), - .clear(FlushE), + .clear(1'b0), .d(BPPredD), .q(BPPredE)); diff --git a/wally-pipelined/src/ifu/globalHistoryPredictor.sv b/wally-pipelined/src/ifu/globalHistoryPredictor.sv index b2357ecce..fadbf004b 100644 --- a/wally-pipelined/src/ifu/globalHistoryPredictor.sv +++ b/wally-pipelined/src/ifu/globalHistoryPredictor.sv @@ -37,29 +37,66 @@ module globalHistoryPredictor output logic [1:0] Prediction, // update input logic [`XLEN-1:0] UpdatePC, - input logic UpdateEN, PCSrcE, + input logic UpdateEN, PCSrcE, + input logic SpeculativeUpdateEn, BPPredDirWrongE, input logic [1:0] UpdatePrediction ); - logic [k-1:0] GHRF, GHRFNext; - assign GHRFNext = {PCSrcE, GHRF[k-1:1]}; + logic [k-1:0] GHRF, GHRFNext, GHRD, GHRE, GHRLookup; + + logic FlushedD, FlushedE; + + + // if the prediction is wrong we need to restore the ghr. + assign GHRFNext = BPPredDirWrongE ? {PCSrcE, GHRE[k-1:1]} : + {Prediction[1], GHRF[k-1:1]}; flopenr #(k) GlobalHistoryRegister(.clk(clk), .reset(reset), - .en(UpdateEN), + .en((UpdateEN & BPPredDirWrongE) | (SpeculativeUpdateEn)), .d(GHRFNext), .q(GHRF)); + // if actively updating the GHR at the time of prediction we want to us + // GHRFNext as the lookup rather than GHRF. + + assign GHRLookup = UpdateEN ? GHRFNext : GHRF; + // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT SRAM2P1R1W #(k, 2) PHT(.clk(clk), .reset(reset), .RA1(GHRF), .RD1(Prediction), .REN1(~StallF), - .WA1(GHRF), + .WA1(GHRE), .WD1(UpdatePrediction), .WEN1(UpdateEN), .BitWEN1(2'b11)); + flopenr #(k) GlobalHistoryRegisterD(.clk(clk), + .reset(reset), + .en(~StallD & ~FlushedE), + .d(GHRF), + .q(GHRD)); + + flopenr #(k) GlobalHistoryRegisterE(.clk(clk), + .reset(reset), + .en(~StallE & ~ FlushedE), + .d(GHRD), + .q(GHRE)); + + + flopenr #(1) flushedDReg(.clk(clk), + .reset(reset), + .en(~StallD), + .d(FlushD), + .q(FlushedD)); + + flopenr #(1) flushedEReg(.clk(clk), + .reset(reset), + .en(~StallE), + .d(FlushE | FlushedD), + .q(FlushedE)); + endmodule diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 994288bd8..0922f7877 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -153,14 +153,7 @@ module ifu ( generate if (`BPRED_ENABLED == 1) begin : bpred // I am making the port connection explicit for now as I want to see them and they will be changing. - bpred bpred(.clk(clk), - .reset(reset), - .StallF(StallF), - .StallD(StallD), - .StallE(StallE), - .FlushF(FlushF), - .FlushD(FlushD), - .FlushE(FlushE), + bpred bpred(.*, .PCNextF(PCNextF), .BPPredPCF(BPPredPCF), .SelBPPredF(SelBPPredF), From f6c88666cfc8dbeebfd34db85b5282636c361690 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 31 May 2021 16:11:12 -0500 Subject: [PATCH 4/7] may have fixed the global branch history predictor. The solution required a completed rewrite and understanding of how the GHR needs to be speculatively updated and repaired. --- testsBP/simple/header.h | 4 +- testsBP/simple/main.c | 4 +- wally-pipelined/src/ifu/bpred.sv | 11 ++- .../src/ifu/globalHistoryPredictor.sv | 98 +++++++++++++++---- wally-pipelined/src/ifu/gshare.sv | 41 +++++++- 5 files changed, 128 insertions(+), 30 deletions(-) diff --git a/testsBP/simple/header.h b/testsBP/simple/header.h index 6def656f8..f3a62da30 100644 --- a/testsBP/simple/header.h +++ b/testsBP/simple/header.h @@ -5,5 +5,7 @@ int fail(); int simple_csrbr_test(); int lbu_test(); int icache_spill_test(); -void global_hist_test(); +void global_hist_1_space_test(); +void global_hist_2_space_test(); +void global_hist_3_space_test(); #endif diff --git a/testsBP/simple/main.c b/testsBP/simple/main.c index 036a351d1..7bf6b4751 100644 --- a/testsBP/simple/main.c +++ b/testsBP/simple/main.c @@ -2,7 +2,9 @@ int main(){ //int res = icache_spill_test(); - global_hist_test(); + global_hist_3_space_test(); + global_hist_2_space_test(); + global_hist_1_space_test(); int res = 1; if (res < 0) { fail(); diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index c5b4dde48..9beaa959a 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -90,12 +90,13 @@ module bpred .reset(reset), .*, // Stalls and flushes .LookUpPC(PCNextF), - .Prediction(BPPredF), + .BPPredF(BPPredF), // update - .UpdatePC(PCE), - .UpdateEN(InstrClassE[0] & ~StallE), - .SpeculativeUpdateEn(BPInstrClassF[0] & ~StallF), + .BPPredD(BPPredD), + .InstrClassE(InstrClassE), + .BPInstrClassE(BPInstrClassE), .BPPredDirWrongE(BPPredDirWrongE), + .UpdatePC(PCE), .PCSrcE(PCSrcE), .UpdatePrediction(UpdateBPPredE)); end else if (`BPTYPE == "BPGSHARE") begin:Predictor @@ -108,6 +109,8 @@ module bpred // update .UpdatePC(PCE), .UpdateEN(InstrClassE[0] & ~StallE), + .SpeculativeUpdateEn(BPInstrClassF[0] & ~StallF), + .BPPredDirWrongE(BPPredDirWrongE), .PCSrcE(PCSrcE), .UpdatePrediction(UpdateBPPredE)); end diff --git a/wally-pipelined/src/ifu/globalHistoryPredictor.sv b/wally-pipelined/src/ifu/globalHistoryPredictor.sv index fadbf004b..b2ac19911 100644 --- a/wally-pipelined/src/ifu/globalHistoryPredictor.sv +++ b/wally-pipelined/src/ifu/globalHistoryPredictor.sv @@ -34,49 +34,108 @@ module globalHistoryPredictor input logic reset, input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, input logic [`XLEN-1:0] LookUpPC, - output logic [1:0] Prediction, + output logic [1:0] BPPredF, // update + input logic [1:0] BPPredD, + input logic [4:0] InstrClassE, + input logic [4:0] BPInstrClassE, + input logic [4:0] BPInstrClassD, + input logic [4:0] BPInstrClassF, + input logic BPPredDirWrongE, + input logic [`XLEN-1:0] UpdatePC, - input logic UpdateEN, PCSrcE, - input logic SpeculativeUpdateEn, BPPredDirWrongE, + input logic PCSrcE, input logic [1:0] UpdatePrediction ); - logic [k-1:0] GHRF, GHRFNext, GHRD, GHRE, GHRLookup; + logic [k+1:0] GHR, GHRNext; + logic [k-1:0] PHTUpdateAdr, PHTUpdateAdr0, PHTUpdateAdr1; + logic PHTUpdateEN; + logic BPClassWrongNonCFI; + logic BPClassWrongCFI; + logic BPClassRightNonCFI; + + +/* -----\/----- EXCLUDED -----\/----- + logic [k-1:0] GHRD, GHRE, GHRLookup; logic FlushedD, FlushedE; + -----/\----- EXCLUDED -----/\----- */ + + + logic [6:0] GHRMuxSel; + logic GHRUpdateEN; + + assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0]; + assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0]; + assign BPClassWrongNonCFI = BPInstrClassE[0] & ~InstrClassE[0]; + assign BPClassRightBPWrong = BPInstrClassE[0] & InstrClassE[0] & BPPredDirWrongE; + assign BPClassRightBPRight = BPInstrClassE[0] & InstrClassE[0] & ~BPPredDirWrongE; + + // GHR update selection, 1 hot encoded. + assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight); - // if the prediction is wrong we need to restore the ghr. - assign GHRFNext = BPPredDirWrongE ? {PCSrcE, GHRE[k-1:1]} : - {Prediction[1], GHRF[k-1:1]}; + assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0]; + assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]); - flopenr #(k) GlobalHistoryRegister(.clk(clk), - .reset(reset), - .en((UpdateEN & BPPredDirWrongE) | (SpeculativeUpdateEn)), - .d(GHRFNext), - .q(GHRF)); + + assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0]; + + + + assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0]; + assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0]; + assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight)); + assign GHRUpdateEN = (| GHRMuxSel[5:1] & ~StallE) | GHRMuxSel[6] & ~StallF; + + // hoping this created a AND-OR mux. + always_comb begin + case (GHRMuxSel) + 7'b000_0001: GHRNext = GHR[k-1+2:0]; // no change + 7'b000_0010: GHRNext = {GHR[k-2+2:0], PCSrcE}; // branch update + 7'b000_0100: GHRNext = {1'b0, GHR[k+1:1]}; // repair 1 + 7'b000_1000: GHRNext = {GHR[k-1+2:1], PCSrcE}; // branch update with mis prediction correction + 7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2 + 7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1 + 7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update + //7'b100_0000: GHRNext = {k+1{1'bx}}; // speculative update + default: GHRNext = GHR[k-1+2:0]; + endcase + end + + flopenr #(k+2) GlobalHistoryRegister(.clk(clk), + .reset(reset), + .en((GHRUpdateEN)), + .d(GHRNext), + .q(GHR)); // if actively updating the GHR at the time of prediction we want to us - // GHRFNext as the lookup rather than GHRF. + // GHRNext as the lookup rather than GHR. - assign GHRLookup = UpdateEN ? GHRFNext : GHRF; + //assign GHRLookup = GHRUpdateEN ? GHRNext : GHR; + assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0]; + assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1]; + assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0; + assign PHTUpdateEN = InstrClassE[0] & ~StallE; + // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT SRAM2P1R1W #(k, 2) PHT(.clk(clk), .reset(reset), - .RA1(GHRF), - .RD1(Prediction), + .RA1(GHR[k-1:0]), + .RD1(BPPredF), .REN1(~StallF), - .WA1(GHRE), + .WA1(PHTUpdateAdr), .WD1(UpdatePrediction), - .WEN1(UpdateEN), + .WEN1(PHTUpdateEN), .BitWEN1(2'b11)); +/* -----\/----- EXCLUDED -----\/----- flopenr #(k) GlobalHistoryRegisterD(.clk(clk), .reset(reset), .en(~StallD & ~FlushedE), - .d(GHRF), + .d(GHR), .q(GHRD)); flopenr #(k) GlobalHistoryRegisterE(.clk(clk), @@ -97,6 +156,7 @@ module globalHistoryPredictor .en(~StallE), .d(FlushE | FlushedD), .q(FlushedE)); + -----/\----- EXCLUDED -----/\----- */ endmodule diff --git a/wally-pipelined/src/ifu/gshare.sv b/wally-pipelined/src/ifu/gshare.sv index 4d31e519b..3cc73be80 100644 --- a/wally-pipelined/src/ifu/gshare.sv +++ b/wally-pipelined/src/ifu/gshare.sv @@ -38,28 +38,32 @@ module gsharePredictor // update input logic [`XLEN-1:0] UpdatePC, input logic UpdateEN, PCSrcE, + input logic SpeculativeUpdateEn, BPPredDirWrongE, input logic [1:0] UpdatePrediction ); - logic [k-1:0] GHRF, GHRFNext; + logic [k-1:0] GHRF, GHRFNext, GHRD, GHRE; //logic [k-1:0] LookUpPCIndexD, LookUpPCIndexE; logic [k-1:0] LookUpPCIndex, UpdatePCIndex; logic [1:0] PredictionMemory; logic DoForwarding, DoForwardingF; logic [1:0] UpdatePredictionF; + logic FlushedD, FlushedE; - assign GHRFNext = {PCSrcE, GHRF[k-1:1]}; + // if the prediction is wrong we need to restore the ghr. + assign GHRFNext = BPPredDirWrongE ? {PCSrcE, GHRE[k-1:1]} : + {Prediction[1], GHRF[k-1:1]}; flopenr #(k) GlobalHistoryRegister(.clk(clk), .reset(reset), - .en(UpdateEN), + .en((UpdateEN & BPPredDirWrongE) | (SpeculativeUpdateEn)), .d(GHRFNext), .q(GHRF)); // for gshare xor the PC with the GHR - assign UpdatePCIndex = GHRFNext ^ UpdatePC[k:1]; + assign UpdatePCIndex = GHRE ^ UpdatePC[k:1]; assign LookUpPCIndex = GHRF ^ LookUpPC[k:1]; // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT // GHR referes to the address that the past k branches points to in the prediction stage @@ -67,7 +71,7 @@ module gsharePredictor SRAM2P1R1W #(k, 2) PHT(.clk(clk), .reset(reset), .RA1(LookUpPCIndex), - .RD1(PredictionMemory), + .RD1(Prediction), .REN1(~StallF), .WA1(UpdatePCIndex), .WD1(UpdatePrediction), @@ -75,6 +79,32 @@ module gsharePredictor .BitWEN1(2'b11)); + flopenr #(k) GlobalHistoryRegisterD(.clk(clk), + .reset(reset), + .en(~StallD & ~FlushedE), + .d(GHRF), + .q(GHRD)); + + flopenr #(k) GlobalHistoryRegisterE(.clk(clk), + .reset(reset), + .en(~StallE & ~ FlushedE), + .d(GHRD), + .q(GHRE)); + + + flopenr #(1) flushedDReg(.clk(clk), + .reset(reset), + .en(~StallD), + .d(FlushD), + .q(FlushedD)); + + flopenr #(1) flushedEReg(.clk(clk), + .reset(reset), + .en(~StallE), + .d(FlushE | FlushedD), + .q(FlushedE)); + +/* -----\/----- EXCLUDED -----\/----- // need to forward when updating to the same address as reading. // first we compare to see if the update and lookup addreses are the same assign DoForwarding = LookUpPCIndex == UpdatePCIndex; @@ -92,6 +122,7 @@ module gsharePredictor .q(UpdatePredictionF)); assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory; + -----/\----- EXCLUDED -----/\----- */ //pipeline for GHR /* -----\/----- EXCLUDED -----\/----- From ddbdd0d5a27d485537994516a0671225c3cb7219 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Mon, 31 May 2021 23:27:42 -0400 Subject: [PATCH 5/7] Modify muldiv.sv to handle W instructions for 64-bits --- wally-pipelined/src/muldiv/div.sv | 1 - wally-pipelined/src/muldiv/muldiv.sv | 17 ++++++++++++++--- wally-pipelined/testbench/testbench-imperas.sv | 10 +++++----- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/wally-pipelined/src/muldiv/div.sv b/wally-pipelined/src/muldiv/div.sv index 8b4e0463a..10af5eee4 100755 --- a/wally-pipelined/src/muldiv/div.sv +++ b/wally-pipelined/src/muldiv/div.sv @@ -87,7 +87,6 @@ module intdiv #(parameter WIDTH=64) // is 0 and thus a divide by 0 exception. This div0 // exception is given to FSM to tell the operation to // quit gracefully. - lzd_hier #(WIDTH) p1 (.ZP(P), .ZV(V), .B(twoD)); shift_left #(WIDTH) p2 (twoD, P, op2); assign op1 = twoN; diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index ccabe341a..0c26a5df8 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -53,6 +53,7 @@ module muldiv ( logic [2:0] Funct3E_Q; logic div0error; logic [`XLEN-1:0] N, D; + logic [`XLEN-1:0] Num0, Den0; logic gclk; logic DivStartE; @@ -69,13 +70,23 @@ module muldiv ( end assign gclk = enable_q & clk; + // Handle sign extension for W-type instructions + if (`XLEN == 64) begin // RV64 has W-type instructions + assign Num0 = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE; + assign Den0 = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE; + end else begin // RV32 has no W-type instructions + assign Num0 = SrcAE; + assign Den0 = SrcAE; + end + // capture the Numerator/Denominator - flopenrc #(`XLEN) reg_num (.d(SrcAE), .q(N), + flopenrc #(`XLEN) reg_num (.d(Num0), .q(N), .en(startDivideE), .clear(DivDoneE), .reset(reset), .clk(~gclk)); - flopenrc #(`XLEN) reg_den (.d(SrcBE), .q(D), + flopenrc #(`XLEN) reg_den (.d(Den0), .q(D), .en(startDivideE), .clear(DivDoneE), - .reset(reset), .clk(~gclk)); + .reset(reset), .clk(~gclk)); + assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index ea6939004..6d8f1049f 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -166,12 +166,12 @@ string tests32f[] = '{ "rv64m/I-MULW-01", "3000", "rv64m/I-DIV-01", "3000", "rv64m/I-DIVU-01", "3000", - //"rv64m/I-DIVUW-01", "3000", - //"rv64m/I-DIVW-01", "3000", + "rv64m/I-DIVUW-01", "3000", + "rv64m/I-DIVW-01", "3000", "rv64m/I-REM-01", "3000", - "rv64m/I-REMU-01", "3000" - //"rv64m/I-REMUW-01", "3000", - //"rv64m/I-REMW-01", "3000" + "rv64m/I-REMU-01", "3000", + "rv64m/I-REMUW-01", "3000", + "rv64m/I-REMW-01", "3000" }; string tests64ic[] = '{ From 857f59ab5c51e146d1cdf121443297f7ac079246 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 1 Jun 2021 10:57:43 -0500 Subject: [PATCH 6/7] Now have global history working correctly. --- testsBP/crt0/Makefile | 8 +- testsBP/simple/header.h | 1 + testsBP/simple/main.c | 3 +- wally-pipelined/config/rv64BP/wally-config.vh | 6 +- wally-pipelined/src/ifu/bpred.sv | 29 ++-- .../src/ifu/globalHistoryPredictor.sv | 62 ++----- wally-pipelined/src/ifu/gshare.sv | 159 ------------------ .../testbench/testbench-imperas.sv | 5 +- 8 files changed, 38 insertions(+), 235 deletions(-) delete mode 100644 wally-pipelined/src/ifu/gshare.sv diff --git a/testsBP/crt0/Makefile b/testsBP/crt0/Makefile index b42e86cb8..2af43a408 100644 --- a/testsBP/crt0/Makefile +++ b/testsBP/crt0/Makefile @@ -4,12 +4,12 @@ ROOT := .. LIBRARY_DIRS := LIBRARY_FILES := -MARCH :=-march=rv64ic -MABI :=-mabi=lp64 +MARCH :=-march=rv64imfdc +MABI :=-mabi=lp64d LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -AFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -W -CFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -mcmodel=medany -O2 +AFLAGS =$(MARCH) $(MABI) -W +CFLAGS =$(MARCH) $(MABI) -mcmodel=medany -O2 AS=riscv64-unknown-elf-as CC=riscv64-unknown-elf-gcc AR=riscv64-unknown-elf-ar diff --git a/testsBP/simple/header.h b/testsBP/simple/header.h index f3a62da30..aab8973fd 100644 --- a/testsBP/simple/header.h +++ b/testsBP/simple/header.h @@ -5,6 +5,7 @@ int fail(); int simple_csrbr_test(); int lbu_test(); int icache_spill_test(); +void global_hist_0_space_test(); void global_hist_1_space_test(); void global_hist_2_space_test(); void global_hist_3_space_test(); diff --git a/testsBP/simple/main.c b/testsBP/simple/main.c index 7bf6b4751..564b474e1 100644 --- a/testsBP/simple/main.c +++ b/testsBP/simple/main.c @@ -4,7 +4,8 @@ int main(){ //int res = icache_spill_test(); global_hist_3_space_test(); global_hist_2_space_test(); - global_hist_1_space_test(); + global_hist_1_space_test(); + global_hist_0_space_test(); int res = 1; if (res < 0) { fail(); diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh index fd482bfde..a9dbb1bda 100644 --- a/wally-pipelined/config/rv64BP/wally-config.vh +++ b/wally-pipelined/config/rv64BP/wally-config.vh @@ -32,7 +32,7 @@ `define XLEN 64 //`define MISA (32'h00000105) -`define MISA (32'h00000104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0) +`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0) `define A_SUPPORTED ((`MISA >> 0) % 2 == 1) `define C_SUPPORTED ((`MISA >> 2) % 2 == 1) `define D_SUPPORTED ((`MISA >> 3) % 2 == 1) @@ -107,8 +107,8 @@ /* verilator lint_off ASSIGNDLY */ /* verilator lint_off PINCONNECTEMPTY */ -`define TWO_BIT_PRELOAD "../config/rv64icfd/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64icfd/BTBPredictor.txt" +`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt" +`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt" `define BPRED_ENABLED 1 //`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE `define BPTYPE "BPGLOBAL" // BPTWOBIT or "BPGSHARE" or BPLOCALPAg or BPGSHARE diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index 9beaa959a..92471c574 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -89,30 +89,29 @@ module bpred globalHistoryPredictor DirPredictor(.clk(clk), .reset(reset), .*, // Stalls and flushes - .LookUpPC(PCNextF), + .PCNextF(PCNextF), .BPPredF(BPPredF), // update - .BPPredD(BPPredD), .InstrClassE(InstrClassE), .BPInstrClassE(BPInstrClassE), .BPPredDirWrongE(BPPredDirWrongE), - .UpdatePC(PCE), + .PCE(PCE), .PCSrcE(PCSrcE), - .UpdatePrediction(UpdateBPPredE)); + .UpdateBPPredE(UpdateBPPredE)); end else if (`BPTYPE == "BPGSHARE") begin:Predictor gsharePredictor DirPredictor(.clk(clk), - .reset(reset), - .*, // Stalls and flushes - .LookUpPC(PCNextF), - .Prediction(BPPredF), - // update - .UpdatePC(PCE), - .UpdateEN(InstrClassE[0] & ~StallE), - .SpeculativeUpdateEn(BPInstrClassF[0] & ~StallF), - .BPPredDirWrongE(BPPredDirWrongE), - .PCSrcE(PCSrcE), - .UpdatePrediction(UpdateBPPredE)); + .reset(reset), + .*, // Stalls and flushes + .PCNextF(PCNextF), + .BPPredF(BPPredF), + // update + .InstrClassE(InstrClassE), + .BPInstrClassE(BPInstrClassE), + .BPPredDirWrongE(BPPredDirWrongE), + .PCE(PCE), + .PCSrcE(PCSrcE), + .UpdateBPPredE(UpdateBPPredE)); end else if (`BPTYPE == "BPLOCALPAg") begin:Predictor diff --git a/wally-pipelined/src/ifu/globalHistoryPredictor.sv b/wally-pipelined/src/ifu/globalHistoryPredictor.sv index b2ac19911..516de633e 100644 --- a/wally-pipelined/src/ifu/globalHistoryPredictor.sv +++ b/wally-pipelined/src/ifu/globalHistoryPredictor.sv @@ -33,19 +33,18 @@ module globalHistoryPredictor (input logic clk, input logic reset, input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, - input logic [`XLEN-1:0] LookUpPC, + input logic [`XLEN-1:0] PCNextF, output logic [1:0] BPPredF, // update - input logic [1:0] BPPredD, input logic [4:0] InstrClassE, input logic [4:0] BPInstrClassE, input logic [4:0] BPInstrClassD, input logic [4:0] BPInstrClassF, input logic BPPredDirWrongE, - input logic [`XLEN-1:0] UpdatePC, + input logic [`XLEN-1:0] PCE, input logic PCSrcE, - input logic [1:0] UpdatePrediction + input logic [1:0] UpdateBPPredE ); logic [k+1:0] GHR, GHRNext; @@ -54,17 +53,10 @@ module globalHistoryPredictor logic BPClassWrongNonCFI; logic BPClassWrongCFI; logic BPClassRightNonCFI; - - -/* -----\/----- EXCLUDED -----\/----- - logic [k-1:0] GHRD, GHRE, GHRLookup; - - logic FlushedD, FlushedE; - -----/\----- EXCLUDED -----/\----- */ - logic [6:0] GHRMuxSel; logic GHRUpdateEN; + logic [k-1:0] GHRLookup; assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0]; assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0]; @@ -75,15 +67,9 @@ module globalHistoryPredictor // GHR update selection, 1 hot encoded. assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight); - assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0]; - assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]); - - assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0]; - - - + assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]); assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0]; assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0]; assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight)); @@ -99,7 +85,6 @@ module globalHistoryPredictor 7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2 7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1 7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update - //7'b100_0000: GHRNext = {k+1{1'bx}}; // speculative update default: GHRNext = GHR[k-1+2:0]; endcase end @@ -113,50 +98,23 @@ module globalHistoryPredictor // if actively updating the GHR at the time of prediction we want to us // GHRNext as the lookup rather than GHR. - //assign GHRLookup = GHRUpdateEN ? GHRNext : GHR; - assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0]; assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1]; assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0; assign PHTUpdateEN = InstrClassE[0] & ~StallE; + + assign GHRLookup = |GHRMuxSel[6:1] ? GHRNext[k-1:0] : GHR[k-1:0]; // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT SRAM2P1R1W #(k, 2) PHT(.clk(clk), .reset(reset), - .RA1(GHR[k-1:0]), + //.RA1(GHR[k-1:0]), + .RA1(GHRLookup), .RD1(BPPredF), .REN1(~StallF), .WA1(PHTUpdateAdr), - .WD1(UpdatePrediction), + .WD1(UpdateBPPredE), .WEN1(PHTUpdateEN), .BitWEN1(2'b11)); -/* -----\/----- EXCLUDED -----\/----- - flopenr #(k) GlobalHistoryRegisterD(.clk(clk), - .reset(reset), - .en(~StallD & ~FlushedE), - .d(GHR), - .q(GHRD)); - - flopenr #(k) GlobalHistoryRegisterE(.clk(clk), - .reset(reset), - .en(~StallE & ~ FlushedE), - .d(GHRD), - .q(GHRE)); - - - flopenr #(1) flushedDReg(.clk(clk), - .reset(reset), - .en(~StallD), - .d(FlushD), - .q(FlushedD)); - - flopenr #(1) flushedEReg(.clk(clk), - .reset(reset), - .en(~StallE), - .d(FlushE | FlushedD), - .q(FlushedE)); - -----/\----- EXCLUDED -----/\----- */ - - endmodule diff --git a/wally-pipelined/src/ifu/gshare.sv b/wally-pipelined/src/ifu/gshare.sv deleted file mode 100644 index 3cc73be80..000000000 --- a/wally-pipelined/src/ifu/gshare.sv +++ /dev/null @@ -1,159 +0,0 @@ -/////////////////////////////////////////// -// gshare.sv -// -// Written: Shreya Sanghai -// Email: ssanghai@hmc.edu -// Created: March 16, 2021 -// Modified: -// -// Purpose: Gshare predictor with parameterized global history register -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module gsharePredictor - #(parameter int k = 10 - ) - (input logic clk, - input logic reset, - input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, - input logic [`XLEN-1:0] LookUpPC, - output logic [1:0] Prediction, - // update - input logic [`XLEN-1:0] UpdatePC, - input logic UpdateEN, PCSrcE, - input logic SpeculativeUpdateEn, BPPredDirWrongE, - input logic [1:0] UpdatePrediction - - ); - - logic [k-1:0] GHRF, GHRFNext, GHRD, GHRE; - //logic [k-1:0] LookUpPCIndexD, LookUpPCIndexE; - logic [k-1:0] LookUpPCIndex, UpdatePCIndex; - logic [1:0] PredictionMemory; - logic DoForwarding, DoForwardingF; - logic [1:0] UpdatePredictionF; - logic FlushedD, FlushedE; - - // if the prediction is wrong we need to restore the ghr. - assign GHRFNext = BPPredDirWrongE ? {PCSrcE, GHRE[k-1:1]} : - {Prediction[1], GHRF[k-1:1]}; - - flopenr #(k) GlobalHistoryRegister(.clk(clk), - .reset(reset), - .en((UpdateEN & BPPredDirWrongE) | (SpeculativeUpdateEn)), - .d(GHRFNext), - .q(GHRF)); - - - // for gshare xor the PC with the GHR - assign UpdatePCIndex = GHRE ^ UpdatePC[k:1]; - assign LookUpPCIndex = GHRF ^ LookUpPC[k:1]; - // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT - // GHR referes to the address that the past k branches points to in the prediction stage - // GHRE refers to the address that the past k branches points to in the exectution stage - SRAM2P1R1W #(k, 2) PHT(.clk(clk), - .reset(reset), - .RA1(LookUpPCIndex), - .RD1(Prediction), - .REN1(~StallF), - .WA1(UpdatePCIndex), - .WD1(UpdatePrediction), - .WEN1(UpdateEN), - .BitWEN1(2'b11)); - - - flopenr #(k) GlobalHistoryRegisterD(.clk(clk), - .reset(reset), - .en(~StallD & ~FlushedE), - .d(GHRF), - .q(GHRD)); - - flopenr #(k) GlobalHistoryRegisterE(.clk(clk), - .reset(reset), - .en(~StallE & ~ FlushedE), - .d(GHRD), - .q(GHRE)); - - - flopenr #(1) flushedDReg(.clk(clk), - .reset(reset), - .en(~StallD), - .d(FlushD), - .q(FlushedD)); - - flopenr #(1) flushedEReg(.clk(clk), - .reset(reset), - .en(~StallE), - .d(FlushE | FlushedD), - .q(FlushedE)); - -/* -----\/----- EXCLUDED -----\/----- - // need to forward when updating to the same address as reading. - // first we compare to see if the update and lookup addreses are the same - assign DoForwarding = LookUpPCIndex == UpdatePCIndex; - - // register the update value and the forwarding signal into the Fetch stage - // TODO: add stall logic *** - flopr #(1) DoForwardingReg(.clk(clk), - .reset(reset), - .d(DoForwarding), - .q(DoForwardingF)); - - flopr #(2) UpdatePredictionReg(.clk(clk), - .reset(reset), - .d(UpdatePrediction), - .q(UpdatePredictionF)); - - assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory; - -----/\----- EXCLUDED -----/\----- */ - - //pipeline for GHR -/* -----\/----- EXCLUDED -----\/----- - flopenrc #(k) LookUpDReg(.clk(clk), - .reset(reset), - .en(~StallD), - .clear(FlushD), - .d(LookUpPCIndex), - .q(LookUpPCIndexD)); - - flopenrc #(k) LookUpEReg(.clk(clk), - .reset(reset), - .en(~StallE), - .clear(FlushE), - .d(LookUpPCIndexD), - .q(LookUpPCIndexE)); - -----/\----- EXCLUDED -----/\----- */ - -/* flopenrc #(k) GHRRegD(.clk(clk), - .reset(reset), - .en(~StallD), - .clear(FlushD), - .d(GHRF), - .q(GHRD)); - - flopenrc #(k) GHRRegE(.clk(clk), - .reset(reset), - .en(~StallE), - .clear(FlushE), - .d(GHRD), - .q(GHRE)); - -*/ -endmodule diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index ddee23a1e..bb8ffbd4b 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -438,8 +438,11 @@ string tests32f[] = '{ string testsBP64[] = '{ "rv64BP/simple", "10000", + "rv64BP/mmm", "1000000", + "rv64BP/linpack_bench", "1000000", + "rv64BP/sieve", "1000000", "rv64BP/qsort", "1000000", - "rv64BP/sieve", "1000000" + "rv64BP/dhrystone", "1000000" }; string tests64p[] = '{ From ab509614bb36a1db60b8017f0df1521bf9688858 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 1 Jun 2021 12:14:58 -0500 Subject: [PATCH 7/7] Changed to bp config to use gshare. --- wally-pipelined/config/rv64BP/wally-config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh index a9dbb1bda..f85e0c228 100644 --- a/wally-pipelined/config/rv64BP/wally-config.vh +++ b/wally-pipelined/config/rv64BP/wally-config.vh @@ -111,5 +111,5 @@ `define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt" `define BPRED_ENABLED 1 //`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE -`define BPTYPE "BPGLOBAL" // BPTWOBIT or "BPGSHARE" or BPLOCALPAg or BPGSHARE +`define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE `define TESTSBP 1