diff --git a/testsBP/crt0/Makefile b/testsBP/crt0/Makefile index ab47384fc..2af43a408 100644 --- a/testsBP/crt0/Makefile +++ b/testsBP/crt0/Makefile @@ -4,12 +4,12 @@ ROOT := .. LIBRARY_DIRS := LIBRARY_FILES := -MARCH :=-march=rv64ic -MABI :=-mabi=lp64 +MARCH :=-march=rv64imfdc +MABI :=-mabi=lp64d LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -AFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -W -CFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -mcmodel=medany +AFLAGS =$(MARCH) $(MABI) -W +CFLAGS =$(MARCH) $(MABI) -mcmodel=medany -O2 AS=riscv64-unknown-elf-as CC=riscv64-unknown-elf-gcc AR=riscv64-unknown-elf-ar @@ -19,7 +19,7 @@ all: libcrt0.a %.o: %.s ${AS} ${AFLAGS} -c $< -o $@ -libcrt0.a: start.o +libcrt0.a: start.o pcnt_driver.o pre_main.o ${AR} -r $@ $^ clean: diff --git a/testsBP/crt0/start.s b/testsBP/crt0/start.s index 19a240d87..731a61e34 100644 --- a/testsBP/crt0/start.s +++ b/testsBP/crt0/start.s @@ -43,11 +43,10 @@ _start: - # set the stack pointer to the top of memory - # 0x8000_0000 + 64K - 8 bytes - li sp, 0x007FFFF8 + # set the stack pointer to the top of memory - 8 bytes (pointer size) + li sp, 0x07FFFFF8 - jal ra, main + jal ra, pre_main jal ra, _halt .section .text diff --git a/testsBP/mibench_qsort/Makefile b/testsBP/mibench_qsort/Makefile index f4d368392..b1cf7b679 100644 --- a/testsBP/mibench_qsort/Makefile +++ b/testsBP/mibench_qsort/Makefile @@ -8,7 +8,7 @@ MARCH :=-march=rv64ic MABI :=-mabi=lp64 LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map -CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align +CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2 CC=riscv64-unknown-elf-gcc DA=riscv64-unknown-elf-objdump -d diff --git a/testsBP/sieve/Makefile b/testsBP/sieve/Makefile index 1d38d123d..9c884f48a 100644 --- a/testsBP/sieve/Makefile +++ b/testsBP/sieve/Makefile @@ -8,7 +8,7 @@ MARCH :=-march=rv64ic MABI :=-mabi=lp64 LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map -CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align +CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2 CC=riscv64-unknown-elf-gcc DA=riscv64-unknown-elf-objdump -d diff --git a/testsBP/sieve/sieve.c b/testsBP/sieve/sieve.c index e82074045..f7d36d957 100644 --- a/testsBP/sieve/sieve.c +++ b/testsBP/sieve/sieve.c @@ -66,21 +66,21 @@ int main () { ans = sieve (); //gettimeofday(&after , NULL); - if (ans != 1899) - printf ("Sieve result wrong, ans = %d, expected 1899", ans); + /* /\* /\\* if (ans != 1899) *\\/ *\/ */ + /* /\* /\\* printf ("Sieve result wrong, ans = %d, expected 1899", ans); *\\/ *\/ */ - //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); + /* /\* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); *\/ */ - printf("Round 2\n"); - //gettimeofday(&before , NULL); + /* /\* printf("Round 2\n"); *\/ */ + /* //gettimeofday(&before , NULL); */ - ans = sieve (); - //gettimeofday(&after , NULL); - if (ans != 1899) - printf ("Sieve result wrong, ans = %d, expected 1899", ans); + /* ans = sieve (); */ + /* //gettimeofday(&after , NULL); */ + /* if (ans != 1899) */ + /* printf ("Sieve result wrong, ans = %d, expected 1899", ans); */ - //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); + /* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); */ return 0; diff --git a/testsBP/simple/Makefile b/testsBP/simple/Makefile index 450aacaa4..4447f2843 100644 --- a/testsBP/simple/Makefile +++ b/testsBP/simple/Makefile @@ -8,7 +8,7 @@ MARCH :=-march=rv64ic MABI :=-mabi=lp64 LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map -CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align +CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany -mstrict-align -O2 CC=riscv64-unknown-elf-gcc DA=riscv64-unknown-elf-objdump -d diff --git a/testsBP/simple/header.h b/testsBP/simple/header.h index bfe014a4b..aab8973fd 100644 --- a/testsBP/simple/header.h +++ b/testsBP/simple/header.h @@ -5,4 +5,8 @@ int fail(); int simple_csrbr_test(); int lbu_test(); int icache_spill_test(); +void global_hist_0_space_test(); +void global_hist_1_space_test(); +void global_hist_2_space_test(); +void global_hist_3_space_test(); #endif diff --git a/testsBP/simple/main.c b/testsBP/simple/main.c index 0d14fcfb8..564b474e1 100644 --- a/testsBP/simple/main.c +++ b/testsBP/simple/main.c @@ -2,6 +2,10 @@ int main(){ //int res = icache_spill_test(); + global_hist_3_space_test(); + global_hist_2_space_test(); + global_hist_1_space_test(); + global_hist_0_space_test(); int res = 1; if (res < 0) { fail(); diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh index 17a8c284a..f85e0c228 100644 --- a/wally-pipelined/config/rv64BP/wally-config.vh +++ b/wally-pipelined/config/rv64BP/wally-config.vh @@ -32,7 +32,7 @@ `define XLEN 64 //`define MISA (32'h00000105) -`define MISA (32'h00000104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0) +`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0) `define A_SUPPORTED ((`MISA >> 0) % 2 == 1) `define C_SUPPORTED ((`MISA >> 2) % 2 == 1) `define D_SUPPORTED ((`MISA >> 3) % 2 == 1) @@ -107,8 +107,9 @@ /* verilator lint_off ASSIGNDLY */ /* verilator lint_off PINCONNECTEMPTY */ -`define TWO_BIT_PRELOAD "../config/rv64icfd/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64icfd/BTBPredictor.txt" +`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt" +`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt" `define BPRED_ENABLED 1 -`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE +//`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE +`define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE `define TESTSBP 1 diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index de0f8143b..92471c574 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -30,7 +30,8 @@ module bpred (input logic clk, reset, - input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Fetch stage // the prediction input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list @@ -88,25 +89,29 @@ module bpred globalHistoryPredictor DirPredictor(.clk(clk), .reset(reset), .*, // Stalls and flushes - .LookUpPC(PCNextF), - .Prediction(BPPredF), + .PCNextF(PCNextF), + .BPPredF(BPPredF), // update - .UpdatePC(PCE), - .UpdateEN(InstrClassE[0] & ~StallE), + .InstrClassE(InstrClassE), + .BPInstrClassE(BPInstrClassE), + .BPPredDirWrongE(BPPredDirWrongE), + .PCE(PCE), .PCSrcE(PCSrcE), - .UpdatePrediction(UpdateBPPredE)); + .UpdateBPPredE(UpdateBPPredE)); end else if (`BPTYPE == "BPGSHARE") begin:Predictor gsharePredictor DirPredictor(.clk(clk), - .reset(reset), - .*, // Stalls and flushes - .LookUpPC(PCNextF), - .Prediction(BPPredF), - // update - .UpdatePC(PCE), - .UpdateEN(InstrClassE[0] & ~StallE), - .PCSrcE(PCSrcE), - .UpdatePrediction(UpdateBPPredE)); + .reset(reset), + .*, // Stalls and flushes + .PCNextF(PCNextF), + .BPPredF(BPPredF), + // update + .InstrClassE(InstrClassE), + .BPInstrClassE(BPInstrClassE), + .BPPredDirWrongE(BPPredDirWrongE), + .PCE(PCE), + .PCSrcE(PCSrcE), + .UpdateBPPredE(UpdateBPPredE)); end else if (`BPTYPE == "BPLOCALPAg") begin:Predictor @@ -190,14 +195,14 @@ module bpred flopenrc #(2) BPPredRegD(.clk(clk), .reset(reset), .en(~StallD), - .clear(FlushD), + .clear(1'b0), .d(BPPredF), .q(BPPredD)); flopenrc #(2) BPPredRegE(.clk(clk), .reset(reset), .en(~StallE), - .clear(FlushE), + .clear(1'b0), .d(BPPredD), .q(BPPredE)); diff --git a/wally-pipelined/src/ifu/globalHistoryPredictor.sv b/wally-pipelined/src/ifu/globalHistoryPredictor.sv index 087458df3..516de633e 100644 --- a/wally-pipelined/src/ifu/globalHistoryPredictor.sv +++ b/wally-pipelined/src/ifu/globalHistoryPredictor.sv @@ -32,76 +32,89 @@ module globalHistoryPredictor ) (input logic clk, input logic reset, - input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, - input logic [`XLEN-1:0] LookUpPC, - output logic [1:0] Prediction, + input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, + input logic [`XLEN-1:0] PCNextF, + output logic [1:0] BPPredF, // update - input logic [`XLEN-1:0] UpdatePC, - input logic UpdateEN, PCSrcE, - input logic [1:0] UpdatePrediction - + input logic [4:0] InstrClassE, + input logic [4:0] BPInstrClassE, + input logic [4:0] BPInstrClassD, + input logic [4:0] BPInstrClassF, + input logic BPPredDirWrongE, + + input logic [`XLEN-1:0] PCE, + input logic PCSrcE, + input logic [1:0] UpdateBPPredE + ); - logic [k-1:0] GHRF, GHRFNext; - assign GHRFNext = {PCSrcE, GHRF[k-1:1]}; + logic [k+1:0] GHR, GHRNext; + logic [k-1:0] PHTUpdateAdr, PHTUpdateAdr0, PHTUpdateAdr1; + logic PHTUpdateEN; + logic BPClassWrongNonCFI; + logic BPClassWrongCFI; + logic BPClassRightNonCFI; - flopenr #(k) GlobalHistoryRegister(.clk(clk), - .reset(reset), - .en(UpdateEN), - .d(GHRFNext), - .q(GHRF)); + logic [6:0] GHRMuxSel; + logic GHRUpdateEN; + logic [k-1:0] GHRLookup; + assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0]; + assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0]; + assign BPClassWrongNonCFI = BPInstrClassE[0] & ~InstrClassE[0]; + assign BPClassRightBPWrong = BPInstrClassE[0] & InstrClassE[0] & BPPredDirWrongE; + assign BPClassRightBPRight = BPInstrClassE[0] & InstrClassE[0] & ~BPPredDirWrongE; + + + // GHR update selection, 1 hot encoded. + assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight); + assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0]; + assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0]; + assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]); + assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0]; + assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0]; + assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight)); + assign GHRUpdateEN = (| GHRMuxSel[5:1] & ~StallE) | GHRMuxSel[6] & ~StallF; + // hoping this created a AND-OR mux. + always_comb begin + case (GHRMuxSel) + 7'b000_0001: GHRNext = GHR[k-1+2:0]; // no change + 7'b000_0010: GHRNext = {GHR[k-2+2:0], PCSrcE}; // branch update + 7'b000_0100: GHRNext = {1'b0, GHR[k+1:1]}; // repair 1 + 7'b000_1000: GHRNext = {GHR[k-1+2:1], PCSrcE}; // branch update with mis prediction correction + 7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2 + 7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1 + 7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update + default: GHRNext = GHR[k-1+2:0]; + endcase + end - logic [1:0] PredictionMemory; - logic DoForwarding, DoForwardingF; - logic [1:0] UpdatePredictionF; - + flopenr #(k+2) GlobalHistoryRegister(.clk(clk), + .reset(reset), + .en((GHRUpdateEN)), + .d(GHRNext), + .q(GHR)); + // if actively updating the GHR at the time of prediction we want to us + // GHRNext as the lookup rather than GHR. + + assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0]; + assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1]; + assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0; + assign PHTUpdateEN = InstrClassE[0] & ~StallE; + + assign GHRLookup = |GHRMuxSel[6:1] ? GHRNext[k-1:0] : GHR[k-1:0]; + // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT - // GHR referes to the address that the past k branches points to in the prediction stage - // GHRE refers to the address that the past k branches points to in the exectution stage - SRAM2P1R1W #(k, 2) PHT(.clk(clk), - .reset(reset), - .RA1(GHRF), - .RD1(PredictionMemory), - .REN1(~StallF), - .WA1(GHRFNext), - .WD1(UpdatePrediction), - .WEN1(UpdateEN), - .BitWEN1(2'b11)); + SRAM2P1R1W #(k, 2) PHT(.clk(clk), + .reset(reset), + //.RA1(GHR[k-1:0]), + .RA1(GHRLookup), + .RD1(BPPredF), + .REN1(~StallF), + .WA1(PHTUpdateAdr), + .WD1(UpdateBPPredE), + .WEN1(PHTUpdateEN), + .BitWEN1(2'b11)); - - // need to forward when updating to the same address as reading. - // first we compare to see if the update and lookup addreses are the same - assign DoForwarding = GHRF == GHRFNext; - - // register the update value and the forwarding signal into the Fetch stage - // TODO: add stall logic *** - flopr #(1) DoForwardingReg(.clk(clk), - .reset(reset), - .d(DoForwarding), - .q(DoForwardingF)); - - flopr #(2) UpdatePredictionReg(.clk(clk), - .reset(reset), - .d(UpdatePrediction), - .q(UpdatePredictionF)); - - assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory; - - //pipeline for GHR - /*flopenrc #(k) GHRDReg(.clk(clk), - .reset(reset), - .en(~StallD), - .clear(FlushD), - .d(GHRF), - .q(GHRD)); - - flopenrc #(k) GHREReg(.clk(clk), - .reset(reset), - .en(~StallE), - .clear(FlushE), - .d(GHRD), - .q(GHRE)); -*/ endmodule diff --git a/wally-pipelined/src/ifu/gshare.sv b/wally-pipelined/src/ifu/gshare.sv deleted file mode 100644 index 4d31e519b..000000000 --- a/wally-pipelined/src/ifu/gshare.sv +++ /dev/null @@ -1,128 +0,0 @@ -/////////////////////////////////////////// -// gshare.sv -// -// Written: Shreya Sanghai -// Email: ssanghai@hmc.edu -// Created: March 16, 2021 -// Modified: -// -// Purpose: Gshare predictor with parameterized global history register -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module gsharePredictor - #(parameter int k = 10 - ) - (input logic clk, - input logic reset, - input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, - input logic [`XLEN-1:0] LookUpPC, - output logic [1:0] Prediction, - // update - input logic [`XLEN-1:0] UpdatePC, - input logic UpdateEN, PCSrcE, - input logic [1:0] UpdatePrediction - - ); - - logic [k-1:0] GHRF, GHRFNext; - //logic [k-1:0] LookUpPCIndexD, LookUpPCIndexE; - logic [k-1:0] LookUpPCIndex, UpdatePCIndex; - logic [1:0] PredictionMemory; - logic DoForwarding, DoForwardingF; - logic [1:0] UpdatePredictionF; - - assign GHRFNext = {PCSrcE, GHRF[k-1:1]}; - - flopenr #(k) GlobalHistoryRegister(.clk(clk), - .reset(reset), - .en(UpdateEN), - .d(GHRFNext), - .q(GHRF)); - - - // for gshare xor the PC with the GHR - assign UpdatePCIndex = GHRFNext ^ UpdatePC[k:1]; - assign LookUpPCIndex = GHRF ^ LookUpPC[k:1]; - // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT - // GHR referes to the address that the past k branches points to in the prediction stage - // GHRE refers to the address that the past k branches points to in the exectution stage - SRAM2P1R1W #(k, 2) PHT(.clk(clk), - .reset(reset), - .RA1(LookUpPCIndex), - .RD1(PredictionMemory), - .REN1(~StallF), - .WA1(UpdatePCIndex), - .WD1(UpdatePrediction), - .WEN1(UpdateEN), - .BitWEN1(2'b11)); - - - // need to forward when updating to the same address as reading. - // first we compare to see if the update and lookup addreses are the same - assign DoForwarding = LookUpPCIndex == UpdatePCIndex; - - // register the update value and the forwarding signal into the Fetch stage - // TODO: add stall logic *** - flopr #(1) DoForwardingReg(.clk(clk), - .reset(reset), - .d(DoForwarding), - .q(DoForwardingF)); - - flopr #(2) UpdatePredictionReg(.clk(clk), - .reset(reset), - .d(UpdatePrediction), - .q(UpdatePredictionF)); - - assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory; - - //pipeline for GHR -/* -----\/----- EXCLUDED -----\/----- - flopenrc #(k) LookUpDReg(.clk(clk), - .reset(reset), - .en(~StallD), - .clear(FlushD), - .d(LookUpPCIndex), - .q(LookUpPCIndexD)); - - flopenrc #(k) LookUpEReg(.clk(clk), - .reset(reset), - .en(~StallE), - .clear(FlushE), - .d(LookUpPCIndexD), - .q(LookUpPCIndexE)); - -----/\----- EXCLUDED -----/\----- */ - -/* flopenrc #(k) GHRRegD(.clk(clk), - .reset(reset), - .en(~StallD), - .clear(FlushD), - .d(GHRF), - .q(GHRD)); - - flopenrc #(k) GHRRegE(.clk(clk), - .reset(reset), - .en(~StallE), - .clear(FlushE), - .d(GHRD), - .q(GHRE)); - -*/ -endmodule diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 9e30a083a..4f51edd79 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -154,15 +154,16 @@ module icachecontroller #(parameter LINESIZE = 256) ( localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update. - localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 13; // miss on block 1, issue read to AHB and wait - localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 14; // write data to SRAM/LUT - localparam STATE_MISS_SPILL_MERGE = 15; // read block 0 of CPU access, + localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait + localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT + localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access, - localparam STATE_MISS_SPILL_FINAL = 16; // this state replicates STATE_READY's replay of the + localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the // spill access but does nto consider spill. It also does not do another operation. - localparam STATE_INVALIDATE = 17; // *** not sure if invalidate or evict? invalidate by cache block or address? + localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -380,11 +381,20 @@ module icachecontroller #(parameter LINESIZE = 256) ( PCMux = 2'b10; UnalignedSelect = 1'b1; spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_2_START; + end + STATE_MISS_SPILL_2_START: begin if (~hit) begin CntReset = 1'b1; NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; end else begin - NextState = STATE_MISS_SPILL_FINAL; + NextState = STATE_READY; + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + ICacheStallF = 1'b0; end end STATE_MISS_SPILL_MISS_FETCH_WDV: begin diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 28f7597e3..e0507b63d 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -154,14 +154,7 @@ module ifu ( generate if (`BPRED_ENABLED == 1) begin : bpred // I am making the port connection explicit for now as I want to see them and they will be changing. - bpred bpred(.clk(clk), - .reset(reset), - .StallF(StallF), - .StallD(StallD), - .StallE(StallE), - .FlushF(FlushF), - .FlushD(FlushD), - .FlushE(FlushE), + bpred bpred(.*, .PCNextF(PCNextF), .BPPredPCF(BPPredPCF), .SelBPPredF(SelBPPredF), diff --git a/wally-pipelined/src/muldiv/div.sv b/wally-pipelined/src/muldiv/div.sv index 8b4e0463a..10af5eee4 100755 --- a/wally-pipelined/src/muldiv/div.sv +++ b/wally-pipelined/src/muldiv/div.sv @@ -87,7 +87,6 @@ module intdiv #(parameter WIDTH=64) // is 0 and thus a divide by 0 exception. This div0 // exception is given to FSM to tell the operation to // quit gracefully. - lzd_hier #(WIDTH) p1 (.ZP(P), .ZV(V), .B(twoD)); shift_left #(WIDTH) p2 (twoD, P, op2); assign op1 = twoN; diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index ccabe341a..0c26a5df8 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -53,6 +53,7 @@ module muldiv ( logic [2:0] Funct3E_Q; logic div0error; logic [`XLEN-1:0] N, D; + logic [`XLEN-1:0] Num0, Den0; logic gclk; logic DivStartE; @@ -69,13 +70,23 @@ module muldiv ( end assign gclk = enable_q & clk; + // Handle sign extension for W-type instructions + if (`XLEN == 64) begin // RV64 has W-type instructions + assign Num0 = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE; + assign Den0 = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE; + end else begin // RV32 has no W-type instructions + assign Num0 = SrcAE; + assign Den0 = SrcAE; + end + // capture the Numerator/Denominator - flopenrc #(`XLEN) reg_num (.d(SrcAE), .q(N), + flopenrc #(`XLEN) reg_num (.d(Num0), .q(N), .en(startDivideE), .clear(DivDoneE), .reset(reset), .clk(~gclk)); - flopenrc #(`XLEN) reg_den (.d(SrcBE), .q(D), + flopenrc #(`XLEN) reg_den (.d(Den0), .q(D), .en(startDivideE), .clear(DivDoneE), - .reset(reset), .clk(~gclk)); + .reset(reset), .clk(~gclk)); + assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index ea6939004..c60aa40db 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -166,12 +166,12 @@ string tests32f[] = '{ "rv64m/I-MULW-01", "3000", "rv64m/I-DIV-01", "3000", "rv64m/I-DIVU-01", "3000", - //"rv64m/I-DIVUW-01", "3000", - //"rv64m/I-DIVW-01", "3000", + "rv64m/I-DIVUW-01", "3000", + "rv64m/I-DIVW-01", "3000", "rv64m/I-REM-01", "3000", - "rv64m/I-REMU-01", "3000" - //"rv64m/I-REMUW-01", "3000", - //"rv64m/I-REMW-01", "3000" + "rv64m/I-REMU-01", "3000", + "rv64m/I-REMUW-01", "3000", + "rv64m/I-REMW-01", "3000" }; string tests64ic[] = '{ @@ -439,8 +439,11 @@ string tests32f[] = '{ string testsBP64[] = '{ "rv64BP/simple", "10000", + "rv64BP/mmm", "1000000", + "rv64BP/linpack_bench", "1000000", + "rv64BP/sieve", "1000000", "rv64BP/qsort", "1000000", - "rv64BP/sieve", "1000000" + "rv64BP/dhrystone", "1000000" }; string tests64p[] = '{