From 7592a0dacb5757a7960ddc3faff0ea23575f684c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 Feb 2021 20:12:27 -0600 Subject: [PATCH] Shreya and I found a bug with the exeuction of JAL and JALR instructions. The link was only set in the writeback stage. Once the branch predictor started correctly predicting JAL(R)s the ALU and forwarding logic need to have the PCLinkE at the execution stage in case an instruction in the next two clocks need the data. --- wally-pipelined/regression/wave.do | 2 +- wally-pipelined/src/ieu/controller.sv | 3 ++- wally-pipelined/src/ieu/datapath.sv | 10 ++++++++-- wally-pipelined/src/ieu/ieu.sv | 2 ++ wally-pipelined/src/ifu/bpred.sv | 5 +++-- wally-pipelined/src/ifu/ifu.sv | 5 +++-- wally-pipelined/src/wally/wallypipelinedhart.sv | 2 +- wally-pipelined/testbench/testbench-imperas.sv | 2 +- 8 files changed, 21 insertions(+), 10 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 3601be0f..636d4095 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -115,7 +115,7 @@ add wave -noupdate -expand -group {alu execution stage} /testbench/dut/hart/ieu/ add wave -noupdate -expand -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcBE add wave -noupdate /testbench/dut/hart/ieu/dp/ALUResultM TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {231033 ns} 0} {{Cursor 3} {1276133 ns} 0} +WaveRestoreCursors {{Cursor 2} {231033 ns} 0} {{Cursor 3} {1276117 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 518 diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 5a62f014..0edb40fe 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -44,6 +44,7 @@ module controller( output logic ALUSrcAE, ALUSrcBE, output logic TargetSrcE, output logic MemReadE, // for Hazard Unit + output logic JumpE, // Memory stage control signals input logic FlushM, input logic DataMisalignedM, @@ -64,7 +65,7 @@ module controller( logic RegWriteD, RegWriteE; logic [1:0] ResultSrcD, ResultSrcE, ResultSrcM; logic [1:0] MemRWD, MemRWE; - logic JumpD, JumpE; + logic JumpD; logic BranchD, BranchE; logic [1:0] ALUOpD; logic [4:0] ALUControlD; diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index bb02bad5..5499e57f 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -38,7 +38,9 @@ module datapath ( input logic [4:0] ALUControlE, input logic ALUSrcAE, ALUSrcBE, input logic TargetSrcE, + input logic JumpE, input logic [`XLEN-1:0] PCE, + input logic [`XLEN-1:0] PCLinkE, output logic [2:0] FlagsE, output logic [`XLEN-1:0] PCTargetE, // Memory stage signals @@ -67,7 +69,7 @@ module datapath ( // Execute stage signals logic [`XLEN-1:0] RD1E, RD2E; logic [`XLEN-1:0] ExtImmE; - logic [`XLEN-1:0] PreSrcAE, SrcAE, SrcBE; + logic [`XLEN-1:0] PreSrcAE, SrcAE, SrcBE, SrcAE2, SrcBE2; logic [`XLEN-1:0] ALUResultE; logic [`XLEN-1:0] WriteDataE; logic [`XLEN-1:0] TargetBaseE; @@ -95,8 +97,10 @@ module datapath ( mux3 #(`XLEN) faemux(RD1E, ResultW, ALUResultM, ForwardAE, PreSrcAE); mux3 #(`XLEN) fbemux(RD2E, ResultW, ALUResultM, ForwardBE, WriteDataE); mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE); + mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2); mux2 #(`XLEN) srcbmux(WriteDataE, ExtImmE, ALUSrcBE, SrcBE); - alu #(`XLEN) alu(SrcAE, SrcBE, ALUControlE, ALUResultE, FlagsE); + mux2 #(`XLEN) srcbmux2(SrcBE, {`XLEN{1'b0}}, JumpE, SrcBE2); // *** May be able to remove this mux. + alu #(`XLEN) alu(SrcAE2, SrcBE2, ALUControlE, ALUResultE, FlagsE); mux2 #(`XLEN) targetsrcmux(PCE, SrcAE, TargetSrcE, TargetBaseE); assign PCTargetE = ExtImmE + TargetBaseE; @@ -111,5 +115,7 @@ module datapath ( floprc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ALUResultM, ALUResultW); floprc #(5) RdWEg(clk, reset, FlushW, RdM, RdW); + // This mux4:1 no longer needs to include PCLinkW. This is set correctly in the execution stage. + // *** need to look at how the decoder is coded to fix. mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, ResultW); endmodule diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 7ed4bdff..2e3dbc85 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -33,6 +33,7 @@ module ieu ( output logic IllegalBaseInstrFaultD, // Execute Stage interface input logic [`XLEN-1:0] PCE, + input logic [`XLEN-1:0] PCLinkE, output logic [`XLEN-1:0] PCTargetE, // Memory stage interface input logic DataMisalignedM, @@ -68,6 +69,7 @@ module ieu ( logic [1:0] ForwardAE, ForwardBE; logic RegWriteM, RegWriteW; logic MemReadE; + logic JumpE; controller c(.OpD(InstrD[6:0]), .Funct3D(InstrD[14:12]), .Funct7b5D(InstrD[30]), .*); datapath dp(.*); diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index c589035c..e6ed30b3 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -78,6 +78,7 @@ module bpred // Part 2 branch direction prediction twoBitPredictor DirPredictor(.clk(clk), + .reset(reset), .LookUpPC(PCNextF), .Prediction(BPPredF), // update @@ -110,8 +111,8 @@ module bpred .UpdateTarget(PCTargetE)); // need to forward when updating to the same address as reading. - assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE; - assign TargetPC = (PCE == PCNextF) ? CorrectPCE : BTBPredPCF; + //assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE; + //assign TargetPC = (PCE == PCNextF) ? CorrectPCE : BTBPredPCF; // Part 4 RAS // *** need to add the logic to restore RAS on flushes. We will use incr for this. diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 5de133f2..317a1da2 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -36,10 +36,11 @@ module ifu ( // Decode output logic InstrStall, // Execute + output logic [`XLEN-1:0] PCLinkE, input logic PCSrcE, input logic [`XLEN-1:0] PCTargetE, output logic [`XLEN-1:0] PCE, - output logic BPPredWrongE, + output logic BPPredWrongE, // Mem input logic RetM, TrapM, input logic [`XLEN-1:0] PrivilegedNextPCM, @@ -58,7 +59,7 @@ module ifu ( logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; logic StallExceptResolveBranchesF, PrivilegedChangePCM; logic IllegalCompInstrD; - logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM; + logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM; logic CompressedF; logic [31:0] InstrRawD, InstrE; logic [31:0] nop = 32'h00000013; // instruction for NOP diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 2edeb902..9cce4559 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -57,7 +57,7 @@ module wallypipelinedhart ( logic [`XLEN-1:0] SrcAM; // logic [31:0] InstrF; logic [31:0] InstrD, InstrM; - logic [`XLEN-1:0] PCE, PCM, PCLinkW; + logic [`XLEN-1:0] PCE, PCM, PCLinkE, PCLinkW; logic [`XLEN-1:0] PCTargetE; logic [`XLEN-1:0] CSRReadValW; logic [`XLEN-1:0] PrivilegedNextPCM; diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 38b9a6d7..343791ae 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -408,7 +408,7 @@ string tests32i[] = { end // always @ (negedge clk) // track the current function or label - function_rfunction_radix function_radix(); + //function_rfunction_radix function_radix(); endmodule