From aa5300389f862aa8c7ee80a5c6526e94b6645857 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 10 Jan 2023 18:12:48 -0600 Subject: [PATCH 01/18] Optimized gshare. --- pipelined/src/ifu/optgshare.sv | 225 +++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 pipelined/src/ifu/optgshare.sv diff --git a/pipelined/src/ifu/optgshare.sv b/pipelined/src/ifu/optgshare.sv new file mode 100644 index 000000000..d911ec03e --- /dev/null +++ b/pipelined/src/ifu/optgshare.sv @@ -0,0 +1,225 @@ +/////////////////////////////////////////// +// gsharePredictor.sv +// +// Written: Shreya Sanghai +// Email: ssanghai@hmc.edu +// Created: March 16, 2021 +// Modified: +// +// Purpose: Global History Branch predictor with parameterized global history register +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module optgshare + #(parameter int k = 10 + ) + (input logic clk, + input logic reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, +// input logic [`XLEN-1:0] LookUpPC, + output logic [1:0] DirPredictionF, + output logic DirPredictionWrongE, + // update + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, + input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW, + input logic PCSrcE + ); + + logic MatchF, MatchD, MatchE, MatchM, MatchW; + logic MatchNextX, MatchXF; + + logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; + logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM, NewDirPredictionW; + + logic [k-1:0] GHRF; + logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW; + logic [k-1:0] GHRNextF; + logic [k:0] GHRNextD, GHRNextE, GHRNextM, GHRNextW; + logic [k-1:0] IndexNextF, IndexF; + logic [k-1:0] IndexD, IndexE, IndexM, IndexW; + + logic PCSrcM, PCSrcW; + logic [`XLEN-1:0] PCW; + + logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; + + logic [k+4:0] GHRNext, GHR; + logic GHRUpdateEn; + + assign GHRUpdateEn = BranchInstrF | (DirPredictionWrongE & BranchInstrE) | + FlushD | FlushE | FlushM | FlushW; + + // it doesn't work this way. Instead we need to see how many branch instructions are flushed. + // then shift over by that amount. + logic RemoveBrW, RemoveBrM, RemoveBrE, RemoveBrD, RemoveBrF, RemoveBrNextF; + + assign RemoveBrW = '0; + assign RemoveBrM = BranchInstrM & FlushW; + assign RemoveBrE = BranchInstrE & FlushM; + assign RemoveBrD = BranchInstrD & FlushE; + assign RemoveBrF = BranchInstrF & FlushD; + assign RemoveBrNextF = BranchInstrF & FlushD; + + always_comb begin + casez ({BranchInstrF, DirPredictionWrongE, RemoveBrF, RemoveBrD, RemoveBrE, RemoveBrM}) + 6'b00_0000: GHRNext = GHR; // no change + 6'b00_0001: GHRNext = {GHR[k+4:k+1], GHR[k-1:0], 1'b0}; // RemoveBrM + 6'b0?_0010: GHRNext = {GHR[k+4:k+2], GHR[k:0], 1'b0}; // RemoveBrE + 6'b0?_0011: GHRNext = {GHR[k+4:k+2], GHR[k-1:0], 2'b0}; // RemoveBrE, RemoveBrM + + 6'b00_0100: GHRNext = {GHR[k+4:k+2], GHR[k-1:0], 2'b0}; // RemoveBrD + 6'b00_0101: GHRNext = {GHR[k+4:k+3], GHR[k+1:0], 1'b0}; // RemoveBrD, RemoveBrM + 6'b0?_0110: GHRNext = {GHR[k+4:k+3], GHR[k+1], GHR[k-1:0], 2'b0}; // RemoveBrD, RemoveBrE + 6'b0?_0111: GHRNext = {GHR[k+4:k+3], GHR[k-1:0], 3'b0}; // RemoveBrD, RemoveBrE, RemoveBrM + + 6'b?0_1000: GHRNext = {GHR[k+2:0], 2'b0}; // RemoveBrF, + 6'b?0_1001: GHRNext = {GHR[k+2:k+1], GHR[k-1:0], 3'b0}; // RemoveBrF, RemoveBrM + 6'b??_1010: GHRNext = {GHR[k+2], GHR[k:0], 3'b0}; // RemoveBrF, RemoveBrE + 6'b??_1011: GHRNext = {GHR[k+2], GHR[k-1:0], 4'b0}; // RemoveBrF, RemoveBrE, RemoveBrM + + 6'b?0_1100: GHRNext = {GHR[k+1:0], 3'b0}; // RemoveBrF, RemoveBrD + 6'b?0_1101: GHRNext = {GHR[k+1], GHR[k-1:0], 4'b0}; // RemoveBrF, RemoveBrD, RemoveBrM + 6'b??_1110: GHRNext = {GHR[k:0], 4'b0}; // RemoveBrF, RemoveBrD, RemoveBrE + 6'b??_1111: GHRNext = {GHR[k-1:0], 5'b0}; // RemoveBrF, RemoveBrD, RemoveBrE, RemoveBrM + + 6'b?1_0000: GHRNext = {PCSrcE, GHR[k+3:0]}; // Miss prediction, no branches to flushes + 6'b?1_0001: GHRNext = {PCSrcE, GHR[k+3:k], GHR[k-1:1], 1'b0}; // Miss prediction, branch in Memory stage dropped + + 6'b?1_1100: GHRNext = {PCSrcE, GHR[k+1:0], 2'b00}; // Miss prediction, cannot have RemoveBrE + 6'b?1_1101: GHRNext = {PCSrcE, GHR[k+1], GHR[k-1:0], 3'b0}; // Miss prediction, cannot have RemoveBrE + 6'b10_0000: GHRNext = {DirPredictionF[1], GHR[k+4:1]}; + 6'b10_0001: GHRNext = {DirPredictionF[1], GHR[k+4:k+1], GHR[k-1:1], 1'b0}; + 6'b10_0010: GHRNext = {DirPredictionF[1], GHR[k+4:k+2], GHR[k:1], 1'b0}; + 6'b10_0011: GHRNext = {DirPredictionF[1], GHR[k+4:k+2], GHR[k-1:1], 2'b0}; + 6'b10_0100: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k+1:1], 1'b0}; + 6'b10_0101: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k+1], GHR[k-1:1], 2'b0}; + 6'b10_0110: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k], GHR[k-1:1], 2'b0}; + 6'b10_0111: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k-1:1], 3'b0}; + + default: GHRNext = GHR; + endcase + end + + flopenr #(k+5) GHRReg(clk, reset, GHRUpdateEn, GHRNext, GHR); + logic [k-1:0] GHRNextF_temp, GHRF_temp; + logic [k:0] GHRD_temp, GHRE_temp, GHRM_temp, GHRW_temp; + logic GHRFExtra_temp; + + // these are also in the ieu controller. should create inputs. + logic InstrValidF, InstrValidD, InstrValidE, InstrValidM, InstrValidW; + flopenrc #(1) InstrValidFReg(clk, reset, FlushD, ~StallF, 1'b1, InstrValidF); + flopenrc #(1) InstrValidDReg(clk, reset, FlushD, ~StallD, InstrValidF, InstrValidD); + flopenrc #(1) InstrValidEReg(clk, reset, FlushE, ~StallE, InstrValidD, InstrValidE); + flopenrc #(1) InstrValidMReg(clk, reset, FlushM, ~StallM, InstrValidE, InstrValidM); + flopenrc #(1) InstrValidWReg(clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW); + + + assign GHRNextF_temp = GHRNext[k+4:5]; + assign GHRF_temp = InstrValidF ? GHR[k+3:4] : GHRNextF_temp; + assign GHRFExtra_temp = InstrValidF ? 1'b0 : GHR[k+4]; + assign GHRD_temp = InstrValidD ? GHR[k+3:3] : {GHRFExtra_temp, GHRF_temp}; + assign GHRE_temp = InstrValidE ? GHR[k+2:2] : GHRD_temp; + assign GHRM_temp = InstrValidM ? GHR[k+1:1] : GHRE_temp; + assign GHRW_temp = InstrValidW ? GHR[k:0] : GHRM_temp; + + + assign IndexNextF = GHRNextF ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; + assign IndexF = GHRF ^ {PCF[k+1] ^ PCF[1], PCF[k:2]}; + assign IndexD = GHRD[k-1:0] ^ {PCD[k+1] ^ PCD[1], PCD[k:2]}; + assign IndexE = GHRE[k-1:0] ^ {PCE[k+1] ^ PCE[1], PCE[k:2]}; + assign IndexM = GHRM[k-1:0] ^ {PCM[k+1] ^ PCM[1], PCM[k:2]}; + assign IndexW = GHRW[k-1:0] ^ {PCW[k+1] ^ PCW[1], PCW[k:2]}; + + ram2p1r1wbefix #(2**k, 2) PHT(.clk(clk), + .ce1(~StallF | reset), .ce2(~StallW & ~FlushW), + .ra1(IndexNextF), + .rd1(TableDirPredictionF), + .wa2(IndexW), + .wd2(NewDirPredictionW), + .we2(BranchInstrW & ~StallW & ~FlushW), + .bwe2(1'b1)); + + // if there are non-flushed branches in the pipeline we need to forward the prediction from that stage to the NextF demi stage + // and then register for use in the Fetch stage. + assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF); + assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD); + assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE); + assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM); + assign MatchW = BranchInstrW & (IndexNextF == IndexW); + assign MatchNextX = MatchF | MatchD | MatchE | MatchM | MatchW; + + flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); + + assign ForwardNewDirPrediction = MatchF ? NewDirPredictionF : + MatchD ? NewDirPredictionD : + MatchE ? NewDirPredictionE : + MatchM ? NewDirPredictionM : + NewDirPredictionW; + + flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); + + assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF; + + // DirPrediction pipeline + flopenr #(2) PredictionRegD(clk, reset, ~StallD, DirPredictionF, DirPredictionD); + flopenr #(2) PredictionRegE(clk, reset, ~StallE, DirPredictionD, DirPredictionE); + + // New prediction pipeline + satCounter2 BPDirUpdateF(.BrDir(DirPredictionF[1]), .OldState(DirPredictionF), .NewState(NewDirPredictionF)); + flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD); + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); + flopenr #(2) NewPredMReg(clk, reset, ~StallM, NewDirPredictionE, NewDirPredictionM); + flopenr #(2) NewPredWReg(clk, reset, ~StallW, NewDirPredictionM, NewDirPredictionW); + + // PCSrc pipeline + flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); + flopenrc #(1) PCSrcWReg(clk, reset, FlushW, ~StallW, PCSrcM, PCSrcW); + + // GHR pipeline + assign GHRNextF = FlushD ? GHRNextD[k:1] : + BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} : + GHRF; + + flopenr #(k) GHRFReg(clk, reset, (~StallF) | FlushD, GHRNextF, GHRF); + + assign GHRNextD = FlushD ? GHRNextE : {DirPredictionF[1], GHRF}; + flopenr #(k+1) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, GHRD); + + assign GHRNextE = FlushE ? GHRNextM : GHRD; + flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE); + assign GHRE = BranchInstrE ? {PCSrcE, OldGHRE[k-1:0]} : OldGHRE; + + assign GHRNextM = FlushM ? GHRNextW : GHRE; + flopenr #(k+1) GHRMReg(clk, reset, (~StallM) | FlushM, GHRNextM, GHRM); + + assign GHRNextW = FlushW ? GHRW : GHRM; + flopenr #(k+1) GHRWReg(clk, reset, (BranchInstrM & ~StallW) | FlushW, GHRNextW, GHRW); + + assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); + +endmodule From f59e1d03fcd3729bcb6c57c0cb384d80859b218a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 12 Jan 2023 10:09:34 -0600 Subject: [PATCH 02/18] Added instruction logger. --- pipelined/testbench/testbench.sv | 195 ++++++- pipelined/testbench/testbench_imperas.sv | 709 +++++++++++++++++++++++ 2 files changed, 903 insertions(+), 1 deletion(-) create mode 100644 pipelined/testbench/testbench_imperas.sv diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 8683af69c..dcbebbf46 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -509,7 +509,53 @@ logic [3:0] dummy; $stop; end end - +/* -----\/----- EXCLUDED -----\/----- + + // rvvi tracer + localparam int ILEN = `XLEN; // Instruction length in bits + localparam int XLEN = `XLEN; // GPR length in bits + localparam int FLEN = `FLEN; // FPR length in bits + localparam int VLEN = 0; // Vector register size in bits + localparam int NHART = 1; // Number of harts reported + localparam int RETIRE = 1; // Number of instructions that can retire during valid event + + logic TraceClk; // Interface clock + + logic valid [(NHART-1):0][(RETIRE-1):0]; // Retired instruction + logic [63:0] order [(NHART-1):0][(RETIRE-1):0]; // Unique instruction order count (no gaps or reuse) + logic [(ILEN-1):0] insn [(NHART-1):0][(RETIRE-1):0]; // Instruction bit pattern + logic trap [(NHART-1):0][(RETIRE-1):0]; // Trapped instruction + logic halt [(NHART-1):0][(RETIRE-1):0]; // Halted instruction + logic intr [(NHART-1):0][(RETIRE-1):0]; // (RVFI Legacy) Flag first instruction of trap handler + logic [1:0] mode [(NHART-1):0][(RETIRE-1):0]; // Privilege mode of operation + logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0]; // XLEN mode 32/64 bit + + logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0]; // PC of insn + logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0]; // PC of next instruction + + // X Registers + logic [31:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0]; // X data value + logic [31:0] x_wb [(NHART-1):0][(RETIRE-1):0]; // X data writeback (change) flag + + // F Registers + logic [31:0][(FLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0]; // F data value + logic [31:0] f_wb [(NHART-1):0][(RETIRE-1):0]; // F data writeback (change) flag + + // V Registers + logic [31:0][(VLEN-1):0] v_wdata [(NHART-1):0][(RETIRE-1):0]; // V data value + logic [31:0] v_wb [(NHART-1):0][(RETIRE-1):0]; // V data writeback (change) flag + + // Control & State Registers + logic [4095:0][(XLEN-1):0] csr [(NHART-1):0][(RETIRE-1):0]; // Full CSR Address range + logic [4095:0] csr_wb [(NHART-1):0][(RETIRE-1):0]; // CSR writeback (change) flag + + logic lrsc_cancel[(NHART-1):0][(RETIRE-1):0]; // Implementation defined + rvviTrace #(`XLEN, `XLEN, `FLEN, 0, 1, 1) rvviTrace(.clk(TraceClk), .valid, .order, .insn, .trap, .halt, .intr, + .mode, .ixl, .pc_rdata, .pc_wdata, .x_wdata, .x_wb, .f_wdata, .f_wb, .v_wdata, .v_wb, + .csr, .csr_wb, .lrsc_cancel); + -----/\----- EXCLUDED -----/\----- */ + + rvviTrace rvviTrace(); endmodule @@ -692,3 +738,150 @@ task automatic updateProgramAddrLabelArray; $fclose(ProgramLabelMapFP); $fclose(ProgramAddrMapFP); endtask + +`define NUM_REGS 32 +`define NUM_CSRS 4096 + +module rvviTrace(); + + // wally specific signals + logic reset; + + logic [`XLEN-1:0] PCM, PCW; + logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW; + logic InstrValidM, InstrValidW; + logic StallE, StallM, StallW; + logic FlushE, FlushM, FlushW; + + // tracer signals + logic clk; + logic valid; + logic [`XLEN-1:0] insn; + logic [`XLEN-1:0 ] pc_rdata; + + assign clk = testbench.dut.clk; + assign InstrValidM = testbench.dut.core.ieu.InstrValidM; + assign InstrRawD = testbench.dut.core.ifu.InstrRawD; + assign PCM = testbench.dut.core.ifu.PCM; + assign reset = testbench.reset; + assign StallE = testbench.dut.core.StallE; + assign StallM = testbench.dut.core.StallM; + assign StallW = testbench.dut.core.StallW; + assign FlushE = testbench.dut.core.FlushE; + assign FlushM = testbench.dut.core.FlushM; + assign FlushW = testbench.dut.core.FlushW; + + // pipeline to writeback stage + flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE); + flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM); + flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW); + flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW); + flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW); + + assign valid = InstrValidW; + assign insn = InstrRawW; + assign pc_rdata = PCW; + + always_ff @(posedge clk) begin + if(valid) begin + $display("PC = %x, insn = %x", pc_rdata, insn); + end + end + + +endmodule + +/* -----\/----- EXCLUDED -----\/----- +module rvviTrace #( + parameter int ILEN = `XLEN, // Instruction length in bits + parameter int XLEN = `XLEN, // GPR length in bits + parameter int FLEN = `FLEN, // FPR length in bits + parameter int VLEN = 0, // Vector register size in bits + parameter int NHART = 1, // Number of harts reported + parameter int RETIRE = 1 // Number of instructions that can retire during valid event + )( + // + // RISCV output signals + // + output logic clk, // Interface clock + + output logic valid [(NHART-1):0][(RETIRE-1):0], // Retired instruction + output logic [63:0] order [(NHART-1):0][(RETIRE-1):0], // Unique instruction order count (no gaps or reuse) + output logic [(ILEN-1):0] insn [(NHART-1):0][(RETIRE-1):0], // Instruction bit pattern + output logic trap [(NHART-1):0][(RETIRE-1):0], // Trapped instruction + output logic halt [(NHART-1):0][(RETIRE-1):0], // Halted instruction + output logic intr [(NHART-1):0][(RETIRE-1):0], // (RVFI Legacy) Flag first instruction of trap handler + output logic [1:0] mode [(NHART-1):0][(RETIRE-1):0], // Privilege mode of operation + output logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0], // XLEN mode 32/64 bit + + output logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0], // PC of insn + output logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0], // PC of next instruction + + // X Registers + output logic [31:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0], // X data value + output logic [31:0] x_wb [(NHART-1):0][(RETIRE-1):0], // X data writeback (change) flag + + // F Registers + output logic [31:0][(FLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0], // F data value + output logic [31:0] f_wb [(NHART-1):0][(RETIRE-1):0], // F data writeback (change) flag + + // V Registers + output logic [31:0][(VLEN-1):0] v_wdata [(NHART-1):0][(RETIRE-1):0], // V data value + output logic [31:0] v_wb [(NHART-1):0][(RETIRE-1):0], // V data writeback (change) flag + + // Control & State Registers + output logic [4095:0][(XLEN-1):0] csr [(NHART-1):0][(RETIRE-1):0], // Full CSR Address range + output logic [4095:0] csr_wb [(NHART-1):0][(RETIRE-1):0], // CSR writeback (change) flag + + output logic lrsc_cancel[(NHART-1):0][(RETIRE-1):0] // Implementation defined cancel + ); + + + assign clk = dut.clk; + // *** need to pipeline to writeback stage. + assign valid = dut.core.ieu.InstrValidM; + assign insn = dut.core.ifu.InstrM; + assign pc_rdata = dut.core.ifu.PCM; + + always_ff @(posedge clk) begin + if(valid) begin + $display("PC = %d, insn = %d", pc_rdata, insn); + end + end + + + // + // Synchronization of NETs + // + wire clkD; + assign #1 clkD = clk; + + string name[$]; + int value[$]; + longint tslot[$]; + int nets[string]; + + function automatic void net_push(input string vname, input int vvalue); + longint vslot = $time; + name.push_front(vname); + value.push_front(vvalue); + tslot.push_front(vslot); + endfunction + + function automatic int net_pop(output string vname, output int vvalue, output longint vslot); + int ok; + string msg; + if (name.size() > 0) begin + vname = name.pop_back(); + vvalue = value.pop_back(); + vslot = tslot.pop_back(); + nets[vname] = vvalue; + ok = 1; + end else begin + ok = 0; + end + return ok; + endfunction + +endmodule + -----/\----- EXCLUDED -----/\----- */ diff --git a/pipelined/testbench/testbench_imperas.sv b/pipelined/testbench/testbench_imperas.sv new file mode 100644 index 000000000..372257c5c --- /dev/null +++ b/pipelined/testbench/testbench_imperas.sv @@ -0,0 +1,709 @@ +/////////////////////////////////////////// +// testbench.sv +// +// Written: David_Harris@hmc.edu 9 January 2021 +// Modified: +// +// Purpose: Wally Testbench and helper modules +// Applies test programs from the riscv-arch-test and Imperas suites +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" +`include "tests.vh" + +`define PrintHPMCounters 0 +`define BPRED_LOGGER 0 + +module testbench_imperas; + parameter DEBUG=0; + parameter TEST="none"; + + logic clk; + logic reset_ext, reset; + + parameter SIGNATURESIZE = 5000000; + + int test, i, errors, totalerrors; + logic [31:0] sig32[0:SIGNATURESIZE]; + logic [`XLEN-1:0] signature[0:SIGNATURESIZE]; + logic [`XLEN-1:0] testadr, testadrNoBase; + string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; + logic [31:0] InstrW; + +string tests[]; +logic [3:0] dummy; + + logic [`AHBW-1:0] HRDATAEXT; + logic HREADYEXT, HRESPEXT; + logic [`PA_BITS-1:0] HADDR; + logic [`AHBW-1:0] HWDATA; + logic [`XLEN/8-1:0] HWSTRB; + logic HWRITE; + logic [2:0] HSIZE; + logic [2:0] HBURST; + logic [3:0] HPROT; + logic [1:0] HTRANS; + logic HMASTLOCK; + logic HCLK, HRESETn; + logic [`XLEN-1:0] PCW; + + string ProgramAddrMapFile, ProgramLabelMapFile; + integer ProgramAddrLabelArray [string] = '{ "begin_signature" : 0, "tohost" : 0 }; + + logic DCacheFlushDone, DCacheFlushStart; + logic riscofTest; + + flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW); + flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW); + + // check assertions for a legal configuration + riscvassertions riscvassertions(); + + // pick tests based on modes supported + initial begin + $display("TEST is %s", TEST); + //tests = '{}; + if (`XLEN == 64) begin // RV64 + case (TEST) + "arch64i": tests = arch64i; + "arch64priv": tests = arch64priv; + "arch64c": if (`C_SUPPORTED) + if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv}; + else tests = {arch64c}; + "arch64m": if (`M_SUPPORTED) tests = arch64m; + "arch64f": if (`F_SUPPORTED) tests = arch64f; + "arch64d": if (`D_SUPPORTED) tests = arch64d; + "imperas64i": tests = imperas64i; + "imperas64f": if (`F_SUPPORTED) tests = imperas64f; + "imperas64d": if (`D_SUPPORTED) tests = imperas64d; + "imperas64m": if (`M_SUPPORTED) tests = imperas64m; + "wally64a": if (`A_SUPPORTED) tests = wally64a; + "imperas64c": if (`C_SUPPORTED) tests = imperas64c; + else tests = imperas64iNOc; + "custom": tests = custom; + "wally64i": tests = wally64i; + "wally64priv": tests = wally64priv; + "wally64periph": tests = wally64periph; + "coremark": tests = coremark; + "fpga": tests = fpga; + "ahb" : tests = ahb; + endcase + end else begin // RV32 + case (TEST) + "arch32i": tests = arch32i; + "arch32priv": tests = arch32priv; + "arch32c": if (`C_SUPPORTED) + if (`ZICSR_SUPPORTED) tests = {arch32c, arch32cpriv}; + else tests = {arch32c}; + "arch32m": if (`M_SUPPORTED) tests = arch32m; + "arch32f": if (`F_SUPPORTED) tests = arch32f; + "arch32d": if (`D_SUPPORTED) tests = arch32d; + "imperas32i": tests = imperas32i; + "imperas32f": if (`F_SUPPORTED) tests = imperas32f; + "imperas32m": if (`M_SUPPORTED) tests = imperas32m; + "wally32a": if (`A_SUPPORTED) tests = wally32a; + "imperas32c": if (`C_SUPPORTED) tests = imperas32c; + else tests = imperas32iNOc; + "wally32i": tests = wally32i; + "wally32e": tests = wally32e; + "wally32priv": tests = wally32priv; + "wally32periph": tests = wally32periph; + "embench": tests = embench; + "coremark": tests = coremark; + endcase + end + if (tests.size() == 0) begin + $display("TEST %s not supported in this configuration", TEST); + $stop; + end + end + + string signame, memfilename, pathname, objdumpfilename, adrstr, outputfile; + integer outputFilePointer; + + logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn; + logic UARTSin, UARTSout; + + logic SDCCLK; + logic SDCCmdIn; + logic SDCCmdOut; + logic SDCCmdOE; + logic [3:0] SDCDatIn; + tri1 [3:0] SDCDat; + tri1 SDCCmd; + + logic HREADY; + logic HSELEXT; + + logic InitializingMemories; + integer ResetCount, ResetThreshold; + logic InReset; + + // instantiate device to be tested + assign GPIOPinsIn = 0; + assign UARTSin = 1; + + if(`EXT_MEM_SUPPORTED) begin + ram_ahb #(.BASE(`EXT_MEM_BASE), .RANGE(`EXT_MEM_RANGE)) + ram (.HCLK, .HRESETn, .HADDR, .HWRITE, .HTRANS, .HWDATA, .HSELRam(HSELEXT), + .HREADRam(HRDATAEXT), .HREADYRam(HREADYEXT), .HRESPRam(HRESPEXT), .HREADY, + .HWSTRB); + end else begin + assign HREADYEXT = 1; + assign HRESPEXT = 0; + assign HRDATAEXT = 0; + end + + if(`FPGA) begin : sdcard + sdModel sdcard + (.sdClk(SDCCLK), + .cmd(SDCCmd), + .dat(SDCDat)); + + assign SDCCmd = SDCCmdOE ? SDCCmdOut : 1'bz; + assign SDCCmdIn = SDCCmd; + assign SDCDatIn = SDCDat; + end else begin + assign SDCCmd = '0; + assign SDCDat = '0; + end + + wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT, + .HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, + .HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn, + .UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK); + + // Track names of instructions + instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE, + dut.core.ifu.FinalInstrRawF[31:0], + dut.core.ifu.InstrD, dut.core.ifu.InstrE, + dut.core.ifu.InstrM, InstrW, + InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); + + // initialize tests + localparam integer MemStartAddr = 0; + localparam integer MemEndAddr = `UNCORE_RAM_RANGE>>1+(`XLEN/32); + + initial + begin + ResetCount = 0; + ResetThreshold = 2; + InReset = 1; + test = 1; + totalerrors = 0; + testadr = 0; + testadrNoBase = 0; + // riscof tests have a different signature, tests[0] == "1" refers to RiscvArchTests and tests[0] == "2" refers to WallyRiscvArchTests + riscofTest = tests[0] == "1" | tests[0] == "2"; + // fill memory with defined values to reduce Xs in simulation + // Quick note the memory will need to be initialized. The C library does not + // guarantee the initialized reads. For example a strcmp can read 6 byte + // strings, but uses a load double to read them in. If the last 2 bytes are + // not initialized the compare results in an 'x' which propagates through + // the design. + if (TEST == "coremark") + for (i=MemStartAddr; i XLEN because AHB bus width is XLEN"); + assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); + assert (`FLEN<=`XLEN | `DCACHE | `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); + assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (!`DCACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`DCACHE_LINELENINBITS >= 128 | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); + assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); + assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (!`ICACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`ICACHE_LINELENINBITS >= 32 | (!`ICACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); + assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); + assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (!`DCACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (!`ICACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (!`ICACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); + assert (`UNCORE_RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if UNCORE_RAM_RANGE is less than 56'h07FFFFFF"); + assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); + assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported"); + assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); + assert (`VIRTMEM_SUPPORTED == 0 | (`DTIM_SUPPORTED == 0 & `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); + assert (`DCACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); + assert (`ICACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); + assert ((`DCACHE == 0 & `ICACHE == 0) | `BUS) else $error("Dcache and Icache requires DBUS."); + assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); + assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words"); + assert (`DCACHE | `A_SUPPORTED == 0) else $error("Atomic extension (A) requires cache on Wally."); + assert (`IDIV_ON_FPU == 0 | `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); + end + + // *** DH 8/23/ +endmodule + + +/* verilator lint_on STMTDLY */ +/* verilator lint_on WIDTH */ + +module DCacheFlushFSM + (input logic clk, + input logic reset, + input logic start, + output logic done); + + genvar adr; + + logic [`XLEN-1:0] ShadowRAM[`UNCORE_RAM_BASE>>(1+`XLEN/32):(`UNCORE_RAM_RANGE+`UNCORE_RAM_BASE)>>1+(`XLEN/32)]; + + if(`DCACHE) begin + localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES; + localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS; + localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN; + localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN; + localparam integer sramlen = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].SRAMLEN; + localparam integer cachesramwords = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].NUMSRAM; + +//testbench.dut.core.lsu.bus.dcache.dcache.CacheWays.NUMSRAM; + localparam integer numwords = sramlen/`XLEN; + localparam integer lognumlines = $clog2(numlines); + localparam integer loglinebytelen = $clog2(linebytelen); + localparam integer lognumways = $clog2(numways); + localparam integer tagstart = lognumlines + loglinebytelen; + + + + genvar index, way, cacheWord; + logic [sramlen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic [sramlen-1:0] cacheline; + logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic CacheValid [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic CacheDirty [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + for(index = 0; index < numlines; index++) begin + for(way = 0; way < numways; way++) begin + for(cacheWord = 0; cacheWord < cachesramwords; cacheWord++) begin + copyShadow #(.tagstart(tagstart), + .loglinebytelen(loglinebytelen), .sramlen(sramlen)) + copyShadow(.clk, + .start, + .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][`PA_BITS-1-tagstart:0]), + .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), + .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), + // these dirty bit selections would be needed if dirty is moved inside the tag array. + //.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].dirty.DirtyMem.RAM[index]), + //.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][`PA_BITS+tagstart]), + .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.RAM[index]), + .index(index), + .cacheWord(cacheWord), + .CacheData(CacheData[way][index][cacheWord]), + .CacheAdr(CacheAdr[way][index][cacheWord]), + .CacheTag(CacheTag[way][index][cacheWord]), + .CacheValid(CacheValid[way][index][cacheWord]), + .CacheDirty(CacheDirty[way][index][cacheWord])); + end + end + end + + integer i, j, k, l; + + always @(posedge clk) begin + if (start) begin #1 + #1 + for(i = 0; i < numlines; i++) begin + for(j = 0; j < numways; j++) begin + for(l = 0; l < cachesramwords; l++) begin + if (CacheValid[j][i][l] & CacheDirty[j][i][l]) begin + for(k = 0; k < numwords; k++) begin + //cacheline = CacheData[j][i][0]; + // does not work with modelsim + // # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions. + // see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions + //ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k]; + ShadowRAM[(CacheAdr[j][i][l] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][l][`XLEN*k +: `XLEN]; + end + end + end + end + end + end + end + end + flop #(1) doneReg(.clk, .d(start), .q(done)); +endmodule + +module copyShadow + #(parameter tagstart, loglinebytelen, sramlen) + (input logic clk, + input logic start, + input logic [`PA_BITS-1:tagstart] tag, + input logic valid, dirty, + input logic [sramlen-1:0] data, + input logic [32-1:0] index, + input logic [32-1:0] cacheWord, + output logic [sramlen-1:0] CacheData, + output logic [`PA_BITS-1:0] CacheAdr, + output logic [`XLEN-1:0] CacheTag, + output logic CacheValid, + output logic CacheDirty); + + + always_ff @(posedge clk) begin + if(start) begin + CacheTag = tag; + CacheValid = valid; + CacheDirty = dirty; + CacheData = data; + CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(sramlen/8)); + end + end + +endmodule + +task automatic updateProgramAddrLabelArray; + input string ProgramAddrMapFile, ProgramLabelMapFile; + inout integer ProgramAddrLabelArray [string]; + // Gets the memory location of begin_signature + integer ProgramLabelMapFP, ProgramAddrMapFP; + ProgramLabelMapFP = $fopen(ProgramLabelMapFile, "r"); + ProgramAddrMapFP = $fopen(ProgramAddrMapFile, "r"); + + if (ProgramLabelMapFP & ProgramAddrMapFP) begin // check we found both files + while (!$feof(ProgramLabelMapFP)) begin + string label, adrstr; + integer returncode; + returncode = $fscanf(ProgramLabelMapFP, "%s\n", label); + returncode = $fscanf(ProgramAddrMapFP, "%s\n", adrstr); + if (ProgramAddrLabelArray.exists(label)) + ProgramAddrLabelArray[label] = adrstr.atohex(); + end + end + $fclose(ProgramLabelMapFP); + $fclose(ProgramAddrMapFP); +endtask + +`define NUM_REGS 32 +`define NUM_CSRS 4096 + +module rvviTrace(); + + // wally specific signals + logic reset; + + logic [`XLEN-1:0] PCM, PCW; + logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW; + logic InstrValidM, InstrValidW; + logic StallE, StallM, StallW; + logic FlushE, FlushM, FlushW; + + // tracer signals + logic clk; + logic valid; + logic [`XLEN-1:0] insn; + logic [`XLEN-1:0 ] pc_rdata; + + assign clk = testbench.dut.clk; + assign InstrValidM = testbench.dut.core.ieu.InstrValidM; + assign InstrRawD = testbench.dut.core.ifu.InstrRawD; + assign PCM = testbench.dut.core.ifu.PCM; + assign reset = testbench.reset; + assign StallE = testbench.dut.core.StallE; + assign StallM = testbench.dut.core.StallM; + assign StallW = testbench.dut.core.StallW; + assign FlushE = testbench.dut.core.FlushE; + assign FlushM = testbench.dut.core.FlushM; + assign FlushW = testbench.dut.core.FlushW; + + // pipeline to writeback stage + flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE); + flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM); + flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW); + flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW); + flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW); + + assign valid = InstrValidW; + assign insn = InstrRawW; + assign pc_rdata = PCW; + + always_ff @(posedge clk) begin + if(valid) begin + $display("PC = %x, insn = %x", pc_rdata, insn); + end + end + + +endmodule + From 8ee80c5d5432ad936c4f87397c459960450d7d1d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 12 Jan 2023 12:07:07 -0600 Subject: [PATCH 03/18] Created separate imperas testbench. Resolved logger issue with the duplicated instructions after commit. --- .../regression/wally-pipelined-imperas.do | 45 +++++++++++++++++++ pipelined/testbench/testbench_imperas.sv | 6 +-- 2 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 pipelined/regression/wally-pipelined-imperas.do diff --git a/pipelined/regression/wally-pipelined-imperas.do b/pipelined/regression/wally-pipelined-imperas.do new file mode 100644 index 000000000..e0d5070a3 --- /dev/null +++ b/pipelined/regression/wally-pipelined-imperas.do @@ -0,0 +1,45 @@ +# wally-pipelined.do +# +# Modification by Oklahoma State University & Harvey Mudd College +# Use with Testbench +# James Stine, 2008; David Harris 2021 +# Go Cowboys!!!!!! +# +# Takes 1:10 to run RV64IC tests using gui + +# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m" + +# Use this wally-pipelined.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do wally-pipelined.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do wally-pipelined.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +# suppress spurious warnngs about +# "Extra checking for conflicts with always_comb done at vopt time" +# because vsim will run vopt + +# start and run simulation +# remove +acc flag for faster sim during regressions if there is no need to access internal signals + # *** modelsim won't take `PA_BITS, but will take other defines for the lengths of DTIM_RANGE and IROM_LEN. For now just live with the warnings. +vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench_imperas.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063 +vopt +acc work.testbench -G TEST=$2 -G DEBUG=1 -o workopt +vsim workopt +nowarn3829 -fatal 7 +view wave +#-- display input and output signals as hexidecimal values +add log -recursive /* +do wave.do + +run -all +noview ../testbench/testbench_imperas.sv +view wave diff --git a/pipelined/testbench/testbench_imperas.sv b/pipelined/testbench/testbench_imperas.sv index 372257c5c..929f16b22 100644 --- a/pipelined/testbench/testbench_imperas.sv +++ b/pipelined/testbench/testbench_imperas.sv @@ -32,10 +32,8 @@ `include "wally-config.vh" `include "tests.vh" -`define PrintHPMCounters 0 -`define BPRED_LOGGER 0 -module testbench_imperas; +module testbench; parameter DEBUG=0; parameter TEST="none"; @@ -694,7 +692,7 @@ module rvviTrace(); flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW); flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW); - assign valid = InstrValidW; + assign valid = InstrValidW & ~StallW & ~FlushW; assign insn = InstrRawW; assign pc_rdata = PCW; From 5112ffcbc95b44cda6a17ba2f6d836d04acb85e5 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 12 Jan 2023 12:45:44 -0600 Subject: [PATCH 04/18] Stripped out all signature checking. Removed multiple tests loop. Only runs 1 test now. --- pipelined/testbench/testbench_imperas.sv | 349 +++-------------------- 1 file changed, 41 insertions(+), 308 deletions(-) diff --git a/pipelined/testbench/testbench_imperas.sv b/pipelined/testbench/testbench_imperas.sv index 929f16b22..0b40242b3 100644 --- a/pipelined/testbench/testbench_imperas.sv +++ b/pipelined/testbench/testbench_imperas.sv @@ -30,7 +30,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" -`include "tests.vh" module testbench; @@ -40,17 +39,12 @@ module testbench; logic clk; logic reset_ext, reset; - parameter SIGNATURESIZE = 5000000; - int test, i, errors, totalerrors; - logic [31:0] sig32[0:SIGNATURESIZE]; - logic [`XLEN-1:0] signature[0:SIGNATURESIZE]; logic [`XLEN-1:0] testadr, testadrNoBase; string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; logic [31:0] InstrW; -string tests[]; -logic [3:0] dummy; + logic [3:0] dummy; logic [`AHBW-1:0] HRDATAEXT; logic HREADYEXT, HRESPEXT; @@ -68,77 +62,9 @@ logic [3:0] dummy; string ProgramAddrMapFile, ProgramLabelMapFile; integer ProgramAddrLabelArray [string] = '{ "begin_signature" : 0, "tohost" : 0 }; - logic DCacheFlushDone, DCacheFlushStart; - logic riscofTest; - - flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW); - flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW); - - // check assertions for a legal configuration - riscvassertions riscvassertions(); - - // pick tests based on modes supported - initial begin - $display("TEST is %s", TEST); - //tests = '{}; - if (`XLEN == 64) begin // RV64 - case (TEST) - "arch64i": tests = arch64i; - "arch64priv": tests = arch64priv; - "arch64c": if (`C_SUPPORTED) - if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv}; - else tests = {arch64c}; - "arch64m": if (`M_SUPPORTED) tests = arch64m; - "arch64f": if (`F_SUPPORTED) tests = arch64f; - "arch64d": if (`D_SUPPORTED) tests = arch64d; - "imperas64i": tests = imperas64i; - "imperas64f": if (`F_SUPPORTED) tests = imperas64f; - "imperas64d": if (`D_SUPPORTED) tests = imperas64d; - "imperas64m": if (`M_SUPPORTED) tests = imperas64m; - "wally64a": if (`A_SUPPORTED) tests = wally64a; - "imperas64c": if (`C_SUPPORTED) tests = imperas64c; - else tests = imperas64iNOc; - "custom": tests = custom; - "wally64i": tests = wally64i; - "wally64priv": tests = wally64priv; - "wally64periph": tests = wally64periph; - "coremark": tests = coremark; - "fpga": tests = fpga; - "ahb" : tests = ahb; - endcase - end else begin // RV32 - case (TEST) - "arch32i": tests = arch32i; - "arch32priv": tests = arch32priv; - "arch32c": if (`C_SUPPORTED) - if (`ZICSR_SUPPORTED) tests = {arch32c, arch32cpriv}; - else tests = {arch32c}; - "arch32m": if (`M_SUPPORTED) tests = arch32m; - "arch32f": if (`F_SUPPORTED) tests = arch32f; - "arch32d": if (`D_SUPPORTED) tests = arch32d; - "imperas32i": tests = imperas32i; - "imperas32f": if (`F_SUPPORTED) tests = imperas32f; - "imperas32m": if (`M_SUPPORTED) tests = imperas32m; - "wally32a": if (`A_SUPPORTED) tests = wally32a; - "imperas32c": if (`C_SUPPORTED) tests = imperas32c; - else tests = imperas32iNOc; - "wally32i": tests = wally32i; - "wally32e": tests = wally32e; - "wally32priv": tests = wally32priv; - "wally32periph": tests = wally32periph; - "embench": tests = embench; - "coremark": tests = coremark; - endcase - end - if (tests.size() == 0) begin - $display("TEST %s not supported in this configuration", TEST); - $stop; - end - end - - string signame, memfilename, pathname, objdumpfilename, adrstr, outputfile; - integer outputFilePointer; + string testName; + string memfilename, pathname, adrstr; logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn; logic UARTSin, UARTSout; @@ -158,6 +84,41 @@ logic [3:0] dummy; integer ResetCount, ResetThreshold; logic InReset; + // Imperas look here. + initial + begin + ResetCount = 0; + ResetThreshold = 2; + InReset = 1; + testadr = 0; + testadrNoBase = 0; + + testName = "rv64i_m/I/src/add-01.S"; + + pathname = "../../tests/riscof/work/riscv-arch-test/"; + memfilename = {pathname, testName, "/ref/ref.elf.memfile"}; + if (`BUS) $readmemh(memfilename, dut.uncore.uncore.ram.ram.memory.RAM); + else $error("Imperas test bench requires BUS."); + + ProgramAddrMapFile = {pathname, testName, "/ref/ref.elf.objdump.addr"}; + ProgramLabelMapFile = {pathname, testName, "/ref/ref.elf.objdump.lab"}; + + // declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array + // to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test) + updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray); + $display("Read memfile %s", memfilename); + end + + rvviTrace rvviTrace(); + + + flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW); + flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW); + + // check assertions for a legal configuration + riscvassertions riscvassertions(); + + // instantiate device to be tested assign GPIOPinsIn = 0; assign UARTSin = 1; @@ -200,65 +161,6 @@ logic [3:0] dummy; InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); // initialize tests - localparam integer MemStartAddr = 0; - localparam integer MemEndAddr = `UNCORE_RAM_RANGE>>1+(`XLEN/32); - - initial - begin - ResetCount = 0; - ResetThreshold = 2; - InReset = 1; - test = 1; - totalerrors = 0; - testadr = 0; - testadrNoBase = 0; - // riscof tests have a different signature, tests[0] == "1" refers to RiscvArchTests and tests[0] == "2" refers to WallyRiscvArchTests - riscofTest = tests[0] == "1" | tests[0] == "2"; - // fill memory with defined values to reduce Xs in simulation - // Quick note the memory will need to be initialized. The C library does not - // guarantee the initialized reads. For example a strcmp can read 6 byte - // strings, but uses a load double to read them in. If the last 2 bytes are - // not initialized the compare results in an 'x' which propagates through - // the design. - if (TEST == "coremark") - for (i=MemStartAddr; i Date: Thu, 12 Jan 2023 14:46:31 -0600 Subject: [PATCH 06/18] rvvi trace is coming alone nicely. --- pipelined/testbench/common/rvvitrace.sv | 151 +++++++++++++++++++++++ pipelined/testbench/testbench_imperas.sv | 4 - 2 files changed, 151 insertions(+), 4 deletions(-) create mode 100644 pipelined/testbench/common/rvvitrace.sv diff --git a/pipelined/testbench/common/rvvitrace.sv b/pipelined/testbench/common/rvvitrace.sv new file mode 100644 index 000000000..3cc76be7f --- /dev/null +++ b/pipelined/testbench/common/rvvitrace.sv @@ -0,0 +1,151 @@ +`include "wally-config.vh" + +`define NUM_REGS 32 +`define NUM_CSRS 4096 + +module rvviTrace #( + parameter int ILEN = `XLEN, // Instruction length in bits + parameter int XLEN = `XLEN, // GPR length in bits + parameter int FLEN = `FLEN, // FPR length in bits + parameter int VLEN = 0, // Vector register size in bits + parameter int NHART = 1, // Number of harts reported + parameter int RETIRE = 1) // Number of instructions that can retire during valid event + (); + + localparam NUMREGS = `E_SUPPORTED ? 16 : 32; + + // wally specific signals + logic reset; + + logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW; + logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW; + logic InstrValidM, InstrValidW; + logic StallE, StallM, StallW; + logic FlushD, FlushE, FlushM, FlushW; + logic TrapM, TrapW; + logic HaltM, HaltW; + logic [1:0] PrivilegeModeW; + logic [`XLEN-1:0] rf[NUMREGS]; + logic [NUMREGS-1:0] rf_wb; + logic [4:0] rf_a3; + logic rf_we3; + logic [`XLEN-1:0] frf[32]; + logic [31:0] frf_wb; + logic [4:0] frf_a4; + logic frf_we4; + + + + // tracer signals + logic clk; + logic valid; + logic [ILEN-1:0] insn [(NHART-1):0][(RETIRE-1):0]; + logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0]; + logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0]; + logic trap [(NHART-1):0][(RETIRE-1):0]; + logic halt [(NHART-1):0][(RETIRE-1):0]; + logic intr [(NHART-1):0][(RETIRE-1):0]; + logic [1:0] mode [(NHART-1):0][(RETIRE-1):0]; + logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0]; + logic [31:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0]; + logic [31:0] x_wb [(NHART-1):0][(RETIRE-1):0]; + logic [31:0][(XLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0]; + logic [31:0] f_wb [(NHART-1):0][(RETIRE-1):0]; + + assign clk = testbench.dut.clk; +// assign InstrValidF = testbench.dut.core.ieu.InstrValidF; // not needed yet + assign InstrValidD = testbench.dut.core.ieu.c.InstrValidD; + assign InstrValidE = testbench.dut.core.ieu.c.InstrValidE; + assign InstrValidM = testbench.dut.core.ieu.InstrValidM; + assign InstrRawD = testbench.dut.core.ifu.InstrRawD; + assign PCNextF = testbench.dut.core.ifu.PCNextF; + assign PCF = testbench.dut.core.ifu.PCF; + assign PCD = testbench.dut.core.ifu.PCD; + assign PCE = testbench.dut.core.ifu.PCE; + assign PCM = testbench.dut.core.ifu.PCM; + assign reset = testbench.reset; + assign StallE = testbench.dut.core.StallE; + assign StallM = testbench.dut.core.StallM; + assign StallW = testbench.dut.core.StallW; + assign FlushD = testbench.dut.core.FlushD; + assign FlushE = testbench.dut.core.FlushE; + assign FlushM = testbench.dut.core.FlushM; + assign FlushW = testbench.dut.core.FlushW; + assign TrapM = testbench.dut.core.TrapM; + assign HaltM = testbench.DCacheFlushStart; + assign STATUS_SXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_SXL; + assign STATUS_UXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_UXL; + + genvar index; + assign rf[0] = '0; + for(index = 1; index < NUMREGS; index += 1) + assign rf[index] = testbench.dut.core.ieu.dp.regf.rf[index]; + + assign rf_a3 = testbench.dut.core.ieu.dp.regf.a3; + assign rf_we3 = testbench.dut.core.ieu.dp.regf.we3; + + always_comb begin + rf_wb = '0; + if(rf_we3) + rf_wb[rf_a3] = 1'b1; + end + + for(index = 0; index < NUMREGS; index += 1) + assign frf[index] = testbench.dut.core.fpu.fpu.fregfile.rf[index]; + + assign frf_a4 = testbench.dut.core.fpu.fpu.fregfile.a4; + assign frf_we4 = testbench.dut.core.fpu.fpu.fregfile.we4; + + always_comb begin + frf_wb = '0; + if(frf_we4) + frf_wb[frf_a4] = 1'b1; + end + + // pipeline to writeback stage + flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE); + flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM); + flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW); + flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW); + flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW); + flopenrc #(1) TrapWReg (clk, reset, 1'b0, ~StallW, TrapM, TrapW); + flopenrc #(1) HaltWReg (clk, reset, 1'b0, ~StallW, HaltM, HaltW); + + // Initially connecting the writeback stage signals, but may need to use M stage + // and gate on ~FlushW. + + assign valid = InstrValidW & ~StallW & ~FlushW; + assign insn[0][0] = InstrRawW; + assign pc_rdata[0][0] = PCW; + assign trap[0][0] = TrapW; + assign halt[0][0] = HaltW; + assign intr[0][0] = '0; // *** first retired instruction of trap handler. Not sure how i'm going to get this yet. + assign mode[0][0] = PrivilegeModeW; + assign ixl[0][0] = PrivilegeModeW == 2'b11 ? `XLEN : + PrivilegeModeW == 2'b01 ? STATUS_SXL : STATUS_UXL; + assign pc_wdata[0][0] = ~FlushW ? PCM : + ~FlushM ? PCE : + ~FlushE ? PCD : + ~FlushD ? PCF : PCNextF; + + for(index = 0; index < NUMREGS; index += 1) begin + assign x_wdata[index][0][0] = rf[index]; + assign x_wb[index][0][0] = rf_wb[index]; + assign f_wdata[index][0][0] = frf[index]; + assign f_wb[index][0][0] = frf_wb[index]; + end + + + + + always_ff @(posedge clk) begin + if(valid) begin + $display("PC = %08x, insn = %08x, trap = %1d, halt = %1d", pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0]); + end + if(HaltW) $stop(); + end + + + +endmodule + diff --git a/pipelined/testbench/testbench_imperas.sv b/pipelined/testbench/testbench_imperas.sv index d55cc6759..e8f014b8a 100644 --- a/pipelined/testbench/testbench_imperas.sv +++ b/pipelined/testbench/testbench_imperas.sv @@ -181,10 +181,6 @@ module testbench; InReset = 0; ResetCount = 0; end - end else begin - if(DCacheFlushStart) begin - $stop; - end end end // always @ (negedge clk) From f3443e2ecae02dc8bbec158ea39fdd95c147dc5f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 12 Jan 2023 16:09:30 -0600 Subject: [PATCH 07/18] Added support to print the gprs. --- pipelined/testbench/common/rvvitrace.sv | 113 ++++++++++++++---------- 1 file changed, 65 insertions(+), 48 deletions(-) diff --git a/pipelined/testbench/common/rvvitrace.sv b/pipelined/testbench/common/rvvitrace.sv index 3cc76be7f..1bf79c247 100644 --- a/pipelined/testbench/common/rvvitrace.sv +++ b/pipelined/testbench/common/rvvitrace.sv @@ -3,57 +3,62 @@ `define NUM_REGS 32 `define NUM_CSRS 4096 +`define PRINT_PC_INSTR 1 +`define PRINT_MOST 1 +`define PRINT_ALL 0 + module rvviTrace #( - parameter int ILEN = `XLEN, // Instruction length in bits - parameter int XLEN = `XLEN, // GPR length in bits - parameter int FLEN = `FLEN, // FPR length in bits - parameter int VLEN = 0, // Vector register size in bits - parameter int NHART = 1, // Number of harts reported - parameter int RETIRE = 1) // Number of instructions that can retire during valid event + parameter int ILEN = `XLEN, // Instruction length in bits + parameter int XLEN = `XLEN, // GPR length in bits + parameter int FLEN = `FLEN, // FPR length in bits + parameter int VLEN = 0, // Vector register size in bits + parameter int NHART = 1, // Number of harts reported + parameter int RETIRE = 1) // Number of instructions that can retire during valid event (); localparam NUMREGS = `E_SUPPORTED ? 16 : 32; // wally specific signals - logic reset; + logic reset; - logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW; - logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW; - logic InstrValidM, InstrValidW; - logic StallE, StallM, StallW; - logic FlushD, FlushE, FlushM, FlushW; - logic TrapM, TrapW; - logic HaltM, HaltW; - logic [1:0] PrivilegeModeW; - logic [`XLEN-1:0] rf[NUMREGS]; - logic [NUMREGS-1:0] rf_wb; - logic [4:0] rf_a3; - logic rf_we3; - logic [`XLEN-1:0] frf[32]; - logic [31:0] frf_wb; - logic [4:0] frf_a4; - logic frf_we4; + logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW; + logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW; + logic InstrValidM, InstrValidW; + logic StallE, StallM, StallW; + logic FlushD, FlushE, FlushM, FlushW; + logic TrapM, TrapW; + logic HaltM, HaltW; + logic [1:0] PrivilegeModeW; + logic [`XLEN-1:0] rf[NUMREGS]; + logic [NUMREGS-1:0] rf_wb; + logic [4:0] rf_a3; + logic rf_we3; + logic [`XLEN-1:0] frf[32]; + logic [`NUM_REGS-1:0] frf_wb; + logic [4:0] frf_a4; + logic frf_we4; // tracer signals - logic clk; - logic valid; - logic [ILEN-1:0] insn [(NHART-1):0][(RETIRE-1):0]; - logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0]; - logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0]; - logic trap [(NHART-1):0][(RETIRE-1):0]; - logic halt [(NHART-1):0][(RETIRE-1):0]; - logic intr [(NHART-1):0][(RETIRE-1):0]; - logic [1:0] mode [(NHART-1):0][(RETIRE-1):0]; - logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0]; - logic [31:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0]; - logic [31:0] x_wb [(NHART-1):0][(RETIRE-1):0]; - logic [31:0][(XLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0]; - logic [31:0] f_wb [(NHART-1):0][(RETIRE-1):0]; + logic clk; + logic valid; + logic [63:0] order [(NHART-1):0][(RETIRE-1):0]; + logic [ILEN-1:0] insn [(NHART-1):0][(RETIRE-1):0]; + logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0]; + logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0]; + logic trap [(NHART-1):0][(RETIRE-1):0]; + logic halt [(NHART-1):0][(RETIRE-1):0]; + logic intr [(NHART-1):0][(RETIRE-1):0]; + logic [1:0] mode [(NHART-1):0][(RETIRE-1):0]; + logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0]; + logic [`NUM_REGS-1:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0]; + logic [`NUM_REGS-1:0] x_wb [(NHART-1):0][(RETIRE-1):0]; + logic [`NUM_REGS-1:0][(XLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0]; + logic [`NUM_REGS-1:0] f_wb [(NHART-1):0][(RETIRE-1):0]; assign clk = testbench.dut.clk; -// assign InstrValidF = testbench.dut.core.ieu.InstrValidF; // not needed yet + // assign InstrValidF = testbench.dut.core.ieu.InstrValidF; // not needed yet assign InstrValidD = testbench.dut.core.ieu.c.InstrValidD; assign InstrValidE = testbench.dut.core.ieu.c.InstrValidE; assign InstrValidM = testbench.dut.core.ieu.InstrValidM; @@ -73,10 +78,11 @@ module rvviTrace #( assign FlushW = testbench.dut.core.FlushW; assign TrapM = testbench.dut.core.TrapM; assign HaltM = testbench.DCacheFlushStart; + assign PrivilegeModeW = testbench.dut.core.priv.priv.privmode.PrivilegeModeW; assign STATUS_SXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_SXL; assign STATUS_UXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_UXL; - genvar index; + genvar index; assign rf[0] = '0; for(index = 1; index < NUMREGS; index += 1) assign rf[index] = testbench.dut.core.ieu.dp.regf.rf[index]; @@ -121,26 +127,37 @@ module rvviTrace #( assign halt[0][0] = HaltW; assign intr[0][0] = '0; // *** first retired instruction of trap handler. Not sure how i'm going to get this yet. assign mode[0][0] = PrivilegeModeW; - assign ixl[0][0] = PrivilegeModeW == 2'b11 ? `XLEN : + assign ixl[0][0] = PrivilegeModeW == 2'b11 ? 2'b10 : PrivilegeModeW == 2'b01 ? STATUS_SXL : STATUS_UXL; assign pc_wdata[0][0] = ~FlushW ? PCM : ~FlushM ? PCE : ~FlushE ? PCD : ~FlushD ? PCF : PCNextF; - for(index = 0; index < NUMREGS; index += 1) begin - assign x_wdata[index][0][0] = rf[index]; - assign x_wb[index][0][0] = rf_wb[index]; - assign f_wdata[index][0][0] = frf[index]; - assign f_wb[index][0][0] = frf_wb[index]; + for(index = 0; index < `NUM_REGS; index += 1) begin + assign x_wdata[0][0][index] = rf[index]; + assign x_wb[0][0][index] = rf_wb[index]; + assign f_wdata[0][0][index] = frf[index]; + assign f_wb[0][0][index] = frf_wb[index]; end - - + integer index2; always_ff @(posedge clk) begin if(valid) begin - $display("PC = %08x, insn = %08x, trap = %1d, halt = %1d", pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0]); + if(`PRINT_PC_INSTR & !(`PRINT_ALL | `PRINT_MOST)) + $display("PC = %08x, insn = %08x", pc_rdata[0][0], insn[0][0]); + else if(`PRINT_MOST & !`PRINT_ALL) + $display("PC = %08x, insn = %08x, trap = %1d, halt = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x, x%02d = %08x", pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0], rf_a3, x_wdata[0][0][rf_a3]); + else if(`PRINT_ALL) begin + $display("PC = %08x, insn = %08x, trap = %1d, halt = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x", pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0]); + for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin + $display("x%02d = %08x", index2, x_wdata[0][0][index2]); + end + for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin + $display("f%02d = %08x", index2, f_wdata[0][0][index2]); + end + end end if(HaltW) $stop(); end From 8981739310ebf9bd951f34c13d21e2211d0a034f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 12 Jan 2023 16:35:19 -0600 Subject: [PATCH 08/18] added machine csr to logger. --- pipelined/testbench/common/rvvitrace.sv | 28 ++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/pipelined/testbench/common/rvvitrace.sv b/pipelined/testbench/common/rvvitrace.sv index 1bf79c247..873824136 100644 --- a/pipelined/testbench/common/rvvitrace.sv +++ b/pipelined/testbench/common/rvvitrace.sv @@ -37,8 +37,8 @@ module rvviTrace #( logic [`NUM_REGS-1:0] frf_wb; logic [4:0] frf_a4; logic frf_we4; - - + logic [`XLEN-1:0] CSRArray [logic[4095:0]]; + // tracer signals logic clk; @@ -82,6 +82,28 @@ module rvviTrace #( assign STATUS_SXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_SXL; assign STATUS_UXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_UXL; + assign MSTATUS = testbench.dut.core.priv.priv.csr.csrm.MSTATUS_REGW; // 300 + assign MSTATUSH = testbench.dut.core.priv.priv.csr.csrm.MSTATUSH_REGW; // 310 + assign MTVEC = testbench.dut.core.priv.priv.csr.csrm.MTVEC_REGW; // 305 + assign MEPC_REGW = testbench.dut.core.priv.priv.csr.csrm.MEPC_REGW; // 341 + assign MCOUNTEREN_REGW = testbench.dut.core.priv.priv.csr.csrm.MCOUNTEREN_REGW; // 306 + assign MCOUNTINHIBIT_REGW = testbench.dut.core.priv.priv.csr.csrm.MCOUNTINHIBIT_REGW; // 320 + assign MEDELEG_REGW = testbench.dut.core.priv.priv.csr.csrm.MEDELEG_REGW; // 302 + assign MIDELEG_REGW = testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW; // 303 + assign MIP_REGW = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW; // 344 + assign MIE_REGW = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW; // 304 + assign MISA_REGW = testbench.dut.core.priv.priv.csr.csrm.MISA_REGW; // 301 + assign MHARTID_REGW = testbench.dut.core.priv.priv.csr.csrm.MHARTID_REGW; // F14 + assign MSCRATCH_REGW = testbench.dut.core.priv.priv.csr.csrm.MSCRATCH_REGW; // 340 + assign MCAUSE_REGW = testbench.dut.core.priv.priv.csr.csrm.MCAUSE_REGW; // 342 + assign MTVAL_REGW = testbench.dut.core.priv.priv.csr.csrm.MTVAL_REGW; // 343 + assign MVENDORID = '0; // F11 + assign MARCHID = '0; // F12 + assign MIMPID = `XLEN'h100; // F13 + assign MCONFIGPTR = '0; // F15 + assign MTINST = '0; // 34A + + genvar index; assign rf[0] = '0; for(index = 1; index < NUMREGS; index += 1) @@ -148,7 +170,7 @@ module rvviTrace #( if(`PRINT_PC_INSTR & !(`PRINT_ALL | `PRINT_MOST)) $display("PC = %08x, insn = %08x", pc_rdata[0][0], insn[0][0]); else if(`PRINT_MOST & !`PRINT_ALL) - $display("PC = %08x, insn = %08x, trap = %1d, halt = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x, x%02d = %08x", pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0], rf_a3, x_wdata[0][0][rf_a3]); + $display("PC = %08x, insn = %08x, trap = %1d, halt = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x, x%02d = %016x, f%02d = %016x", pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0], rf_a3, x_wdata[0][0][rf_a3], frf_a4, f_wdata[0][0][frf_a4]); else if(`PRINT_ALL) begin $display("PC = %08x, insn = %08x, trap = %1d, halt = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x", pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0]); for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin From 6500321aaffcde67a0496f1cafd7b6d78dd1f378 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 12 Jan 2023 16:51:51 -0600 Subject: [PATCH 09/18] Added M CSRs to the CSRArray. --- pipelined/testbench/common/rvvitrace.sv | 38 +++++++++++++++++++------ 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/pipelined/testbench/common/rvvitrace.sv b/pipelined/testbench/common/rvvitrace.sv index 873824136..17f760544 100644 --- a/pipelined/testbench/common/rvvitrace.sv +++ b/pipelined/testbench/common/rvvitrace.sv @@ -37,7 +37,7 @@ module rvviTrace #( logic [`NUM_REGS-1:0] frf_wb; logic [4:0] frf_a4; logic frf_we4; - logic [`XLEN-1:0] CSRArray [logic[4095:0]]; + logic [`XLEN-1:0] CSRArray [logic[11:0]]; // tracer signals @@ -82,9 +82,9 @@ module rvviTrace #( assign STATUS_SXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_SXL; assign STATUS_UXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_UXL; - assign MSTATUS = testbench.dut.core.priv.priv.csr.csrm.MSTATUS_REGW; // 300 - assign MSTATUSH = testbench.dut.core.priv.priv.csr.csrm.MSTATUSH_REGW; // 310 - assign MTVEC = testbench.dut.core.priv.priv.csr.csrm.MTVEC_REGW; // 305 + assign MSTATUS = testbench.dut.core.priv.priv.csr.csrm.MSTATUS_REGW; // 300 + assign MSTATUSH = testbench.dut.core.priv.priv.csr.csrm.MSTATUSH_REGW; // 310 + assign MTVEC = testbench.dut.core.priv.priv.csr.csrm.MTVEC_REGW; // 305 assign MEPC_REGW = testbench.dut.core.priv.priv.csr.csrm.MEPC_REGW; // 341 assign MCOUNTEREN_REGW = testbench.dut.core.priv.priv.csr.csrm.MCOUNTEREN_REGW; // 306 assign MCOUNTINHIBIT_REGW = testbench.dut.core.priv.priv.csr.csrm.MCOUNTINHIBIT_REGW; // 320 @@ -103,6 +103,28 @@ module rvviTrace #( assign MCONFIGPTR = '0; // F15 assign MTINST = '0; // 34A + always_comb begin + CSRArray[12'h300] = MSTATUS; + CSRArray[12'h310] = MSTATUSH; + CSRArray[12'h305] = MTVEC; + CSRArray[12'h341] = MEPC_REGW; + CSRArray[12'h306] = MCOUNTEREN_REGW; + CSRArray[12'h320] = MCOUNTINHIBIT_REGW; + CSRArray[12'h302] = MEDELEG_REGW; + CSRArray[12'h303] = MIDELEG_REGW; + CSRArray[12'h344] = MIP_REGW; + CSRArray[12'h304] = MIE_REGW; + CSRArray[12'h301] = MISA_REGW; + CSRArray[12'hF14] = MHARTID_REGW; + CSRArray[12'h340] = MSCRATCH_REGW; + CSRArray[12'h342] = MCAUSE_REGW; + CSRArray[12'h343] = MTVAL_REGW; + CSRArray[12'hF11] = MVENDORID; + CSRArray[12'hF12] = MARCHID; + CSRArray[12'hF13] = MIMPID; + CSRArray[12'hF15] = MCONFIGPTR; + CSRArray[12'h34A] = MTINST; + end genvar index; assign rf[0] = '0; @@ -113,9 +135,9 @@ module rvviTrace #( assign rf_we3 = testbench.dut.core.ieu.dp.regf.we3; always_comb begin - rf_wb = '0; + rf_wb <= '0; if(rf_we3) - rf_wb[rf_a3] = 1'b1; + rf_wb[rf_a3] <= 1'b1; end for(index = 0; index < NUMREGS; index += 1) @@ -125,9 +147,9 @@ module rvviTrace #( assign frf_we4 = testbench.dut.core.fpu.fpu.fregfile.we4; always_comb begin - frf_wb = '0; + frf_wb <= '0; if(frf_we4) - frf_wb[frf_a4] = 1'b1; + frf_wb[frf_a4] <= 1'b1; end // pipeline to writeback stage From 59b135d895bb4ccad772303e322fdab992152689 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 12 Jan 2023 17:04:41 -0600 Subject: [PATCH 10/18] Added supervisor mode registers to tracer. --- pipelined/testbench/common/rvvitrace.sv | 73 +++++++++++-------------- 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/pipelined/testbench/common/rvvitrace.sv b/pipelined/testbench/common/rvvitrace.sv index 17f760544..b71a7ce7e 100644 --- a/pipelined/testbench/common/rvvitrace.sv +++ b/pipelined/testbench/common/rvvitrace.sv @@ -82,48 +82,39 @@ module rvviTrace #( assign STATUS_SXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_SXL; assign STATUS_UXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_UXL; - assign MSTATUS = testbench.dut.core.priv.priv.csr.csrm.MSTATUS_REGW; // 300 - assign MSTATUSH = testbench.dut.core.priv.priv.csr.csrm.MSTATUSH_REGW; // 310 - assign MTVEC = testbench.dut.core.priv.priv.csr.csrm.MTVEC_REGW; // 305 - assign MEPC_REGW = testbench.dut.core.priv.priv.csr.csrm.MEPC_REGW; // 341 - assign MCOUNTEREN_REGW = testbench.dut.core.priv.priv.csr.csrm.MCOUNTEREN_REGW; // 306 - assign MCOUNTINHIBIT_REGW = testbench.dut.core.priv.priv.csr.csrm.MCOUNTINHIBIT_REGW; // 320 - assign MEDELEG_REGW = testbench.dut.core.priv.priv.csr.csrm.MEDELEG_REGW; // 302 - assign MIDELEG_REGW = testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW; // 303 - assign MIP_REGW = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW; // 344 - assign MIE_REGW = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW; // 304 - assign MISA_REGW = testbench.dut.core.priv.priv.csr.csrm.MISA_REGW; // 301 - assign MHARTID_REGW = testbench.dut.core.priv.priv.csr.csrm.MHARTID_REGW; // F14 - assign MSCRATCH_REGW = testbench.dut.core.priv.priv.csr.csrm.MSCRATCH_REGW; // 340 - assign MCAUSE_REGW = testbench.dut.core.priv.priv.csr.csrm.MCAUSE_REGW; // 342 - assign MTVAL_REGW = testbench.dut.core.priv.priv.csr.csrm.MTVAL_REGW; // 343 - assign MVENDORID = '0; // F11 - assign MARCHID = '0; // F12 - assign MIMPID = `XLEN'h100; // F13 - assign MCONFIGPTR = '0; // F15 - assign MTINST = '0; // 34A - always_comb begin - CSRArray[12'h300] = MSTATUS; - CSRArray[12'h310] = MSTATUSH; - CSRArray[12'h305] = MTVEC; - CSRArray[12'h341] = MEPC_REGW; - CSRArray[12'h306] = MCOUNTEREN_REGW; - CSRArray[12'h320] = MCOUNTINHIBIT_REGW; - CSRArray[12'h302] = MEDELEG_REGW; - CSRArray[12'h303] = MIDELEG_REGW; - CSRArray[12'h344] = MIP_REGW; - CSRArray[12'h304] = MIE_REGW; - CSRArray[12'h301] = MISA_REGW; - CSRArray[12'hF14] = MHARTID_REGW; - CSRArray[12'h340] = MSCRATCH_REGW; - CSRArray[12'h342] = MCAUSE_REGW; - CSRArray[12'h343] = MTVAL_REGW; - CSRArray[12'hF11] = MVENDORID; - CSRArray[12'hF12] = MARCHID; - CSRArray[12'hF13] = MIMPID; - CSRArray[12'hF15] = MCONFIGPTR; - CSRArray[12'h34A] = MTINST; + // machine mode CSRs + CSRArray[12'h300] = testbench.dut.core.priv.priv.csr.csrm.MSTATUS_REGW; + CSRArray[12'h310] = testbench.dut.core.priv.priv.csr.csrm.MSTATUSH_REGW; + CSRArray[12'h305] = testbench.dut.core.priv.priv.csr.csrm.MTVEC_REGW; + CSRArray[12'h341] = testbench.dut.core.priv.priv.csr.csrm.MEPC_REGW; + CSRArray[12'h306] = testbench.dut.core.priv.priv.csr.csrm.MCOUNTEREN_REGW; + CSRArray[12'h320] = testbench.dut.core.priv.priv.csr.csrm.MCOUNTINHIBIT_REGW; + CSRArray[12'h302] = testbench.dut.core.priv.priv.csr.csrm.MEDELEG_REGW; + CSRArray[12'h303] = testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW; + CSRArray[12'h344] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW; + CSRArray[12'h304] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW; + CSRArray[12'h301] = testbench.dut.core.priv.priv.csr.csrm.MISA_REGW; + CSRArray[12'hF14] = testbench.dut.core.priv.priv.csr.csrm.MHARTID_REGW; + CSRArray[12'h340] = testbench.dut.core.priv.priv.csr.csrm.MSCRATCH_REGW; + CSRArray[12'h342] = testbench.dut.core.priv.priv.csr.csrm.MCAUSE_REGW; + CSRArray[12'h343] = testbench.dut.core.priv.priv.csr.csrm.MTVAL_REGW; + CSRArray[12'hF11] = 0; + CSRArray[12'hF12] = 0; + CSRArray[12'hF13] = `XLEN'h100; + CSRArray[12'hF15] = 0; + CSRArray[12'h34A] = 0; + + CSRArray[12'h100] = testbench.dut.core.priv.priv.csr.csrs.SSTATUS_REGW; + CSRArray[12'h104] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW & 12'h222; + CSRArray[12'h105] = testbench.dut.core.priv.priv.csr.csrs.STVEC_REGW; + CSRArray[12'h141] = testbench.dut.core.priv.priv.csr.csrs.SEPC_REGW; + CSRArray[12'h106] = testbench.dut.core.priv.priv.csr.csrs.SCOUNTEREN_REGW; + CSRArray[12'h180] = testbench.dut.core.priv.priv.csr.csrs.SATP_REGW; + CSRArray[12'h140] = testbench.dut.core.priv.priv.csr.csrs.csrs.SSCRATCH_REGW; + CSRArray[12'h143] = testbench.dut.core.priv.priv.csr.csrs.csrs.STVAL_REGW; + CSRArray[12'h142] = testbench.dut.core.priv.priv.csr.csrs.csrs.SCAUSE_REGW; + CSRArray[12'h144] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW & & 12'h222 & testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW; end genvar index; From 14ecaabbf6cda509e181138da8a93d341773ff7b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 12 Jan 2023 18:43:39 -0600 Subject: [PATCH 11/18] Nearly complete RVVI tracer. Missing PMP registers and performance counters other than MCYCLE and MINSTRET. --- pipelined/testbench/common/rvvitrace.sv | 66 ++++++++++++++++++++---- pipelined/testbench/testbench_imperas.sv | 8 ++- 2 files changed, 63 insertions(+), 11 deletions(-) diff --git a/pipelined/testbench/common/rvvitrace.sv b/pipelined/testbench/common/rvvitrace.sv index b71a7ce7e..cc564d382 100644 --- a/pipelined/testbench/common/rvvitrace.sv +++ b/pipelined/testbench/common/rvvitrace.sv @@ -27,6 +27,7 @@ module rvviTrace #( logic StallE, StallM, StallW; logic FlushD, FlushE, FlushM, FlushW; logic TrapM, TrapW; + logic IntrF, IntrD, IntrE, IntrM, IntrW; logic HaltM, HaltW; logic [1:0] PrivilegeModeW; logic [`XLEN-1:0] rf[NUMREGS]; @@ -38,24 +39,28 @@ module rvviTrace #( logic [4:0] frf_a4; logic frf_we4; logic [`XLEN-1:0] CSRArray [logic[11:0]]; - + logic CSRWriteM, CSRWriteW; + logic [11:0] CSRAdrM, CSRAdrW; // tracer signals logic clk; logic valid; logic [63:0] order [(NHART-1):0][(RETIRE-1):0]; logic [ILEN-1:0] insn [(NHART-1):0][(RETIRE-1):0]; + logic intr [(NHART-1):0][(RETIRE-1):0]; logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0]; logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0]; logic trap [(NHART-1):0][(RETIRE-1):0]; logic halt [(NHART-1):0][(RETIRE-1):0]; - logic intr [(NHART-1):0][(RETIRE-1):0]; logic [1:0] mode [(NHART-1):0][(RETIRE-1):0]; logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0]; logic [`NUM_REGS-1:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0]; logic [`NUM_REGS-1:0] x_wb [(NHART-1):0][(RETIRE-1):0]; logic [`NUM_REGS-1:0][(XLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0]; logic [`NUM_REGS-1:0] f_wb [(NHART-1):0][(RETIRE-1):0]; + logic [4095:0][(XLEN-1):0] csr [(NHART-1):0][(RETIRE-1):0]; + logic [4095:0] csr_wb [(NHART-1):0][(RETIRE-1):0]; + logic lrsc_cancel[(NHART-1):0][(RETIRE-1):0]; assign clk = testbench.dut.clk; // assign InstrValidF = testbench.dut.core.ieu.InstrValidF; // not needed yet @@ -83,7 +88,8 @@ module rvviTrace #( assign STATUS_UXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_UXL; always_comb begin - // machine mode CSRs + // machine CSRs + // *** missing PMP and performance counters. CSRArray[12'h300] = testbench.dut.core.priv.priv.csr.csrm.MSTATUS_REGW; CSRArray[12'h310] = testbench.dut.core.priv.priv.csr.csrm.MSTATUSH_REGW; CSRArray[12'h305] = testbench.dut.core.priv.priv.csr.csrm.MTVEC_REGW; @@ -104,7 +110,10 @@ module rvviTrace #( CSRArray[12'hF13] = `XLEN'h100; CSRArray[12'hF15] = 0; CSRArray[12'h34A] = 0; - + // MCYCLE and MINSTRET + CSRArray[12'hB00] = testbench.dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[0]; + CSRArray[12'hB02] = testbench.dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[2]; + // supervisor CSRs CSRArray[12'h100] = testbench.dut.core.priv.priv.csr.csrs.SSTATUS_REGW; CSRArray[12'h104] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW & 12'h222; CSRArray[12'h105] = testbench.dut.core.priv.priv.csr.csrs.STVEC_REGW; @@ -115,6 +124,10 @@ module rvviTrace #( CSRArray[12'h143] = testbench.dut.core.priv.priv.csr.csrs.csrs.STVAL_REGW; CSRArray[12'h142] = testbench.dut.core.priv.priv.csr.csrs.csrs.SCAUSE_REGW; CSRArray[12'h144] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW & & 12'h222 & testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW; + // user CSRs + CSRArray[12'h001] = testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW; + CSRArray[12'h002] = testbench.dut.core.priv.priv.csr.csru.FRM_REGW; + CSRArray[12'h003] = {testbench.dut.core.priv.priv.csr.csru.FRM_REGW, testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW}; end genvar index; @@ -143,6 +156,9 @@ module rvviTrace #( frf_wb[frf_a4] <= 1'b1; end + assign CSRAdrM = testbench.dut.core.priv.priv.csr.CSRAdrM; + assign CSRWriteM = testbench.dut.core.priv.priv.csr.CSRWriteM; + // pipeline to writeback stage flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE); flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM); @@ -152,15 +168,25 @@ module rvviTrace #( flopenrc #(1) TrapWReg (clk, reset, 1'b0, ~StallW, TrapM, TrapW); flopenrc #(1) HaltWReg (clk, reset, 1'b0, ~StallW, HaltM, HaltW); + flopenrc #(1) IntrFReg (clk, reset, 1'b0, ~StallF, TrapM, IntrF); + flopenrc #(1) IntrDReg (clk, reset, FlushD, ~StallD, IntrF, IntrD); + flopenrc #(1) IntrEReg (clk, reset, FlushE, ~StallE, IntrD, IntrE); + flopenrc #(1) IntrMReg (clk, reset, FlushM, ~StallM, IntrE, IntrM); + flopenrc #(1) IntrWReg (clk, reset, FlushW, ~StallW, IntrM, IntrW); + + flopenrc #(12) CSRAdrWReg (clk, reset, FlushW, ~StallW, CSRAdrM, CSRAdrW); + flopenrc #(1) CSRWriteWReg (clk, reset, FlushW, ~StallW, CSRWriteM, CSRWriteW); + // Initially connecting the writeback stage signals, but may need to use M stage // and gate on ~FlushW. assign valid = InstrValidW & ~StallW & ~FlushW; + assign order[0][0] = CSRArray[12'hB02]; assign insn[0][0] = InstrRawW; assign pc_rdata[0][0] = PCW; assign trap[0][0] = TrapW; assign halt[0][0] = HaltW; - assign intr[0][0] = '0; // *** first retired instruction of trap handler. Not sure how i'm going to get this yet. + assign intr[0][0] = IntrW; assign mode[0][0] = PrivilegeModeW; assign ixl[0][0] = PrivilegeModeW == 2'b11 ? 2'b10 : PrivilegeModeW == 2'b01 ? STATUS_SXL : STATUS_UXL; @@ -176,16 +202,38 @@ module rvviTrace #( assign f_wb[0][0][index] = frf_wb[index]; end + always_comb begin + csr_wb[0][0] <= '0; + if(CSRWriteW) + csr_wb[0][0][CSRAdrW] <= 1'b1; + end + + integer index3; + + always_comb begin + for(index3 = 0; index3 < `NUM_CSRS; index3 += 1) begin + if(CSRArray.exists(index3)) + csr[0][0][index3] = CSRArray[index3]; + else + csr[0][0][index3] = '0; + end + end + + // *** implementation only cancel? so sc does not clear? + assign lrsc_cancel[0][0] = '0; + integer index2; - + always_ff @(posedge clk) begin if(valid) begin if(`PRINT_PC_INSTR & !(`PRINT_ALL | `PRINT_MOST)) - $display("PC = %08x, insn = %08x", pc_rdata[0][0], insn[0][0]); + $display("order = %08d, PC = %08x, insn = %08x", order[0][0], pc_rdata[0][0], insn[0][0]); else if(`PRINT_MOST & !`PRINT_ALL) - $display("PC = %08x, insn = %08x, trap = %1d, halt = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x, x%02d = %016x, f%02d = %016x", pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0], rf_a3, x_wdata[0][0][rf_a3], frf_a4, f_wdata[0][0][frf_a4]); + $display("order = %08d, PC = %010x, insn = %08x, trap = %1d, halt = %1d, intr = %1d, mode = %1x, ixl = %1x, pc_wdata = %010x, x%02d = %016x, f%02d = %016x, csr%03x = %016x", + order[0][0], pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], intr[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0], rf_a3, x_wdata[0][0][rf_a3], frf_a4, f_wdata[0][0][frf_a4], CSRAdrW, csr[0][0][CSRAdrW]); else if(`PRINT_ALL) begin - $display("PC = %08x, insn = %08x, trap = %1d, halt = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x", pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0]); + $display("order = %08d, PC = %08x, insn = %08x, trap = %1d, halt = %1d, intr = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x", + order[0][0], pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], intr[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0]); for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin $display("x%02d = %08x", index2, x_wdata[0][0][index2]); end diff --git a/pipelined/testbench/testbench_imperas.sv b/pipelined/testbench/testbench_imperas.sv index e8f014b8a..6e84e879b 100644 --- a/pipelined/testbench/testbench_imperas.sv +++ b/pipelined/testbench/testbench_imperas.sv @@ -92,9 +92,13 @@ module testbench; testadr = 0; testadrNoBase = 0; - testName = "rv64i_m/I/src/add-01.S"; + //testName = "rv64i_m/I/src/add-01.S"; + testName = "rv64i_m/privilege/src/WALLY-mmu-sv48-01.S"; + - pathname = "../../tests/riscof/work/riscv-arch-test/"; + //pathname = "../../tests/riscof/work/riscv-arch-test/"; + pathname = "../../tests/riscof/work/wally-riscv-arch-test/"; + memfilename = {pathname, testName, "/ref/ref.elf.memfile"}; if (`BUS) $readmemh(memfilename, dut.uncore.uncore.ram.ram.memory.RAM); else $error("Imperas test bench requires BUS."); From f7dacb59f9bd29c929cd9151bb11a51ff4bd6f96 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 13 Jan 2023 12:32:39 -0600 Subject: [PATCH 12/18] Possible minor enhancement to gshare. --- pipelined/src/ifu/speculativegshare.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/src/ifu/speculativegshare.sv b/pipelined/src/ifu/speculativegshare.sv index 943a1b785..11513ac15 100644 --- a/pipelined/src/ifu/speculativegshare.sv +++ b/pipelined/src/ifu/speculativegshare.sv @@ -104,7 +104,8 @@ module speculativegshare flopenr #(2) PredictionRegE(clk, reset, ~StallE, DirPredictionD, DirPredictionE); // New prediction pipeline - satCounter2 BPDirUpdateF(.BrDir(DirPredictionF[1]), .OldState(DirPredictionF), .NewState(NewDirPredictionF)); + assign NewDirPredictionF = {DirPredictionF[1], DirPredictionF[1]}; + flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD); satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); flopenr #(2) NewPredMReg(clk, reset, ~StallM, NewDirPredictionE, NewDirPredictionM); From b26cec1ef4dd021c223fd6b67c69d3aa70729fc3 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 13 Jan 2023 12:39:29 -0600 Subject: [PATCH 13/18] Possible optimization of gshare. I don't believe the Writeback stage ghr is needed. --- pipelined/testbench/testbench.sv | 141 ------------------------------- 1 file changed, 141 deletions(-) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index dcbebbf46..fcf4fca87 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -509,53 +509,6 @@ logic [3:0] dummy; $stop; end end -/* -----\/----- EXCLUDED -----\/----- - - // rvvi tracer - localparam int ILEN = `XLEN; // Instruction length in bits - localparam int XLEN = `XLEN; // GPR length in bits - localparam int FLEN = `FLEN; // FPR length in bits - localparam int VLEN = 0; // Vector register size in bits - localparam int NHART = 1; // Number of harts reported - localparam int RETIRE = 1; // Number of instructions that can retire during valid event - - logic TraceClk; // Interface clock - - logic valid [(NHART-1):0][(RETIRE-1):0]; // Retired instruction - logic [63:0] order [(NHART-1):0][(RETIRE-1):0]; // Unique instruction order count (no gaps or reuse) - logic [(ILEN-1):0] insn [(NHART-1):0][(RETIRE-1):0]; // Instruction bit pattern - logic trap [(NHART-1):0][(RETIRE-1):0]; // Trapped instruction - logic halt [(NHART-1):0][(RETIRE-1):0]; // Halted instruction - logic intr [(NHART-1):0][(RETIRE-1):0]; // (RVFI Legacy) Flag first instruction of trap handler - logic [1:0] mode [(NHART-1):0][(RETIRE-1):0]; // Privilege mode of operation - logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0]; // XLEN mode 32/64 bit - - logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0]; // PC of insn - logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0]; // PC of next instruction - - // X Registers - logic [31:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0]; // X data value - logic [31:0] x_wb [(NHART-1):0][(RETIRE-1):0]; // X data writeback (change) flag - - // F Registers - logic [31:0][(FLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0]; // F data value - logic [31:0] f_wb [(NHART-1):0][(RETIRE-1):0]; // F data writeback (change) flag - - // V Registers - logic [31:0][(VLEN-1):0] v_wdata [(NHART-1):0][(RETIRE-1):0]; // V data value - logic [31:0] v_wb [(NHART-1):0][(RETIRE-1):0]; // V data writeback (change) flag - - // Control & State Registers - logic [4095:0][(XLEN-1):0] csr [(NHART-1):0][(RETIRE-1):0]; // Full CSR Address range - logic [4095:0] csr_wb [(NHART-1):0][(RETIRE-1):0]; // CSR writeback (change) flag - - logic lrsc_cancel[(NHART-1):0][(RETIRE-1):0]; // Implementation defined - rvviTrace #(`XLEN, `XLEN, `FLEN, 0, 1, 1) rvviTrace(.clk(TraceClk), .valid, .order, .insn, .trap, .halt, .intr, - .mode, .ixl, .pc_rdata, .pc_wdata, .x_wdata, .x_wb, .f_wdata, .f_wb, .v_wdata, .v_wb, - .csr, .csr_wb, .lrsc_cancel); - -----/\----- EXCLUDED -----/\----- */ - - rvviTrace rvviTrace(); endmodule @@ -791,97 +744,3 @@ module rvviTrace(); endmodule -/* -----\/----- EXCLUDED -----\/----- -module rvviTrace #( - parameter int ILEN = `XLEN, // Instruction length in bits - parameter int XLEN = `XLEN, // GPR length in bits - parameter int FLEN = `FLEN, // FPR length in bits - parameter int VLEN = 0, // Vector register size in bits - parameter int NHART = 1, // Number of harts reported - parameter int RETIRE = 1 // Number of instructions that can retire during valid event - )( - // - // RISCV output signals - // - output logic clk, // Interface clock - - output logic valid [(NHART-1):0][(RETIRE-1):0], // Retired instruction - output logic [63:0] order [(NHART-1):0][(RETIRE-1):0], // Unique instruction order count (no gaps or reuse) - output logic [(ILEN-1):0] insn [(NHART-1):0][(RETIRE-1):0], // Instruction bit pattern - output logic trap [(NHART-1):0][(RETIRE-1):0], // Trapped instruction - output logic halt [(NHART-1):0][(RETIRE-1):0], // Halted instruction - output logic intr [(NHART-1):0][(RETIRE-1):0], // (RVFI Legacy) Flag first instruction of trap handler - output logic [1:0] mode [(NHART-1):0][(RETIRE-1):0], // Privilege mode of operation - output logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0], // XLEN mode 32/64 bit - - output logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0], // PC of insn - output logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0], // PC of next instruction - - // X Registers - output logic [31:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0], // X data value - output logic [31:0] x_wb [(NHART-1):0][(RETIRE-1):0], // X data writeback (change) flag - - // F Registers - output logic [31:0][(FLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0], // F data value - output logic [31:0] f_wb [(NHART-1):0][(RETIRE-1):0], // F data writeback (change) flag - - // V Registers - output logic [31:0][(VLEN-1):0] v_wdata [(NHART-1):0][(RETIRE-1):0], // V data value - output logic [31:0] v_wb [(NHART-1):0][(RETIRE-1):0], // V data writeback (change) flag - - // Control & State Registers - output logic [4095:0][(XLEN-1):0] csr [(NHART-1):0][(RETIRE-1):0], // Full CSR Address range - output logic [4095:0] csr_wb [(NHART-1):0][(RETIRE-1):0], // CSR writeback (change) flag - - output logic lrsc_cancel[(NHART-1):0][(RETIRE-1):0] // Implementation defined cancel - ); - - - assign clk = dut.clk; - // *** need to pipeline to writeback stage. - assign valid = dut.core.ieu.InstrValidM; - assign insn = dut.core.ifu.InstrM; - assign pc_rdata = dut.core.ifu.PCM; - - always_ff @(posedge clk) begin - if(valid) begin - $display("PC = %d, insn = %d", pc_rdata, insn); - end - end - - - // - // Synchronization of NETs - // - wire clkD; - assign #1 clkD = clk; - - string name[$]; - int value[$]; - longint tslot[$]; - int nets[string]; - - function automatic void net_push(input string vname, input int vvalue); - longint vslot = $time; - name.push_front(vname); - value.push_front(vvalue); - tslot.push_front(vslot); - endfunction - - function automatic int net_pop(output string vname, output int vvalue, output longint vslot); - int ok; - string msg; - if (name.size() > 0) begin - vname = name.pop_back(); - vvalue = value.pop_back(); - vslot = tslot.pop_back(); - nets[vname] = vvalue; - ok = 1; - end else begin - ok = 0; - end - return ok; - endfunction - -endmodule - -----/\----- EXCLUDED -----/\----- */ From 37481fce77d6b4aeac41add9065d629e71dbd1bf Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 13 Jan 2023 12:57:18 -0600 Subject: [PATCH 14/18] More branch predictor cleanup. Found small bug. The decode stage was using the predicted instruction class rather than the decoded instruction class. --- pipelined/src/ifu/bpred.sv | 26 +++++++++++++------------- pipelined/src/ifu/speculativegshare.sv | 24 ++++++++++-------------- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/pipelined/src/ifu/bpred.sv b/pipelined/src/ifu/bpred.sv index 246a52428..b05007167 100644 --- a/pipelined/src/ifu/bpred.sv +++ b/pipelined/src/ifu/bpred.sv @@ -65,7 +65,7 @@ module bpred ( logic BTBValidF; logic [1:0] DirPredictionF; - logic [4:0] BPInstrClassF, BPInstrClassD, BPInstrClassE; + logic [4:0] PredInstrClassF, PredInstrClassD, PredInstrClassE; logic [`XLEN-1:0] BTBPredPCF, RASPCF; logic TargetWrongE; logic FallThroughWrongE; @@ -95,7 +95,7 @@ module bpred ( end else if (`BPTYPE == "BPSPECULATIVEGLOBAL") begin:Predictor speculativeglobalhistory #(10) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(BPInstrClassF[0]), .BranchInstrD(BPInstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), + .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .BranchInstrW(InstrClassW[0]), .PCSrcE); end else if (`BPTYPE == "BPGSHARE") begin:Predictor @@ -106,7 +106,7 @@ module bpred ( end else if (`BPTYPE == "BPSPECULATIVEGSHARE") begin:Predictor speculativegshare DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(BPInstrClassF[0]), .BranchInstrD(BPInstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), + .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .BranchInstrW(InstrClassW[0]), .PCSrcE); end else if (`BPTYPE == "BPLOCALPAg") begin:Predictor @@ -129,10 +129,10 @@ module bpred ( // 1) A direction (1 = Taken, 0 = Not Taken) // 2) Any information which is necessary for the predictor to build its next state. // For a 2 bit table this is the prediction count. - assign SelBPPredF = ((BPInstrClassF[0] & DirPredictionF[1] & BTBValidF) | - BPInstrClassF[3] | - (BPInstrClassF[2] & BTBValidF) | - BPInstrClassF[1] & BTBValidF) ; + assign SelBPPredF = ((PredInstrClassF[0] & DirPredictionF[1] & BTBValidF) | + PredInstrClassF[3] | + (PredInstrClassF[2] & BTBValidF) | + PredInstrClassF[1] & BTBValidF) ; // Part 2 Branch target address prediction // *** For now the BTB will house the direct and indirect targets @@ -143,7 +143,7 @@ module bpred ( .*, // Stalls and flushes .LookUpPC(PCNextF), .TargetPC(BTBPredPCF), - .InstrClass(BPInstrClassF), + .InstrClass(PredInstrClassF), .Valid(BTBValidF), // update .UpdateEN((|InstrClassE | (PredictionInstrClassWrongE)) & ~StallE), @@ -156,13 +156,13 @@ module bpred ( // *** need to add the logic to restore RAS on flushes. We will use incr for this. RASPredictor RASPredictor(.clk(clk), .reset(reset), - .pop(BPInstrClassF[3] & ~StallF), + .pop(PredInstrClassF[3] & ~StallF), .popPC(RASPCF), .push(InstrClassE[4] & ~StallE), .incr(1'b0), .pushPC(PCLinkE)); - assign BPPredPCF = BPInstrClassF[3] ? RASPCF : BTBPredPCF; + assign BPPredPCF = PredInstrClassF[3] ? RASPCF : BTBPredPCF; // the branch predictor needs a compact decoding of the instruction class. // *** consider adding in the alternate return address x5 for returns. @@ -182,8 +182,8 @@ module bpred ( {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM}); // pipeline the class - flopenrc #(5) BPInstrClassRegD(clk, reset, FlushD, ~StallD, BPInstrClassF, BPInstrClassD); - flopenrc #(5) BPInstrClassRegE(clk, reset, FlushE, ~StallE, BPInstrClassD, BPInstrClassE); + flopenrc #(5) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); + flopenrc #(5) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); // Check the prediction // first check if the target or fallthrough address matches what was predicted. @@ -201,7 +201,7 @@ module bpred ( // Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated. // Also we want to track this in a performance counter. - assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE; + assign PredictionInstrClassWrongE = InstrClassE != PredInstrClassE; // We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about // the direction or class, but correct about the target we don't have the flush the pipeline. However we still diff --git a/pipelined/src/ifu/speculativegshare.sv b/pipelined/src/ifu/speculativegshare.sv index 11513ac15..eaa21c2f3 100644 --- a/pipelined/src/ifu/speculativegshare.sv +++ b/pipelined/src/ifu/speculativegshare.sv @@ -44,18 +44,18 @@ module speculativegshare input logic PCSrcE ); - logic MatchF, MatchD, MatchE, MatchM, MatchW; + logic MatchF, MatchD, MatchE, MatchM; logic MatchNextX, MatchXF; logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; - logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM, NewDirPredictionW; + logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM; logic [k-1:0] GHRF; logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW; logic [k-1:0] GHRNextF; logic [k:0] GHRNextD, GHRNextE, GHRNextM, GHRNextW; logic [k-1:0] IndexNextF, IndexF; - logic [k-1:0] IndexD, IndexE, IndexM, IndexW; + logic [k-1:0] IndexD, IndexE, IndexM; logic PCSrcM, PCSrcW; logic [`XLEN-1:0] PCW; @@ -67,34 +67,31 @@ module speculativegshare assign IndexD = GHRD[k-1:0] ^ {PCD[k+1] ^ PCD[1], PCD[k:2]}; assign IndexE = GHRE[k-1:0] ^ {PCE[k+1] ^ PCE[1], PCE[k:2]}; assign IndexM = GHRM[k-1:0] ^ {PCM[k+1] ^ PCM[1], PCM[k:2]}; - assign IndexW = GHRW[k-1:0] ^ {PCW[k+1] ^ PCW[1], PCW[k:2]}; ram2p1r1wbefix #(2**k, 2) PHT(.clk(clk), .ce1(~StallF | reset), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), .rd1(TableDirPredictionF), - .wa2(IndexW), - .wd2(NewDirPredictionW), - .we2(BranchInstrW & ~StallW & ~FlushW), + .wa2(IndexM), + .wd2(NewDirPredictionM), + .we2(BranchInstrM & ~StallW & ~FlushW), .bwe2(1'b1)); // if there are non-flushed branches in the pipeline we need to forward the prediction from that stage to the NextF demi stage - // and then register for use in the Fetch stage. + // and then register for use in the Fetch stage. assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF); assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD); assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE); assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM); - assign MatchW = BranchInstrW & (IndexNextF == IndexW); - assign MatchNextX = MatchF | MatchD | MatchE | MatchM | MatchW; + assign MatchNextX = MatchF | MatchD | MatchE | MatchM; flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); assign ForwardNewDirPrediction = MatchF ? NewDirPredictionF : MatchD ? NewDirPredictionD : MatchE ? NewDirPredictionE : - MatchM ? NewDirPredictionM : - NewDirPredictionW; - + NewDirPredictionM; + flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF; @@ -109,7 +106,6 @@ module speculativegshare flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD); satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); flopenr #(2) NewPredMReg(clk, reset, ~StallM, NewDirPredictionE, NewDirPredictionM); - flopenr #(2) NewPredWReg(clk, reset, ~StallW, NewDirPredictionM, NewDirPredictionW); // PCSrc pipeline flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); From 8e3e8591a662c22fe58097ebfb38c4507ae93407 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 13 Jan 2023 15:19:53 -0600 Subject: [PATCH 15/18] Removed 1 bit from instruction classification. --- pipelined/src/ifu/BTBPredictor.sv | 8 ++--- pipelined/src/ifu/bpred.sv | 43 +++++++++++------------ pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/privileged/csr.sv | 2 +- pipelined/src/privileged/csrc.sv | 6 ++-- pipelined/src/privileged/privileged.sv | 2 +- pipelined/src/wally/wallypipelinedcore.sv | 2 +- 7 files changed, 32 insertions(+), 33 deletions(-) diff --git a/pipelined/src/ifu/BTBPredictor.sv b/pipelined/src/ifu/BTBPredictor.sv index d15dae6ce..9e46858e2 100644 --- a/pipelined/src/ifu/BTBPredictor.sv +++ b/pipelined/src/ifu/BTBPredictor.sv @@ -37,13 +37,13 @@ module BTBPredictor input logic StallF, StallE, input logic [`XLEN-1:0] LookUpPC, output logic [`XLEN-1:0] TargetPC, - output logic [4:0] InstrClass, + output logic [3:0] InstrClass, output logic Valid, // update input logic UpdateEN, input logic [`XLEN-1:0] UpdatePC, input logic [`XLEN-1:0] UpdateTarget, - input logic [4:0] UpdateInstrClass, + input logic [3:0] UpdateInstrClass, input logic UpdateInvalid ); @@ -99,7 +99,7 @@ module BTBPredictor // *** need to add forwarding. // *** optimize for byte write enables - ram2p1r1wb #(Depth, `XLEN+5) memory(.clk(clk), + ram2p1r1wb #(Depth, `XLEN+4) memory(.clk(clk), .reset(reset), .ra1(LookUpPCIndex), .rd1({{InstrClass, TargetPC}}), @@ -107,7 +107,7 @@ module BTBPredictor .wa2(UpdatePCIndex), .wd2({UpdateInstrClass, UpdateTarget}), .wen2(UpdateEN), - .bwe2({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right. + .bwe2({4'hF, {`XLEN{1'b1}}})); // *** definitely not right. endmodule diff --git a/pipelined/src/ifu/bpred.sv b/pipelined/src/ifu/bpred.sv index b05007167..7784d813c 100644 --- a/pipelined/src/ifu/bpred.sv +++ b/pipelined/src/ifu/bpred.sv @@ -52,7 +52,7 @@ module bpred ( input logic PCSrcE, // Executation stage branch is taken input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) - output logic [4:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br + output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br // Report branch prediction status output logic BPPredWrongE, // Prediction is wrong. @@ -65,13 +65,13 @@ module bpred ( logic BTBValidF; logic [1:0] DirPredictionF; - logic [4:0] PredInstrClassF, PredInstrClassD, PredInstrClassE; + logic [3:0] PredInstrClassF, PredInstrClassD, PredInstrClassE; logic [`XLEN-1:0] BTBPredPCF, RASPCF; logic TargetWrongE; logic FallThroughWrongE; logic PredictionPCWrongE; logic PredictionInstrClassWrongE; - logic [4:0] InstrClassD, InstrClassE, InstrClassW; + logic [3:0] InstrClassD, InstrClassE, InstrClassW; logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE; logic SelBPPredF; @@ -129,10 +129,9 @@ module bpred ( // 1) A direction (1 = Taken, 0 = Not Taken) // 2) Any information which is necessary for the predictor to build its next state. // For a 2 bit table this is the prediction count. - assign SelBPPredF = ((PredInstrClassF[0] & DirPredictionF[1] & BTBValidF) | - PredInstrClassF[3] | - (PredInstrClassF[2] & BTBValidF) | - PredInstrClassF[1] & BTBValidF) ; + assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & BTBValidF) | + PredInstrClassF[2] | + (PredInstrClassF[1] & BTBValidF) ; // Part 2 Branch target address prediction // *** For now the BTB will house the direct and indirect targets @@ -154,26 +153,26 @@ module bpred ( // Part 3 RAS // *** need to add the logic to restore RAS on flushes. We will use incr for this. + // *** needs to include flushX RASPredictor RASPredictor(.clk(clk), .reset(reset), - .pop(PredInstrClassF[3] & ~StallF), + .pop(PredInstrClassF[2] & ~StallF), .popPC(RASPCF), - .push(InstrClassE[4] & ~StallE), + .push(InstrClassE[3] & ~StallE), .incr(1'b0), .pushPC(PCLinkE)); - assign BPPredPCF = PredInstrClassF[3] ? RASPCF : BTBPredPCF; + assign BPPredPCF = PredInstrClassF[2] ? RASPCF : BTBPredPCF; // the branch predictor needs a compact decoding of the instruction class. - // *** consider adding in the alternate return address x5 for returns. - assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5 - assign InstrClassD[3] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 - assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01; // jump register, but not return - assign InstrClassD[1] = InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01; // jump, RD != x1 or x5 + assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 + assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 + assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return + (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch - flopenrc #(5) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); - flopenrc #(5) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); - flopenrc #(5) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW); + flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); + flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); + flopenrc #(4) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW); flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); // branch predictor @@ -182,8 +181,8 @@ module bpred ( {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM}); // pipeline the class - flopenrc #(5) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); - flopenrc #(5) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); + flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); + flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); // Check the prediction // first check if the target or fallthrough address matches what was predicted. @@ -209,9 +208,9 @@ module bpred ( assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE; // If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter. - assign BTBPredPCWrongE = (InstrClassE[4] | InstrClassE[2] | InstrClassE[1]) & PredictionPCWrongE; + assign BTBPredPCWrongE = (InstrClassE[3] | InstrClassE[1]) & PredictionPCWrongE; // similar with RAS - assign RASPredPCWrongE = InstrClassE[3] & PredictionPCWrongE; + assign RASPredPCWrongE = InstrClassE[2] & PredictionPCWrongE; // Finally if the real instruction class is non CFI but the predictor said it was we need to count. assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE; diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 9798a7f20..04b880a6c 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -56,7 +56,7 @@ module ifu ( output logic [31:0] InstrD, InstrM, output logic [`XLEN-1:0] PCM, // branch predictor - output logic [4:0] InstrClassM, + output logic [3:0] InstrClassM, output logic DirPredictionWrongM, output logic BTBPredPCWrongM, output logic RASPredPCWrongM, diff --git a/pipelined/src/privileged/csr.sv b/pipelined/src/privileged/csr.sv index 52f277f3f..9fe9d2e03 100644 --- a/pipelined/src/privileged/csr.sv +++ b/pipelined/src/privileged/csr.sv @@ -45,7 +45,7 @@ module csr #(parameter input logic BTBPredPCWrongM, input logic RASPredPCWrongM, input logic PredictionInstrClassWrongM, - input logic [4:0] InstrClassM, + input logic [3:0] InstrClassM, input logic DCacheMiss, input logic DCacheAccess, input logic ICacheMiss, diff --git a/pipelined/src/privileged/csrc.sv b/pipelined/src/privileged/csrc.sv index 6b2497d4a..220242f78 100644 --- a/pipelined/src/privileged/csrc.sv +++ b/pipelined/src/privileged/csrc.sv @@ -45,7 +45,7 @@ module csrc #(parameter input logic BTBPredPCWrongM, input logic RASPredPCWrongM, input logic PredictionInstrClassWrongM, - input logic [4:0] InstrClassM, + input logic [3:0] InstrClassM, input logic DCacheMiss, input logic DCacheAccess, input logic ICacheMiss, @@ -85,9 +85,9 @@ module csrc #(parameter assign CounterEvent[4] = DirPredictionWrongM & InstrValidNotFlushedM; assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; - assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM; + assign CounterEvent[7] = (InstrClassM[3] | InstrClassM[1]) & InstrValidNotFlushedM; assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; - assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM; + assign CounterEvent[9] = InstrClassM[2] & InstrValidNotFlushedM; assign CounterEvent[10] = PredictionInstrClassWrongM & InstrValidNotFlushedM; assign CounterEvent[11] = DCacheAccess; assign CounterEvent[12] = DCacheMiss; diff --git a/pipelined/src/privileged/privileged.sv b/pipelined/src/privileged/privileged.sv index 73ecfada5..89c777303 100644 --- a/pipelined/src/privileged/privileged.sv +++ b/pipelined/src/privileged/privileged.sv @@ -45,7 +45,7 @@ module privileged ( input logic BTBPredPCWrongM, input logic RASPredPCWrongM, input logic PredictionInstrClassWrongM, - input logic [4:0] InstrClassM, + input logic [3:0] InstrClassM, input logic DCacheMiss, input logic DCacheAccess, input logic ICacheMiss, diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 6954a9da9..369906443 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -146,7 +146,7 @@ module wallypipelinedcore ( logic BTBPredPCWrongM; logic RASPredPCWrongM; logic PredictionInstrClassWrongM; - logic [4:0] InstrClassM; + logic [3:0] InstrClassM; logic InstrAccessFaultF, HPTWInstrAccessFaultM; logic [2:0] LSUHSIZE; logic [2:0] LSUHBURST; From 53c8042276bcc2bb759d18eba007854c431d7b67 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 13 Jan 2023 15:56:10 -0600 Subject: [PATCH 16/18] Signal renames for ras. --- pipelined/src/ifu/RAsPredictor.sv | 23 +++++++++++++---------- pipelined/src/ifu/bpred.sv | 8 ++++---- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/pipelined/src/ifu/RAsPredictor.sv b/pipelined/src/ifu/RAsPredictor.sv index 2fb98417f..3d82342fc 100644 --- a/pipelined/src/ifu/RAsPredictor.sv +++ b/pipelined/src/ifu/RAsPredictor.sv @@ -33,13 +33,16 @@ module RASPredictor ) (input logic clk, input logic reset, - input logic pop, - output logic [`XLEN-1:0] popPC, - input logic push, + input logic PopF, + output logic [`XLEN-1:0] RASPCF, + input logic PushE, input logic incr, - input logic [`XLEN-1:0] pushPC + input logic [`XLEN-1:0] PCLinkE ); + // *** need to update so it either doesn't push until the memory stage + // or need to repair flushed push. + // *** need to repair popped and then flushed returns. logic CounterEn; localparam Depth = $clog2(StackSize); @@ -47,13 +50,13 @@ module RASPredictor logic [StackSize-1:0] [`XLEN-1:0] memory; integer index; - assign CounterEn = pop | push | incr; + assign CounterEn = PopF | PushE | incr; - assign PtrD = pop ? PtrM1 : PtrP1; + assign PtrD = PopF ? PtrM1 : PtrP1; assign PtrM1 = PtrQ - 1'b1; assign PtrP1 = PtrQ + 1'b1; - // may have to handle a push and an incr at the same time. + // may have to handle a PushE and an incr at the same time. // *** what happens if jal is executing and there is a return being flushed in Decode? flopenr #(Depth) PTR(.clk(clk), @@ -67,12 +70,12 @@ module RASPredictor if(reset) begin for(index=0; index Date: Fri, 13 Jan 2023 18:05:47 -0600 Subject: [PATCH 17/18] Partial fix to RAS prediction accurracy. --- pipelined/src/ifu/RAsPredictor.sv | 16 +++++++++------- pipelined/src/ifu/bpred.sv | 8 +++++++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pipelined/src/ifu/RAsPredictor.sv b/pipelined/src/ifu/RAsPredictor.sv index 3d82342fc..c71ff2966 100644 --- a/pipelined/src/ifu/RAsPredictor.sv +++ b/pipelined/src/ifu/RAsPredictor.sv @@ -32,12 +32,14 @@ module RASPredictor #(parameter int StackSize = 16 ) (input logic clk, - input logic reset, - input logic PopF, + input logic reset, + input logic PopF, output logic [`XLEN-1:0] RASPCF, - input logic PushE, - input logic incr, - input logic [`XLEN-1:0] PCLinkE + input logic [3:0] WrongPredInstrClassD, + input logic [3:0] InstrClassD, + input logic PushE, + input logic incr, + input logic [`XLEN-1:0] PCLinkE ); // *** need to update so it either doesn't push until the memory stage @@ -50,9 +52,9 @@ module RASPredictor logic [StackSize-1:0] [`XLEN-1:0] memory; integer index; - assign CounterEn = PopF | PushE | incr; + assign CounterEn = PopF | PushE | incr | WrongPredInstrClassD[2]; - assign PtrD = PopF ? PtrM1 : PtrP1; + assign PtrD = PopF | InstrClassD[2] ? PtrM1 : PtrP1; assign PtrM1 = PtrQ - 1'b1; assign PtrP1 = PtrQ + 1'b1; diff --git a/pipelined/src/ifu/bpred.sv b/pipelined/src/ifu/bpred.sv index 53bd4bafd..38efe8fd5 100644 --- a/pipelined/src/ifu/bpred.sv +++ b/pipelined/src/ifu/bpred.sv @@ -79,7 +79,8 @@ module bpred ( logic BPPredWrongM; logic [`XLEN-1:0] PCNext0F; logic [`XLEN-1:0] PCCorrectE; - + logic [3:0] WrongPredInstrClassD; + // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. if (`BPTYPE == "BPTWOBIT") begin:Predictor @@ -157,6 +158,8 @@ module bpred ( RASPredictor RASPredictor(.clk(clk), .reset(reset), .PopF(PredInstrClassF[2] & ~StallF), + .WrongPredInstrClassD, + .InstrClassD, .RASPCF, .PushE(InstrClassE[3] & ~StallE), .incr(1'b0), @@ -213,6 +216,9 @@ module bpred ( assign RASPredPCWrongE = InstrClassE[2] & PredictionPCWrongE; // Finally if the real instruction class is non CFI but the predictor said it was we need to count. assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE; + + // branch class prediction wrong. + assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD; // Selects the BP or PC+2/4. From 77756e12ebcdb2a307181776449f77d68e8ab9f6 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 13 Jan 2023 18:50:01 -0600 Subject: [PATCH 18/18] Possible improvement to gshare. --- pipelined/src/ifu/bpred.sv | 2 +- pipelined/src/ifu/speculativegshare.sv | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pipelined/src/ifu/bpred.sv b/pipelined/src/ifu/bpred.sv index 38efe8fd5..219d9b4e0 100644 --- a/pipelined/src/ifu/bpred.sv +++ b/pipelined/src/ifu/bpred.sv @@ -108,7 +108,7 @@ module bpred ( speculativegshare DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), - .BranchInstrW(InstrClassW[0]), .PCSrcE); + .BranchInstrW(InstrClassW[0]), .WrongPredInstrClassD, .PCSrcE); end else if (`BPTYPE == "BPLOCALPAg") begin:Predictor // *** Fix me diff --git a/pipelined/src/ifu/speculativegshare.sv b/pipelined/src/ifu/speculativegshare.sv index eaa21c2f3..36c9086b2 100644 --- a/pipelined/src/ifu/speculativegshare.sv +++ b/pipelined/src/ifu/speculativegshare.sv @@ -40,7 +40,8 @@ module speculativegshare output logic DirPredictionWrongE, // update input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, - input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW, + input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW, + input logic [3:0] WrongPredInstrClassD, input logic PCSrcE ); @@ -53,7 +54,8 @@ module speculativegshare logic [k-1:0] GHRF; logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW; logic [k-1:0] GHRNextF; - logic [k:0] GHRNextD, GHRNextE, GHRNextM, GHRNextW; + logic [k:-1] GHRNextD, OldGHRD; + logic [k:0] GHRNextE, GHRNextM, GHRNextW; logic [k-1:0] IndexNextF, IndexF; logic [k-1:0] IndexD, IndexE, IndexM; @@ -118,8 +120,11 @@ module speculativegshare flopenr #(k) GHRFReg(clk, reset, (~StallF) | FlushD, GHRNextF, GHRF); - assign GHRNextD = FlushD ? GHRNextE : {DirPredictionF[1], GHRF}; - flopenr #(k+1) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, GHRD); + assign GHRNextD = FlushD ? {GHRNextE, GHRNextE[0]} : {DirPredictionF[1], GHRF, GHRF[0]}; + flopenr #(k+2) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, OldGHRD); + assign GHRD = WrongPredInstrClassD[0] & BranchInstrD ? {DirPredictionD[1], OldGHRD[k:1]} : // shift right + WrongPredInstrClassD[0] & ~BranchInstrD ? OldGHRD[k-2:-1] : // shift left + OldGHRD; assign GHRNextE = FlushE ? GHRNextM : GHRD; flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE);