diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 0156dc9f..7b695d34 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -247,13 +247,25 @@ if(sys.argv[1] == '-b'): currPercent.append(percent) dct[PredType] = (currSize, currPercent) print(dct) + fig, axes = plt.subplots() + marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x'} + colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue'} for cat in dct: (x, y) = dct[cat] - plt.scatter(x, y, label='k') - plt.plot(x, y) - plt.ylabel('Prediction Accuracy') - plt.xlabel('Size (b or k)') - plt.legend(loc='upper left') + x=[int(2**int(v)/4) for v in x] + print(x, y) + axes.plot(x,y, color=colors[cat]) + axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat]) + #plt.scatter(x, y, label=cat) + #plt.plot(x, y) + #axes.set_xticks([4, 6, 8, 10, 12, 14]) + axes.legend(loc='upper left') + axes.set_xscale("log") + axes.set_ylabel('Prediction Accuracy') + axes.set_xlabel('Size (bytes)') + axes.set_xticks([16, 64, 256, 1024, 4096, 16384]) + axes.set_xticklabels([16, 64, 256, 1024, 4096, 16384]) + axes.grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5) plt.show() diff --git a/config/buildroot/wally-config.vh b/config/buildroot/wally-config.vh index bfe69e84..3a68571d 100644 --- a/config/buildroot/wally-config.vh +++ b/config/buildroot/wally-config.vh @@ -135,7 +135,8 @@ `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 1 +`define SVADU_SUPPORTED 1 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/fpga/wally-config.vh b/config/fpga/wally-config.vh index 3ae91e3a..1f7447f4 100644 --- a/config/fpga/wally-config.vh +++ b/config/fpga/wally-config.vh @@ -144,7 +144,8 @@ `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 1 +`define SVADU_SUPPORTED 1 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32e/wally-config.vh b/config/rv32e/wally-config.vh index 6e0de334..aee0e541 100644 --- a/config/rv32e/wally-config.vh +++ b/config/rv32e/wally-config.vh @@ -138,7 +138,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32gc/wally-config.vh b/config/rv32gc/wally-config.vh index 1fb89abf..e6ea6a15 100644 --- a/config/rv32gc/wally-config.vh +++ b/config/rv32gc/wally-config.vh @@ -137,7 +137,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32i/wally-config.vh b/config/rv32i/wally-config.vh index efbf6e7c..d75d0c46 100644 --- a/config/rv32i/wally-config.vh +++ b/config/rv32i/wally-config.vh @@ -138,7 +138,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32imc/wally-config.vh b/config/rv32imc/wally-config.vh index 8fb29a67..42442d46 100644 --- a/config/rv32imc/wally-config.vh +++ b/config/rv32imc/wally-config.vh @@ -137,7 +137,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64fpquad/wally-config.vh b/config/rv64fpquad/wally-config.vh index dd8058c2..34d7628e 100644 --- a/config/rv64fpquad/wally-config.vh +++ b/config/rv64fpquad/wally-config.vh @@ -140,7 +140,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64gc/wally-config.vh b/config/rv64gc/wally-config.vh index c65b7105..fe6ecdc6 100644 --- a/config/rv64gc/wally-config.vh +++ b/config/rv64gc/wally-config.vh @@ -140,7 +140,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64i/wally-config.vh b/config/rv64i/wally-config.vh index a3702c3f..34c37f73 100644 --- a/config/rv64i/wally-config.vh +++ b/config/rv64i/wally-config.vh @@ -140,7 +140,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index e39c5cac..7bbc8bf6 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -162,14 +162,14 @@ module controller( ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // Non-implemented instruction 7'b0110011: if (Funct7D == 7'b0000000 | Funct7D == 7'b0100000 | ((`ZBB_SUPPORTED & BSelectD[2]) | (`ZBC_SUPPORTED & BSelectD[1]) | (`ZBS_SUPPORTED & BSelectD[0]) | (`ZBA_SUPPORTED & BSelectD[3]))) ControlsD = `CTRLW'b1_000_00_00_000_0_1_0_0_0_0_0_0_0_00_0; // R-type - else if (Funct7D == 7'b0000001 & `M_SUPPORTED) + else if (Funct7D == 7'b0000001 & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2]))) ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_0_0_0_0_1_00_0; // Multiply/divide else ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // Non-implemented instruction 7'b0110111: ControlsD = `CTRLW'b1_100_01_00_000_0_0_0_1_0_0_0_0_0_00_0; // lui 7'b0111011: if ((Funct7D == 7'b0000000 | Funct7D == 7'b0100000 | (`ZBA_SUPPORTED & BSelectD[3]) | (`ZBB_SUPPORTED & BSelectD[2])) & `XLEN == 64) ControlsD = `CTRLW'b1_000_00_00_000_0_1_0_0_1_0_0_0_0_00_0; // R-type W instructions for RV64i - else if (Funct7D == 7'b0000001 & `M_SUPPORTED & `XLEN == 64) + else if (Funct7D == 7'b0000001 & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])) & `XLEN == 64) ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_1_0_0_0_1_00_0; // W-type Multiply/Divide else ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // Non-implemented instruction diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 330607af..5f14a028 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -33,10 +33,10 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, - input logic [3:0] WrongPredInstrClassD, // Prediction class is wrong - input logic [3:0] InstrClassD, - input logic [3:0] InstrClassE, // Instr class - input logic [3:0] PredInstrClassF, + input logic WrongBPRetD, // Prediction class is wrong + input logic RetD, + input logic RetE, JalE, // Instr class + input logic BPRetF, input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal output logic [`XLEN-1:0] RASPCF // Top of the stack ); @@ -58,17 +58,17 @@ module RASPredictor #(parameter int StackSize = 16 )( logic WrongPredRetD; - assign PopF = PredInstrClassF[2] & ~StallD & ~FlushD; - assign PushE = InstrClassE[3] & ~StallM & ~FlushM; + assign PopF = BPRetF & ~StallD & ~FlushD; + assign PushE = JalE & ~StallM & ~FlushM; - assign WrongPredRetD = (WrongPredInstrClassD[2]) & ~StallE & ~FlushE; - assign FlushedRetDE = (~StallE & FlushE & InstrClassD[2]) | (~StallM & FlushM & InstrClassE[2]); // flushed ret + assign WrongPredRetD = (WrongBPRetD) & ~StallE & ~FlushE; + assign FlushedRetDE = (~StallE & FlushE & RetD) | (~StallM & FlushM & RetE); // flushed ret assign RepairD = WrongPredRetD | FlushedRetDE ; - assign IncrRepairD = FlushedRetDE | (WrongPredRetD & ~InstrClassD[2]); // Guessed it was a ret, but its not + assign IncrRepairD = FlushedRetDE | (WrongPredRetD & ~RetD); // Guessed it was a ret, but its not - assign DecRepairD = WrongPredRetD & InstrClassD[2]; // Guessed non ret but is a ret. + assign DecRepairD = WrongPredRetD & RetD; // Guessed non ret but is a ret. assign CounterEn = PopF | PushE | RepairD; diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index ec974d14..c802ffb2 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -72,15 +72,12 @@ module bpred ( logic [1:0] DirPredictionF; - logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; logic [`XLEN-1:0] BTAF, RASPCF; logic PredictionPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic [3:0] InstrClassD; - logic [3:0] InstrClassE; logic DirPredictionWrongE; - logic SelBPPredF; + logic BPPCSrcF; logic [`XLEN-1:0] BPPredPCF; logic [`XLEN-1:0] PCNext0F; logic [`XLEN-1:0] PCCorrectE; @@ -91,34 +88,45 @@ module bpred ( logic [`XLEN-1:0] BTAD; + logic BTBJalF, BTBRetF, BTBJumpF, BTBBranchF; + logic BPBranchF, BPJumpF, BPRetF, BPJalF; + logic BPBranchD, BPJumpD, BPRetD, BPJalD; + logic RetD, JalD; + logic RetE, JalE; + logic BranchM, JumpM, RetM, JalM; + logic BranchW, JumpW, RetW, JalW; + logic WrongBPRetD; + logic [`XLEN-1:0] PCW, IEUAdrW; + // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor - twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, + twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, + .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), + .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .DirPredictionF, .DirPredictionWrongE, + .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), + .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .DirPredictionF, .DirPredictionWrongE, + .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor // *** Fix me @@ -140,18 +148,21 @@ module bpred ( btb #(`BTB_SIZE) TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .BTAF, .BTAD, - .BTBPredInstrClassF, + .BTBPredInstrClassF({BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}), .PredictionInstrClassWrongM, - .IEUAdrE, .IEUAdrM, - .InstrClassD, .InstrClassE, .InstrClassM); + .IEUAdrE, .IEUAdrM, .IEUAdrW, + .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), .InstrClassM({JalM, RetM, JumpM, BranchM}), + .InstrClassW({JalW, RetW, JumpW, BranchW})); - // the branch predictor needs a compact decoding of the instruction class. - if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode - logic [3:0] InstrClassF; + if (!`INSTR_CLASS_PRED) begin : DirectClassDecode + // This section is mainly for testing, verification, and PPA comparison. + // An alternative to using the BTB to store the instruction class is to partially decode + // the instructions in the Fetch stage into, Jal, Ret, Jump, and Branch instructions. + // This logic is not described in the text book as of 23 February 2023. logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; - logic JumpF, BranchF; + logic NCJumpF, NCBranchF; if(`C_SUPPORTED) begin logic [4:0] CompressedOpcF; @@ -166,48 +177,44 @@ module bpred ( assign {cjal, cj, cjr, cjalr, CJumpF, CBranchF} = '0; end - assign JumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; - assign BranchF = PostSpillInstrRawF[6:0] == 7'h63; + assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; + assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63; - assign InstrClassF[0] = BranchF | (`C_SUPPORTED & CBranchF); - assign InstrClassF[1] = JumpF | (`C_SUPPORTED & (CJumpF)); - assign InstrClassF[2] = (JumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 - (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); + assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF); + assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF)); + assign BPRetF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 + (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 + assign BPJalF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); - assign PredInstrClassF = InstrClassF; - assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[1]; end else begin - assign PredInstrClassF = BTBPredInstrClassF; - assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[1]; + // This section connects the BTB's instruction class prediction. + assign {BPJalF, BPRetF, BPJumpF, BPBranchF} = {BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}; end + assign BPPCSrcF = (BPBranchF & DirPredictionF[1]) | BPJumpF; // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PredInstrClassF, .InstrClassD, .InstrClassE, - .WrongPredInstrClassD, .RASPCF, .PCLinkE); + .BPRetF, .RetD, .RetE, .JalE, + .WrongBPRetD, .RASPCF, .PCLinkE); - assign BPPredPCF = PredInstrClassF[2] ? RASPCF : BTAF; + assign BPPredPCF = BPRetF ? RASPCF : BTAF; - assign InstrClassD[0] = BranchD; - assign InstrClassD[1] = JumpD ; - assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 - assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 + assign RetD = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 + assign JalD = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); - flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); + flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {JalD, RetD}, {JalE, RetE}); + flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, {JalE, RetE, JumpE, BranchE}, {JalM, RetM, JumpM, BranchM}); + flopenrc #(4) InstrClassRegW(clk, reset, FlushM, ~StallW, {JalM, RetM, JumpM, BranchM}, {JalW, RetW, JumpW, BranchW}); flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); // branch predictor flopenrc #(1) BPClassWrongRegM(clk, reset, FlushM, ~StallM, AnyWrongPredInstrClassE, PredictionInstrClassWrongM); - - // pipeline the class - flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); + + // pipeline the predicted class + flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, {BPJalF, BPRetF, BPJumpF, BPBranchF}, {BPJalD, BPRetD, BPJumpD, BPBranchD}); // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. @@ -218,11 +225,10 @@ module bpred ( assign PredictionPCWrongE = PCCorrectE != PCD; // branch class prediction wrong. - assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[3:0]; - assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; + assign AnyWrongPredInstrClassD = |({BPJalD, BPRetD, BPJumpD, BPBranchD} ^ {JalD, RetD, JumpD, BranchD}); + assign WrongBPRetD = BPRetD ^ RetD; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. - //assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE)); assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; logic BPPredWrongEAlt; @@ -232,7 +238,7 @@ module bpred ( // Output the predicted PC or corrected PC on miss-predict. // Selects the BP or PC+2/4. - mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, SelBPPredF, PCNext0F); + mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, BPPCSrcF, PCNext0F); // If the prediction is wrong select the correct address. mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F); // Correct branch/jump target. @@ -257,10 +263,10 @@ module bpred ( // could be wrong or the fall through address selected for branch predict not taken. // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // both without the above inaccuracies. - assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]) & PCSrcE; - assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; + assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~RetE) & PCSrcE; + assign RASPredPCWrongE = (RASPCE != IEUAdrE) & RetE & PCSrcE; - assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1]; + assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE; flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); @@ -275,5 +281,11 @@ module bpred ( end else begin assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; end + + // **** Fix me + assign InstrClassM = {JalM, RetM, JumpM, BranchM}; + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); + flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); + endmodule diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 2bb00671..d2f0cb77 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -34,7 +34,7 @@ module btb #(parameter Depth = 10 ) ( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW, - input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, // PC at various stages + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW,// PC at various stages output logic [`XLEN-1:0] BTAF, // BTB's guess at PC output logic [`XLEN-1:0] BTAD, output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class @@ -42,14 +42,16 @@ module btb #(parameter Depth = 10 ) ( input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb + input logic [`XLEN-1:0] IEUAdrW, input logic [3:0] InstrClassD, // Instruction class to insert into btb input logic [3:0] InstrClassE, // Instruction class to insert into btb - input logic [3:0] InstrClassM // Instruction class to insert into btb + input logic [3:0] InstrClassM, // Instruction class to insert into btb + input logic [3:0] InstrClassW ); - logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex; + logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; logic [`XLEN-1:0] ResetPC; - logic MatchF, MatchD, MatchE, MatchM, MatchNextX, MatchXF; + logic MatchD, MatchE, MatchM, MatchW, MatchX; logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] TableBTBPredictionF; logic UpdateEn; @@ -62,6 +64,7 @@ module btb #(parameter Depth = 10 ) ( assign PCDIndex = {PCD[Depth+1] ^ PCD[1], PCD[Depth:2]}; assign PCEIndex = {PCE[Depth+1] ^ PCE[1], PCE[Depth:2]}; assign PCMIndex = {PCM[Depth+1] ^ PCM[1], PCM[Depth:2]}; + assign PCWIndex = {PCW[Depth+1] ^ PCW[1], PCW[Depth:2]}; // must output a valid PC and valid bit during reset. Because only PCF, not PCNextF is reset, PCNextF is invalid // during reset. The BTB must produce a non X PC1NextF to allow the simulation to run. @@ -70,23 +73,18 @@ module btb #(parameter Depth = 10 ) ( assign ResetPC = `RESET_VECTOR; assign PCNextFIndex = reset ? ResetPC[Depth+1:2] : {PCNextF[Depth+1] ^ PCNextF[1], PCNextF[Depth:2]}; - assign MatchF = PCNextFIndex == PCFIndex; - assign MatchD = PCNextFIndex == PCDIndex; - assign MatchE = PCNextFIndex == PCEIndex; - assign MatchM = PCNextFIndex == PCMIndex; - assign MatchNextX = MatchF | MatchD | MatchE | MatchM; - - flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); + assign MatchD = PCFIndex == PCDIndex; + assign MatchE = PCFIndex == PCEIndex; + assign MatchM = PCFIndex == PCMIndex; + assign MatchW = PCFIndex == PCWIndex; + assign MatchX = MatchD | MatchE | MatchM | MatchW; - assign ForwardBTBPrediction = MatchF ? {BTBPredInstrClassF, BTAF} : - MatchD ? {InstrClassD, BTAD} : - MatchE ? {InstrClassE, IEUAdrE} : - {InstrClassM, IEUAdrM} ; - - flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); - - assign {BTBPredInstrClassF, BTAF} = MatchXF ? ForwardBTBPredictionF : {TableBTBPredictionF}; + assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BTAD} : + MatchE ? {InstrClassE, IEUAdrE} : + MatchM ? {InstrClassM, IEUAdrM} : + {InstrClassW, IEUAdrW} ; + assign {BTBPredInstrClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredictionF}; assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index 5332ce5c..70c03afb 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -38,17 +38,17 @@ module gshare #(parameter k = 10, output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, - input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, PCSrcE + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW, + input logic BPBranchF, BranchD, BranchE, BranchM, BranchW, PCSrcE ); - logic MatchF, MatchD, MatchE, MatchM; - logic MatchNextX, MatchXF; + logic MatchF, MatchD, MatchE, MatchM, MatchW; + logic MatchX; - logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE, ForwardNewDirPrediction, ForwardDirPredictionF; - logic [1:0] NewDirPredictionE, NewDirPredictionM; + logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE, ForwardNewDirPredictionF; + logic [1:0] NewDirPredictionE, NewDirPredictionM, NewDirPredictionW; - logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM; + logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM, IndexW; logic [k-1:0] GHRF, GHRD, GHRE, GHRM; logic [k-1:0] GHRNextM, GHRNextF; @@ -68,22 +68,20 @@ module gshare #(parameter k = 10, assign IndexM = GHRM; end - assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF); - assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD); - assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE); - assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM); - assign MatchNextX = MatchF | MatchD | MatchE | MatchM; + flopenrc #(k) IndexWReg(clk, reset, FlushW, ~StallW, IndexM, IndexW); - flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); + assign MatchD = BranchD & ~FlushE & (IndexF == IndexD); + assign MatchE = BranchE & ~FlushM & (IndexF == IndexE); + assign MatchM = BranchM & ~FlushW & (IndexF == IndexM); + assign MatchW = BranchW & ~FlushW & (IndexF == IndexW); + assign MatchX = MatchD | MatchE | MatchM | MatchW; - assign ForwardNewDirPrediction = MatchF ? {2{DirPredictionF[1]}} : - MatchD ? {2{DirPredictionD[1]}} : + assign ForwardNewDirPredictionF = MatchD ? {2{DirPredictionD[1]}} : MatchE ? {NewDirPredictionE} : - NewDirPredictionM ; + MatchM ? {NewDirPredictionM} : + NewDirPredictionW ; - flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); - - assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF; + assign DirPredictionF = MatchX ? ForwardNewDirPredictionF : TableDirPredictionF; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallM & ~FlushM), @@ -91,7 +89,7 @@ module gshare #(parameter k = 10, .rd1(TableDirPredictionF), .wa2(IndexM), .wd2(NewDirPredictionM), - .we2(BranchInstrM), + .we2(BranchM), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); @@ -99,17 +97,18 @@ module gshare #(parameter k = 10, satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewDirPredictionM, NewDirPredictionW); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; - assign GHRNextF = BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} : GHRF; - assign GHRF = BranchInstrD ? {DirPredictionD[1], GHRD[k-1:1]} : GHRD; - assign GHRD = BranchInstrE ? {PCSrcE, GHRE[k-1:1]} : GHRE; - assign GHRE = BranchInstrM ? {PCSrcM, GHRM[k-1:1]} : GHRM; + assign GHRNextF = BPBranchF ? {DirPredictionF[1], GHRF[k-1:1]} : GHRF; + assign GHRF = BranchD ? {DirPredictionD[1], GHRD[k-1:1]} : GHRD; + assign GHRD = BranchE ? {PCSrcE, GHRE[k-1:1]} : GHRE; + assign GHRE = BranchM ? {PCSrcM, GHRM[k-1:1]} : GHRM; assign GHRNextM = {PCSrcM, GHRM[k-1:1]}; - flopenr #(k) GHRReg(clk, reset, ~StallW & ~FlushW & BranchInstrM, GHRNextM, GHRM); + flopenr #(k) GHRReg(clk, reset, ~StallW & ~FlushW & BranchM, GHRNextM, GHRM); flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); endmodule diff --git a/src/ifu/bpred/gsharebasic.sv b/src/ifu/bpred/gsharebasic.sv index cb0bbe9e..e793e7ac 100644 --- a/src/ifu/bpred/gsharebasic.sv +++ b/src/ifu/bpred/gsharebasic.sv @@ -39,7 +39,7 @@ module gsharebasic #(parameter k = 10, output logic DirPredictionWrongE, // update input logic [`XLEN-1:0] PCNextF, PCM, - input logic BranchInstrE, BranchInstrM, PCSrcE + input logic BranchE, BranchM, PCSrcE ); logic [k-1:0] IndexNextF, IndexM; @@ -64,7 +64,7 @@ module gsharebasic #(parameter k = 10, .rd1(DirPredictionF), .wa2(IndexM), .wd2(NewDirPredictionM), - .we2(BranchInstrM), + .we2(BranchM), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); @@ -73,10 +73,10 @@ module gsharebasic #(parameter k = 10, satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; - assign GHRNext = BranchInstrM ? {PCSrcM, GHR[k-1:1]} : GHR; - flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchInstrM, GHRNext, GHR); + assign GHRNext = BranchM ? {PCSrcM, GHR[k-1:1]} : GHR; + flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchM, GHRNext, GHR); flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF); diff --git a/src/ifu/bpred/twoBitPredictor.sv b/src/ifu/bpred/twoBitPredictor.sv index 4a7be674..58bf1c6b 100644 --- a/src/ifu/bpred/twoBitPredictor.sv +++ b/src/ifu/bpred/twoBitPredictor.sv @@ -31,12 +31,12 @@ module twoBitPredictor #(parameter k = 10) ( input logic clk, input logic reset, - input logic StallF, StallD, StallE, StallM, - input logic FlushD, FlushE, FlushM, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, input logic [`XLEN-1:0] PCNextF, PCM, output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, - input logic BranchInstrE, BranchInstrM, + input logic BranchE, BranchM, input logic PCSrcE ); @@ -55,18 +55,18 @@ module twoBitPredictor #(parameter k = 10) ( ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), - .ce1(~StallF), .ce2(~StallM & ~FlushM), + .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), .rd1(DirPredictionF), .wa2(IndexM), .wd2(NewDirPredictionM), - .we2(BranchInstrM & ~StallM & ~FlushM), + .we2(BranchM), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 71221ef6..887d1f45 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -88,7 +88,7 @@ module ifu ( input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk - output logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits + output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP address from privileged unit output logic InstrAccessFaultF, // Instruction access fault @@ -145,7 +145,7 @@ module ifu ( if(`C_SUPPORTED) begin : Spill spill #(`ICACHE_SUPPORTED) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, - .InstrDAPageFaultF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); + .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill assign PCNextFSpill = PCNextF; assign PCFSpill = PCF; @@ -185,12 +185,12 @@ module ifu ( .InstrAccessFaultF, .LoadAccessFaultM(), .StoreAmoAccessFaultM(), .InstrPageFaultF, .LoadPageFaultM(), .StoreAmoPageFaultM(), .LoadMisalignedFaultM(), .StoreAmoMisalignedFaultM(), - .DAPageFault(InstrDAPageFaultF), + .UpdateDA(InstrUpdateDAF), .AtomicAccessM(1'b0),.ExecuteAccessF(1'b1), .WriteAccessM(1'b0), .ReadAccessM(1'b0), .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); end else begin - assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrDAPageFaultF} = '0; + assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrUpdateDAF} = '0; assign PCPF = PCFExt[`PA_BITS-1:0]; assign CacheableF = '1; assign SelIROM = '0; diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index 4bb677ca..4b89a3ce 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -42,7 +42,7 @@ module spill #( input logic [31:0] InstrRawF, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic ITLBMissF, // ITLB miss, ignore memory request - input logic InstrDAPageFaultF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) + input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill output logic [`XLEN-1:0] PCFSpill, // PCF for one of the two memory addresses of the spill output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline @@ -77,7 +77,7 @@ module spill #( //////////////////////////////////////////////////////////////////////////////////////////////////// assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1]; - assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF)); + assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF)); always_ff @(posedge clk) if (reset | FlushD) CurrState <= #1 STATE_READY; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index e01de312..18383e0d 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -81,7 +81,7 @@ module lsu ( input logic [1:0] STATUS_MPP, // Machine previous privilege mode input logic [`XLEN-1:0] PCFSpill, // Fetch PC input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk - input logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits + input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB output logic [1:0] PageType, // Type of page table entry to write to ITLB output logic ITLBWriteF, // Write PTE to ITLB @@ -127,7 +127,7 @@ module lsu ( logic DTLBMissM; // DTLB miss causes HPTW walk logic DTLBWriteM; // Writes PTE and PageType to DTLB - logic DataDAPageFaultM; // DTLB hit needs to update dirty or access bits + logic DataUpdateDAM; // DTLB hit needs to update dirty or access bits logic LSULoadAccessFaultM; // Load acces fault logic LSUStoreAmoAccessFaultM; // Store access fault logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle @@ -151,7 +151,7 @@ module lsu ( if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED hptw hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, - .DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM, + .DTLBMissM, .DTLBWriteM, .InstrUpdateDAF, .DataUpdateDAM, .FlushW, .DCacheStallM, .SATP_REGW, .PCFSpill, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadDataM(ReadDataM[`XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN @@ -196,7 +196,7 @@ module lsu ( .StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), .InstrPageFaultF(), .LoadPageFaultM, .StoreAmoPageFaultM, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, // *** these faults need to be supressed during hptw. - .DAPageFault(DataDAPageFaultM), + .UpdateDA(DataUpdateDAM), .AtomicAccessM(|LSUAtomicM), .ExecuteAccessF(1'b0), .WriteAccessM(PreLSURWM[0]), .ReadAccessM(PreLSURWM[1]), .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); diff --git a/src/mdu/mdu.sv b/src/mdu/mdu.sv index 4a85bf47..b62add60 100644 --- a/src/mdu/mdu.sv +++ b/src/mdu/mdu.sv @@ -51,16 +51,18 @@ module mdu( // Divider // Start a divide when a new division instruction is received and the divider isn't already busy or finishing // When IDIV_ON_FPU is set, use the FPU divider instead - if (`IDIV_ON_FPU) begin + // In ZMMUL, with M_SUPPORTED = 0, omit the divider + if ((`IDIV_ON_FPU) || (!`M_SUPPORTED)) begin:nodiv assign QuotM = 0; assign RemM = 0; assign DivBusyE = 0; - end else begin + end else begin:div intdivrestoring div(.clk, .reset, .StallM, .FlushE, .DivSignedE(~Funct3E[0]), .W64E, .IntDivE, .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); end // Result multiplexer + // For ZMMUL, QuotM and RemM are tied to 0, so the mux automatically simplifies always_comb case (Funct3M) 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; // mul diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index c0b7ad93..f2df8ea9 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -49,8 +49,8 @@ module hptw ( input logic ITLBMissF, input logic DTLBMissM, input logic FlushW, - input logic InstrDAPageFaultF, - input logic DataDAPageFaultM, + input logic InstrUpdateDAF, + input logic DataUpdateDAM, output logic [`XLEN-1:0] PTE, // page table entry to TLBs output logic [1:0] PageType, // page type to TLBs output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry @@ -87,21 +87,23 @@ module hptw ( logic [`XLEN-1:0] TranslationVAdr; logic [`XLEN-1:0] NextPTE; logic UpdatePTE; - logic DAPageFault; + logic HPTWUpdateDA; logic [`PA_BITS-1:0] HPTWReadAdr; logic SelHPTWAdr; logic [`XLEN+1:0] HPTWAdrExt; logic ITLBMissOrDAFaultF; logic DTLBMissOrDAFaultM; + logic LSUAccessFaultM; logic [`PA_BITS-1:0] HPTWAdr; logic [1:0] HPTWRW; logic [2:0] HPTWSize; // 32 or 64 bit access statetype WalkerState, NextWalkerState, InitialWalkerState; // map hptw access faults onto either the original LSU load/store fault or instruction access fault - assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[1] & ~MemRWM[0]; - assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[0]; - assign HPTWInstrAccessFaultM = WalkerState == IDLE ? 1'b0: (LSUStoreAmoAccessFaultM | LSULoadAccessFaultM) & ~DTLBWalk; + assign LSUAccessFaultM = LSULoadAccessFaultM | LSUStoreAmoAccessFaultM; + assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[1] & ~MemRWM[0]; + assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[0]; + assign HPTWInstrAccessFaultM = WalkerState == IDLE ? 1'b0: LSUAccessFaultM & ~DTLBWalk; // Extract bits from CSRs and inputs assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; @@ -125,10 +127,10 @@ module hptw ( assign ValidLeafPTE = ValidPTE & LeafPTE; assign ValidNonLeafPTE = ValidPTE & ~LeafPTE; - if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites + if(`SVADU_SUPPORTED) begin : hptwwrites logic ReadAccess, WriteAccess; - logic InvalidRead, InvalidWrite; - logic UpperBitsUnequalPageFault; + logic InvalidRead, InvalidWrite, InvalidOp; + logic UpperBitsUnequal; logic OtherPageFault; logic [1:0] EffectivePrivilegeMode; logic ImproperPrivilege; @@ -147,7 +149,7 @@ module hptw ( mux2 #(`PA_BITS) HPTWWriteAdrMux(HPTWReadAdr, HPTWWriteAdr, SelHPTWWriteAdr, HPTWAdr); assign {Dirty, Accessed} = PTE[7:6]; - assign WriteAccess = MemRWM[0] | (|AtomicM); + assign WriteAccess = MemRWM[0]; // implies | (|AtomicM); assign SetDirty = ~Dirty & DTLBWalk & WriteAccess; assign ReadAccess = MemRWM[1]; @@ -157,24 +159,24 @@ module hptw ( // Check for page faults vm64check vm64check(.SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), .VAdr(TranslationVAdr), - .SV39Mode(), .UpperBitsUnequalPageFault); + .SV39Mode(), .UpperBitsUnequal); assign InvalidRead = ReadAccess & ~Readable & (~STATUS_MXR | ~Executable); assign InvalidWrite = WriteAccess & ~Writable; - assign OtherPageFault = DTLBWalk? ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~Valid : - ImproperPrivilege | ~Executable | UpperBitsUnequalPageFault | Misaligned | ~Valid; + assign InvalidOp = DTLBWalk ? (InvalidRead | InvalidWrite) : ~Executable; + assign OtherPageFault = ImproperPrivilege | InvalidOp | UpperBitsUnequal | Misaligned | ~Valid; // hptw needs to know if there is a Dirty or Access fault occuring on this // memory access. If there is the PTE needs to be updated seting Access // and possibly also Dirty. Dirty is set if the operation is a store/amo. // However any other fault should not cause the update. - assign DAPageFault = ValidLeafPTE & (~Accessed | SetDirty) & ~OtherPageFault; + assign HPTWUpdateDA = ValidLeafPTE & (~Accessed | SetDirty) & ~OtherPageFault; assign HPTWRW[0] = (WalkerState == UPDATE_PTE); - assign UpdatePTE = (WalkerState == LEAF) & DAPageFault; + assign UpdatePTE = (WalkerState == LEAF) & HPTWUpdateDA; end else begin // block: hptwwrites assign NextPTE = ReadDataM; assign HPTWAdr = HPTWReadAdr; - assign DAPageFault = '0; + assign HPTWUpdateDA = '0; assign UpdatePTE = '0; assign HPTWRW[0] = '0; end @@ -182,8 +184,8 @@ module hptw ( // Enable and select signals based on states assign StartWalk = (WalkerState == IDLE) & TLBMiss; assign HPTWRW[1] = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD); - assign DTLBWriteM = (WalkerState == LEAF & ~DAPageFault) & DTLBWalk; - assign ITLBWriteF = (WalkerState == LEAF & ~DAPageFault) & ~DTLBWalk; + assign DTLBWriteM = (WalkerState == LEAF & ~HPTWUpdateDA) & DTLBWalk; + assign ITLBWriteF = (WalkerState == LEAF & ~HPTWUpdateDA) & ~DTLBWalk; // FSM to track PageType based on the levels of the page table traversed flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType); @@ -262,7 +264,7 @@ module hptw ( else NextWalkerState = LEAF; L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; else NextWalkerState = LEAF; - LEAF: if (`HPTW_WRITES_SUPPORTED & DAPageFault) NextWalkerState = UPDATE_PTE; + LEAF: if (`SVADU_SUPPORTED & HPTWUpdateDA) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE; else NextWalkerState = LEAF; @@ -273,8 +275,8 @@ module hptw ( assign SelHPTW = WalkerState != IDLE; assign HPTWStall = (WalkerState != IDLE) | (WalkerState == IDLE & TLBMiss); - assign ITLBMissOrDAFaultF = ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF); - assign DTLBMissOrDAFaultM = DTLBMissM | (`HPTW_WRITES_SUPPORTED & DataDAPageFaultM); + assign ITLBMissOrDAFaultF = ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF); + assign DTLBMissOrDAFaultM = DTLBMissM | (`SVADU_SUPPORTED & DataUpdateDAM); // HTPW address/data/control muxing @@ -291,7 +293,7 @@ module hptw ( mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M); mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM); mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, IHAdrM); - if(`HPTW_WRITES_SUPPORTED) + if(`SVADU_SUPPORTED) mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IHWriteDataM); else assign IHWriteDataM = WriteDataM; diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 5b524816..e3cd8031 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -51,7 +51,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( // Faults output logic InstrAccessFaultF, LoadAccessFaultM, StoreAmoAccessFaultM, // access fault sources output logic InstrPageFaultF, LoadPageFaultM, StoreAmoPageFaultM, // page fault sources - output logic DAPageFault, // page fault due to setting dirty or access bit + output logic UpdateDA, // page fault due to setting dirty or access bit output logic LoadMisalignedFaultM, StoreAmoMisalignedFaultM, // misaligned fault sources // PMA checker signals input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // access type @@ -70,6 +70,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( logic Translate; // Translation occurs when virtual memory is active and DisableTranslation is off logic TLBHit; // Hit in TLB logic TLBPageFault; // Page fault from TLB + logic ReadNoAmoAccessM; // Read that is not part of atomic operation causes Load faults. Otherwise StoreAmo faults // only instantiate TLB if Virtual Memory is supported if (`VIRTMEM_SUPPORTED) begin:tlb @@ -84,7 +85,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .PTE, .PageTypeWriteVal, .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, - .Translate, .TLBPageFault, .DAPageFault); + .Translate, .TLBPageFault, .UpdateDA); end else begin:tlb// just pass address through as physical assign Translate = 0; assign TLBMiss = 0; @@ -118,11 +119,13 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( assign PMPLoadAccessFaultM = 0; end + assign ReadNoAmoAccessM = ReadAccessM & ~WriteAccessM;// AMO causes StoreAmo rather than Load fault + // Access faults // If TLB miss and translating we want to not have faults from the PMA and PMP checkers. - assign InstrAccessFaultF = (PMAInstrAccessFaultF | PMPInstrAccessFaultF) & ~(Translate & ~TLBHit); - assign LoadAccessFaultM = (PMALoadAccessFaultM | PMPLoadAccessFaultM) & ~(Translate & ~TLBHit); - assign StoreAmoAccessFaultM = (PMAStoreAmoAccessFaultM | PMPStoreAmoAccessFaultM) & ~(Translate & ~TLBHit); + assign InstrAccessFaultF = (PMAInstrAccessFaultF | PMPInstrAccessFaultF) & ~TLBMiss; + assign LoadAccessFaultM = (PMALoadAccessFaultM | PMPLoadAccessFaultM) & ~TLBMiss; + assign StoreAmoAccessFaultM = (PMAStoreAmoAccessFaultM | PMPStoreAmoAccessFaultM) & ~TLBMiss; // Misaligned faults always_comb @@ -132,11 +135,11 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( 2'b10: DataMisalignedM = VAdr[1] | VAdr[0]; // lw, sw, flw, fsw, lwu 2'b11: DataMisalignedM = |VAdr[2:0]; // ld, sd, fld, fsd endcase - assign LoadMisalignedFaultM = DataMisalignedM & ReadAccessM; - assign StoreAmoMisalignedFaultM = DataMisalignedM & (WriteAccessM | AtomicAccessM); + assign LoadMisalignedFaultM = DataMisalignedM & ReadNoAmoAccessM; + assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM; // Specify which type of page fault is occurring assign InstrPageFaultF = TLBPageFault & ExecuteAccessF; - assign LoadPageFaultM = TLBPageFault & ReadAccessM; - assign StoreAmoPageFaultM = TLBPageFault & (WriteAccessM | AtomicAccessM); + assign LoadPageFaultM = TLBPageFault & ReadNoAmoAccessM; + assign StoreAmoPageFaultM = TLBPageFault & WriteAccessM; endmodule diff --git a/src/mmu/pmpadrdec.sv b/src/mmu/pmpadrdec.sv index 4aeea116..5f666eea 100644 --- a/src/mmu/pmpadrdec.sv +++ b/src/mmu/pmpadrdec.sv @@ -85,5 +85,9 @@ module pmpadrdec ( assign W = PMPCfg[1]; assign R = PMPCfg[0]; assign Active = |PMPCfg[4:3]; + + // known bug: The size of the access is not yet checked. For example, if an NA4 entry matches 0xC-0xF and the system + // attempts an 8-byte access to 0x8, the access should fail (see page 60 of privileged specification 20211203). This + // implementation will not detect the failure. endmodule diff --git a/src/mmu/tlb.sv b/src/mmu/tlb/tlb.sv similarity index 98% rename from src/mmu/tlb.sv rename to src/mmu/tlb/tlb.sv index f8bf0d17..7d6cd317 100644 --- a/src/mmu/tlb.sv +++ b/src/mmu/tlb/tlb.sv @@ -72,7 +72,7 @@ module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( output logic TLBHit, output logic Translate, output logic TLBPageFault, - output logic DAPageFault + output logic UpdateDA ); logic [TLB_ENTRIES-1:0] Matches, WriteEnables, PTE_Gs; // used as the one-hot encoding of WriteIndex @@ -105,7 +105,7 @@ module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( tlbcontrol #(ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush, .PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault, - .DAPageFault, .SV39Mode, .Translate); + .UpdateDA, .SV39Mode, .Translate); tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) diff --git a/src/mmu/tlbcam.sv b/src/mmu/tlb/tlbcam.sv similarity index 100% rename from src/mmu/tlbcam.sv rename to src/mmu/tlb/tlbcam.sv diff --git a/src/mmu/tlbcamline.sv b/src/mmu/tlb/tlbcamline.sv similarity index 100% rename from src/mmu/tlbcamline.sv rename to src/mmu/tlb/tlbcamline.sv diff --git a/src/mmu/tlbcontrol.sv b/src/mmu/tlb/tlbcontrol.sv similarity index 81% rename from src/mmu/tlbcontrol.sv rename to src/mmu/tlb/tlbcontrol.sv index abbdba8f..9754124d 100644 --- a/src/mmu/tlbcontrol.sv +++ b/src/mmu/tlb/tlbcontrol.sv @@ -43,7 +43,7 @@ module tlbcontrol #(parameter ITLB = 0) ( output logic TLBMiss, output logic TLBHit, output logic TLBPageFault, - output logic DAPageFault, + output logic UpdateDA, output logic SV39Mode, output logic Translate ); @@ -52,7 +52,7 @@ module tlbcontrol #(parameter ITLB = 0) ( logic [1:0] EffectivePrivilegeMode; logic PTE_D, PTE_A, PTE_U, PTE_X, PTE_W, PTE_R, PTE_V; // Useful PTE Control Bits - logic UpperBitsUnequalPageFault; + logic UpperBitsUnequal; logic TLBAccess; logic ImproperPrivilege; @@ -64,7 +64,7 @@ module tlbcontrol #(parameter ITLB = 0) ( assign TLBAccess = ReadAccess | WriteAccess; // Check that upper bits are legal (all 0s or all 1s) - vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequalPageFault); + vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequal); // unswizzle useful PTE bits assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; @@ -76,13 +76,13 @@ module tlbcontrol #(parameter ITLB = 0) ( // only execute non-user mode pages. assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | ((EffectivePrivilegeMode == `S_MODE) & PTE_U); - if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites - assign DAPageFault = Translate & TLBHit & ~PTE_A & ~TLBPageFault; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); + if(`SVADU_SUPPORTED) begin : hptwwrites + assign UpdateDA = Translate & TLBHit & ~PTE_A & ~TLBPageFault; + assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequal | Misaligned | ~PTE_V); end else begin // fault for software handling if access bit is off - assign DAPageFault = ~PTE_A; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); + assign UpdateDA = ~PTE_A; + assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpdateDA | UpperBitsUnequal | Misaligned | ~PTE_V); end end else begin:dtlb // Data TLB fault checking logic InvalidRead, InvalidWrite; @@ -98,16 +98,16 @@ module tlbcontrol #(parameter ITLB = 0) ( // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess & ~PTE_W; - if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites - assign DAPageFault = Translate & TLBHit & (~PTE_A | WriteAccess & ~PTE_D) & ~TLBPageFault; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); + if(`SVADU_SUPPORTED) begin : hptwwrites + assign UpdateDA = Translate & TLBHit & (~PTE_A | WriteAccess & ~PTE_D) & ~TLBPageFault; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequal | Misaligned | ~PTE_V)); end else begin // Fault for software handling if access bit is off or writing a page with dirty bit off - assign DAPageFault = ~PTE_A | WriteAccess & ~PTE_D; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); + assign UpdateDA = ~PTE_A | WriteAccess & ~PTE_D; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpdateDA | UpperBitsUnequal | Misaligned | ~PTE_V)); end end assign TLBHit = CAMHit & TLBAccess; - assign TLBMiss = (~CAMHit | TLBFlush) & Translate & TLBAccess; + assign TLBMiss = ~CAMHit & TLBAccess & Translate ; endmodule diff --git a/src/mmu/tlblru.sv b/src/mmu/tlb/tlblru.sv similarity index 100% rename from src/mmu/tlblru.sv rename to src/mmu/tlb/tlblru.sv diff --git a/src/mmu/tlbmixer.sv b/src/mmu/tlb/tlbmixer.sv similarity index 100% rename from src/mmu/tlbmixer.sv rename to src/mmu/tlb/tlbmixer.sv diff --git a/src/mmu/tlbram.sv b/src/mmu/tlb/tlbram.sv similarity index 100% rename from src/mmu/tlbram.sv rename to src/mmu/tlb/tlbram.sv diff --git a/src/mmu/tlbramline.sv b/src/mmu/tlb/tlbramline.sv similarity index 100% rename from src/mmu/tlbramline.sv rename to src/mmu/tlb/tlbramline.sv diff --git a/src/mmu/vm64check.sv b/src/mmu/tlb/vm64check.sv similarity index 90% rename from src/mmu/vm64check.sv rename to src/mmu/tlb/vm64check.sv index a78b853e..5f12eef7 100644 --- a/src/mmu/vm64check.sv +++ b/src/mmu/tlb/vm64check.sv @@ -32,7 +32,7 @@ module vm64check ( input logic [`SVMODE_BITS-1:0] SATP_MODE, input logic [`XLEN-1:0] VAdr, output logic SV39Mode, - output logic UpperBitsUnequalPageFault + output logic UpperBitsUnequal ); if (`XLEN == 64) begin @@ -42,9 +42,9 @@ module vm64check ( logic eq_63_47, eq_46_38; assign eq_46_38 = &(VAdr[46:38]) | ~|(VAdr[46:38]); assign eq_63_47 = &(VAdr[63:47]) | ~|(VAdr[63:47]); - assign UpperBitsUnequalPageFault = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47; + assign UpperBitsUnequal = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47; end else begin assign SV39Mode = 0; - assign UpperBitsUnequalPageFault = 0; + assign UpperBitsUnequal = 0; end endmodule diff --git a/src/privileged/csrs.sv b/src/privileged/csrs.sv index 99c56ad1..253d0245 100644 --- a/src/privileged/csrs.sv +++ b/src/privileged/csrs.sv @@ -86,8 +86,8 @@ module csrs #(parameter assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL)) & InstrValidNotFlushedM; assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == `M_MODE | ~STATUS_TVM) & InstrValidNotFlushedM; assign WriteSCOUNTERENM = CSRSWriteM & (CSRAdrM == SCOUNTEREN) & InstrValidNotFlushedM; - assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & MCOUNTEREN_TM & InstrValidNotFlushedM; - assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & MCOUNTEREN_TM & (`XLEN == 32) & InstrValidNotFlushedM; + assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM) & InstrValidNotFlushedM; + assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM) & (`XLEN == 32) & InstrValidNotFlushedM; // CSRs flopenr #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, STVEC_REGW); @@ -100,12 +100,14 @@ module csrs #(parameter else assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported flopens #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW); - if (`XLEN == 64) - flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW); - else begin - flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW[31:0]); - flopenr #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, STIMECMP_REGW[63:32]); - end + if (`SSTC_SUPPORTED) begin + if (`XLEN == 64) + flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW); + else begin + flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW[31:0]); + flopenr #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, STIMECMP_REGW[63:32]); + end + end else assign STIMECMP_REGW = 0; // Supervisor timer interrupt logic // Spec is a bit peculiar - Machine timer interrupts are produced in CLINT, while Supervisor timer interrupts are in CSRs @@ -132,12 +134,12 @@ module csrs #(parameter if (PrivilegeModeW == `S_MODE & STATUS_TVM) IllegalCSRSAccessM = 1; end SCOUNTEREN:CSRSReadValM = {{(`XLEN-32){1'b0}}, SCOUNTEREN_REGW}; - STIMECMP: if (MCOUNTEREN_TM) CSRSReadValM = STIMECMP_REGW[`XLEN-1:0]; + STIMECMP: if (`SSTC_SUPPORTED & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM)) CSRSReadValM = STIMECMP_REGW[`XLEN-1:0]; else begin CSRSReadValM = 0; IllegalCSRSAccessM = 1; end - STIMECMPH: if (MCOUNTEREN_TM & (`XLEN == 32)) CSRSReadValM[31:0] = STIMECMP_REGW[63:32]; + STIMECMPH: if (`SSTC_SUPPORTED & (`XLEN == 32) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM)) CSRSReadValM[31:0] = STIMECMP_REGW[63:32]; else begin // not supported for RV64 CSRSReadValM = 0; IllegalCSRSAccessM = 1; diff --git a/src/wally/cvw.sv b/src/wally/cvw.sv index c4541b69..eb614628 100644 --- a/src/wally/cvw.sv +++ b/src/wally/cvw.sv @@ -101,7 +101,7 @@ package cvw; parameter BPRED_SUPPORTED = `BPRED_SUPPORTED; parameter BPRED_TYPE = `BPRED_TYPE; parameter BPRED_SIZE = `BPRED_SIZE; - parameter HPTW_WRITES_SUPPORTED = `HPTW_WRITES_SUPPORTED; + parameter SVADU_SUPPORTED = `SVADU_SUPPORTED; // parameter = `; diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 02074f97..d5458ed7 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -156,7 +156,7 @@ module wallypipelinedcore ( logic ICacheMiss; logic ICacheAccess; logic BreakpointFaultM, EcallFaultM; - logic InstrDAPageFaultF; + logic InstrUpdateDAF; logic BigEndianM; logic FCvtIntE; logic CommittedF; @@ -184,7 +184,7 @@ module wallypipelinedcore ( .PrivilegeModeW, .PTE, .PageType, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ITLBWriteF, .sfencevmaM, .ITLBMissF, // pmp/pma (inside mmu) signals. - .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .InstrAccessFaultF, .InstrDAPageFaultF); + .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .InstrAccessFaultF, .InstrUpdateDAF); // integer execution unit: integer register file, datapath and controller ieu ieu(.clk, .reset, @@ -238,7 +238,7 @@ module wallypipelinedcore ( .HPTWInstrAccessFaultM, // connects to privilege .StoreAmoMisalignedFaultM, // connects to privilege .StoreAmoAccessFaultM, // connects to privilege - .InstrDAPageFaultF, + .InstrUpdateDAF, .PCFSpill, .ITLBMissF, .PTE, .PageType, .ITLBWriteF, .SelHPTW, .LSUStallM); @@ -313,7 +313,7 @@ module wallypipelinedcore ( end // multiply/divide unit - if (`M_SUPPORTED) begin:mdu + if (`M_SUPPORTED | `ZMMUL_SUPPORTED) begin:mdu mdu mdu(.clk, .reset, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, diff --git a/testbench/common/riscvassertions.sv b/testbench/common/riscvassertions.sv index f6cb4c6f..f733aac5 100644 --- a/testbench/common/riscvassertions.sv +++ b/testbench/common/riscvassertions.sv @@ -23,40 +23,42 @@ module riscvassertions; initial begin - assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); - assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); - assert (`IDIV_BITSPERCYCLE == 1 | `IDIV_BITSPERCYCLE==2 | `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4"); - assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); - assert (`D_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)"); - assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); - assert (`DCACHE_SUPPORTED | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); + $display("IDIV_ON_FPU = %b M_SUPPORTED %b comb %b\n", `IDIV_ON_FPU, `M_SUPPORTED, ((`IDIV_ON_FPU) || (!`M_SUPPORTED))); + assert (`PMP_ENTRIES == 0 || `PMP_ENTRIES==16 || `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); + assert (`S_SUPPORTED || `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); + assert (`IDIV_BITSPERCYCLE == 1 || `IDIV_BITSPERCYCLE==2 || `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4"); + assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); + assert (`D_SUPPORTED || ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)"); + assert (`F_SUPPORTED || ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); + assert (`DCACHE_SUPPORTED || ~`F_SUPPORTED || `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); - assert (`FLEN<=`XLEN | `DCACHE_SUPPORTED | `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); - assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (!`DCACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`DCACHE_LINELENINBITS >= 128 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); + assert (`FLEN<=`XLEN || `DCACHE_SUPPORTED || `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); + assert (`DCACHE_WAYSIZEINBYTES <= 4096 || (!`DCACHE_SUPPORTED) || `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`DCACHE_LINELENINBITS >= 128 || (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); - assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (!`ICACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`ICACHE_LINELENINBITS >= 32 | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); + assert (`ICACHE_WAYSIZEINBYTES <= 4096 || (!`ICACHE_SUPPORTED) || `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`ICACHE_LINELENINBITS >= 32 || (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); - assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (!`DCACHE_SUPPORTED)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (!`ICACHE_SUPPORTED)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); - assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS || (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES || (!`DCACHE_SUPPORTED)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS || (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES || (!`ICACHE_SUPPORTED)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES || `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES || `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); assert (`UNCORE_RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if UNCORE_RAM_RANGE is less than 56'h07FFFFFF"); - assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); - assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZICSR not supported"); - assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); - assert (`VIRTMEM_SUPPORTED == 0 | (`DTIM_SUPPORTED == 0 & `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); - assert (`DCACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); - assert (`ICACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); - assert ((`DCACHE_SUPPORTED == 0 & `ICACHE_SUPPORTED == 0) | `BUS_SUPPORTED) else $error("Dcache and Icache requires DBUS_SUPPORTED."); - assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); + assert (`ZICSR_SUPPORTED == 1 || (`PMP_ENTRIES == 0 && `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); + assert (`ZICSR_SUPPORTED == 1 || (`S_SUPPORTED == 0 && `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZICSR not supported"); + assert (`U_SUPPORTED || (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); + assert (`VIRTMEM_SUPPORTED == 0 || (`DTIM_SUPPORTED == 0 && `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); + assert (`DCACHE_SUPPORTED || `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); + assert (`ICACHE_SUPPORTED || `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); + assert ((`DCACHE_SUPPORTED == 0 && `ICACHE_SUPPORTED == 0) || `BUS_SUPPORTED) else $error("Dcache and Icache requires DBUS_SUPPORTED."); + assert (`DCACHE_LINELENINBITS <= `XLEN*16 || (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words"); - assert (`DCACHE_SUPPORTED | `A_SUPPORTED == 0) else $error("Atomic extension (A) requires cache on Wally."); - assert (`IDIV_ON_FPU == 0 | `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); - assert (`SSTC_SUPPORTED == 0 | (`S_SUPPORTED)) else $error("SSTC requires S_SUPPORTED"); + assert (`DCACHE_SUPPORTED || (`A_SUPPORTED == 0)) else $error("Atomic extension (A) requires cache on Wally."); + assert (`IDIV_ON_FPU == 0 || `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); + assert (`SSTC_SUPPORTED == 0 || (`S_SUPPORTED)) else $error("SSTC requires S_SUPPORTED"); + assert ((`ZMMUL_SUPPORTED == 0) || (`M_SUPPORTED ==0)) else $error("At most one of ZMMUL_SUPPORTED and M_SUPPORTED can be enabled"); end endmodule