From ebd6b931c6a64b7c8b6636d9a7601d1067d85bbf Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Wed, 24 Mar 2021 13:39:45 -0400 Subject: [PATCH 01/11] Fix bug in cache line --- wally-pipelined/src/cache/line.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/cache/line.sv b/wally-pipelined/src/cache/line.sv index e498d073..6fe848e5 100644 --- a/wally-pipelined/src/cache/line.sv +++ b/wally-pipelined/src/cache/line.sv @@ -62,7 +62,7 @@ module rocacheline #(parameter LINESIZE = 256, parameter TAGSIZE = 32, parameter always_comb begin - assign DataWord = DataLinesOut[WordSelect[OFFSETSIZE-1:$clog2(WORDSIZE)]]; + assign DataWord = DataLinesOut[WordSelect[OFFSETSIZE-1:$clog2(WORDSIZE/8)]]; end endmodule From ad0d77e9e1ca92449c13b7dd9bc4d3575e43258a Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Wed, 24 Mar 2021 13:40:08 -0400 Subject: [PATCH 02/11] Begin rewrite of icache module to use a direct-mapped scheme --- wally-pipelined/src/ifu/icache.sv | 147 ++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 4208c355..17c8bf1e 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -48,6 +48,153 @@ module icache( output logic [31:0] InstrRawD ); + // Configuration parameters + // TODO Move these to a config file + localparam integer ICACHELINESIZE = 256; + localparam integer ICACHENUMLINES = 512; + + // Input signals to cache memory + logic FlushMem; + logic [`XLEN-1:12] ICacheMemReadUpperPAdr; + logic [11:0] ICacheMemReadLowerAdr; + logic ICacheMemWriteEnable; + logic [ICACHELINESIZE-1:0] ICacheMemWriteData; + logic [`XLEN-1:0] ICacheMemWritePAdr; + // Output signals from cache memory + logic [`XLEN-1:0] ICacheMemReadData; + logic ICacheMemReadValid; + + rodirectmappedmem #(.LINESIZE(ICACHELINESIZE), .NUMLINES(ICACHENUMLINES)) cachemem( + .*, + .flush(FlushMem), + .ReadUpperPAdr(ICacheMemReadUpperPAdr), + .ReadLowerAdr(ICacheMemReadLowerAdr), + .WriteEnable(ICacheMemWriteEnable), + .WriteLine(ICacheMemWriteData), + .WritePAdr(ICacheMemWritePAdr), + .DataWord(ICacheMemReadData), + .DataValid(ICacheMemReadValid) + ); + + icachecontroller #(.LINESIZE(ICACHELINESIZE)) controller(.*); +endmodule + +module icachecontroller #(parameter LINESIZE = 256) ( + // Inputs from pipeline + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, + + // Input the address to read + // The upper bits of the physical pc + input logic [`XLEN-1:12] UpperPCPF, + // The lower bits of the virtual pc + input logic [11:0] LowerPCF, + + // Signals to/from cache memory + // The read coming out of it + input logic [`XLEN-1:0] ICacheMemReadData, + input logic ICacheMemReadValid, + // The address at which we want to search the cache memory + output logic [`XLEN-1:12] ICacheMemReadUpperPAdr, + output logic [11:0] ICacheMemReadLowerAdr, + // Load data into the cache + output logic ICacheMemWriteEnable, + output logic [LINESIZE-1:0] ICacheMemWriteData, + output logic [`XLEN-1:0] ICacheMemWritePAdr, + + // Outputs to rest of ifu + // High if the instruction in the fetch stage is compressed + output logic CompressedF, + // The instruction that was requested + // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros + output logic [31:0] InstrRawD, + + // Outputs to pipeline control stuff + output logic ICacheStallF, + + // Signals to/from ahblite interface + // A read containing the requested data + input logic [`XLEN-1:0] InstrInF, + // The read we request from main memory + output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF +); + + logic [31:0] AlignedInstrRawF, AlignedInstrRawD; + logic FlushDLastCycle; + const logic [31:0] NOP = 32'h13; + + // TODO allow compressed instructions + // (start with noncompressed only to get something working) + assign CompressedF = 1'b0; + + // Handle happy path (data in cache, reads aligned) + always_comb begin + assign ICacheMemReadLowerAdr = LowerPCF; + assign ICacheMemReadUpperPAdr = UpperPCPF; + end + + generate + if (`XLEN == 32) begin + assign AlignedInstrRawF = ICacheMemReadData; + end else begin + assign AlignedInstrRawF = LowerPCF[2] ? ICacheMemReadData[63:32] : ICacheMemReadData[31:0]; + end + endgenerate + + flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD); + flopr #(1) FlushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); + mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, FlushDLastCycle, InstrRawD); + + // Handle cache faults + + localparam integer WORDSPERLINE = LINESIZE/`XLEN; + localparam integer OFFSETWIDTH = $clog2(LINESIZE/8); + + logic FetchState; + logic [$clog2(WORDSPERLINE)-1:0] FetchWordNum; + logic [`XLEN-1:0] LineAlignedPCPF; + + flopr #(1) FetchStateFlop(clk, reset, 1'b0, FetchState); + flopr #($clog2(WORDSPERLINE)) FetchWordNumFlop(clk, reset, {$clog2(WORDSPERLINE){1'b0}}, FetchWordNum); + + genvar i; + generate + for (i=0; i < WORDSPERLINE; i++) begin + flopenr #(32) flop(clk, reset, FetchState & (i == FetchWordNum), InstrInF, ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]); + end + endgenerate + + always_comb begin + assign InstrReadF = FetchState; + assign LineAlignedPCPF = {UpperPCPF, LowerPCF[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; + assign InstrPAdrF = LineAlignedPCPF + i*`XLEN; + end +endmodule + +module oldicache( + // Basic pipeline stuff + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, + // Upper bits of physical address for PC + input logic [`XLEN-1:12] UpperPCPF, + // Lower 12 bits of virtual PC address, since it's faster this way + input logic [11:0] LowerPCF, + // Data read in from the ebu unit + input logic [`XLEN-1:0] InstrInF, + // Read requested from the ebu unit + output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF, + // High if the instruction currently in the fetch stage is compressed + output logic CompressedF, + // High if the icache is requesting a stall + output logic ICacheStallF, + // The raw (not decompressed) instruction that was requested + // If the next instruction is compressed, the upper 16 bits may be anything + output logic [31:0] InstrRawD +); logic DelayF, DelaySideF, FlushDLastCyclen, DelayD; logic [1:0] InstrDMuxChoice; logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; From ba95557c449f9566e05ac2de7243b736792cf4d5 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Wed, 24 Mar 2021 13:58:43 -0400 Subject: [PATCH 03/11] More progress on icache controller --- wally-pipelined/src/ifu/icache.sv | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 17c8bf1e..c0f04286 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -150,14 +150,15 @@ module icachecontroller #(parameter LINESIZE = 256) ( // Handle cache faults localparam integer WORDSPERLINE = LINESIZE/`XLEN; + localparam integer LOGWPL = $clog2(WORDSPERLINE); localparam integer OFFSETWIDTH = $clog2(LINESIZE/8); - logic FetchState; - logic [$clog2(WORDSPERLINE)-1:0] FetchWordNum; + logic FetchState, EndFetchState, BeginFetchState; + logic [LOGWPL:0] FetchWordNum, NextFetchWordNum; logic [`XLEN-1:0] LineAlignedPCPF; - flopr #(1) FetchStateFlop(clk, reset, 1'b0, FetchState); - flopr #($clog2(WORDSPERLINE)) FetchWordNumFlop(clk, reset, {$clog2(WORDSPERLINE){1'b0}}, FetchWordNum); + flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState); + flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum); genvar i; generate @@ -166,10 +167,23 @@ module icachecontroller #(parameter LINESIZE = 256) ( end endgenerate + // Machinery to request the correct addresses from main memory always_comb begin assign InstrReadF = FetchState; assign LineAlignedPCPF = {UpperPCPF, LowerPCF[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; - assign InstrPAdrF = LineAlignedPCPF + i*`XLEN; + assign InstrPAdrF = LineAlignedPCPF + FetchWordNum*`XLEN; + assign NextFetchWordNum = FetchState ? FetchWordNum+1 : {LOGWPL+1{1'b0}}; + end + + // Write to cache memory when we have the line here + always_comb begin + assign BeginFetchState = 1'b0; + assign EndFetchState = FetchWordNum == {1'b1, {LOGWPL{1'b0}}}; + end + + // Stall the pipeline while loading a new line from memory + always_comb begin + assign ICacheStallF = FetchState | ~ICacheMemReadValid; end endmodule From 602271ff7b91a6eda837f07196b571ceff887f32 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Wed, 24 Mar 2021 16:56:44 -0400 Subject: [PATCH 04/11] rv64i linear control flow now working --- wally-pipelined/regression/wally-pipelined.do | 2 +- .../regression/wave-dos/ahb-waves.do | 8 -- .../regression/wave-dos/cache-waves.do | 82 +++++++++++++++++++ .../regression/wave-dos/default-waves.do | 5 -- wally-pipelined/src/cache/line.sv | 4 +- wally-pipelined/src/ebu/ahblite.sv | 2 + wally-pipelined/src/hazard/hazard.sv | 4 +- wally-pipelined/src/ifu/icache.sv | 22 +++-- wally-pipelined/src/ifu/ifu.sv | 1 + .../src/wally/wallypipelinedhart.sv | 2 +- .../testbench/testbench-imperas.sv | 3 +- 11 files changed, 109 insertions(+), 26 deletions(-) create mode 100644 wally-pipelined/regression/wave-dos/cache-waves.do diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index a5041005..5f1b1406 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -42,7 +42,7 @@ vsim workopt view wave -- display input and output signals as hexidecimal values -do ./wave-dos/ahb-waves.do +do ./wave-dos/cache-waves.do -- Set Wave Output Items TreeUpdate [SetDefaultTree] diff --git a/wally-pipelined/regression/wave-dos/ahb-waves.do b/wally-pipelined/regression/wave-dos/ahb-waves.do index f043d779..c3a38563 100644 --- a/wally-pipelined/regression/wave-dos/ahb-waves.do +++ b/wally-pipelined/regression/wave-dos/ahb-waves.do @@ -19,16 +19,8 @@ add wave -divider add wave -hex /testbench/dut/hart/ifu/PCF add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/InstrD - add wave /testbench/InstrDName add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD -add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD -add wave -divider -add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF -add wave /testbench/dut/hart/ifu/ic/DelayF -add wave /testbench/dut/hart/ifu/ic/DelaySideF -add wave /testbench/dut/hart/ifu/ic/DelayD -add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider add wave -hex /testbench/dut/hart/ifu/PCE diff --git a/wally-pipelined/regression/wave-dos/cache-waves.do b/wally-pipelined/regression/wave-dos/cache-waves.do new file mode 100644 index 00000000..e39d40a0 --- /dev/null +++ b/wally-pipelined/regression/wave-dos/cache-waves.do @@ -0,0 +1,82 @@ +add wave /testbench/clk +add wave /testbench/reset +add wave -divider + +#add wave /testbench/dut/hart/ebu/IReadF +add wave /testbench/dut/hart/DataStall +add wave /testbench/dut/hart/InstrStall +add wave /testbench/dut/hart/StallF +add wave /testbench/dut/hart/StallD +add wave /testbench/dut/hart/StallE +add wave /testbench/dut/hart/StallM +add wave /testbench/dut/hart/StallW +add wave /testbench/dut/hart/FlushD +add wave /testbench/dut/hart/FlushE +add wave /testbench/dut/hart/FlushM +add wave /testbench/dut/hart/FlushW + +add wave -divider +add wave -hex /testbench/dut/hart/ifu/PCF +add wave -hex /testbench/dut/hart/ifu/PCD +add wave -hex /testbench/dut/hart/ifu/InstrD + +add wave /testbench/InstrDName +add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD +add wave -hex /testbench/dut/hart/ifu/ic/controller/AlignedInstrRawD +add wave -divider +add wave -hex /testbench/dut/hart/ifu/ic/controller/FetchState +add wave -hex /testbench/dut/hart/ifu/ic/controller/FetchWordNum +add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWriteEnable +add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF +add wave -hex /testbench/dut/hart/ifu/ic/InstrAckF +add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWriteData +add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWritePAdr +add wave -divider + +add wave -hex /testbench/dut/hart/ifu/PCE +add wave -hex /testbench/dut/hart/ifu/InstrE +add wave /testbench/InstrEName +add wave -hex /testbench/dut/hart/ieu/dp/SrcAE +add wave -hex /testbench/dut/hart/ieu/dp/SrcBE +add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE +#add wave /testbench/dut/hart/ieu/dp/PCSrcE +add wave -divider + +add wave -hex /testbench/dut/hart/ifu/PCM +add wave -hex /testbench/dut/hart/ifu/InstrM +add wave /testbench/InstrMName +add wave /testbench/dut/uncore/dtim/memwrite +add wave -hex /testbench/dut/uncore/HADDR +add wave -hex /testbench/dut/uncore/HWDATA +add wave -divider + +add wave -hex /testbench/dut/hart/ebu/MemReadM +add wave -hex /testbench/dut/hart/ebu/InstrReadF +add wave -hex /testbench/dut/hart/ebu/BusState +add wave -hex /testbench/dut/hart/ebu/NextBusState +add wave -hex /testbench/dut/hart/ebu/HADDR +add wave -hex /testbench/dut/hart/ebu/HREADY +add wave -hex /testbench/dut/hart/ebu/HTRANS +add wave -hex /testbench/dut/hart/ebu/HRDATA +add wave -hex /testbench/dut/hart/ebu/HWRITE +add wave -hex /testbench/dut/hart/ebu/HWDATA +add wave -hex /testbench/dut/hart/ebu/CaptureDataM +add wave -hex /testbench/dut/hart/ebu/InstrStall +add wave -divider + +add wave -hex /testbench/dut/uncore/dtim/* +add wave -divider + +add wave -hex /testbench/dut/hart/ifu/PCW +add wave -hex /testbench/dut/hart/ifu/InstrW +add wave /testbench/InstrWName +add wave /testbench/dut/hart/ieu/dp/RegWriteW +add wave -hex /testbench/dut/hart/ebu/ReadDataW +add wave -hex /testbench/dut/hart/ieu/dp/ResultW +add wave -hex /testbench/dut/hart/ieu/dp/RdW +add wave -divider + +add wave -hex /testbench/dut/uncore/dtim/* +add wave -divider + +add wave -hex -r /testbench/* diff --git a/wally-pipelined/regression/wave-dos/default-waves.do b/wally-pipelined/regression/wave-dos/default-waves.do index 4b645651..ef4e30c6 100644 --- a/wally-pipelined/regression/wave-dos/default-waves.do +++ b/wally-pipelined/regression/wave-dos/default-waves.do @@ -23,11 +23,6 @@ add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/InstrD add wave /testbench/InstrDName add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD -add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD -add wave /testbench/dut/hart/ifu/ic/DelayF -add wave /testbench/dut/hart/ifu/ic/DelaySideF -add wave /testbench/dut/hart/ifu/ic/DelayD -add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/dut/hart/ifu/InstrE diff --git a/wally-pipelined/src/cache/line.sv b/wally-pipelined/src/cache/line.sv index 6fe848e5..d90cd206 100644 --- a/wally-pipelined/src/cache/line.sv +++ b/wally-pipelined/src/cache/line.sv @@ -55,8 +55,8 @@ module rocacheline #(parameter LINESIZE = 256, parameter TAGSIZE = 32, parameter genvar i; generate for (i=0; i < NUMWORDS; i++) begin - assign DataLinesIn[i] = WriteData[NUMWORDS*i+WORDSIZE-1:NUMWORDS*i]; - flopenr #(LINESIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]); + assign DataLinesIn[i] = WriteData[WORDSIZE*(i+1)-1:WORDSIZE*i]; + flopenr #(WORDSIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]); end endgenerate diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 90ef018b..58a28747 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -41,6 +41,7 @@ module ahblite ( input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram input logic InstrReadF, output logic [`XLEN-1:0] InstrRData, + output logic InstrAckF, // Signals from Data Cache input logic [`XLEN-1:0] MemPAdrM, input logic MemReadM, MemWriteM, @@ -171,6 +172,7 @@ module ahblite ( assign #1 MMUReady = (NextBusState == MMUIDLE); assign InstrRData = HRDATA; + assign InstrAckF = (BusState == INSTRREAD) && (NextBusState != INSTRREAD) || (BusState == INSTRREADC) && (NextBusState != INSTRREADC); assign MMUReadPTE = HRDATA; assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) || diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 9542020d..ecd3c366 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -53,12 +53,12 @@ module hazard( assign BranchFlushDE = BPPredWrongE | RetM | TrapM; - assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE); + assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE) | ICacheStallF; assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD) & ~(BranchFlushDE); // stall in decode if instruction is a load/mul/csr dependent on previous // assign StallDCause = LoadStallD | MulDivStallD | CSRRdStallD; // stall in decode if instruction is a load/mul/csr dependent on previous assign StallECause = 0; assign StallMCause = 0; - assign StallWCause = DataStall | InstrStall; + assign StallWCause = DataStall; // Each stage stalls if the next stage is stalled or there is a cause to stall this stage. assign StallF = StallD | StallFCause; diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index c0f04286..df608a39 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -36,6 +36,7 @@ module icache( input logic [11:0] LowerPCF, // Data read in from the ebu unit input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, // Read requested from the ebu unit output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, @@ -77,6 +78,8 @@ module icache( ); icachecontroller #(.LINESIZE(ICACHELINESIZE)) controller(.*); + + assign FlushMem = 1'b0; endmodule module icachecontroller #(parameter LINESIZE = 256) ( @@ -116,6 +119,7 @@ module icachecontroller #(parameter LINESIZE = 256) ( // Signals to/from ahblite interface // A read containing the requested data input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, // The read we request from main memory output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF @@ -163,22 +167,28 @@ module icachecontroller #(parameter LINESIZE = 256) ( genvar i; generate for (i=0; i < WORDSPERLINE; i++) begin - flopenr #(32) flop(clk, reset, FetchState & (i == FetchWordNum), InstrInF, ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]); + flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), InstrInF, ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]); end endgenerate + // Enter the fetch state when we hit a cache fault + always_comb begin + assign BeginFetchState = ~ICacheMemReadValid & ~FetchState; + end + // Machinery to request the correct addresses from main memory always_comb begin - assign InstrReadF = FetchState; + assign InstrReadF = FetchState & ~EndFetchState; assign LineAlignedPCPF = {UpperPCPF, LowerPCF[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; - assign InstrPAdrF = LineAlignedPCPF + FetchWordNum*`XLEN; - assign NextFetchWordNum = FetchState ? FetchWordNum+1 : {LOGWPL+1{1'b0}}; + assign InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8); + assign NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; end // Write to cache memory when we have the line here always_comb begin - assign BeginFetchState = 1'b0; - assign EndFetchState = FetchWordNum == {1'b1, {LOGWPL{1'b0}}}; + assign EndFetchState = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState; + assign ICacheMemWritePAdr = LineAlignedPCPF; + assign ICacheMemWriteEnable = EndFetchState; end // Stall the pipeline while loading a new line from memory diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index c68786e5..747a2b49 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -32,6 +32,7 @@ module ifu ( input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Fetch input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index c858befd..49214b0d 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -112,7 +112,7 @@ module wallypipelinedhart ( logic [`XLEN-1:0] InstrRData; logic InstrReadF; logic DataStall, InstrStall; - logic InstrAckD, MemAckW; + logic InstrAckF, MemAckW; logic BPPredWrongE, BPPredWrongM; logic [3:0] InstrClassM; diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 8b128b17..37d9883e 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -370,7 +370,8 @@ string tests32i[] = { // Track names of instructions instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, - dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, + dut.hart.ifu.ic.controller.AlignedInstrRawF, + dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, dut.hart.ifu.InstrM, dut.hart.ifu.InstrW, InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); From 128278ea2799da37487e9e231dd29e9c6aa27898 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Wed, 24 Mar 2021 17:23:00 -0400 Subject: [PATCH 05/11] Working for all of rv64i now, but not compressed instructions --- wally-pipelined/src/hazard/hazard.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index ecd3c366..3768f0fc 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -53,12 +53,12 @@ module hazard( assign BranchFlushDE = BPPredWrongE | RetM | TrapM; - assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE) | ICacheStallF; + assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE); assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD) & ~(BranchFlushDE); // stall in decode if instruction is a load/mul/csr dependent on previous // assign StallDCause = LoadStallD | MulDivStallD | CSRRdStallD; // stall in decode if instruction is a load/mul/csr dependent on previous assign StallECause = 0; assign StallMCause = 0; - assign StallWCause = DataStall; + assign StallWCause = DataStall | ICacheStallF; // Each stage stalls if the next stage is stalled or there is a cause to stall this stage. assign StallF = StallD | StallFCause; From ce6f102fc5b9f73126cfba3a1f558118f19c6874 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Thu, 25 Mar 2021 00:46:51 -0400 Subject: [PATCH 06/11] Clean up some stuff --- wally-pipelined/src/ebu/ahblite.sv | 5 ++--- wally-pipelined/src/hazard/hazard.sv | 2 +- wally-pipelined/src/wally/wallypipelinedhart.sv | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 58a28747..c0aa27db 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -71,7 +71,7 @@ module ahblite ( output logic [3:0] HSIZED, output logic HWRITED, // Stalls - output logic InstrStall,/*InstrUpdate, */DataStall + output logic /*InstrUpdate, */DataStall // *** add a chip-level ready signal as part of handshake ); @@ -135,8 +135,7 @@ module ahblite ( // stall signals assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || - (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE) || - (NextBusState == MMUTRANSLATE) || (NextBusState == MMUIDLE); + (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE); // *** Could get finer grained stalling if we distinguish between MMU // instruction address translation and data address translation assign #1 InstrStall = (NextBusState == INSTRREAD) || (NextBusState == INSTRREADC) || diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 3768f0fc..c225a4e8 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -29,7 +29,7 @@ module hazard( // Detect hazards input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, - input logic InstrStall, DataStall, ICacheStallF, + input logic DataStall, ICacheStallF, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, output logic FlushF, FlushD, FlushE, FlushM, FlushW diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 49214b0d..0c079ba1 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -111,7 +111,7 @@ module wallypipelinedhart ( logic [`XLEN-1:0] InstrPAdrF; logic [`XLEN-1:0] InstrRData; logic InstrReadF; - logic DataStall, InstrStall; + logic DataStall; logic InstrAckF, MemAckW; logic BPPredWrongE, BPPredWrongM; From 0290568a5227b4fba3cf20aae9b4981b99b0b4f3 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Thu, 25 Mar 2021 13:18:30 -0400 Subject: [PATCH 07/11] Make cache output NOP after a reset --- wally-pipelined/regression/wave-dos/ahb-waves.do | 2 +- wally-pipelined/regression/wave-dos/cache-waves.do | 2 +- wally-pipelined/regression/wave-dos/default-waves.do | 2 +- wally-pipelined/src/ifu/icache.sv | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/ahb-waves.do b/wally-pipelined/regression/wave-dos/ahb-waves.do index c3a38563..c542f584 100644 --- a/wally-pipelined/regression/wave-dos/ahb-waves.do +++ b/wally-pipelined/regression/wave-dos/ahb-waves.do @@ -4,7 +4,7 @@ add wave -divider #add wave /testbench/dut/hart/ebu/IReadF add wave /testbench/dut/hart/DataStall -add wave /testbench/dut/hart/InstrStall +add wave /testbench/dut/hart/ICacheStallF add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallE diff --git a/wally-pipelined/regression/wave-dos/cache-waves.do b/wally-pipelined/regression/wave-dos/cache-waves.do index e39d40a0..bdd88a13 100644 --- a/wally-pipelined/regression/wave-dos/cache-waves.do +++ b/wally-pipelined/regression/wave-dos/cache-waves.do @@ -4,7 +4,7 @@ add wave -divider #add wave /testbench/dut/hart/ebu/IReadF add wave /testbench/dut/hart/DataStall -add wave /testbench/dut/hart/InstrStall +add wave /testbench/dut/hart/ICacheStallF add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallE diff --git a/wally-pipelined/regression/wave-dos/default-waves.do b/wally-pipelined/regression/wave-dos/default-waves.do index ef4e30c6..3f81cfad 100644 --- a/wally-pipelined/regression/wave-dos/default-waves.do +++ b/wally-pipelined/regression/wave-dos/default-waves.do @@ -6,7 +6,7 @@ add wave /testbench/reset add wave -divider #add wave /testbench/dut/hart/ebu/IReadF add wave /testbench/dut/hart/DataStall -add wave /testbench/dut/hart/InstrStall +add wave /testbench/dut/hart/ICacheStallF add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallE diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index df608a39..631a9bd6 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -126,7 +126,7 @@ module icachecontroller #(parameter LINESIZE = 256) ( ); logic [31:0] AlignedInstrRawF, AlignedInstrRawD; - logic FlushDLastCycle; + logic FlushDLastCycleN; const logic [31:0] NOP = 32'h13; // TODO allow compressed instructions @@ -148,8 +148,8 @@ module icachecontroller #(parameter LINESIZE = 256) ( endgenerate flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD); - flopr #(1) FlushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); - mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, FlushDLastCycle, InstrRawD); + flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN); + mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD); // Handle cache faults From 3b4f0141f4a7782ef3fd4c4d580d43f8cfa9d88f Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Thu, 25 Mar 2021 14:43:10 -0400 Subject: [PATCH 08/11] Begin work on compressed instructions --- .../regression/wave-dos/ahb-waves.do | 1 - .../regression/wave-dos/cache-waves.do | 1 - wally-pipelined/src/ebu/ahblite.sv | 4 - wally-pipelined/src/ifu/icache.sv | 77 ++++++++++++++++--- .../testbench/testbench-imperas.sv | 2 +- 5 files changed, 69 insertions(+), 16 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/ahb-waves.do b/wally-pipelined/regression/wave-dos/ahb-waves.do index c542f584..263693d7 100644 --- a/wally-pipelined/regression/wave-dos/ahb-waves.do +++ b/wally-pipelined/regression/wave-dos/ahb-waves.do @@ -51,7 +51,6 @@ add wave -hex /testbench/dut/hart/ebu/HRDATA add wave -hex /testbench/dut/hart/ebu/HWRITE add wave -hex /testbench/dut/hart/ebu/HWDATA add wave -hex /testbench/dut/hart/ebu/CaptureDataM -add wave -hex /testbench/dut/hart/ebu/InstrStall add wave -divider add wave -hex /testbench/dut/uncore/dtim/* diff --git a/wally-pipelined/regression/wave-dos/cache-waves.do b/wally-pipelined/regression/wave-dos/cache-waves.do index bdd88a13..20c7061b 100644 --- a/wally-pipelined/regression/wave-dos/cache-waves.do +++ b/wally-pipelined/regression/wave-dos/cache-waves.do @@ -61,7 +61,6 @@ add wave -hex /testbench/dut/hart/ebu/HRDATA add wave -hex /testbench/dut/hart/ebu/HWRITE add wave -hex /testbench/dut/hart/ebu/HWDATA add wave -hex /testbench/dut/hart/ebu/CaptureDataM -add wave -hex /testbench/dut/hart/ebu/InstrStall add wave -divider add wave -hex /testbench/dut/uncore/dtim/* diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index c0aa27db..73df76a3 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -136,10 +136,6 @@ module ahblite ( // stall signals assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE); - // *** Could get finer grained stalling if we distinguish between MMU - // instruction address translation and data address translation - assign #1 InstrStall = (NextBusState == INSTRREAD) || (NextBusState == INSTRREADC) || - (NextBusState == MMUTRANSLATE) || (NextBusState == MMUIDLE); // bus outputs assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 631a9bd6..09fb84ae 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -127,11 +127,11 @@ module icachecontroller #(parameter LINESIZE = 256) ( logic [31:0] AlignedInstrRawF, AlignedInstrRawD; logic FlushDLastCycleN; + logic PCPMisalignedF; const logic [31:0] NOP = 32'h13; - // TODO allow compressed instructions - // (start with noncompressed only to get something working) - assign CompressedF = 1'b0; + // Detect if the instruction is compressed + assign CompressedF = AlignedInstrRawF[1:0] != 2'b11; // Handle happy path (data in cache, reads aligned) always_comb begin @@ -141,9 +141,13 @@ module icachecontroller #(parameter LINESIZE = 256) ( generate if (`XLEN == 32) begin - assign AlignedInstrRawF = ICacheMemReadData; + assign AlignedInstrRawF = LowerPCF[1] ? {16'b0, ICacheMemReadData[31:16]} : ICacheMemReadData; + assign PCPMisalignedF = LowerPCF[1] && ~CompressedF; end else begin - assign AlignedInstrRawF = LowerPCF[2] ? ICacheMemReadData[63:32] : ICacheMemReadData[31:0]; + assign AlignedInstrRawF = LowerPCF[2] + ? (LowerPCF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32]) + : (LowerPCF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]); + assign PCPMisalignedF = LowerPCF[2] && LowerPCF[1] && ~CompressedF; end endgenerate @@ -151,15 +155,70 @@ module icachecontroller #(parameter LINESIZE = 256) ( flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN); mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD); + // Stall for faults or misaligned reads + always_comb begin + assign ICacheStallF = FaultStall | MisalignedStall; + end + + // Handle misaligned, noncompressed reads + logic MisalignedState, NextMisalignedState; + logic MisalignedStall; + logic [15:0] MisalignedHalfInstrF; + logic [`XLEN:0] MisalignedInstrRawF; + + always_comb begin + assign MisalignedInstrRawF = {16'b0, ICacheMemReadData[63:48]}; + end + + flopenr #(16) MisalignedHalfInstrFlop(clk, reset, ~FaultStall & (PCPMisalignedF & MisalignedState), AlignedInstrRawF[15:0], MisalignedHalfInstrF); + flopenr #(1) MisalignedStateFlop(clk, reset, ~FaultStall, NextMisalignedState, MisalignedState); + + always_comb begin + assign MisalignedStall = PCPMisalignedF & MisalignedState; + assign NextMisalignedState = ~PCPMisalignedF | ~MisalignedState; + end + + // Pick the correct address to read + always_comb begin + if (~PCPMisalignedF) begin + assign ICacheMemReadUpperPAdr = UpperPCPF; + generate + if (`XLEN == 32) + assign ICacheMemReadLowerAdr = {LowerPCF[31:2], 2'b00}; + else + assign ICacheMemReadLowerAdr = {LowerPCF[31:3], 2'b000}; + endgenerate + end else begin + if (MisalignedState) begin + assign ICacheMemReadUpperPAdr = UpperPCPF; + generate + if (`XLEN == 32) + assign ICacheMemReadLowerAdr = {LowerPCF[31:2]+1, 2'b00}; + else + assign ICacheMemReadLowerAdr = {LowerPCF[31:3]+1, 2'b000}; + endgenerate + end else begin + assign ICacheMemReadUpperPAdr = UpperPCPF; + generate + if (`XLEN == 32) + assign ICacheMemReadLowerAdr = {LowerPCF[31:2], 2'b00}; + else + assign ICacheMemReadLowerAdr = {LowerPCF[31:3], 2'b000}; + endgenerate + end + end + end + // Handle cache faults localparam integer WORDSPERLINE = LINESIZE/`XLEN; localparam integer LOGWPL = $clog2(WORDSPERLINE); localparam integer OFFSETWIDTH = $clog2(LINESIZE/8); - logic FetchState, EndFetchState, BeginFetchState; - logic [LOGWPL:0] FetchWordNum, NextFetchWordNum; - logic [`XLEN-1:0] LineAlignedPCPF; + logic FetchState, EndFetchState, BeginFetchState; + logic FaultStall; + logic [LOGWPL:0] FetchWordNum, NextFetchWordNum; + logic [`XLEN-1:0] LineAlignedPCPF; flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState); flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum); @@ -193,7 +252,7 @@ module icachecontroller #(parameter LINESIZE = 256) ( // Stall the pipeline while loading a new line from memory always_comb begin - assign ICacheStallF = FetchState | ~ICacheMemReadValid; + assign FaultStall = FetchState | ~ICacheMemReadValid; end endmodule diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 37d9883e..b94c1b62 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -340,7 +340,7 @@ string tests32i[] = { tests = testsBP64; end else begin tests = {tests64i}; - if (`C_SUPPORTED) tests = {tests, tests64ic}; + if (`C_SUPPORTED) tests = {tests64ic, tests}; else tests = {tests, tests64iNOc}; if (`M_SUPPORTED) tests = {tests, tests64m}; if (`A_SUPPORTED) tests = {tests, tests64a}; From 5f4feb0ff12dae150ac3c782c7ed1d7304c96920 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Thu, 25 Mar 2021 15:42:17 -0400 Subject: [PATCH 09/11] Works for misaligned instructions not on line boundaries --- wally-pipelined/src/ifu/icache.sv | 79 +++++++++++++++---------------- 1 file changed, 37 insertions(+), 42 deletions(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 09fb84ae..85ec4cd3 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -45,7 +45,7 @@ module icache( // High if the icache is requesting a stall output logic ICacheStallF, // The raw (not decompressed) instruction that was requested - // If the next instruction is compressed, the upper 16 bits may be anything + // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros output logic [31:0] InstrRawD ); @@ -125,23 +125,25 @@ module icachecontroller #(parameter LINESIZE = 256) ( output logic InstrReadF ); + // Happy path signals logic [31:0] AlignedInstrRawF, AlignedInstrRawD; logic FlushDLastCycleN; logic PCPMisalignedF; const logic [31:0] NOP = 32'h13; + // Misaligned signals + logic [`XLEN:0] MisalignedInstrRawF; + logic MisalignedStall; + // Cache fault signals + logic FaultStall; // Detect if the instruction is compressed assign CompressedF = AlignedInstrRawF[1:0] != 2'b11; // Handle happy path (data in cache, reads aligned) - always_comb begin - assign ICacheMemReadLowerAdr = LowerPCF; - assign ICacheMemReadUpperPAdr = UpperPCPF; - end generate if (`XLEN == 32) begin - assign AlignedInstrRawF = LowerPCF[1] ? {16'b0, ICacheMemReadData[31:16]} : ICacheMemReadData; + assign AlignedInstrRawF = LowerPCF[1] ? MisalignedInstrRawF : ICacheMemReadData; assign PCPMisalignedF = LowerPCF[1] && ~CompressedF; end else begin assign AlignedInstrRawF = LowerPCF[2] @@ -160,54 +162,48 @@ module icachecontroller #(parameter LINESIZE = 256) ( assign ICacheStallF = FaultStall | MisalignedStall; end - // Handle misaligned, noncompressed reads - logic MisalignedState, NextMisalignedState; - logic MisalignedStall; - logic [15:0] MisalignedHalfInstrF; - logic [`XLEN:0] MisalignedInstrRawF; - always_comb begin - assign MisalignedInstrRawF = {16'b0, ICacheMemReadData[63:48]}; - end + // Handle misaligned, noncompressed reads + + logic MisalignedState, NextMisalignedState; + logic [15:0] MisalignedHalfInstrF; + logic [15:0] UpperHalfWord; flopenr #(16) MisalignedHalfInstrFlop(clk, reset, ~FaultStall & (PCPMisalignedF & MisalignedState), AlignedInstrRawF[15:0], MisalignedHalfInstrF); flopenr #(1) MisalignedStateFlop(clk, reset, ~FaultStall, NextMisalignedState, MisalignedState); + // When doing a misaligned read, swizzle the bits correctly + generate + if (`XLEN == 32) begin + assign UpperHalfWord = ICacheMemReadData[31:16]; + end else begin + assign UpperHalfWord = ICacheMemReadData[63:48]; + end + endgenerate + always_comb begin + if (MisalignedState) begin + assign MisalignedInstrRawF = {16'b0, UpperHalfWord}; + end else begin + assign MisalignedInstrRawF = {ICacheMemReadData[15:0], MisalignedHalfInstrF}; + end + end + + // Manage internal state and stall when necessary always_comb begin assign MisalignedStall = PCPMisalignedF & MisalignedState; assign NextMisalignedState = ~PCPMisalignedF | ~MisalignedState; end // Pick the correct address to read - always_comb begin - if (~PCPMisalignedF) begin - assign ICacheMemReadUpperPAdr = UpperPCPF; - generate - if (`XLEN == 32) - assign ICacheMemReadLowerAdr = {LowerPCF[31:2], 2'b00}; - else - assign ICacheMemReadLowerAdr = {LowerPCF[31:3], 2'b000}; - endgenerate + generate + if (`XLEN == 32) begin + assign ICacheMemReadLowerAdr = {LowerPCF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00}; end else begin - if (MisalignedState) begin - assign ICacheMemReadUpperPAdr = UpperPCPF; - generate - if (`XLEN == 32) - assign ICacheMemReadLowerAdr = {LowerPCF[31:2]+1, 2'b00}; - else - assign ICacheMemReadLowerAdr = {LowerPCF[31:3]+1, 2'b000}; - endgenerate - end else begin - assign ICacheMemReadUpperPAdr = UpperPCPF; - generate - if (`XLEN == 32) - assign ICacheMemReadLowerAdr = {LowerPCF[31:2], 2'b00}; - else - assign ICacheMemReadLowerAdr = {LowerPCF[31:3], 2'b000}; - endgenerate - end + assign ICacheMemReadLowerAdr = {LowerPCF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00}; end - end + endgenerate + assign ICacheMemReadUpperPAdr = UpperPCPF; + // Handle cache faults @@ -216,7 +212,6 @@ module icachecontroller #(parameter LINESIZE = 256) ( localparam integer OFFSETWIDTH = $clog2(LINESIZE/8); logic FetchState, EndFetchState, BeginFetchState; - logic FaultStall; logic [LOGWPL:0] FetchWordNum, NextFetchWordNum; logic [`XLEN-1:0] LineAlignedPCPF; From 32829bf7a1679811364f607ea7ed89a87869db0d Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Thu, 25 Mar 2021 15:46:35 -0400 Subject: [PATCH 10/11] Remove old icache --- wally-pipelined/src/ifu/icache.sv | 111 ------------------------------ 1 file changed, 111 deletions(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 85ec4cd3..046126d3 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -250,114 +250,3 @@ module icachecontroller #(parameter LINESIZE = 256) ( assign FaultStall = FetchState | ~ICacheMemReadValid; end endmodule - -module oldicache( - // Basic pipeline stuff - input logic clk, reset, - input logic StallF, StallD, - input logic FlushD, - // Upper bits of physical address for PC - input logic [`XLEN-1:12] UpperPCPF, - // Lower 12 bits of virtual PC address, since it's faster this way - input logic [11:0] LowerPCF, - // Data read in from the ebu unit - input logic [`XLEN-1:0] InstrInF, - // Read requested from the ebu unit - output logic [`XLEN-1:0] InstrPAdrF, - output logic InstrReadF, - // High if the instruction currently in the fetch stage is compressed - output logic CompressedF, - // High if the icache is requesting a stall - output logic ICacheStallF, - // The raw (not decompressed) instruction that was requested - // If the next instruction is compressed, the upper 16 bits may be anything - output logic [31:0] InstrRawD -); - logic DelayF, DelaySideF, FlushDLastCyclen, DelayD; - logic [1:0] InstrDMuxChoice; - logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; - logic [31:0] InstrF, AlignedInstrD; - // Buffer the last read, for ease of accessing it again - logic LastReadDataValidF; - logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF; - - // instruction for NOP - logic [31:0] nop = 32'h00000013; - - // Temporary change to bridge the new interface to old behaviors - logic [`XLEN-1:0] PCPF; - assign PCPF = {UpperPCPF, LowerPCF}; - - // This flop doesn't stall if StallF is high because we should output a nop - // when FlushD happens, even if the pipeline is also stalled. - flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen); - - flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF & ~CompressedF, DelayD); - flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF); - // This flop stores the first half of a misaligned instruction while waiting for the other half - flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD); - - // This flop is here to simulate pulling data out of the cache, which is edge-triggered - flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD); - - // These flops cache the previous read, to accelerate things - flopenr #(`XLEN) lastReadDataFlop(clk, reset, InstrReadF & ~StallF, InstrInF, LastReadDataF); - flopenr #(1) lastReadDataVFlop(clk, reset, InstrReadF & ~StallF, 1'b1, LastReadDataValidF); - flopenr #(`XLEN) lastReadAdrFlop(clk, reset, InstrReadF & ~StallF, InstrPAdrF, LastReadAdrF); - - // Decide which address needs to be fetched and sent out over InstrPAdrF - // If the requested address fits inside one read from memory, we fetch that - // address, adjusted to the bit width. Otherwise, we request the lower word - // and then the upper word, in that order. - generate - if (`XLEN == 32) begin - assign InstrPAdrF = PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[31:2], 2'b00} : {PCPF[31:2], 2'b00}) : PCPF; - end else begin - assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}; - end - endgenerate - - // Read from memory if we don't have the address we want - always_comb if (LastReadDataValidF & (InstrPAdrF == LastReadAdrF)) begin - assign InstrReadF = 0; - end else begin - assign InstrReadF = 1; - end - - // Pick from the memory input or from the previous read, as appropriate - mux2 #(`XLEN) inDataMux(LastReadDataF, InstrInF, InstrReadF, InDataF); - - // If the instruction fits in one memory read, then we put the right bits - // into InstrF. Otherwise, we activate DelayF to signal the rest of the - // machinery to swizzle bits. - generate - if (`XLEN == 32) begin - assign InstrF = PCPF[1] ? {16'b0, InDataF[31:16]} : InDataF; - assign DelayF = PCPF[1]; - assign MisalignedHalfInstrF = InDataF[31:16]; - end else begin - assign InstrF = PCPF[2] ? (PCPF[1] ? {16'b0, InDataF[63:48]} : InDataF[63:32]) : (PCPF[1] ? InDataF[47:16] : InDataF[31:0]); - assign DelayF = PCPF[1] && PCPF[2]; - assign MisalignedHalfInstrF = InDataF[63:48]; - end - endgenerate - // We will likely need to stall later, but stalls are handled by the rest of the pipeline for now - assign ICacheStallF = 0; - - // Detect if the instruction is compressed - assign CompressedF = InstrF[1:0] != 2'b11; - - // Pick the correct output, depending on whether we have to assemble this - // instruction from two reads or not. - // Output the requested instruction (we don't need to worry if the read is - // incomplete, since the pipeline stalls for us when it isn't), or a NOP for - // the cycle when the first of two reads comes in. - always_comb if (~FlushDLastCyclen) begin - assign InstrDMuxChoice = 2'b10; - end else if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin - assign InstrDMuxChoice = 2'b11; - end else begin - assign InstrDMuxChoice = {1'b0, DelayD}; - end - mux4 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, {16'b0, MisalignedHalfInstrD}, InstrDMuxChoice, InstrRawD); -endmodule From 39bf2347bc6044496a0a4e978fff505ce0259cb7 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Thu, 25 Mar 2021 18:47:23 -0400 Subject: [PATCH 11/11] Fix error when reading an instruction that crosses a line boundary --- wally-pipelined/regression/wave-dos/cache-waves.do | 3 +++ wally-pipelined/src/ifu/icache.sv | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/wally-pipelined/regression/wave-dos/cache-waves.do b/wally-pipelined/regression/wave-dos/cache-waves.do index 20c7061b..c7b32e1e 100644 --- a/wally-pipelined/regression/wave-dos/cache-waves.do +++ b/wally-pipelined/regression/wave-dos/cache-waves.do @@ -31,8 +31,11 @@ add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF add wave -hex /testbench/dut/hart/ifu/ic/InstrAckF add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWriteData add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWritePAdr +add wave -hex /testbench/dut/hart/ifu/ic/controller/MisalignedState +add wave -hex /testbench/dut/hart/ifu/ic/controller/MisalignedHalfInstrF add wave -divider + add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/dut/hart/ifu/InstrE add wave /testbench/InstrEName diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 046126d3..0d953760 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -233,7 +233,7 @@ module icachecontroller #(parameter LINESIZE = 256) ( // Machinery to request the correct addresses from main memory always_comb begin assign InstrReadF = FetchState & ~EndFetchState; - assign LineAlignedPCPF = {UpperPCPF, LowerPCF[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; + assign LineAlignedPCPF = {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; assign InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8); assign NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; end