Merge branch 'cache2' into cache

Conflicts:
	wally-pipelined/testbench/testbench-imperas.sv
This commit is contained in:
Jarred Allen 2021-03-30 13:32:33 -04:00
commit 631454ccf9
11 changed files with 292 additions and 109 deletions

View File

@ -42,7 +42,7 @@ vsim workopt
view wave view wave
-- display input and output signals as hexidecimal values -- display input and output signals as hexidecimal values
do ./wave-dos/ahb-waves.do do ./wave-dos/cache-waves.do
-- Set Wave Output Items -- Set Wave Output Items
TreeUpdate [SetDefaultTree] TreeUpdate [SetDefaultTree]

View File

@ -4,7 +4,7 @@ add wave -divider
#add wave /testbench/dut/hart/ebu/IReadF #add wave /testbench/dut/hart/ebu/IReadF
add wave /testbench/dut/hart/DataStall add wave /testbench/dut/hart/DataStall
add wave /testbench/dut/hart/InstrStall add wave /testbench/dut/hart/ICacheStallF
add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallF
add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallD
add wave /testbench/dut/hart/StallE add wave /testbench/dut/hart/StallE
@ -19,16 +19,8 @@ add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCF add wave -hex /testbench/dut/hart/ifu/PCF
add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/PCD
add wave -hex /testbench/dut/hart/ifu/InstrD add wave -hex /testbench/dut/hart/ifu/InstrD
add wave /testbench/InstrDName add wave /testbench/InstrDName
add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD
add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD
add wave -divider
add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF
add wave /testbench/dut/hart/ifu/ic/DelayF
add wave /testbench/dut/hart/ifu/ic/DelaySideF
add wave /testbench/dut/hart/ifu/ic/DelayD
add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD
add wave -divider add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/dut/hart/ifu/PCE
@ -59,7 +51,6 @@ add wave -hex /testbench/dut/hart/ebu/HRDATA
add wave -hex /testbench/dut/hart/ebu/HWRITE add wave -hex /testbench/dut/hart/ebu/HWRITE
add wave -hex /testbench/dut/hart/ebu/HWDATA add wave -hex /testbench/dut/hart/ebu/HWDATA
add wave -hex /testbench/dut/hart/ebu/CaptureDataM add wave -hex /testbench/dut/hart/ebu/CaptureDataM
add wave -hex /testbench/dut/hart/ebu/InstrStall
add wave -divider add wave -divider
add wave -hex /testbench/dut/uncore/dtim/* add wave -hex /testbench/dut/uncore/dtim/*

View File

@ -0,0 +1,84 @@
add wave /testbench/clk
add wave /testbench/reset
add wave -divider
#add wave /testbench/dut/hart/ebu/IReadF
add wave /testbench/dut/hart/DataStall
add wave /testbench/dut/hart/ICacheStallF
add wave /testbench/dut/hart/StallF
add wave /testbench/dut/hart/StallD
add wave /testbench/dut/hart/StallE
add wave /testbench/dut/hart/StallM
add wave /testbench/dut/hart/StallW
add wave /testbench/dut/hart/FlushD
add wave /testbench/dut/hart/FlushE
add wave /testbench/dut/hart/FlushM
add wave /testbench/dut/hart/FlushW
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCF
add wave -hex /testbench/dut/hart/ifu/PCD
add wave -hex /testbench/dut/hart/ifu/InstrD
add wave /testbench/InstrDName
add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD
add wave -hex /testbench/dut/hart/ifu/ic/controller/AlignedInstrRawD
add wave -divider
add wave -hex /testbench/dut/hart/ifu/ic/controller/FetchState
add wave -hex /testbench/dut/hart/ifu/ic/controller/FetchWordNum
add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWriteEnable
add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF
add wave -hex /testbench/dut/hart/ifu/ic/InstrAckF
add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWriteData
add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWritePAdr
add wave -hex /testbench/dut/hart/ifu/ic/controller/MisalignedState
add wave -hex /testbench/dut/hart/ifu/ic/controller/MisalignedHalfInstrF
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCE
add wave -hex /testbench/dut/hart/ifu/InstrE
add wave /testbench/InstrEName
add wave -hex /testbench/dut/hart/ieu/dp/SrcAE
add wave -hex /testbench/dut/hart/ieu/dp/SrcBE
add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE
#add wave /testbench/dut/hart/ieu/dp/PCSrcE
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCM
add wave -hex /testbench/dut/hart/ifu/InstrM
add wave /testbench/InstrMName
add wave /testbench/dut/uncore/dtim/memwrite
add wave -hex /testbench/dut/uncore/HADDR
add wave -hex /testbench/dut/uncore/HWDATA
add wave -divider
add wave -hex /testbench/dut/hart/ebu/MemReadM
add wave -hex /testbench/dut/hart/ebu/InstrReadF
add wave -hex /testbench/dut/hart/ebu/BusState
add wave -hex /testbench/dut/hart/ebu/NextBusState
add wave -hex /testbench/dut/hart/ebu/HADDR
add wave -hex /testbench/dut/hart/ebu/HREADY
add wave -hex /testbench/dut/hart/ebu/HTRANS
add wave -hex /testbench/dut/hart/ebu/HRDATA
add wave -hex /testbench/dut/hart/ebu/HWRITE
add wave -hex /testbench/dut/hart/ebu/HWDATA
add wave -hex /testbench/dut/hart/ebu/CaptureDataM
add wave -divider
add wave -hex /testbench/dut/uncore/dtim/*
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCW
add wave -hex /testbench/dut/hart/ifu/InstrW
add wave /testbench/InstrWName
add wave /testbench/dut/hart/ieu/dp/RegWriteW
add wave -hex /testbench/dut/hart/ebu/ReadDataW
add wave -hex /testbench/dut/hart/ieu/dp/ResultW
add wave -hex /testbench/dut/hart/ieu/dp/RdW
add wave -divider
add wave -hex /testbench/dut/uncore/dtim/*
add wave -divider
add wave -hex -r /testbench/*

View File

@ -6,7 +6,7 @@ add wave /testbench/reset
add wave -divider add wave -divider
#add wave /testbench/dut/hart/ebu/IReadF #add wave /testbench/dut/hart/ebu/IReadF
add wave /testbench/dut/hart/DataStall add wave /testbench/dut/hart/DataStall
add wave /testbench/dut/hart/InstrStall add wave /testbench/dut/hart/ICacheStallF
add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallF
add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallD
add wave /testbench/dut/hart/StallE add wave /testbench/dut/hart/StallE
@ -23,11 +23,6 @@ add wave -hex /testbench/dut/hart/ifu/PCD
add wave -hex /testbench/dut/hart/ifu/InstrD add wave -hex /testbench/dut/hart/ifu/InstrD
add wave /testbench/InstrDName add wave /testbench/InstrDName
add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD
add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD
add wave /testbench/dut/hart/ifu/ic/DelayF
add wave /testbench/dut/hart/ifu/ic/DelaySideF
add wave /testbench/dut/hart/ifu/ic/DelayD
add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD
add wave -divider add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/dut/hart/ifu/PCE
add wave -hex /testbench/dut/hart/ifu/InstrE add wave -hex /testbench/dut/hart/ifu/InstrE

View File

@ -55,14 +55,14 @@ module rocacheline #(parameter LINESIZE = 256, parameter TAGSIZE = 32, parameter
genvar i; genvar i;
generate generate
for (i=0; i < NUMWORDS; i++) begin for (i=0; i < NUMWORDS; i++) begin
assign DataLinesIn[i] = WriteData[NUMWORDS*i+WORDSIZE-1:NUMWORDS*i]; assign DataLinesIn[i] = WriteData[WORDSIZE*(i+1)-1:WORDSIZE*i];
flopenr #(LINESIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]); flopenr #(WORDSIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]);
end end
endgenerate endgenerate
always_comb begin always_comb begin
assign DataWord = DataLinesOut[WordSelect[OFFSETSIZE-1:$clog2(WORDSIZE)]]; assign DataWord = DataLinesOut[WordSelect[OFFSETSIZE-1:$clog2(WORDSIZE/8)]];
end end
endmodule endmodule

View File

@ -41,6 +41,7 @@ module ahblite (
input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram
input logic InstrReadF, input logic InstrReadF,
output logic [`XLEN-1:0] InstrRData, output logic [`XLEN-1:0] InstrRData,
output logic InstrAckF,
// Signals from Data Cache // Signals from Data Cache
input logic [`XLEN-1:0] MemPAdrM, input logic [`XLEN-1:0] MemPAdrM,
input logic MemReadM, MemWriteM, input logic MemReadM, MemWriteM,
@ -70,7 +71,7 @@ module ahblite (
output logic [3:0] HSIZED, output logic [3:0] HSIZED,
output logic HWRITED, output logic HWRITED,
// Stalls // Stalls
output logic InstrStall,/*InstrUpdate, */DataStall output logic /*InstrUpdate, */DataStall
// *** add a chip-level ready signal as part of handshake // *** add a chip-level ready signal as part of handshake
); );
@ -134,12 +135,7 @@ module ahblite (
// stall signals // stall signals
assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) ||
(NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE) || (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE);
(NextBusState == MMUTRANSLATE) || (NextBusState == MMUIDLE);
// *** Could get finer grained stalling if we distinguish between MMU
// instruction address translation and data address translation
assign #1 InstrStall = (NextBusState == INSTRREAD) || (NextBusState == INSTRREADC) ||
(NextBusState == MMUTRANSLATE) || (NextBusState == MMUIDLE);
// bus outputs // bus outputs
assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) ||
@ -171,6 +167,7 @@ module ahblite (
assign #1 MMUReady = (NextBusState == MMUIDLE); assign #1 MMUReady = (NextBusState == MMUIDLE);
assign InstrRData = HRDATA; assign InstrRData = HRDATA;
assign InstrAckF = (BusState == INSTRREAD) && (NextBusState != INSTRREAD) || (BusState == INSTRREADC) && (NextBusState != INSTRREADC);
assign MMUReadPTE = HRDATA; assign MMUReadPTE = HRDATA;
assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021
assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) || assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) ||

View File

@ -29,7 +29,7 @@ module hazard(
// Detect hazards // Detect hazards
input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
input logic LoadStallD, MulDivStallD, CSRRdStallD, input logic LoadStallD, MulDivStallD, CSRRdStallD,
input logic InstrStall, DataStall, ICacheStallF, input logic DataStall, ICacheStallF,
// Stall & flush outputs // Stall & flush outputs
output logic StallF, StallD, StallE, StallM, StallW, output logic StallF, StallD, StallE, StallM, StallW,
output logic FlushF, FlushD, FlushE, FlushM, FlushW output logic FlushF, FlushD, FlushE, FlushM, FlushW
@ -58,7 +58,7 @@ module hazard(
// assign StallDCause = LoadStallD | MulDivStallD | CSRRdStallD; // stall in decode if instruction is a load/mul/csr dependent on previous // assign StallDCause = LoadStallD | MulDivStallD | CSRRdStallD; // stall in decode if instruction is a load/mul/csr dependent on previous
assign StallECause = 0; assign StallECause = 0;
assign StallMCause = 0; assign StallMCause = 0;
assign StallWCause = DataStall | InstrStall; assign StallWCause = DataStall | ICacheStallF;
// Each stage stalls if the next stage is stalled or there is a cause to stall this stage. // Each stage stalls if the next stage is stalled or there is a cause to stall this stage.
assign StallF = StallD | StallFCause; assign StallF = StallD | StallFCause;

View File

@ -36,6 +36,7 @@ module icache(
input logic [11:0] LowerPCF, input logic [11:0] LowerPCF,
// Data read in from the ebu unit // Data read in from the ebu unit
input logic [`XLEN-1:0] InstrInF, input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
// Read requested from the ebu unit // Read requested from the ebu unit
output logic [`XLEN-1:0] InstrPAdrF, output logic [`XLEN-1:0] InstrPAdrF,
output logic InstrReadF, output logic InstrReadF,
@ -44,95 +45,208 @@ module icache(
// High if the icache is requesting a stall // High if the icache is requesting a stall
output logic ICacheStallF, output logic ICacheStallF,
// The raw (not decompressed) instruction that was requested // The raw (not decompressed) instruction that was requested
// If the next instruction is compressed, the upper 16 bits may be anything // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
output logic [31:0] InstrRawD output logic [31:0] InstrRawD
); );
logic DelayF, DelaySideF, FlushDLastCyclen, DelayD; // Configuration parameters
logic [1:0] InstrDMuxChoice; // TODO Move these to a config file
logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; localparam integer ICACHELINESIZE = 256;
logic [31:0] InstrF, AlignedInstrD; localparam integer ICACHENUMLINES = 512;
// Buffer the last read, for ease of accessing it again
logic LastReadDataValidF;
logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF;
// instruction for NOP // Input signals to cache memory
logic [31:0] nop = 32'h00000013; logic FlushMem;
logic [`XLEN-1:12] ICacheMemReadUpperPAdr;
logic [11:0] ICacheMemReadLowerAdr;
logic ICacheMemWriteEnable;
logic [ICACHELINESIZE-1:0] ICacheMemWriteData;
logic [`XLEN-1:0] ICacheMemWritePAdr;
// Output signals from cache memory
logic [`XLEN-1:0] ICacheMemReadData;
logic ICacheMemReadValid;
// Temporary change to bridge the new interface to old behaviors rodirectmappedmem #(.LINESIZE(ICACHELINESIZE), .NUMLINES(ICACHENUMLINES)) cachemem(
logic [`XLEN-1:0] PCPF; .*,
assign PCPF = {UpperPCPF, LowerPCF}; .flush(FlushMem),
.ReadUpperPAdr(ICacheMemReadUpperPAdr),
.ReadLowerAdr(ICacheMemReadLowerAdr),
.WriteEnable(ICacheMemWriteEnable),
.WriteLine(ICacheMemWriteData),
.WritePAdr(ICacheMemWritePAdr),
.DataWord(ICacheMemReadData),
.DataValid(ICacheMemReadValid)
);
// This flop doesn't stall if StallF is high because we should output a nop icachecontroller #(.LINESIZE(ICACHELINESIZE)) controller(.*);
// when FlushD happens, even if the pipeline is also stalled.
flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen);
flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF & ~CompressedF, DelayD); assign FlushMem = 1'b0;
flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF); endmodule
// This flop stores the first half of a misaligned instruction while waiting for the other half
flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD);
// This flop is here to simulate pulling data out of the cache, which is edge-triggered module icachecontroller #(parameter LINESIZE = 256) (
flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD); // Inputs from pipeline
input logic clk, reset,
input logic StallF, StallD,
input logic FlushD,
// These flops cache the previous read, to accelerate things // Input the address to read
flopenr #(`XLEN) lastReadDataFlop(clk, reset, InstrReadF & ~StallF, InstrInF, LastReadDataF); // The upper bits of the physical pc
flopenr #(1) lastReadDataVFlop(clk, reset, InstrReadF & ~StallF, 1'b1, LastReadDataValidF); input logic [`XLEN-1:12] UpperPCPF,
flopenr #(`XLEN) lastReadAdrFlop(clk, reset, InstrReadF & ~StallF, InstrPAdrF, LastReadAdrF); // The lower bits of the virtual pc
input logic [11:0] LowerPCF,
// Decide which address needs to be fetched and sent out over InstrPAdrF // Signals to/from cache memory
// If the requested address fits inside one read from memory, we fetch that // The read coming out of it
// address, adjusted to the bit width. Otherwise, we request the lower word input logic [`XLEN-1:0] ICacheMemReadData,
// and then the upper word, in that order. input logic ICacheMemReadValid,
generate // The address at which we want to search the cache memory
if (`XLEN == 32) begin output logic [`XLEN-1:12] ICacheMemReadUpperPAdr,
assign InstrPAdrF = PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[31:2], 2'b00} : {PCPF[31:2], 2'b00}) : PCPF; output logic [11:0] ICacheMemReadLowerAdr,
end else begin // Load data into the cache
assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}; output logic ICacheMemWriteEnable,
end output logic [LINESIZE-1:0] ICacheMemWriteData,
endgenerate output logic [`XLEN-1:0] ICacheMemWritePAdr,
// Read from memory if we don't have the address we want // Outputs to rest of ifu
always_comb if (LastReadDataValidF & (InstrPAdrF == LastReadAdrF)) begin // High if the instruction in the fetch stage is compressed
assign InstrReadF = 0; output logic CompressedF,
end else begin // The instruction that was requested
assign InstrReadF = 1; // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
end output logic [31:0] InstrRawD,
// Pick from the memory input or from the previous read, as appropriate // Outputs to pipeline control stuff
mux2 #(`XLEN) inDataMux(LastReadDataF, InstrInF, InstrReadF, InDataF); output logic ICacheStallF,
// If the instruction fits in one memory read, then we put the right bits // Signals to/from ahblite interface
// into InstrF. Otherwise, we activate DelayF to signal the rest of the // A read containing the requested data
// machinery to swizzle bits. input logic [`XLEN-1:0] InstrInF,
generate input logic InstrAckF,
if (`XLEN == 32) begin // The read we request from main memory
assign InstrF = PCPF[1] ? {16'b0, InDataF[31:16]} : InDataF; output logic [`XLEN-1:0] InstrPAdrF,
assign DelayF = PCPF[1]; output logic InstrReadF
assign MisalignedHalfInstrF = InDataF[31:16]; );
end else begin
assign InstrF = PCPF[2] ? (PCPF[1] ? {16'b0, InDataF[63:48]} : InDataF[63:32]) : (PCPF[1] ? InDataF[47:16] : InDataF[31:0]); // Happy path signals
assign DelayF = PCPF[1] && PCPF[2]; logic [31:0] AlignedInstrRawF, AlignedInstrRawD;
assign MisalignedHalfInstrF = InDataF[63:48]; logic FlushDLastCycleN;
end logic PCPMisalignedF;
endgenerate const logic [31:0] NOP = 32'h13;
// We will likely need to stall later, but stalls are handled by the rest of the pipeline for now // Misaligned signals
assign ICacheStallF = 0; logic [`XLEN:0] MisalignedInstrRawF;
logic MisalignedStall;
// Cache fault signals
logic FaultStall;
// Detect if the instruction is compressed // Detect if the instruction is compressed
assign CompressedF = InstrF[1:0] != 2'b11; assign CompressedF = AlignedInstrRawF[1:0] != 2'b11;
// Pick the correct output, depending on whether we have to assemble this // Handle happy path (data in cache, reads aligned)
// instruction from two reads or not.
// Output the requested instruction (we don't need to worry if the read is generate
// incomplete, since the pipeline stalls for us when it isn't), or a NOP for if (`XLEN == 32) begin
// the cycle when the first of two reads comes in. assign AlignedInstrRawF = LowerPCF[1] ? MisalignedInstrRawF : ICacheMemReadData;
always_comb if (~FlushDLastCyclen) begin assign PCPMisalignedF = LowerPCF[1] && ~CompressedF;
assign InstrDMuxChoice = 2'b10;
end else if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin
assign InstrDMuxChoice = 2'b11;
end else begin end else begin
assign InstrDMuxChoice = {1'b0, DelayD}; assign AlignedInstrRawF = LowerPCF[2]
? (LowerPCF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32])
: (LowerPCF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]);
assign PCPMisalignedF = LowerPCF[2] && LowerPCF[1] && ~CompressedF;
end
endgenerate
flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD);
flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN);
mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD);
// Stall for faults or misaligned reads
always_comb begin
assign ICacheStallF = FaultStall | MisalignedStall;
end
// Handle misaligned, noncompressed reads
logic MisalignedState, NextMisalignedState;
logic [15:0] MisalignedHalfInstrF;
logic [15:0] UpperHalfWord;
flopenr #(16) MisalignedHalfInstrFlop(clk, reset, ~FaultStall & (PCPMisalignedF & MisalignedState), AlignedInstrRawF[15:0], MisalignedHalfInstrF);
flopenr #(1) MisalignedStateFlop(clk, reset, ~FaultStall, NextMisalignedState, MisalignedState);
// When doing a misaligned read, swizzle the bits correctly
generate
if (`XLEN == 32) begin
assign UpperHalfWord = ICacheMemReadData[31:16];
end else begin
assign UpperHalfWord = ICacheMemReadData[63:48];
end
endgenerate
always_comb begin
if (MisalignedState) begin
assign MisalignedInstrRawF = {16'b0, UpperHalfWord};
end else begin
assign MisalignedInstrRawF = {ICacheMemReadData[15:0], MisalignedHalfInstrF};
end
end
// Manage internal state and stall when necessary
always_comb begin
assign MisalignedStall = PCPMisalignedF & MisalignedState;
assign NextMisalignedState = ~PCPMisalignedF | ~MisalignedState;
end
// Pick the correct address to read
generate
if (`XLEN == 32) begin
assign ICacheMemReadLowerAdr = {LowerPCF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00};
end else begin
assign ICacheMemReadLowerAdr = {LowerPCF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00};
end
endgenerate
assign ICacheMemReadUpperPAdr = UpperPCPF;
// Handle cache faults
localparam integer WORDSPERLINE = LINESIZE/`XLEN;
localparam integer LOGWPL = $clog2(WORDSPERLINE);
localparam integer OFFSETWIDTH = $clog2(LINESIZE/8);
logic FetchState, EndFetchState, BeginFetchState;
logic [LOGWPL:0] FetchWordNum, NextFetchWordNum;
logic [`XLEN-1:0] LineAlignedPCPF;
flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState);
flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum);
genvar i;
generate
for (i=0; i < WORDSPERLINE; i++) begin
flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), InstrInF, ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]);
end
endgenerate
// Enter the fetch state when we hit a cache fault
always_comb begin
assign BeginFetchState = ~ICacheMemReadValid & ~FetchState;
end
// Machinery to request the correct addresses from main memory
always_comb begin
assign InstrReadF = FetchState & ~EndFetchState;
assign LineAlignedPCPF = {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}};
assign InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8);
assign NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}};
end
// Write to cache memory when we have the line here
always_comb begin
assign EndFetchState = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState;
assign ICacheMemWritePAdr = LineAlignedPCPF;
assign ICacheMemWriteEnable = EndFetchState;
end
// Stall the pipeline while loading a new line from memory
always_comb begin
assign FaultStall = FetchState | ~ICacheMemReadValid;
end end
mux4 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, {16'b0, MisalignedHalfInstrD}, InstrDMuxChoice, InstrRawD);
endmodule endmodule

View File

@ -32,6 +32,7 @@ module ifu (
input logic FlushF, FlushD, FlushE, FlushM, FlushW, input logic FlushF, FlushD, FlushE, FlushM, FlushW,
// Fetch // Fetch
input logic [`XLEN-1:0] InstrInF, input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] PCF,
output logic [`XLEN-1:0] InstrPAdrF, output logic [`XLEN-1:0] InstrPAdrF,
output logic InstrReadF, output logic InstrReadF,

View File

@ -111,8 +111,8 @@ module wallypipelinedhart (
logic [`XLEN-1:0] InstrPAdrF; logic [`XLEN-1:0] InstrPAdrF;
logic [`XLEN-1:0] InstrRData; logic [`XLEN-1:0] InstrRData;
logic InstrReadF; logic InstrReadF;
logic DataStall, InstrStall; logic DataStall;
logic InstrAckD, MemAckW; logic InstrAckF, MemAckW;
logic BPPredWrongE, BPPredWrongM; logic BPPredWrongE, BPPredWrongM;
logic [3:0] InstrClassM; logic [3:0] InstrClassM;

View File

@ -380,9 +380,10 @@ string tests32i[] = {
// Track names of instructions // Track names of instructions
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, dut.hart.ifu.ic.controller.AlignedInstrRawF,
dut.hart.ifu.InstrM, InstrW, InstrFName, InstrDName, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
InstrEName, InstrMName, InstrWName); dut.hart.ifu.InstrM, dut.hart.ifu.InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// initialize tests // initialize tests
initial initial