A few more cache fixes

This commit is contained in:
Jarred Allen 2021-04-13 01:06:57 -04:00
parent d99b8f772e
commit fc8b8ad7aa
5 changed files with 95 additions and 62 deletions

22
wally-pipelined/src/cache/cache-sram.sv vendored Normal file
View File

@ -0,0 +1,22 @@
// Depth is number of bits in one "word" of the memory, width is number of such words
module Sram1Read1Write #(parameter DEPTH=128, WIDTH=256) (
input logic clk,
// port 1 is read only
input logic [$clog2(WIDTH)-1:0] ReadAddr,
output logic [DEPTH-1:0] ReadData,
// port 2 is write only
input logic [$clog2(WIDTH)-1:0] WriteAddr,
input logic [DEPTH-1:0] WriteData,
input logic WriteEnable
);
logic [WIDTH-1:0][DEPTH-1:0] StoredData;
always_ff @(posedge clk) begin
ReadData <= StoredData[ReadAddr];
if (WriteEnable) begin
StoredData[WriteAddr] <= WriteData;
end
end
endmodule

View File

@ -30,6 +30,7 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par
// Pipeline stuff // Pipeline stuff
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic stall,
// If flush is high, invalidate the entire cache // If flush is high, invalidate the entire cache
input logic flush, input logic flush,
// Select which address to read (broken for efficiency's sake) // Select which address to read (broken for efficiency's sake)
@ -45,75 +46,80 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par
); );
// Various compile-time constants // Various compile-time constants
localparam integer WORDWIDTH = $clog2(WORDSIZE); localparam integer WORDWIDTH = $clog2(WORDSIZE/8);
localparam integer LINEWIDTH = $clog2(LINESIZE/8); localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE);
localparam integer OFFSETWIDTH = $clog2(LINESIZE) - WORDWIDTH;
localparam integer SETWIDTH = $clog2(NUMLINES); localparam integer SETWIDTH = $clog2(NUMLINES);
localparam integer TAGWIDTH = $clog2(`XLEN) - $clog2(LINESIZE) - SETWIDTH; localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH;
localparam integer OFFSETBEGIN = WORDWIDTH;
localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1;
localparam integer SETBEGIN = OFFSETEND+1;
localparam integer SETEND = SETBEGIN + SETWIDTH - 1;
localparam integer TAGBEGIN = SETEND + 1;
localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
// Machinery to read from and write to the correct addresses in memory // Machinery to read from and write to the correct addresses in memory
logic [`XLEN-1:0] ReadPAdr; logic [`XLEN-1:0] ReadPAdr;
logic [`XLEN-1:0] OldReadPAdr;
logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset; logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset;
logic [SETWIDTH-1:0] ReadSet, WriteSet; logic [SETWIDTH-1:0] ReadSet, WriteSet;
logic [TAGWIDTH-1:0] ReadTag, WriteTag; logic [TAGWIDTH-1:0] ReadTag, WriteTag;
logic [LINESIZE-1:0] ReadLine;
logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
// Machinery to check if a given read is valid and is the desired value // Machinery to check if a given read is valid and is the desired value
logic [TAGWIDTH-1:0] DataTag; logic [TAGWIDTH-1:0] DataTag;
logic [NUMLINES-1:0] ValidOut, NextValidOut; logic [NUMLINES-1:0] ValidOut;
flopenr #(`XLEN) ReadPAdrFlop(clk, reset, ~stall, ReadPAdr, OldReadPAdr);
// Assign the read and write addresses in cache memory // Assign the read and write addresses in cache memory
always_comb begin always_comb begin
assign ReadOffset = ReadLowerAdr[WORDWIDTH+OFFSETWIDTH-1:WORDWIDTH]; assign ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
assign ReadPAdr = {ReadUpperPAdr, ReadLowerAdr}; assign ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
assign ReadSet = ReadPAdr[LINEWIDTH+SETWIDTH-1:LINEWIDTH]; assign ReadSet = ReadPAdr[SETEND:SETBEGIN];
assign ReadTag = ReadPAdr[`XLEN-1:LINEWIDTH+SETWIDTH]; assign ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
assign WriteOffset = WritePAdr[WORDWIDTH+OFFSETWIDTH-1:WORDWIDTH]; assign WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
assign WriteSet = WritePAdr[LINEWIDTH+SETWIDTH-1:LINEWIDTH]; assign WriteSet = WritePAdr[SETEND:SETBEGIN];
assign WriteTag = WritePAdr[`XLEN-1:LINEWIDTH+SETWIDTH]; assign WriteTag = WritePAdr[TAGEND:TAGBEGIN];
end end
SRAM2P1R1W #(.Depth(OFFSETWIDTH), .Width(WORDSIZE)) cachemem ( // Depth is number of bits in one "word" of the memory, width is number of such words
Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
.*, .*,
.RA1(ReadOffset), .ReadAddr(ReadSet),
.RD1(DataWord), .ReadData(ReadLine),
.REN1(1'b1), .WriteAddr(WriteSet),
.WA1(WriteOffset), .WriteData(WriteLine)
.WD1(WriteSet), );
.WEN1(WriteEnable), Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
.BitWEN1(0) .*,
.ReadAddr(ReadSet),
.ReadData(DataTag),
.WriteAddr(WriteSet),
.WriteData(WriteTag)
); );
SRAM2P1R1W #(.Depth(OFFSETWIDTH), .Width(TAGWIDTH)) cachetags ( // Pick the right bits coming out the read line
.*, assign DataWord = ReadLineTransformed[ReadOffset];
.RA1(ReadOffset), genvar i;
.RD1(DataTag), generate
.REN1(1'b1), for (i=0; i < LINESIZE/WORDSIZE; i++) begin
.WA1(WriteOffset), assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE];
.WD1(WriteTag), end
.WEN1(WriteEnable), endgenerate
.BitWEN1(0)
);
// Correctly handle the valid bits // Correctly handle the valid bits
always_comb begin always_ff @(posedge clk, posedge reset) begin
if (WriteEnable) begin
assign NextValidOut = {NextValidOut[NUMLINES-1:WriteSet+1], 1'b1, NextValidOut[WriteSet-1:0]};
end else begin
assign NextValidOut = ValidOut;
end
end
always_ff @(posedge clk, reset, flush) begin
if (reset || flush) begin if (reset || flush) begin
ValidOut <= {NUMLINES{1'b0}}; ValidOut <= {NUMLINES{1'b0}};
end else begin end else begin
ValidOut <= NextValidOut; if (WriteEnable) begin
ValidOut[WriteSet] <= 1;
end
end end
end DataValid <= ValidOut[ReadSet] && (DataTag == ReadTag);
// Determine if the line coming out is valid and matches the desired data
always_comb begin
assign DataValid = ValidOut[ReadSet] && (DataTag == ReadTag);
end end
endmodule endmodule

View File

@ -31,9 +31,9 @@ module icache(
input logic StallF, StallD, input logic StallF, StallD,
input logic FlushD, input logic FlushD,
// Upper bits of physical address for PC // Upper bits of physical address for PC
input logic [`XLEN-1:12] UpperPCPF, input logic [`XLEN-1:12] UpperPCNextPF,
// Lower 12 bits of virtual PC address, since it's faster this way // Lower 12 bits of virtual PC address, since it's faster this way
input logic [11:0] LowerPCF, input logic [11:0] LowerPCNextF,
// Data read in from the ebu unit // Data read in from the ebu unit
input logic [`XLEN-1:0] InstrInF, input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF, input logic InstrAckF,
@ -65,8 +65,9 @@ module icache(
logic [`XLEN-1:0] ICacheMemReadData; logic [`XLEN-1:0] ICacheMemReadData;
logic ICacheMemReadValid; logic ICacheMemReadValid;
rodirectmappedmem #(.LINESIZE(ICACHELINESIZE), .NUMLINES(ICACHENUMLINES)) cachemem( rodirectmappedmem #(.LINESIZE(ICACHELINESIZE), .NUMLINES(ICACHENUMLINES), .WORDSIZE(`XLEN)) cachemem(
.*, .*,
.stall(StallF && (~ICacheStallF || ~InstrAckF)),
.flush(FlushMem), .flush(FlushMem),
.ReadUpperPAdr(ICacheMemReadUpperPAdr), .ReadUpperPAdr(ICacheMemReadUpperPAdr),
.ReadLowerAdr(ICacheMemReadLowerAdr), .ReadLowerAdr(ICacheMemReadLowerAdr),
@ -79,6 +80,7 @@ module icache(
icachecontroller #(.LINESIZE(ICACHELINESIZE)) controller(.*); icachecontroller #(.LINESIZE(ICACHELINESIZE)) controller(.*);
// For now, assume no writes to executable memory
assign FlushMem = 1'b0; assign FlushMem = 1'b0;
endmodule endmodule
@ -90,9 +92,9 @@ module icachecontroller #(parameter LINESIZE = 256) (
// Input the address to read // Input the address to read
// The upper bits of the physical pc // The upper bits of the physical pc
input logic [`XLEN-1:12] UpperPCPF, input logic [`XLEN-1:12] UpperPCNextPF,
// The lower bits of the virtual pc // The lower bits of the virtual pc
input logic [11:0] LowerPCF, input logic [11:0] LowerPCNextF,
// Signals to/from cache memory // Signals to/from cache memory
// The read coming out of it // The read coming out of it
@ -130,6 +132,7 @@ module icachecontroller #(parameter LINESIZE = 256) (
logic FlushDLastCycleN; logic FlushDLastCycleN;
logic PCPMisalignedF; logic PCPMisalignedF;
const logic [31:0] NOP = 32'h13; const logic [31:0] NOP = 32'h13;
logic [`XLEN-1:0] PCPF;
// Misaligned signals // Misaligned signals
logic [`XLEN:0] MisalignedInstrRawF; logic [`XLEN:0] MisalignedInstrRawF;
logic MisalignedStall; logic MisalignedStall;
@ -143,18 +146,19 @@ module icachecontroller #(parameter LINESIZE = 256) (
generate generate
if (`XLEN == 32) begin if (`XLEN == 32) begin
assign AlignedInstrRawF = LowerPCF[1] ? MisalignedInstrRawF : ICacheMemReadData; assign AlignedInstrRawF = PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData;
assign PCPMisalignedF = LowerPCF[1] && ~CompressedF; assign PCPMisalignedF = PCPF[1] && ~CompressedF;
end else begin end else begin
assign AlignedInstrRawF = LowerPCF[2] assign AlignedInstrRawF = PCPF[2]
? (LowerPCF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32]) ? (PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32])
: (LowerPCF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]); : (PCPF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]);
assign PCPMisalignedF = LowerPCF[2] && LowerPCF[1] && ~CompressedF; assign PCPMisalignedF = PCPF[2] && PCPF[1] && ~CompressedF;
end end
endgenerate endgenerate
flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD); flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD);
flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN); flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN);
flopenr #(`XLEN) PCPFFlop(clk, reset, ~StallF, {UpperPCNextPF, LowerPCNextF}, PCPF);
mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD); mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD);
// Stall for faults or misaligned reads // Stall for faults or misaligned reads
@ -197,12 +201,13 @@ module icachecontroller #(parameter LINESIZE = 256) (
// Pick the correct address to read // Pick the correct address to read
generate generate
if (`XLEN == 32) begin if (`XLEN == 32) begin
assign ICacheMemReadLowerAdr = {LowerPCF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00}; assign ICacheMemReadLowerAdr = {LowerPCNextF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00};
end else begin end else begin
assign ICacheMemReadLowerAdr = {LowerPCF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00}; assign ICacheMemReadLowerAdr = {LowerPCNextF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00};
end end
endgenerate endgenerate
assign ICacheMemReadUpperPAdr = UpperPCPF; // TODO Handle reading instructions that cross page boundaries
assign ICacheMemReadUpperPAdr = UpperPCNextPF;
// Handle cache faults // Handle cache faults

View File

@ -71,7 +71,7 @@ module ifu (
logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM;
logic PrivilegedChangePCM; logic PrivilegedChangePCM;
logic IllegalCompInstrD; logic IllegalCompInstrD;
logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCPF; logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCNextPF;
logic CompressedF; logic CompressedF;
logic [31:0] InstrRawD, InstrE, InstrW; logic [31:0] InstrRawD, InstrE, InstrW;
logic [31:0] nop = 32'h00000013; // instruction for NOP logic [31:0] nop = 32'h00000013; // instruction for NOP
@ -98,12 +98,12 @@ module ifu (
// assign InstrReadF = 1; // *** & ICacheMissF; add later // assign InstrReadF = 1; // *** & ICacheMissF; add later
// jarred 2021-03-14 Add instrution cache block to remove rd2 // jarred 2021-03-14 Add instrution cache block to remove rd2
assign PCPF = PCF; // Temporary workaround until iTLB is live assign PCNextPF = PCNextF; // Temporary workaround until iTLB is live
icache ic( icache ic(
.*, .*,
.InstrPAdrF(ICacheInstrPAdrF), .InstrPAdrF(ICacheInstrPAdrF),
.UpperPCPF(PCPF[`XLEN-1:12]), .UpperPCNextPF(PCNextPF[`XLEN-1:12]),
.LowerPCF(PCF[11:0]) .LowerPCNextF(PCNextF[11:0])
); );
// Prioritize the iTLB for reads if it wants one // Prioritize the iTLB for reads if it wants one
mux2 #(`XLEN) instrPAdrMux(ICacheInstrPAdrF, ITLBInstrPAdrF, ITLBMissF, InstrPAdrF); mux2 #(`XLEN) instrPAdrMux(ICacheInstrPAdrF, ITLBInstrPAdrF, ITLBMissF, InstrPAdrF);

View File

@ -393,7 +393,7 @@ module testbench();
// if (`F_SUPPORTED) tests = {tests64f, tests}; // if (`F_SUPPORTED) tests = {tests64f, tests};
// if (`D_SUPPORTED) tests = {tests64d, tests}; // if (`D_SUPPORTED) tests = {tests64d, tests};
if (`A_SUPPORTED) tests = {tests, tests64a}; if (`A_SUPPORTED) tests = {tests, tests64a};
if (`MEM_VIRTMEM) tests = {tests64mmu, tests}; if (`MEM_VIRTMEM) tests = {tests, tests64mmu};
end end
//tests = {tests64a, tests}; //tests = {tests64a, tests};
// tests = {tests, tests64p}; // tests = {tests, tests64p};