diff --git a/wally-pipelined/src/cache/cache-sram.sv b/wally-pipelined/src/cache/cache-sram.sv new file mode 100644 index 00000000..0ba0efa5 --- /dev/null +++ b/wally-pipelined/src/cache/cache-sram.sv @@ -0,0 +1,22 @@ +// Depth is number of bits in one "word" of the memory, width is number of such words +module Sram1Read1Write #(parameter DEPTH=128, WIDTH=256) ( + input logic clk, + // port 1 is read only + input logic [$clog2(WIDTH)-1:0] ReadAddr, + output logic [DEPTH-1:0] ReadData, + + // port 2 is write only + input logic [$clog2(WIDTH)-1:0] WriteAddr, + input logic [DEPTH-1:0] WriteData, + input logic WriteEnable +); + + logic [WIDTH-1:0][DEPTH-1:0] StoredData; + + always_ff @(posedge clk) begin + ReadData <= StoredData[ReadAddr]; + if (WriteEnable) begin + StoredData[WriteAddr] <= WriteData; + end + end +endmodule diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv index 9138089e..346355bd 100644 --- a/wally-pipelined/src/cache/dmapped.sv +++ b/wally-pipelined/src/cache/dmapped.sv @@ -30,6 +30,7 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par // Pipeline stuff input logic clk, input logic reset, + input logic stall, // If flush is high, invalidate the entire cache input logic flush, // Select which address to read (broken for efficiency's sake) @@ -45,75 +46,80 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par ); // Various compile-time constants - localparam integer WORDWIDTH = $clog2(WORDSIZE); - localparam integer LINEWIDTH = $clog2(LINESIZE/8); - localparam integer OFFSETWIDTH = $clog2(LINESIZE) - WORDWIDTH; + localparam integer WORDWIDTH = $clog2(WORDSIZE/8); + localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE); localparam integer SETWIDTH = $clog2(NUMLINES); - localparam integer TAGWIDTH = $clog2(`XLEN) - $clog2(LINESIZE) - SETWIDTH; + localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH; + + localparam integer OFFSETBEGIN = WORDWIDTH; + localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1; + localparam integer SETBEGIN = OFFSETEND+1; + localparam integer SETEND = SETBEGIN + SETWIDTH - 1; + localparam integer TAGBEGIN = SETEND + 1; + localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1; // Machinery to read from and write to the correct addresses in memory logic [`XLEN-1:0] ReadPAdr; + logic [`XLEN-1:0] OldReadPAdr; logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset; logic [SETWIDTH-1:0] ReadSet, WriteSet; logic [TAGWIDTH-1:0] ReadTag, WriteTag; + logic [LINESIZE-1:0] ReadLine; + logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed; // Machinery to check if a given read is valid and is the desired value logic [TAGWIDTH-1:0] DataTag; - logic [NUMLINES-1:0] ValidOut, NextValidOut; + logic [NUMLINES-1:0] ValidOut; + + flopenr #(`XLEN) ReadPAdrFlop(clk, reset, ~stall, ReadPAdr, OldReadPAdr); // Assign the read and write addresses in cache memory always_comb begin - assign ReadOffset = ReadLowerAdr[WORDWIDTH+OFFSETWIDTH-1:WORDWIDTH]; + assign ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN]; assign ReadPAdr = {ReadUpperPAdr, ReadLowerAdr}; - assign ReadSet = ReadPAdr[LINEWIDTH+SETWIDTH-1:LINEWIDTH]; - assign ReadTag = ReadPAdr[`XLEN-1:LINEWIDTH+SETWIDTH]; + assign ReadSet = ReadPAdr[SETEND:SETBEGIN]; + assign ReadTag = OldReadPAdr[TAGEND:TAGBEGIN]; - assign WriteOffset = WritePAdr[WORDWIDTH+OFFSETWIDTH-1:WORDWIDTH]; - assign WriteSet = WritePAdr[LINEWIDTH+SETWIDTH-1:LINEWIDTH]; - assign WriteTag = WritePAdr[`XLEN-1:LINEWIDTH+SETWIDTH]; + assign WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN]; + assign WriteSet = WritePAdr[SETEND:SETBEGIN]; + assign WriteTag = WritePAdr[TAGEND:TAGBEGIN]; end - SRAM2P1R1W #(.Depth(OFFSETWIDTH), .Width(WORDSIZE)) cachemem ( + // Depth is number of bits in one "word" of the memory, width is number of such words + Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem ( .*, - .RA1(ReadOffset), - .RD1(DataWord), - .REN1(1'b1), - .WA1(WriteOffset), - .WD1(WriteSet), - .WEN1(WriteEnable), - .BitWEN1(0) + .ReadAddr(ReadSet), + .ReadData(ReadLine), + .WriteAddr(WriteSet), + .WriteData(WriteLine) + ); + Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags ( + .*, + .ReadAddr(ReadSet), + .ReadData(DataTag), + .WriteAddr(WriteSet), + .WriteData(WriteTag) ); - SRAM2P1R1W #(.Depth(OFFSETWIDTH), .Width(TAGWIDTH)) cachetags ( - .*, - .RA1(ReadOffset), - .RD1(DataTag), - .REN1(1'b1), - .WA1(WriteOffset), - .WD1(WriteTag), - .WEN1(WriteEnable), - .BitWEN1(0) - ); + // Pick the right bits coming out the read line + assign DataWord = ReadLineTransformed[ReadOffset]; + genvar i; + generate + for (i=0; i < LINESIZE/WORDSIZE; i++) begin + assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; + end + endgenerate // Correctly handle the valid bits - always_comb begin - if (WriteEnable) begin - assign NextValidOut = {NextValidOut[NUMLINES-1:WriteSet+1], 1'b1, NextValidOut[WriteSet-1:0]}; - end else begin - assign NextValidOut = ValidOut; - end - end - always_ff @(posedge clk, reset, flush) begin + always_ff @(posedge clk, posedge reset) begin if (reset || flush) begin ValidOut <= {NUMLINES{1'b0}}; end else begin - ValidOut <= NextValidOut; + if (WriteEnable) begin + ValidOut[WriteSet] <= 1; + end end - end - - // Determine if the line coming out is valid and matches the desired data - always_comb begin - assign DataValid = ValidOut[ReadSet] && (DataTag == ReadTag); + DataValid <= ValidOut[ReadSet] && (DataTag == ReadTag); end endmodule diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 0d953760..e42edb49 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -31,9 +31,9 @@ module icache( input logic StallF, StallD, input logic FlushD, // Upper bits of physical address for PC - input logic [`XLEN-1:12] UpperPCPF, + input logic [`XLEN-1:12] UpperPCNextPF, // Lower 12 bits of virtual PC address, since it's faster this way - input logic [11:0] LowerPCF, + input logic [11:0] LowerPCNextF, // Data read in from the ebu unit input logic [`XLEN-1:0] InstrInF, input logic InstrAckF, @@ -65,8 +65,9 @@ module icache( logic [`XLEN-1:0] ICacheMemReadData; logic ICacheMemReadValid; - rodirectmappedmem #(.LINESIZE(ICACHELINESIZE), .NUMLINES(ICACHENUMLINES)) cachemem( + rodirectmappedmem #(.LINESIZE(ICACHELINESIZE), .NUMLINES(ICACHENUMLINES), .WORDSIZE(`XLEN)) cachemem( .*, + .stall(StallF && (~ICacheStallF || ~InstrAckF)), .flush(FlushMem), .ReadUpperPAdr(ICacheMemReadUpperPAdr), .ReadLowerAdr(ICacheMemReadLowerAdr), @@ -79,6 +80,7 @@ module icache( icachecontroller #(.LINESIZE(ICACHELINESIZE)) controller(.*); + // For now, assume no writes to executable memory assign FlushMem = 1'b0; endmodule @@ -90,9 +92,9 @@ module icachecontroller #(parameter LINESIZE = 256) ( // Input the address to read // The upper bits of the physical pc - input logic [`XLEN-1:12] UpperPCPF, + input logic [`XLEN-1:12] UpperPCNextPF, // The lower bits of the virtual pc - input logic [11:0] LowerPCF, + input logic [11:0] LowerPCNextF, // Signals to/from cache memory // The read coming out of it @@ -130,6 +132,7 @@ module icachecontroller #(parameter LINESIZE = 256) ( logic FlushDLastCycleN; logic PCPMisalignedF; const logic [31:0] NOP = 32'h13; + logic [`XLEN-1:0] PCPF; // Misaligned signals logic [`XLEN:0] MisalignedInstrRawF; logic MisalignedStall; @@ -143,18 +146,19 @@ module icachecontroller #(parameter LINESIZE = 256) ( generate if (`XLEN == 32) begin - assign AlignedInstrRawF = LowerPCF[1] ? MisalignedInstrRawF : ICacheMemReadData; - assign PCPMisalignedF = LowerPCF[1] && ~CompressedF; + assign AlignedInstrRawF = PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData; + assign PCPMisalignedF = PCPF[1] && ~CompressedF; end else begin - assign AlignedInstrRawF = LowerPCF[2] - ? (LowerPCF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32]) - : (LowerPCF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]); - assign PCPMisalignedF = LowerPCF[2] && LowerPCF[1] && ~CompressedF; + assign AlignedInstrRawF = PCPF[2] + ? (PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32]) + : (PCPF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]); + assign PCPMisalignedF = PCPF[2] && PCPF[1] && ~CompressedF; end endgenerate flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD); flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN); + flopenr #(`XLEN) PCPFFlop(clk, reset, ~StallF, {UpperPCNextPF, LowerPCNextF}, PCPF); mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD); // Stall for faults or misaligned reads @@ -197,12 +201,13 @@ module icachecontroller #(parameter LINESIZE = 256) ( // Pick the correct address to read generate if (`XLEN == 32) begin - assign ICacheMemReadLowerAdr = {LowerPCF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00}; + assign ICacheMemReadLowerAdr = {LowerPCNextF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00}; end else begin - assign ICacheMemReadLowerAdr = {LowerPCF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00}; + assign ICacheMemReadLowerAdr = {LowerPCNextF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00}; end endgenerate - assign ICacheMemReadUpperPAdr = UpperPCPF; + // TODO Handle reading instructions that cross page boundaries + assign ICacheMemReadUpperPAdr = UpperPCNextPF; // Handle cache faults diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 3347d80c..2ddd6706 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -71,7 +71,7 @@ module ifu ( logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; logic PrivilegedChangePCM; logic IllegalCompInstrD; - logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCPF; + logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCNextPF; logic CompressedF; logic [31:0] InstrRawD, InstrE, InstrW; logic [31:0] nop = 32'h00000013; // instruction for NOP @@ -98,12 +98,12 @@ module ifu ( // assign InstrReadF = 1; // *** & ICacheMissF; add later // jarred 2021-03-14 Add instrution cache block to remove rd2 - assign PCPF = PCF; // Temporary workaround until iTLB is live + assign PCNextPF = PCNextF; // Temporary workaround until iTLB is live icache ic( .*, .InstrPAdrF(ICacheInstrPAdrF), - .UpperPCPF(PCPF[`XLEN-1:12]), - .LowerPCF(PCF[11:0]) + .UpperPCNextPF(PCNextPF[`XLEN-1:12]), + .LowerPCNextF(PCNextF[11:0]) ); // Prioritize the iTLB for reads if it wants one mux2 #(`XLEN) instrPAdrMux(ICacheInstrPAdrF, ITLBInstrPAdrF, ITLBMissF, InstrPAdrF); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 09c63d56..fbd4a829 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -393,7 +393,7 @@ module testbench(); // if (`F_SUPPORTED) tests = {tests64f, tests}; // if (`D_SUPPORTED) tests = {tests64d, tests}; if (`A_SUPPORTED) tests = {tests, tests64a}; - if (`MEM_VIRTMEM) tests = {tests64mmu, tests}; + if (`MEM_VIRTMEM) tests = {tests, tests64mmu}; end //tests = {tests64a, tests}; // tests = {tests, tests64p};