Optimization. Able to remove hptw address muxes from the E stage.

This commit is contained in:
Ross Thompson 2022-09-08 15:51:18 -05:00
parent 0904951a8c
commit 7f1ae039b0
4 changed files with 15 additions and 16 deletions

View File

@ -52,6 +52,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
input logic IgnoreRequestTLB, input logic IgnoreRequestTLB,
input logic TrapM, input logic TrapM,
input logic Cacheable, input logic Cacheable,
input logic SelReplay,
// Bus fsm interface // Bus fsm interface
output logic CacheFetchLine, output logic CacheFetchLine,
output logic CacheWriteLine, output logic CacheWriteLine,
@ -123,7 +124,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
// and FlushAdr when handling D$ flushes // and FlushAdr when handling D$ flushes
mux3 #(SETLEN) AdrSelMux( mux3 #(SETLEN) AdrSelMux(
.d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr), .d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr),
.s({SelFlush, SelAdr}), .y(RAdr)); .s({SelFlush, (SelAdr | SelReplay)}), .y(RAdr));
// Array of cache ways, along with victim, hit, dirty, and read merging logic // Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN)

View File

@ -222,6 +222,7 @@ module ifu (
.CacheFetchLine(ICacheFetchLine), .CacheFetchLine(ICacheFetchLine),
.CacheWriteLine(), .ReadDataWord(FinalInstrRawF), .CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
.Cacheable(CacheableF), .Cacheable(CacheableF),
.SelReplay('0),
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
.ByteMask('0), .WordCount('0), .SelBusWord('0), .ByteMask('0), .WordCount('0), .SelBusWord('0),
.FinalWriteData('0), .FinalWriteData('0),

View File

@ -104,7 +104,6 @@ module lsu (
logic [6:0] LSUFunct7M; logic [6:0] LSUFunct7M;
logic [1:0] LSUAtomicM; logic [1:0] LSUAtomicM;
(* mark_debug = "true" *) logic [`XLEN+1:0] PreLSUPAdrM; (* mark_debug = "true" *) logic [`XLEN+1:0] PreLSUPAdrM;
logic [11:0] LSUAdrE;
logic SelDTIM; logic SelDTIM;
logic CPUBusy; logic CPUBusy;
logic DCacheStallM; logic DCacheStallM;
@ -118,7 +117,8 @@ module lsu (
logic [`LLEN-1:0] IMAFWriteDataM; logic [`LLEN-1:0] IMAFWriteDataM;
logic [`LLEN-1:0] ReadDataM; logic [`LLEN-1:0] ReadDataM;
logic [(`LLEN-1)/8:0] ByteMaskM; logic [(`LLEN-1)/8:0] ByteMaskM;
logic SelReplay;
flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM);
assign IEUAdrExtM = {2'b00, IEUAdrM}; assign IEUAdrExtM = {2'b00, IEUAdrM};
assign IEUAdrExtE = {2'b00, IEUAdrE}; assign IEUAdrExtE = {2'b00, IEUAdrE};
@ -131,17 +131,16 @@ module lsu (
if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED
lsuvirtmem lsuvirtmem(.clk, .reset, .StallW, .MemRWM(NonDTIMMemRWM), .AtomicM, .ITLBMissF, .ITLBWriteF, lsuvirtmem lsuvirtmem(.clk, .reset, .StallW, .MemRWM(NonDTIMMemRWM), .AtomicM, .ITLBMissF, .ITLBWriteF,
.DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM, .DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM, .SelReplay,
.TrapM, .DCacheStallM, .SATP_REGW, .PCF, .TrapM, .DCacheStallM, .SATP_REGW, .PCF,
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW,
.ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, .ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M,
.IEUAdrExtM, .PTE, .IMWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE, .IEUAdrExtM, .PTE, .IMWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM,
.LSUAdrE, .PreLSUPAdrM, .CPUBusy, .InterlockStall, .SelHPTW, .PreLSUPAdrM, .CPUBusy, .InterlockStall, .SelHPTW,
.IgnoreRequestTLB); .IgnoreRequestTLB);
end else begin end else begin
assign {InterlockStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF, IgnoreRequestTLB} = '0; assign {InterlockStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF, IgnoreRequestTLB} = '0;
assign CPUBusy = StallW; assign PreLSURWM = NonDTIMMemRWM; assign CPUBusy = StallW; assign PreLSURWM = NonDTIMMemRWM;
assign LSUAdrE = IEUAdrE[11:0];
assign PreLSUPAdrM = IEUAdrExtM; assign PreLSUPAdrM = IEUAdrExtM;
assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM; assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM;
assign IMWriteDataM = WriteDataM; assign IMWriteDataM = WriteDataM;
@ -251,9 +250,9 @@ module lsu (
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.clk, .reset, .CPUBusy, .SelBusWord, .RW(LSURWM), .Atomic(LSUAtomicM), .clk, .reset, .CPUBusy, .SelBusWord, .RW(LSURWM), .Atomic(LSUAtomicM),
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), .FlushCache(FlushDCacheM), .NextAdr(IEUAdrE[11:0]), .PAdr(LSUPAdrM),
.ByteMask(ByteMaskM), .WordCount, .ByteMask(ByteMaskM), .WordCount,
.FinalWriteData(LSUWriteDataM), .Cacheable(CacheableM), .FinalWriteData(LSUWriteDataM), .Cacheable(CacheableM), .SelReplay,
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.IgnoreRequestTLB, .TrapM, .CacheCommitted(DCacheCommittedM), .IgnoreRequestTLB, .TrapM, .CacheCommitted(DCacheCommittedM),
.CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM),

View File

@ -38,6 +38,7 @@ module lsuvirtmem(
input logic DTLBMissM, input logic DTLBMissM,
output logic DTLBWriteM, output logic DTLBWriteM,
input logic InstrDAPageFaultF, input logic InstrDAPageFaultF,
output logic SelReplay,
input logic DataDAPageFaultM, input logic DataDAPageFaultM,
input logic TrapM, input logic TrapM,
input logic DCacheStallM, input logic DCacheStallM,
@ -52,13 +53,11 @@ module lsuvirtmem(
output logic [2:0] LSUFunct3M, output logic [2:0] LSUFunct3M,
input logic [6:0] Funct7M, input logic [6:0] Funct7M,
output logic [6:0] LSUFunct7M, output logic [6:0] LSUFunct7M,
input logic [`XLEN-1:0] IEUAdrE,
output logic [`XLEN-1:0] PTE, output logic [`XLEN-1:0] PTE,
output logic [`XLEN-1:0] IMWriteDataM, output logic [`XLEN-1:0] IMWriteDataM,
output logic [1:0] PageType, output logic [1:0] PageType,
output logic [1:0] PreLSURWM, output logic [1:0] PreLSURWM,
output logic [1:0] LSUAtomicM, output logic [1:0] LSUAtomicM,
output logic [11:0] LSUAdrE,
output logic [`XLEN+1:0] PreLSUPAdrM, output logic [`XLEN+1:0] PreLSUPAdrM,
input logic [`XLEN+1:0] IEUAdrExtM, // *** can move internally. input logic [`XLEN+1:0] IEUAdrExtM, // *** can move internally.
@ -74,7 +73,6 @@ module lsuvirtmem(
logic [1:0] HPTWRW; logic [1:0] HPTWRW;
logic [2:0] HPTWSize; logic [2:0] HPTWSize;
logic SelReplayMemE; logic SelReplayMemE;
logic [11:0] PreLSUAdrE;
logic ITLBMissOrDAFaultF, ITLBMissOrDAFaultNoTrapF; logic ITLBMissOrDAFaultF, ITLBMissOrDAFaultNoTrapF;
logic DTLBMissOrDAFaultM, DTLBMissOrDAFaultNoTrapM; logic DTLBMissOrDAFaultM, DTLBMissOrDAFaultNoTrapM;
logic SelHPTWAdr; logic SelHPTWAdr;
@ -97,8 +95,10 @@ module lsuvirtmem(
// Once the walk is done and it is time to update the DTLB we need to switch back // Once the walk is done and it is time to update the DTLB we need to switch back
// to the orignal data virtual address. // to the orignal data virtual address.
assign SelHPTWAdr = SelHPTW & ~DTLBWriteM; assign SelHPTWAdr = SelHPTW & ~(DTLBWriteM | ITLBWriteF);
assign SelReplay = SelHPTWAdr | SelReplayMemE;
// multiplex the outputs to LSU // multiplex the outputs to LSU
if(`XLEN+2-`PA_BITS > 0) begin if(`XLEN+2-`PA_BITS > 0) begin
logic [(`XLEN+2-`PA_BITS)-1:0] zeros; logic [(`XLEN+2-`PA_BITS)-1:0] zeros;
@ -109,12 +109,10 @@ module lsuvirtmem(
mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LSUFunct3M); mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LSUFunct3M);
mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M); mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M);
mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM); mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM);
mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLSUAdrE);
mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, PreLSUPAdrM); mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, PreLSUPAdrM);
if(`HPTW_WRITES_SUPPORTED) if(`HPTW_WRITES_SUPPORTED)
mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IMWriteDataM); mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IMWriteDataM);
else assign IMWriteDataM = WriteDataM; else assign IMWriteDataM = WriteDataM;
mux2 #(12) replaymux(PreLSUAdrE, IEUAdrExtM[11:0], SelReplayMemE, LSUAdrE); // replay cpu request after hptw. *** redudant with mux in cache.
// always block interrupts when using the hardware page table walker. // always block interrupts when using the hardware page table walker.
assign CPUBusy = StallW & ~SelHPTW; assign CPUBusy = StallW & ~SelHPTW;