1. Modified the cache so it can handle the reset delay internally. This removes the mux from the IFU.

2. Removed the write address delay from simpleram.sv
3. Fixed rv32tim and rv32ic mode to handle missalignment correctly.
4. Added imperas32i and imperas32c to rv32tim mode.
This commit is contained in:
Ross Thompson 2022-01-26 17:37:04 -06:00
parent 2c982dca03
commit 23c4ba2777
6 changed files with 62 additions and 86 deletions

View File

@ -68,7 +68,7 @@ for test in tests32gc:
grepstr="All tests ran without failures")
configs.append(tc)
tests32ic = ["arch32i", "arch32c"]
tests32ic = ["arch32i", "arch32c", "imperas32i", "imperas32c"]
for test in tests32ic:
tc = TestCase(
name=test,
@ -77,7 +77,7 @@ for test in tests32ic:
grepstr="All tests ran without failures")
configs.append(tc)
tests32tim = ["arch32i", "arch32c"]
tests32tim = ["arch32i", "arch32c", "imperas32i", "imperas32c"]
for test in tests32tim:
tc = TestCase(
name=test,

View File

@ -79,6 +79,8 @@ module cachefsm
);
logic AnyCPUReqM;
logic [1:0] PreSelAdr;
logic resetDelay;
typedef enum {STATE_READY,
@ -107,6 +109,12 @@ module cachefsm
assign CacheAccess = AnyCPUReqM & CurrState == STATE_READY;
assign CacheMiss = CacheAccess & ~CacheHit;
// special case on reset. When the fsm first exists reset the
// PCNextF will no longer be pointing to the correct address.
// But PCF will be the reset vector.
flop #(1) resetDelayReg(.clk, .d(reset), .q(resetDelay));
assign SelAdr = resetDelay ? 2'b01 : PreSelAdr;
always_ff @(posedge clk)
if (reset) CurrState <= #1 STATE_READY;
else CurrState <= #1 NextState;
@ -114,7 +122,7 @@ module cachefsm
// next state logic and some state ouputs.
always_comb begin
CacheStall = 1'b0;
SelAdr = 2'b00;
PreSelAdr = 2'b00;
SetValid = 1'b0;
ClearValid = 1'b0;
SetDirty = 1'b0;
@ -137,7 +145,7 @@ module cachefsm
STATE_READY: begin
CacheStall = 1'b0;
SelAdr = 2'b00;
PreSelAdr = 2'b00;
SRAMWordWriteEnable = 1'b0;
SetDirty = 1'b0;
LRUWriteEn = 1'b0;
@ -150,7 +158,7 @@ module cachefsm
// PTW ready the CPU will stall.
// The page table walker asserts it's control 1 cycle
// after the TLBs miss.
SelAdr = 2'b01;
PreSelAdr = 2'b01;
NextState = STATE_READY;
end
@ -164,12 +172,12 @@ module cachefsm
// amo hit
else if(Atomic[1] & (&RW) & CacheHit) begin
SelAdr = 2'b01;
PreSelAdr = 2'b01;
CacheStall = 1'b0;
if(CPUBusy) begin
NextState = STATE_CPU_BUSY_FINISH_AMO;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
end
else begin
SRAMWordWriteEnable = 1'b1;
@ -185,7 +193,7 @@ module cachefsm
if(CPUBusy) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
end
else begin
NextState = STATE_READY;
@ -193,7 +201,7 @@ module cachefsm
end
// write hit valid cached
else if (RW[0] & CacheHit) begin
SelAdr = 2'b01;
PreSelAdr = 2'b01;
CacheStall = 1'b0;
SRAMWordWriteEnable = 1'b1;
SetDirty = 1'b1;
@ -201,7 +209,7 @@ module cachefsm
if(CPUBusy) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
end
else begin
NextState = STATE_READY;
@ -218,7 +226,7 @@ module cachefsm
STATE_MISS_FETCH_WDV: begin
CacheStall = 1'b1;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
if (CacheBusAck) begin
NextState = STATE_MISS_FETCH_DONE;
@ -229,7 +237,7 @@ module cachefsm
STATE_MISS_FETCH_DONE: begin
CacheStall = 1'b1;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
if(VictimDirty) begin
NextState = STATE_MISS_EVICT_DIRTY;
CacheWriteLine = 1'b1;
@ -242,14 +250,14 @@ module cachefsm
SRAMLineWriteEnable = 1'b1;
CacheStall = 1'b1;
NextState = STATE_MISS_READ_WORD;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
SetValid = 1'b1;
ClearDirty = 1'b1;
//LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write
end
STATE_MISS_READ_WORD: begin
SelAdr = 2'b01;
PreSelAdr = 2'b01;
CacheStall = 1'b1;
if (RW[0] & ~Atomic[1]) begin // handles stores and amo write.
NextState = STATE_MISS_WRITE_WORD;
@ -261,12 +269,12 @@ module cachefsm
end
STATE_MISS_READ_WORD_DELAY: begin
//SelAdr = 2'b01;
//PreSelAdr = 2'b01;
SRAMWordWriteEnable = 1'b0;
SetDirty = 1'b0;
LRUWriteEn = 1'b0;
if(&RW & Atomic[1]) begin // amo write
SelAdr = 2'b01;
PreSelAdr = 2'b01;
if(CPUBusy) begin
NextState = STATE_CPU_BUSY_FINISH_AMO;
end
@ -280,7 +288,7 @@ module cachefsm
LRUWriteEn = 1'b1;
if(CPUBusy) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
end
else begin
NextState = STATE_READY;
@ -291,11 +299,11 @@ module cachefsm
STATE_MISS_WRITE_WORD: begin
SRAMWordWriteEnable = 1'b1;
SetDirty = 1'b1;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
LRUWriteEn = 1'b1;
if(CPUBusy) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
end
else begin
NextState = STATE_READY;
@ -304,7 +312,7 @@ module cachefsm
STATE_MISS_EVICT_DIRTY: begin
CacheStall = 1'b1;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
SelEvict = 1'b1;
if(CacheBusAck) begin
NextState = STATE_MISS_WRITE_CACHE_LINE;
@ -315,10 +323,10 @@ module cachefsm
STATE_CPU_BUSY: begin
SelAdr = 2'b00;
PreSelAdr = 2'b00;
if(CPUBusy) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
PreSelAdr = 2'b01;
end
else begin
NextState = STATE_READY;
@ -326,7 +334,7 @@ module cachefsm
end
STATE_CPU_BUSY_FINISH_AMO: begin
SelAdr = 2'b01;
PreSelAdr = 2'b01;
SRAMWordWriteEnable = 1'b0;
SetDirty = 1'b0;
LRUWriteEn = 1'b0;
@ -345,13 +353,13 @@ module cachefsm
// intialize flush counters
SelFlush = 1'b1;
CacheStall = 1'b1;
SelAdr = 2'b10;
PreSelAdr = 2'b10;
NextState = STATE_FLUSH_CHECK;
end
STATE_FLUSH_CHECK: begin
CacheStall = 1'b1;
SelAdr = 2'b10;
PreSelAdr = 2'b10;
SelFlush = 1'b1;
if(VictimDirty) begin
NextState = STATE_FLUSH_WRITE_BACK;
@ -360,7 +368,7 @@ module cachefsm
end else if (FlushAdrFlag & FlushWayFlag) begin
NextState = STATE_READY;
CacheStall = 1'b0;
SelAdr = 2'b00;
PreSelAdr = 2'b00;
FlushWayCntEn = 1'b0;
end else if(FlushWayFlag) begin
NextState = STATE_FLUSH_INCR;
@ -375,7 +383,7 @@ module cachefsm
STATE_FLUSH_INCR: begin
CacheStall = 1'b1;
SelAdr = 2'b10;
PreSelAdr = 2'b10;
SelFlush = 1'b1;
FlushWayCntRst = 1'b1;
NextState = STATE_FLUSH_CHECK;
@ -383,7 +391,7 @@ module cachefsm
STATE_FLUSH_WRITE_BACK: begin
CacheStall = 1'b1;
SelAdr = 2'b10;
PreSelAdr = 2'b10;
SelFlush = 1'b1;
if(CacheBusAck) begin
NextState = STATE_FLUSH_CLEAR_DIRTY;
@ -397,12 +405,12 @@ module cachefsm
ClearDirty = 1'b1;
VDWriteEnable = 1'b1;
SelFlush = 1'b1;
SelAdr = 2'b10;
PreSelAdr = 2'b10;
FlushWayCntEn = 1'b0;
if(FlushAdrFlag & FlushWayFlag) begin
NextState = STATE_READY;
CacheStall = 1'b0;
SelAdr = 2'b00;
PreSelAdr = 2'b00;
end else if (FlushWayFlag) begin
NextState = STATE_FLUSH_INCR;
FlushAdrCntEn = 1'b1;

View File

@ -39,20 +39,17 @@ module simpleram #(parameter BASE=0, RANGE = 65535) (
);
logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)];
logic [31:0] ad;
flop #(32) areg(clk, a, ad); // *** redesign external interface so this delay isn't needed
/* verilator lint_off WIDTH */
if (`XLEN == 64) begin:ramrw
always_ff @(posedge clk) begin
rd <= RAM[a[31:3]];
if (we) RAM[ad[31:3]] <= #1 wd;
if (we) RAM[a[31:3]] <= #1 wd;
end
end else begin
always_ff @(posedge clk) begin:ramrw
rd <= RAM[a[31:2]];
if (we) RAM[ad[31:2]] <= #1 wd;
if (we) RAM[a[31:2]] <= #1 wd;
end
end
/* verilator lint_on WIDTH */

View File

@ -232,12 +232,10 @@ module ifu (
if (`MEM_IROM) begin : irom
logic [`XLEN-1:0] FinalInstrRawF_FIXME;
// *** adjust interface so write address doesn't need delaying
// *** modify to be a ROM rather than RAM
simpleram #(
.BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram (
.clk,
.a(CPUBusy ? PCPF[31:0] : PCNextFMux[31:0]), // mux is also inside $, have to replay address if CPU is stalled.
.a(CPUBusy | reset ? PCPF[31:0] : PCNextFMux[31:0]), // mux is also inside $, have to replay address if CPU is stalled.
.we(1'b0),
.wd(0), .rd(FinalInstrRawF_FIXME));
assign FinalInstrRawF = FinalInstrRawF_FIXME[31:0];
@ -328,50 +326,23 @@ module ifu (
assign PrivilegedChangePCM = RetM | TrapM;
mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F),
.d1(BPPredPCF),
.s(SelBPPredF),
.y(PCNext0F));
mux2 #(`XLEN) pcmux1(.d0(PCNext0F),
.d1(PCCorrectE),
.s(BPPredWrongE),
.y(PCNext1F));
// December 20, 2021 Ross Thompson, If instructions in ID and IF are already invalid we don't pick PCE on icache invalidate.
// this only happens because of branch class miss prediction. The Fence instruction was incorrectly predicted as a branch
// this means on the previous cycle the BPPredWrongE updated PCNextF to the correct fall through address.
// to fix we need to select the correct address PCF as the next PCNextF. Unforunately we must still flush the instruction in IF
// as we are deliberately invalidating the icache. This address has to be refetched by the icache.
mux2 #(`XLEN) pcmux2(.d0(PCNext1F),
.d1(PCBPWrongInvalidate),
.s(InvalidateICacheM),
.y(PCNext2F));
mux2 #(`XLEN) pcmux3(.d0(PCNext2F),
.d1(PrivilegedNextPCM),
.s(PrivilegedChangePCM),
//.y(UnalignedPCNextF));
.y(PCNext3F));
mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), .d1(BPPredPCF), .s(SelBPPredF), .y(PCNext0F));
mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F));
// The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE.
mux2 #(`XLEN) pccorrectemux(.d0(PCLinkE), .d1(IEUAdrE), .s(PCSrcE), .y(PCCorrectE));
mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(InvalidateICacheM), .y(PCNext2F));
// Mux only required on instruction class miss prediction.
mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), .s(BPPredWrongM), .y(PCBPWrongInvalidate));
mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), .y(PCNext3F));
// This mux is required as PCNextF needs to be the valid reset vector during reset.
// Reseting PCF does not accomplish this as PCNextF will be +2/4 more than PCF.
mux2 #(`XLEN) pcmux4(.d0(PCNext3F),
.d1(`RESET_VECTOR),
.s(`MEM_IROM ? reset : reset_q),
.y(UnalignedPCNextF));
flop #(1) resetReg (.clk(clk), .d(reset),.q(reset_q)); // delay reset
flopenrc #(1) BPPredWrongMReg(.clk, .reset, .en(~StallM), .clear(FlushM),
.d(BPPredWrongE), .q(BPPredWrongM));
mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF),
.s(BPPredWrongM), // & InvalidateICacheM *** check with linux.
.y(PCBPWrongInvalidate));
// The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE.
assign PCCorrectE = PCSrcE ? IEUAdrE : PCLinkE;
//mux2 #(`XLEN) pcmux4(.d0(PCNext3F), .d1(`RESET_VECTOR), .s(`MEM_IROM ? reset : reset_q), .y(UnalignedPCNextF));
// mux2 #(`XLEN) pcmux4(.d0(PCNext3F), .d1(`RESET_VECTOR), .s(reset), .y(UnalignedPCNextF)); // ******* probably can get rid of by making reset SelAdr = 01
assign UnalignedPCNextF = PCNext3F;
flopenrc #(1) BPPredWrongMReg(.clk, .reset, .en(~StallM), .clear(FlushM), .d(BPPredWrongE), .q(BPPredWrongM));
assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF);
@ -404,7 +375,7 @@ module ifu (
end else begin : bpred
assign BPPredPCF = '0;
assign BPPredWrongM = PCSrcE;
assign BPPredWrongE = PCSrcE;
assign {SelBPPredF, BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM} = '0;
end
@ -428,7 +399,6 @@ module ifu (
// *** combine these with others in better way, including M, F
// Misaligned PC logic
// instruction address misalignment is generated by the target of control flow instructions, not
// the fetch itself.

View File

@ -127,11 +127,11 @@ module busfsm #(parameter integer WordCountThreshold,
(BusCurrState == STATE_BUS_FETCH) |
(BusCurrState == STATE_BUS_WRITE);
assign PreCntEn = BusCurrState == STATE_BUS_FETCH | BusCurrState == STATE_BUS_WRITE;
assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & (LSURWM[0])) |
assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & (LSURWM[0] & ~IgnoreRequest)) |
(BusCurrState == STATE_BUS_UNCACHED_WRITE);
assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE);
assign UnCachedLSUBusRead = (BusCurrState == STATE_BUS_READY & UnCachedAccess & (|LSURWM[1])) |
assign UnCachedLSUBusRead = (BusCurrState == STATE_BUS_READY & UnCachedAccess & (|LSURWM[1] & IgnoreRequest)) |
(BusCurrState == STATE_BUS_UNCACHED_READ);
assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine);

View File

@ -153,7 +153,8 @@ module lsu (
assign DTLBStorePageFaultM = DTLBPageFaultM & PreLSURWM[0];
end // if (`MEM_VIRTMEM)
else begin
assign {InterlockStall, SelHPTW, IgnoreRequest, PTE, PageType, DTLBWriteM, ITLBWriteF} = '0;
assign {InterlockStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF} = '0;
assign IgnoreRequest = TrapM;
assign {DTLBLoadPageFaultM, DTLBStorePageFaultM} = '0;
assign CPUBusy = StallW;
assign LSUAdrE = PreLSUAdrE; assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM;
@ -248,8 +249,8 @@ module lsu (
// *** adjust interface so write address doesn't need delaying; switch to standard RAM?
simpleram #(.BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram (
.clk,
.a(CPUBusy ? IEUAdrM[31:0] : IEUAdrE[31:0]),
.we(LSURWM[0]),
.a(CPUBusy | LSURWM[0] ? IEUAdrM[31:0] : IEUAdrE[31:0]),
.we(LSURWM[0] & ~TrapM), // have to ignore write if Trap.
.wd(FinalWriteDataM), .rd(ReadDataWordM));
// since we have a local memory the bus connections are all disabled.