diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally index a8f4f4dd4..391ee90ec 100755 --- a/pipelined/regression/regression-wally +++ b/pipelined/regression/regression-wally @@ -68,7 +68,7 @@ for test in tests32gc: grepstr="All tests ran without failures") configs.append(tc) -tests32ic = ["arch32i", "arch32c"] +tests32ic = ["arch32i", "arch32c", "imperas32i", "imperas32c"] for test in tests32ic: tc = TestCase( name=test, @@ -77,7 +77,7 @@ for test in tests32ic: grepstr="All tests ran without failures") configs.append(tc) -tests32tim = ["arch32i", "arch32c"] +tests32tim = ["arch32i", "arch32c", "imperas32i", "imperas32c"] for test in tests32tim: tc = TestCase( name=test, diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 4997febec..7d4d2f77d 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -79,6 +79,8 @@ module cachefsm ); logic AnyCPUReqM; + logic [1:0] PreSelAdr; + logic resetDelay; typedef enum {STATE_READY, @@ -107,6 +109,12 @@ module cachefsm assign CacheAccess = AnyCPUReqM & CurrState == STATE_READY; assign CacheMiss = CacheAccess & ~CacheHit; + // special case on reset. When the fsm first exists reset the + // PCNextF will no longer be pointing to the correct address. + // But PCF will be the reset vector. + flop #(1) resetDelayReg(.clk, .d(reset), .q(resetDelay)); + assign SelAdr = resetDelay ? 2'b01 : PreSelAdr; + always_ff @(posedge clk) if (reset) CurrState <= #1 STATE_READY; else CurrState <= #1 NextState; @@ -114,7 +122,7 @@ module cachefsm // next state logic and some state ouputs. always_comb begin CacheStall = 1'b0; - SelAdr = 2'b00; + PreSelAdr = 2'b00; SetValid = 1'b0; ClearValid = 1'b0; SetDirty = 1'b0; @@ -137,7 +145,7 @@ module cachefsm STATE_READY: begin CacheStall = 1'b0; - SelAdr = 2'b00; + PreSelAdr = 2'b00; SRAMWordWriteEnable = 1'b0; SetDirty = 1'b0; LRUWriteEn = 1'b0; @@ -150,7 +158,7 @@ module cachefsm // PTW ready the CPU will stall. // The page table walker asserts it's control 1 cycle // after the TLBs miss. - SelAdr = 2'b01; + PreSelAdr = 2'b01; NextState = STATE_READY; end @@ -164,12 +172,12 @@ module cachefsm // amo hit else if(Atomic[1] & (&RW) & CacheHit) begin - SelAdr = 2'b01; + PreSelAdr = 2'b01; CacheStall = 1'b0; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; - SelAdr = 2'b01; + PreSelAdr = 2'b01; end else begin SRAMWordWriteEnable = 1'b1; @@ -185,7 +193,7 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY; - SelAdr = 2'b01; + PreSelAdr = 2'b01; end else begin NextState = STATE_READY; @@ -193,7 +201,7 @@ module cachefsm end // write hit valid cached else if (RW[0] & CacheHit) begin - SelAdr = 2'b01; + PreSelAdr = 2'b01; CacheStall = 1'b0; SRAMWordWriteEnable = 1'b1; SetDirty = 1'b1; @@ -201,7 +209,7 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY; - SelAdr = 2'b01; + PreSelAdr = 2'b01; end else begin NextState = STATE_READY; @@ -218,7 +226,7 @@ module cachefsm STATE_MISS_FETCH_WDV: begin CacheStall = 1'b1; - SelAdr = 2'b01; + PreSelAdr = 2'b01; if (CacheBusAck) begin NextState = STATE_MISS_FETCH_DONE; @@ -229,7 +237,7 @@ module cachefsm STATE_MISS_FETCH_DONE: begin CacheStall = 1'b1; - SelAdr = 2'b01; + PreSelAdr = 2'b01; if(VictimDirty) begin NextState = STATE_MISS_EVICT_DIRTY; CacheWriteLine = 1'b1; @@ -242,14 +250,14 @@ module cachefsm SRAMLineWriteEnable = 1'b1; CacheStall = 1'b1; NextState = STATE_MISS_READ_WORD; - SelAdr = 2'b01; + PreSelAdr = 2'b01; SetValid = 1'b1; ClearDirty = 1'b1; //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write end STATE_MISS_READ_WORD: begin - SelAdr = 2'b01; + PreSelAdr = 2'b01; CacheStall = 1'b1; if (RW[0] & ~Atomic[1]) begin // handles stores and amo write. NextState = STATE_MISS_WRITE_WORD; @@ -261,12 +269,12 @@ module cachefsm end STATE_MISS_READ_WORD_DELAY: begin - //SelAdr = 2'b01; + //PreSelAdr = 2'b01; SRAMWordWriteEnable = 1'b0; SetDirty = 1'b0; LRUWriteEn = 1'b0; if(&RW & Atomic[1]) begin // amo write - SelAdr = 2'b01; + PreSelAdr = 2'b01; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; end @@ -280,7 +288,7 @@ module cachefsm LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - SelAdr = 2'b01; + PreSelAdr = 2'b01; end else begin NextState = STATE_READY; @@ -291,11 +299,11 @@ module cachefsm STATE_MISS_WRITE_WORD: begin SRAMWordWriteEnable = 1'b1; SetDirty = 1'b1; - SelAdr = 2'b01; + PreSelAdr = 2'b01; LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - SelAdr = 2'b01; + PreSelAdr = 2'b01; end else begin NextState = STATE_READY; @@ -304,7 +312,7 @@ module cachefsm STATE_MISS_EVICT_DIRTY: begin CacheStall = 1'b1; - SelAdr = 2'b01; + PreSelAdr = 2'b01; SelEvict = 1'b1; if(CacheBusAck) begin NextState = STATE_MISS_WRITE_CACHE_LINE; @@ -315,10 +323,10 @@ module cachefsm STATE_CPU_BUSY: begin - SelAdr = 2'b00; + PreSelAdr = 2'b00; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - SelAdr = 2'b01; + PreSelAdr = 2'b01; end else begin NextState = STATE_READY; @@ -326,7 +334,7 @@ module cachefsm end STATE_CPU_BUSY_FINISH_AMO: begin - SelAdr = 2'b01; + PreSelAdr = 2'b01; SRAMWordWriteEnable = 1'b0; SetDirty = 1'b0; LRUWriteEn = 1'b0; @@ -345,13 +353,13 @@ module cachefsm // intialize flush counters SelFlush = 1'b1; CacheStall = 1'b1; - SelAdr = 2'b10; + PreSelAdr = 2'b10; NextState = STATE_FLUSH_CHECK; end STATE_FLUSH_CHECK: begin CacheStall = 1'b1; - SelAdr = 2'b10; + PreSelAdr = 2'b10; SelFlush = 1'b1; if(VictimDirty) begin NextState = STATE_FLUSH_WRITE_BACK; @@ -360,7 +368,7 @@ module cachefsm end else if (FlushAdrFlag & FlushWayFlag) begin NextState = STATE_READY; CacheStall = 1'b0; - SelAdr = 2'b00; + PreSelAdr = 2'b00; FlushWayCntEn = 1'b0; end else if(FlushWayFlag) begin NextState = STATE_FLUSH_INCR; @@ -375,7 +383,7 @@ module cachefsm STATE_FLUSH_INCR: begin CacheStall = 1'b1; - SelAdr = 2'b10; + PreSelAdr = 2'b10; SelFlush = 1'b1; FlushWayCntRst = 1'b1; NextState = STATE_FLUSH_CHECK; @@ -383,7 +391,7 @@ module cachefsm STATE_FLUSH_WRITE_BACK: begin CacheStall = 1'b1; - SelAdr = 2'b10; + PreSelAdr = 2'b10; SelFlush = 1'b1; if(CacheBusAck) begin NextState = STATE_FLUSH_CLEAR_DIRTY; @@ -397,12 +405,12 @@ module cachefsm ClearDirty = 1'b1; VDWriteEnable = 1'b1; SelFlush = 1'b1; - SelAdr = 2'b10; + PreSelAdr = 2'b10; FlushWayCntEn = 1'b0; if(FlushAdrFlag & FlushWayFlag) begin NextState = STATE_READY; CacheStall = 1'b0; - SelAdr = 2'b00; + PreSelAdr = 2'b00; end else if (FlushWayFlag) begin NextState = STATE_FLUSH_INCR; FlushAdrCntEn = 1'b1; diff --git a/pipelined/src/generic/flop/simpleram.sv b/pipelined/src/generic/flop/simpleram.sv index 74fb7cd20..43b873567 100644 --- a/pipelined/src/generic/flop/simpleram.sv +++ b/pipelined/src/generic/flop/simpleram.sv @@ -39,20 +39,17 @@ module simpleram #(parameter BASE=0, RANGE = 65535) ( ); logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)]; - logic [31:0] ad; - - flop #(32) areg(clk, a, ad); // *** redesign external interface so this delay isn't needed /* verilator lint_off WIDTH */ if (`XLEN == 64) begin:ramrw always_ff @(posedge clk) begin rd <= RAM[a[31:3]]; - if (we) RAM[ad[31:3]] <= #1 wd; + if (we) RAM[a[31:3]] <= #1 wd; end end else begin always_ff @(posedge clk) begin:ramrw rd <= RAM[a[31:2]]; - if (we) RAM[ad[31:2]] <= #1 wd; + if (we) RAM[a[31:2]] <= #1 wd; end end /* verilator lint_on WIDTH */ diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 4b73d6852..94e6ae2d2 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -232,12 +232,10 @@ module ifu ( if (`MEM_IROM) begin : irom logic [`XLEN-1:0] FinalInstrRawF_FIXME; - // *** adjust interface so write address doesn't need delaying - // *** modify to be a ROM rather than RAM simpleram #( .BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram ( .clk, - .a(CPUBusy ? PCPF[31:0] : PCNextFMux[31:0]), // mux is also inside $, have to replay address if CPU is stalled. + .a(CPUBusy | reset ? PCPF[31:0] : PCNextFMux[31:0]), // mux is also inside $, have to replay address if CPU is stalled. .we(1'b0), .wd(0), .rd(FinalInstrRawF_FIXME)); assign FinalInstrRawF = FinalInstrRawF_FIXME[31:0]; @@ -328,50 +326,23 @@ module ifu ( assign PrivilegedChangePCM = RetM | TrapM; - mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), - .d1(BPPredPCF), - .s(SelBPPredF), - .y(PCNext0F)); - - mux2 #(`XLEN) pcmux1(.d0(PCNext0F), - .d1(PCCorrectE), - .s(BPPredWrongE), - .y(PCNext1F)); - - // December 20, 2021 Ross Thompson, If instructions in ID and IF are already invalid we don't pick PCE on icache invalidate. - // this only happens because of branch class miss prediction. The Fence instruction was incorrectly predicted as a branch - // this means on the previous cycle the BPPredWrongE updated PCNextF to the correct fall through address. - // to fix we need to select the correct address PCF as the next PCNextF. Unforunately we must still flush the instruction in IF - // as we are deliberately invalidating the icache. This address has to be refetched by the icache. - mux2 #(`XLEN) pcmux2(.d0(PCNext1F), - .d1(PCBPWrongInvalidate), - .s(InvalidateICacheM), - .y(PCNext2F)); - - mux2 #(`XLEN) pcmux3(.d0(PCNext2F), - .d1(PrivilegedNextPCM), - .s(PrivilegedChangePCM), - //.y(UnalignedPCNextF)); - .y(PCNext3F)); - + mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), .d1(BPPredPCF), .s(SelBPPredF), .y(PCNext0F)); + mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); + // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. + mux2 #(`XLEN) pccorrectemux(.d0(PCLinkE), .d1(IEUAdrE), .s(PCSrcE), .y(PCCorrectE)); + mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(InvalidateICacheM), .y(PCNext2F)); + // Mux only required on instruction class miss prediction. + mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), .s(BPPredWrongM), .y(PCBPWrongInvalidate)); + mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), .y(PCNext3F)); // This mux is required as PCNextF needs to be the valid reset vector during reset. // Reseting PCF does not accomplish this as PCNextF will be +2/4 more than PCF. - mux2 #(`XLEN) pcmux4(.d0(PCNext3F), - .d1(`RESET_VECTOR), - .s(`MEM_IROM ? reset : reset_q), - .y(UnalignedPCNextF)); - - flop #(1) resetReg (.clk(clk), .d(reset),.q(reset_q)); // delay reset - - flopenrc #(1) BPPredWrongMReg(.clk, .reset, .en(~StallM), .clear(FlushM), - .d(BPPredWrongE), .q(BPPredWrongM)); - - mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), - .s(BPPredWrongM), // & InvalidateICacheM *** check with linux. - .y(PCBPWrongInvalidate)); - // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. - assign PCCorrectE = PCSrcE ? IEUAdrE : PCLinkE; + //mux2 #(`XLEN) pcmux4(.d0(PCNext3F), .d1(`RESET_VECTOR), .s(`MEM_IROM ? reset : reset_q), .y(UnalignedPCNextF)); + // mux2 #(`XLEN) pcmux4(.d0(PCNext3F), .d1(`RESET_VECTOR), .s(reset), .y(UnalignedPCNextF)); // ******* probably can get rid of by making reset SelAdr = 01 + assign UnalignedPCNextF = PCNext3F; + + flopenrc #(1) BPPredWrongMReg(.clk, .reset, .en(~StallM), .clear(FlushM), .d(BPPredWrongE), .q(BPPredWrongM)); + assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); @@ -404,7 +375,7 @@ module ifu ( end else begin : bpred assign BPPredPCF = '0; - assign BPPredWrongM = PCSrcE; + assign BPPredWrongE = PCSrcE; assign {SelBPPredF, BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM} = '0; end @@ -428,7 +399,6 @@ module ifu ( // *** combine these with others in better way, including M, F - // Misaligned PC logic // instruction address misalignment is generated by the target of control flow instructions, not // the fetch itself. diff --git a/pipelined/src/lsu/busfsm.sv b/pipelined/src/lsu/busfsm.sv index a9e43b559..ae85fa870 100644 --- a/pipelined/src/lsu/busfsm.sv +++ b/pipelined/src/lsu/busfsm.sv @@ -127,11 +127,11 @@ module busfsm #(parameter integer WordCountThreshold, (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_WRITE); assign PreCntEn = BusCurrState == STATE_BUS_FETCH | BusCurrState == STATE_BUS_WRITE; - assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & (LSURWM[0])) | + assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & (LSURWM[0] & ~IgnoreRequest)) | (BusCurrState == STATE_BUS_UNCACHED_WRITE); assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE); - assign UnCachedLSUBusRead = (BusCurrState == STATE_BUS_READY & UnCachedAccess & (|LSURWM[1])) | + assign UnCachedLSUBusRead = (BusCurrState == STATE_BUS_READY & UnCachedAccess & (|LSURWM[1] & IgnoreRequest)) | (BusCurrState == STATE_BUS_UNCACHED_READ); assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine); diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 93cf5e849..2ef616156 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -153,7 +153,8 @@ module lsu ( assign DTLBStorePageFaultM = DTLBPageFaultM & PreLSURWM[0]; end // if (`MEM_VIRTMEM) else begin - assign {InterlockStall, SelHPTW, IgnoreRequest, PTE, PageType, DTLBWriteM, ITLBWriteF} = '0; + assign {InterlockStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF} = '0; + assign IgnoreRequest = TrapM; assign {DTLBLoadPageFaultM, DTLBStorePageFaultM} = '0; assign CPUBusy = StallW; assign LSUAdrE = PreLSUAdrE; assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM; @@ -248,8 +249,8 @@ module lsu ( // *** adjust interface so write address doesn't need delaying; switch to standard RAM? simpleram #(.BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram ( .clk, - .a(CPUBusy ? IEUAdrM[31:0] : IEUAdrE[31:0]), - .we(LSURWM[0]), + .a(CPUBusy | LSURWM[0] ? IEUAdrM[31:0] : IEUAdrE[31:0]), + .we(LSURWM[0] & ~TrapM), // have to ignore write if Trap. .wd(FinalWriteDataM), .rd(ReadDataWordM)); // since we have a local memory the bus connections are all disabled.