diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 43fc1ad96..3b54139c3 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -29,7 +29,7 @@ module hazard( // Detect hazards input logic PCSrcE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, - input logic InstrStall, DataStall, + input logic InstrStall, DataStall, ICacheStallF, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, output logic FlushD, FlushE, FlushM, FlushW diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index df0de4d30..b07e64056 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -34,6 +34,8 @@ module icache( input logic [`XLEN-1:0] InstrInF, output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, + output logic CompressedF, + output logic ICacheStallF, // Decode output logic [31:0] InstrRawD ); @@ -46,7 +48,7 @@ module icache( flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); flopenr #(1) delayStateFlop(clk, reset, ~StallF, (DelayF & ~DelaySideF) ? 1'b1 : 1'b0 , DelaySideF); - flopenr #(16) halfInstrFlop(clk, reset, DelayF, MisalignedHalfInstrF, MisalignedHalfInstrD); + flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD); flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD); @@ -69,15 +71,20 @@ module icache( // machinery to swizzle bits. generate if (`XLEN == 32) begin - assign InstrF = PCPF[1] ? 32'b0 : InstrInF; + assign InstrF = PCPF[1] ? {16'b0, InstrInF[31:16]} : InstrInF; assign DelayF = PCPF[1]; assign MisalignedHalfInstrF = InstrInF[31:16]; end else begin - assign InstrF = PCPF[2] ? (PCPF[1] ? 64'b0 : InstrInF[63:32]) : (PCPF[1] ? InstrInF[47:16] : InstrInF[31:0]); + assign InstrF = PCPF[2] ? (PCPF[1] ? {16'b0, InstrInF[63:48]} : InstrInF[63:32]) : (PCPF[1] ? InstrInF[47:16] : InstrInF[31:0]); assign DelayF = PCPF[1] && PCPF[2]; assign MisalignedHalfInstrF = InstrInF[63:48]; end endgenerate + assign ICacheStallF = DelayF & ~DelaySideF; + + // Detect if the instruction is compressed + // TODO Low-hanging optimization, don't delay if compressed + assign CompressedF = DelaySideF ? (MisalignedHalfInstrD[1:0] != 2'b11) : (InstrF[1:0] != 2'b11); // Pick the correct output, depending on whether we have to assemble this // instruction from two reads or not. diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index abfb37c88..e48078fea 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -35,6 +35,7 @@ module ifu ( output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, + output logic ICacheStallF, // Decode // Execute input logic PCSrcE, @@ -51,23 +52,23 @@ module ifu ( input logic IllegalBaseInstrFaultD, output logic IllegalIEUInstrFaultD, output logic InstrMisalignedFaultM, + output logic [`XLEN-1:0] InstrMisalignedAdrM, // TLB management //input logic [`XLEN-1:0] PageTableEntryF, //input logic ITLBWriteF, ITLBFlushF, // *** satp value will come from CSRs // input logic [`XLEN-1:0] SATP, - output logic ITLBMissF, ITLBHitF, - output logic [`XLEN-1:0] InstrMisalignedAdrM + output logic ITLBMissF, ITLBHitF ); logic [`XLEN-1:0] UnalignedPCNextF, PCNextF; - logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; - logic PrivilegedChangePCM; - logic IllegalCompInstrD; + logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; + logic PrivilegedChangePCM; + logic IllegalCompInstrD; logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM, PCPF; - logic CompressedF; - logic [31:0] InstrRawD, InstrE, InstrW; - logic [31:0] nop = 32'h00000013; // instruction for NOP + logic CompressedF; + logic [31:0] InstrRawD, InstrE, InstrW; + logic [31:0] nop = 32'h00000013; // instruction for NOP logic [`XLEN-1:0] ITLBInstrPAdrF, ICacheInstrPAdrF; // *** temporary hack until we can figure out how to get actual satp value @@ -87,7 +88,7 @@ module ifu ( // jarred 2021-03-04 Add instrution cache block to remove rd2 assign PCPF = PCF; // Temporary workaround until iTLB is live - icache ic(clk, reset, StallF, StallD, FlushD, PCPF, InstrInF, ICacheInstrPAdrF, InstrReadF, InstrRawD); + icache ic(clk, reset, StallF, StallD, FlushD, PCPF, InstrInF, ICacheInstrPAdrF, InstrReadF, CompressedF, ICacheStallF, InstrRawD); // Prioritize the iTLB for reads if it wants one mux2 #(`XLEN) instrPAdrMux(ICacheInstrPAdrF, ITLBInstrPAdrF, ITLBMissF, InstrPAdrF); @@ -95,13 +96,11 @@ module ifu ( mux3 #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF); assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment - flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); + flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); // pcadder // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 - assign CompressedF = 0; // is it a 16-bit compressed instruction? TODO Fix this assign PCPlusUpperF = PCF[`XLEN-1:2] + 1; // add 4 to PC - // choose PC+2 or PC+4 always_comb if (CompressedF) // add 2 diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index ded4df3d8..4f34fe2d7 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -91,6 +91,9 @@ module wallypipelinedhart ( logic ITLBMissF, ITLBHitF; logic DTLBMissM, DTLBHitM; + // ICache stalls + logic ICacheStallF; + // bus interface to dmem logic MemReadM, MemWriteM; logic [2:0] Funct3M; diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 7498de76d..8947482ea 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -322,7 +322,7 @@ string tests32i[] = { initial if (`XLEN == 64) begin // RV64 tests = {tests64i}; - if (`C_SUPPORTED % 2 == 1) tests = {tests, tests64ic}; + if (`C_SUPPORTED % 2 == 1) tests = {tests64ic, tests}; else tests = {tests, tests64iNOc}; if (`M_SUPPORTED % 2 == 1) tests = {tests, tests64m}; end else begin // RV32