From 106718b1960609148b83154f853128f510a7801b Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Thu, 4 Mar 2021 16:46:43 -0500 Subject: [PATCH 01/34] Remove rd2, working for non-compressed --- wally-pipelined/regression/wally-pipelined.do | 6 +- wally-pipelined/src/ifu/icache.sv | 90 +++++++++++++++++++ wally-pipelined/src/ifu/ifu.sv | 35 +++----- .../testbench/testbench-imperas.sv | 2 +- 4 files changed, 109 insertions(+), 24 deletions(-) create mode 100644 wally-pipelined/src/ifu/icache.sv diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index c9051274..cf9a39cd 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -60,11 +60,13 @@ add wave /testbench/dut/hart/FlushW add wave -divider add wave -hex /testbench/dut/hart/ifu/PCF -add wave -hex /testbench/dut/hart/ifu/InstrF -add wave /testbench/InstrFName add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/InstrD add wave /testbench/InstrDName +add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD +add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD +add wave /testbench/dut/hart/ifu/ic/DelayF +add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/dut/hart/ifu/InstrE diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv new file mode 100644 index 00000000..df0de4d3 --- /dev/null +++ b/wally-pipelined/src/ifu/icache.sv @@ -0,0 +1,90 @@ +/////////////////////////////////////////// +// icache.sv +// +// Written: jaallen@g.hmc.edu 2021-03-02 +// Modified: +// +// Purpose: Cache instructions for the ifu so it can access memory less often +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module icache( + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, + // Fetch + input logic [`XLEN-1:0] PCPF, + input logic [`XLEN-1:0] InstrInF, + output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF, + // Decode + output logic [31:0] InstrRawD +); + + logic DelayF, DelaySideF, FlushDLastCycle; + logic [1:0] InstrDMuxChoice; + logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; + logic [31:0] InstrF, AlignedInstrD; + logic [31:0] nop = 32'h00000013; // instruction for NOP + + flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); + flopenr #(1) delayStateFlop(clk, reset, ~StallF, (DelayF & ~DelaySideF) ? 1'b1 : 1'b0 , DelaySideF); + flopenr #(16) halfInstrFlop(clk, reset, DelayF, MisalignedHalfInstrF, MisalignedHalfInstrD); + + flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD); + + // Decide which address needs to be fetched and sent out over InstrPAdrF + // If the requested address fits inside one read from memory, we fetch that + // address, adjusted to the bit width. Otherwise, we request the lower word + // and then the upper word, in that order. + generate + if (`XLEN == 32) begin + assign InstrPAdrF = PCPF[1] ? (DelaySideF ? {PCPF[31:2]+1, 2'b00} : {PCPF[31:2], 2'b00}) : PCPF; + end else begin + assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? (DelaySideF ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}; + end + endgenerate + // For now, we always read since the cache doesn't actually cache + assign InstrReadF = 1; + + // If the instruction fits in one memory read, then we put the right bits + // into InstrF. Otherwise, we activate DelayF to signal the rest of the + // machinery to swizzle bits. + generate + if (`XLEN == 32) begin + assign InstrF = PCPF[1] ? 32'b0 : InstrInF; + assign DelayF = PCPF[1]; + assign MisalignedHalfInstrF = InstrInF[31:16]; + end else begin + assign InstrF = PCPF[2] ? (PCPF[1] ? 64'b0 : InstrInF[63:32]) : (PCPF[1] ? InstrInF[47:16] : InstrInF[31:0]); + assign DelayF = PCPF[1] && PCPF[2]; + assign MisalignedHalfInstrF = InstrInF[63:48]; + end + endgenerate + + // Pick the correct output, depending on whether we have to assemble this + // instruction from two reads or not. + // Output the requested instruction (we don't need to worry if the read is + // incomplete, since the pipeline stalls for us when it isn't), or a NOP for + // the cycle when the first of two reads comes in. + always_comb + assign InstrDMuxChoice = FlushDLastCycle ? 2'b10 : (DelayF ? (DelaySideF ? 2'b01 : 2'b10) : 2'b00); + mux3 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, InstrDMuxChoice, InstrRawD); +endmodule diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 88e4f0be..abfb37c8 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -2,7 +2,7 @@ // ifu.sv // // Written: David_Harris@hmc.edu 9 January 2021 -// Modified: +// Modified: // // Purpose: Instrunction Fetch Unit // PC, branch prediction, instruction cache @@ -51,25 +51,24 @@ module ifu ( input logic IllegalBaseInstrFaultD, output logic IllegalIEUInstrFaultD, output logic InstrMisalignedFaultM, - output logic [`XLEN-1:0] InstrMisalignedAdrM, // TLB management //input logic [`XLEN-1:0] PageTableEntryF, //input logic ITLBWriteF, ITLBFlushF, // *** satp value will come from CSRs // input logic [`XLEN-1:0] SATP, output logic ITLBMissF, ITLBHitF, - // bogus - input logic [15:0] rd2 + output logic [`XLEN-1:0] InstrMisalignedAdrM ); logic [`XLEN-1:0] UnalignedPCNextF, PCNextF; logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; logic PrivilegedChangePCM; logic IllegalCompInstrD; - logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM; + logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM, PCPF; logic CompressedF; - logic [31:0] InstrF, InstrRawD, InstrE, InstrW; + logic [31:0] InstrRawD, InstrE, InstrW; logic [31:0] nop = 32'h00000013; // instruction for NOP + logic [`XLEN-1:0] ITLBInstrPAdrF, ICacheInstrPAdrF; // *** temporary hack until we can figure out how to get actual satp value // from priv unit -- Thomas F @@ -79,23 +78,28 @@ module ifu ( logic ITLBFlushF = '0; logic ITLBWriteF = '0; tlb #(3) itlb(clk, reset, SATP, PCF, PageTableEntryF, ITLBWriteF, ITLBFlushF, - InstrPAdrF, ITLBMissF, ITLBHitF); + ITLBInstrPAdrF, ITLBMissF, ITLBHitF); // *** put memory interface on here, InstrF becomes output //assign InstrPAdrF = PCF; // *** no MMU //assign InstrReadF = ~StallD; // *** & ICacheMissF; add later - assign InstrReadF = 1; // *** & ICacheMissF; add later + // assign InstrReadF = 1; // *** & ICacheMissF; add later + + // jarred 2021-03-04 Add instrution cache block to remove rd2 + assign PCPF = PCF; // Temporary workaround until iTLB is live + icache ic(clk, reset, StallF, StallD, FlushD, PCPF, InstrInF, ICacheInstrPAdrF, InstrReadF, InstrRawD); + // Prioritize the iTLB for reads if it wants one + mux2 #(`XLEN) instrPAdrMux(ICacheInstrPAdrF, ITLBInstrPAdrF, ITLBMissF, InstrPAdrF); assign PrivilegedChangePCM = RetM | TrapM; - mux3 #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF); assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); // pcadder // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 - assign CompressedF = (InstrF[1:0] != 2'b11); // is it a 16-bit compressed instruction? + assign CompressedF = 0; // is it a 16-bit compressed instruction? TODO Fix this assign PCPlusUpperF = PCF[`XLEN-1:2] + 1; // add 4 to PC // choose PC+2 or PC+4 @@ -105,18 +109,7 @@ module ifu ( else PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10}; else PCPlus2or4F = {PCPlusUpperF, PCF[1:0]}; // add 4 - // harris 2/23/21 Add code to fetch instruction split across two words - generate - if (`XLEN==32) begin - assign InstrF = PCF[1] ? {rd2[15:0], InstrInF[31:16]} : InstrInF; - end else begin - assign InstrF = PCF[2] ? (PCF[1] ? {rd2[15:0], InstrInF[63:48]} : InstrInF[63:32]) - : (PCF[1] ? InstrInF[47:16] : InstrInF[31:0]); - end - endgenerate - // Decode stage pipeline register and logic - flopenl #(32) InstrDReg(clk, reset, ~StallD, (FlushD ? nop : InstrF), nop, InstrRawD); flopenrc #(`XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD); // expand 16-bit compressed instructions to 32 bits diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 49a5263d..7498de76 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -347,7 +347,7 @@ string tests32i[] = { // Track names of instructions instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, - dut.hart.ifu.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, + dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, dut.hart.ifu.InstrM, dut.hart.ifu.InstrW, InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); From 41f682f8489cebde95afc81d967482c4f6d3cda4 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Thu, 4 Mar 2021 18:30:26 -0500 Subject: [PATCH 02/34] Partial progress towards compressed instructions --- wally-pipelined/src/hazard/hazard.sv | 2 +- wally-pipelined/src/ifu/icache.sv | 13 ++++++++--- wally-pipelined/src/ifu/ifu.sv | 23 +++++++++---------- .../src/wally/wallypipelinedhart.sv | 3 +++ .../testbench/testbench-imperas.sv | 2 +- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 43fc1ad9..3b54139c 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -29,7 +29,7 @@ module hazard( // Detect hazards input logic PCSrcE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, - input logic InstrStall, DataStall, + input logic InstrStall, DataStall, ICacheStallF, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, output logic FlushD, FlushE, FlushM, FlushW diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index df0de4d3..b07e6405 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -34,6 +34,8 @@ module icache( input logic [`XLEN-1:0] InstrInF, output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, + output logic CompressedF, + output logic ICacheStallF, // Decode output logic [31:0] InstrRawD ); @@ -46,7 +48,7 @@ module icache( flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); flopenr #(1) delayStateFlop(clk, reset, ~StallF, (DelayF & ~DelaySideF) ? 1'b1 : 1'b0 , DelaySideF); - flopenr #(16) halfInstrFlop(clk, reset, DelayF, MisalignedHalfInstrF, MisalignedHalfInstrD); + flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD); flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD); @@ -69,15 +71,20 @@ module icache( // machinery to swizzle bits. generate if (`XLEN == 32) begin - assign InstrF = PCPF[1] ? 32'b0 : InstrInF; + assign InstrF = PCPF[1] ? {16'b0, InstrInF[31:16]} : InstrInF; assign DelayF = PCPF[1]; assign MisalignedHalfInstrF = InstrInF[31:16]; end else begin - assign InstrF = PCPF[2] ? (PCPF[1] ? 64'b0 : InstrInF[63:32]) : (PCPF[1] ? InstrInF[47:16] : InstrInF[31:0]); + assign InstrF = PCPF[2] ? (PCPF[1] ? {16'b0, InstrInF[63:48]} : InstrInF[63:32]) : (PCPF[1] ? InstrInF[47:16] : InstrInF[31:0]); assign DelayF = PCPF[1] && PCPF[2]; assign MisalignedHalfInstrF = InstrInF[63:48]; end endgenerate + assign ICacheStallF = DelayF & ~DelaySideF; + + // Detect if the instruction is compressed + // TODO Low-hanging optimization, don't delay if compressed + assign CompressedF = DelaySideF ? (MisalignedHalfInstrD[1:0] != 2'b11) : (InstrF[1:0] != 2'b11); // Pick the correct output, depending on whether we have to assemble this // instruction from two reads or not. diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index abfb37c8..e48078fe 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -35,6 +35,7 @@ module ifu ( output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, + output logic ICacheStallF, // Decode // Execute input logic PCSrcE, @@ -51,23 +52,23 @@ module ifu ( input logic IllegalBaseInstrFaultD, output logic IllegalIEUInstrFaultD, output logic InstrMisalignedFaultM, + output logic [`XLEN-1:0] InstrMisalignedAdrM, // TLB management //input logic [`XLEN-1:0] PageTableEntryF, //input logic ITLBWriteF, ITLBFlushF, // *** satp value will come from CSRs // input logic [`XLEN-1:0] SATP, - output logic ITLBMissF, ITLBHitF, - output logic [`XLEN-1:0] InstrMisalignedAdrM + output logic ITLBMissF, ITLBHitF ); logic [`XLEN-1:0] UnalignedPCNextF, PCNextF; - logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; - logic PrivilegedChangePCM; - logic IllegalCompInstrD; + logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; + logic PrivilegedChangePCM; + logic IllegalCompInstrD; logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM, PCPF; - logic CompressedF; - logic [31:0] InstrRawD, InstrE, InstrW; - logic [31:0] nop = 32'h00000013; // instruction for NOP + logic CompressedF; + logic [31:0] InstrRawD, InstrE, InstrW; + logic [31:0] nop = 32'h00000013; // instruction for NOP logic [`XLEN-1:0] ITLBInstrPAdrF, ICacheInstrPAdrF; // *** temporary hack until we can figure out how to get actual satp value @@ -87,7 +88,7 @@ module ifu ( // jarred 2021-03-04 Add instrution cache block to remove rd2 assign PCPF = PCF; // Temporary workaround until iTLB is live - icache ic(clk, reset, StallF, StallD, FlushD, PCPF, InstrInF, ICacheInstrPAdrF, InstrReadF, InstrRawD); + icache ic(clk, reset, StallF, StallD, FlushD, PCPF, InstrInF, ICacheInstrPAdrF, InstrReadF, CompressedF, ICacheStallF, InstrRawD); // Prioritize the iTLB for reads if it wants one mux2 #(`XLEN) instrPAdrMux(ICacheInstrPAdrF, ITLBInstrPAdrF, ITLBMissF, InstrPAdrF); @@ -95,13 +96,11 @@ module ifu ( mux3 #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF); assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment - flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); + flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); // pcadder // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 - assign CompressedF = 0; // is it a 16-bit compressed instruction? TODO Fix this assign PCPlusUpperF = PCF[`XLEN-1:2] + 1; // add 4 to PC - // choose PC+2 or PC+4 always_comb if (CompressedF) // add 2 diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index ded4df3d..4f34fe2d 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -91,6 +91,9 @@ module wallypipelinedhart ( logic ITLBMissF, ITLBHitF; logic DTLBMissM, DTLBHitM; + // ICache stalls + logic ICacheStallF; + // bus interface to dmem logic MemReadM, MemWriteM; logic [2:0] Funct3M; diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 7498de76..8947482e 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -322,7 +322,7 @@ string tests32i[] = { initial if (`XLEN == 64) begin // RV64 tests = {tests64i}; - if (`C_SUPPORTED % 2 == 1) tests = {tests, tests64ic}; + if (`C_SUPPORTED % 2 == 1) tests = {tests64ic, tests}; else tests = {tests, tests64iNOc}; if (`M_SUPPORTED % 2 == 1) tests = {tests, tests64m}; end else begin // RV32 From 3172be3039e831b053b9a7d725bce32239717eac Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Tue, 9 Mar 2021 21:16:07 -0500 Subject: [PATCH 03/34] More progress --- wally-pipelined/src/ifu/icache.sv | 53 +++++++++++++------ .../testbench/testbench-imperas.sv | 3 +- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index b07e6405..d8083bb6 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -40,18 +40,26 @@ module icache( output logic [31:0] InstrRawD ); - logic DelayF, DelaySideF, FlushDLastCycle; - logic [1:0] InstrDMuxChoice; - logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; - logic [31:0] InstrF, AlignedInstrD; - logic [31:0] nop = 32'h00000013; // instruction for NOP + logic DelayF, DelaySideF, FlushDLastCycle; + logic [1:0] InstrDMuxChoice; + logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; + logic [31:0] InstrF, AlignedInstrD; + logic [31:0] nop = 32'h00000013; // instruction for NOP + logic LastReadDataValidF; + logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF; flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); flopenr #(1) delayStateFlop(clk, reset, ~StallF, (DelayF & ~DelaySideF) ? 1'b1 : 1'b0 , DelaySideF); flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD); + // This flop is here to simulate pulling data out of the cache, which is edge-triggered flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD); + // These flops cache the previous read, to accelerate things + flopenr #(`XLEN) lastReadDataFlop(clk, reset, InstrReadF & ~StallF, InstrInF, LastReadDataF); + flopenr #(1) lastReadDataVFlop(clk, reset, InstrReadF & ~StallF, 1'b1, LastReadDataValidF); + flopenr #(`XLEN) lastReadAdrFlop(clk, reset, InstrReadF & ~StallF, InstrPAdrF, LastReadAdrF); + // Decide which address needs to be fetched and sent out over InstrPAdrF // If the requested address fits inside one read from memory, we fetch that // address, adjusted to the bit width. Otherwise, we request the lower word @@ -64,34 +72,47 @@ module icache( end endgenerate // For now, we always read since the cache doesn't actually cache - assign InstrReadF = 1; + + always_comb if (LastReadDataValidF & (InstrPAdrF == LastReadAdrF)) begin + assign InstrReadF = 0; + end else begin + assign InstrReadF = 1; + end + + // Pick from the memory input or from the previous read, as appropriate + mux2 #(`XLEN) inDataMux(LastReadDataF, InstrInF, InstrReadF, InDataF); // If the instruction fits in one memory read, then we put the right bits // into InstrF. Otherwise, we activate DelayF to signal the rest of the // machinery to swizzle bits. generate if (`XLEN == 32) begin - assign InstrF = PCPF[1] ? {16'b0, InstrInF[31:16]} : InstrInF; + assign InstrF = PCPF[1] ? {16'b0, InDataF[31:16]} : InDataF; assign DelayF = PCPF[1]; - assign MisalignedHalfInstrF = InstrInF[31:16]; + assign MisalignedHalfInstrF = InDataF[31:16]; end else begin - assign InstrF = PCPF[2] ? (PCPF[1] ? {16'b0, InstrInF[63:48]} : InstrInF[63:32]) : (PCPF[1] ? InstrInF[47:16] : InstrInF[31:0]); + assign InstrF = PCPF[2] ? (PCPF[1] ? {16'b0, InDataF[63:48]} : InDataF[63:32]) : (PCPF[1] ? InDataF[47:16] : InDataF[31:0]); assign DelayF = PCPF[1] && PCPF[2]; - assign MisalignedHalfInstrF = InstrInF[63:48]; + assign MisalignedHalfInstrF = InDataF[63:48]; end endgenerate - assign ICacheStallF = DelayF & ~DelaySideF; + assign ICacheStallF = 0; //DelayF & ~DelaySideF; // Detect if the instruction is compressed - // TODO Low-hanging optimization, don't delay if compressed - assign CompressedF = DelaySideF ? (MisalignedHalfInstrD[1:0] != 2'b11) : (InstrF[1:0] != 2'b11); + // TODO Low-hanging optimization, don't delay if getting a compressed instruction + assign CompressedF = (DelaySideF & DelayF) ? (MisalignedHalfInstrD[1:0] != 2'b11) : (InstrF[1:0] != 2'b11); // Pick the correct output, depending on whether we have to assemble this // instruction from two reads or not. // Output the requested instruction (we don't need to worry if the read is // incomplete, since the pipeline stalls for us when it isn't), or a NOP for // the cycle when the first of two reads comes in. - always_comb - assign InstrDMuxChoice = FlushDLastCycle ? 2'b10 : (DelayF ? (DelaySideF ? 2'b01 : 2'b10) : 2'b00); - mux3 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, InstrDMuxChoice, InstrRawD); + always_comb if (DelayF & (MisalignedHalfInstrF[1:0] != 2'b11)) begin + assign InstrDMuxChoice = 2'b11; + end else if (FlushDLastCycle) begin + assign InstrDMuxChoice = 2'b10; + end else begin + assign InstrDMuxChoice = {1'b0, DelaySideF}; + end + mux4 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, {16'b0, MisalignedHalfInstrD}, InstrDMuxChoice, InstrRawD); endmodule diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 8947482e..334e75e2 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -53,14 +53,13 @@ module testbench(); // "rv64m/I-REMW-01", "3000" }; string tests64ic[] = '{ - + "rv64ic/I-C-BEQZ-01", "3000", "rv64ic/I-C-ADD-01", "3000", "rv64ic/I-C-ADDI-01", "3000", "rv64ic/I-C-ADDIW-01", "3000", "rv64ic/I-C-ADDW-01", "3000", "rv64ic/I-C-AND-01", "3000", "rv64ic/I-C-ANDI-01", "3000", - "rv64ic/I-C-BEQZ-01", "3000", "rv64ic/I-C-BNEZ-01", "3000", "rv64ic/I-C-EBREAK-01", "2000", "rv64ic/I-C-J-01", "3000", From 47577948877bfa68bba0624baa79065763b5e772 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Wed, 10 Mar 2021 22:58:41 -0500 Subject: [PATCH 04/34] Return testbench to normal --- wally-pipelined/testbench/testbench-imperas.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 9c064f1a..b59d5453 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -56,13 +56,13 @@ module testbench(); // "rv64m/I-REMW-01", "3000" }; string tests64ic[] = '{ - "rv64ic/I-C-BEQZ-01", "3000", "rv64ic/I-C-ADD-01", "3000", "rv64ic/I-C-ADDI-01", "3000", "rv64ic/I-C-ADDIW-01", "3000", "rv64ic/I-C-ADDW-01", "3000", "rv64ic/I-C-AND-01", "3000", "rv64ic/I-C-ANDI-01", "3000", + "rv64ic/I-C-BEQZ-01", "3000", "rv64ic/I-C-BNEZ-01", "3000", "rv64ic/I-C-EBREAK-01", "2000", "rv64ic/I-C-J-01", "3000", @@ -327,7 +327,7 @@ string tests32i[] = { if (`C_SUPPORTED) tests = {tests64ic, tests}; else tests = {tests, tests64iNOc}; if (`M_SUPPORTED) tests = {tests, tests64m}; - if (`A_SUPPORTED) tests = {tests64a, tests}; + if (`A_SUPPORTED) tests = {tests, tests64a}; // tests = {tests64a, tests}; end else begin // RV32 tests = {tests32i}; From e58d17d5b7d01bfb9320f4f02845e46a2fefa0d7 Mon Sep 17 00:00:00 2001 From: bbracker Date: Sat, 13 Mar 2021 07:03:33 -0500 Subject: [PATCH 05/34] slightly smarter dtim HREADY --- wally-pipelined/src/uncore/dtim.sv | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index 42ae7fbc..3634b022 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -40,6 +40,8 @@ module dtim #(parameter BASE=0, RANGE = 65535) ( logic [`XLEN-1:0] HREADTim0; // logic [`XLEN-1:0] write; + logic [31:0] HADDRd; + logic newAdr; logic [15:0] entry; logic memread, memwrite; logic [3:0] busycount; @@ -48,14 +50,17 @@ module dtim #(parameter BASE=0, RANGE = 65535) ( memread <= HSELTim & ~ HWRITE; memwrite <= HSELTim & HWRITE; A <= HADDR; + HADDRd <= HADDR; end + assign newAdr = HADDR!=HADDRd; + // busy FSM to extend READY signal always_ff @(posedge HCLK, negedge HRESETn) if (~HRESETn) begin HREADYTim <= 1; end else begin - if (HREADYTim & HSELTim) begin + if ((HREADYTim | newAdr) & HSELTim) begin busycount <= 0; HREADYTim <= #1 0; end else if (~HREADYTim) begin From deb13f34bbed27af3fb4c3ffe6eaf6d7f4defbdb Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Sun, 14 Mar 2021 14:46:21 -0400 Subject: [PATCH 06/34] Get non-jump case working --- wally-pipelined/regression/wally-pipelined.do | 3 +++ wally-pipelined/src/ifu/icache.sv | 9 +++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index f982d302..ddcd6c0d 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -74,6 +74,9 @@ add wave /testbench/InstrDName add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD add wave /testbench/dut/hart/ifu/ic/DelayF +add wave /testbench/dut/hart/ifu/ic/DelaySideF +add wave /testbench/dut/hart/ifu/ic/DelayD +add wave /testbench/dut/hart/ifu/ic/DelaySideD add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider add wave -hex /testbench/dut/hart/ifu/PCE diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index d8083bb6..67a52800 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -40,7 +40,7 @@ module icache( output logic [31:0] InstrRawD ); - logic DelayF, DelaySideF, FlushDLastCycle; + logic DelayF, DelaySideF, FlushDLastCycle, DelayD, DelaySideD; logic [1:0] InstrDMuxChoice; logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; logic [31:0] InstrF, AlignedInstrD; @@ -49,7 +49,9 @@ module icache( logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF; flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); - flopenr #(1) delayStateFlop(clk, reset, ~StallF, (DelayF & ~DelaySideF) ? 1'b1 : 1'b0 , DelaySideF); + flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF, DelayD); + flopenr #(1) delaySideDFlop(clk, reset, ~StallF, DelaySideF, DelaySideD); + flopenr #(1) delayStateFlop(clk, reset, ~StallF, DelayF & ~DelaySideF, DelaySideF); flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD); // This flop is here to simulate pulling data out of the cache, which is edge-triggered @@ -99,7 +101,6 @@ module icache( assign ICacheStallF = 0; //DelayF & ~DelaySideF; // Detect if the instruction is compressed - // TODO Low-hanging optimization, don't delay if getting a compressed instruction assign CompressedF = (DelaySideF & DelayF) ? (MisalignedHalfInstrD[1:0] != 2'b11) : (InstrF[1:0] != 2'b11); // Pick the correct output, depending on whether we have to assemble this @@ -107,7 +108,7 @@ module icache( // Output the requested instruction (we don't need to worry if the read is // incomplete, since the pipeline stalls for us when it isn't), or a NOP for // the cycle when the first of two reads comes in. - always_comb if (DelayF & (MisalignedHalfInstrF[1:0] != 2'b11)) begin + always_comb if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin assign InstrDMuxChoice = 2'b11; end else if (FlushDLastCycle) begin assign InstrDMuxChoice = 2'b10; From ac9fd5a32366ec8c2ecfabe32fa42d0de347d04f Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Sun, 14 Mar 2021 15:42:27 -0400 Subject: [PATCH 07/34] Fix BEQZ tests --- wally-pipelined/src/ifu/icache.sv | 10 +++++----- wally-pipelined/src/ifu/ifu.sv | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 67a52800..c5afe784 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -68,9 +68,9 @@ module icache( // and then the upper word, in that order. generate if (`XLEN == 32) begin - assign InstrPAdrF = PCPF[1] ? (DelaySideF ? {PCPF[31:2]+1, 2'b00} : {PCPF[31:2], 2'b00}) : PCPF; + assign InstrPAdrF = PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[31:2]+1, 2'b00} : {PCPF[31:2], 2'b00}) : PCPF; end else begin - assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? (DelaySideF ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}; + assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}; end endgenerate // For now, we always read since the cache doesn't actually cache @@ -108,10 +108,10 @@ module icache( // Output the requested instruction (we don't need to worry if the read is // incomplete, since the pipeline stalls for us when it isn't), or a NOP for // the cycle when the first of two reads comes in. - always_comb if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin - assign InstrDMuxChoice = 2'b11; - end else if (FlushDLastCycle) begin + always_comb if (FlushDLastCycle) begin assign InstrDMuxChoice = 2'b10; + end else if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin + assign InstrDMuxChoice = 2'b11; end else begin assign InstrDMuxChoice = {1'b0, DelaySideF}; end diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 2fe212dd..e82767ed 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -91,7 +91,7 @@ module ifu ( //assign InstrReadF = ~StallD; // *** & ICacheMissF; add later // assign InstrReadF = 1; // *** & ICacheMissF; add later - // jarred 2021-03-04 Add instrution cache block to remove rd2 + // jarred 2021-03-14 Add instrution cache block to remove rd2 assign PCPF = PCF; // Temporary workaround until iTLB is live icache ic(clk, reset, StallF, StallD, FlushD, PCPF, InstrInF, ICacheInstrPAdrF, InstrReadF, CompressedF, ICacheStallF, InstrRawD); // Prioritize the iTLB for reads if it wants one From a82aa233993e9f3dc3278681c6068232e5026e99 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Tue, 16 Mar 2021 16:57:51 -0400 Subject: [PATCH 08/34] Fix icache for jumping into misaligned instructions --- wally-pipelined/src/ifu/icache.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index c5afe784..bef90008 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -51,7 +51,7 @@ module icache( flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF, DelayD); flopenr #(1) delaySideDFlop(clk, reset, ~StallF, DelaySideF, DelaySideD); - flopenr #(1) delayStateFlop(clk, reset, ~StallF, DelayF & ~DelaySideF, DelaySideF); + flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF); flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD); // This flop is here to simulate pulling data out of the cache, which is edge-triggered From 98e93a63c01a3ed4c269f54b4710834e804b088a Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 18 Mar 2021 17:47:00 -0400 Subject: [PATCH 09/34] maybe AHB works now --- wally-pipelined/regression/sim-wally-rv32ic | 2 +- wally-pipelined/regression/wally-pipelined.do | 57 +-------------- .../regression/wave-dos/ahb-waves.do | 73 +++++++++++++++++++ .../regression/wave-dos/default-waves.do | 56 ++++++++++++++ wally-pipelined/src/ebu/ahblite.sv | 61 ++++++++-------- wally-pipelined/src/uncore/dtim.sv | 35 +++++---- 6 files changed, 183 insertions(+), 101 deletions(-) create mode 100644 wally-pipelined/regression/wave-dos/ahb-waves.do create mode 100644 wally-pipelined/regression/wave-dos/default-waves.do diff --git a/wally-pipelined/regression/sim-wally-rv32ic b/wally-pipelined/regression/sim-wally-rv32ic index a254c824..b69fb317 100755 --- a/wally-pipelined/regression/sim-wally-rv32ic +++ b/wally-pipelined/regression/sim-wally-rv32ic @@ -1,3 +1,3 @@ vsim -c < Date: Thu, 18 Mar 2021 18:52:03 -0400 Subject: [PATCH 10/34] Add icache's read request to ahb wavs --- wally-pipelined/regression/wave-dos/ahb-waves.do | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wally-pipelined/regression/wave-dos/ahb-waves.do b/wally-pipelined/regression/wave-dos/ahb-waves.do index ea068581..3fe12993 100644 --- a/wally-pipelined/regression/wave-dos/ahb-waves.do +++ b/wally-pipelined/regression/wave-dos/ahb-waves.do @@ -22,6 +22,8 @@ add wave -hex /testbench/dut/hart/ifu/InstrD add wave /testbench/InstrDName add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD +add wave -divider +add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF add wave /testbench/dut/hart/ifu/ic/DelayF add wave /testbench/dut/hart/ifu/ic/DelaySideF add wave /testbench/dut/hart/ifu/ic/DelayD @@ -70,4 +72,4 @@ add wave -hex /testbench/dut/hart/ieu/dp/ResultW add wave -hex /testbench/dut/hart/ieu/dp/RdW add wave -divider -add wave -hex -r /testbench/* \ No newline at end of file +add wave -hex -r /testbench/* From c8028710a5f80b71a324cf7c6d64173dbeae491c Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Sat, 20 Mar 2021 17:04:13 -0400 Subject: [PATCH 11/34] Change flop to listen to StallF --- wally-pipelined/src/ifu/icache.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index bef90008..5b1f847e 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -48,7 +48,7 @@ module icache( logic LastReadDataValidF; logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF; - flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); + flopenr #(1) flushDLastCycleFlop(clk, reset, ~StallF, FlushD, FlushDLastCycle); flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF, DelayD); flopenr #(1) delaySideDFlop(clk, reset, ~StallF, DelaySideF, DelaySideD); flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF); From b63bfc7afa1e1633d8e4e8de951572cdfc39c866 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Sat, 20 Mar 2021 17:16:50 -0400 Subject: [PATCH 12/34] Fix conflicts in ahb-waves that snuck through manual merging --- wally-pipelined/regression/wave-dos/ahb-waves.do | 7 ------- 1 file changed, 7 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/ahb-waves.do b/wally-pipelined/regression/wave-dos/ahb-waves.do index 5a6e670f..70609fa2 100644 --- a/wally-pipelined/regression/wave-dos/ahb-waves.do +++ b/wally-pipelined/regression/wave-dos/ahb-waves.do @@ -63,12 +63,9 @@ add wave -hex /testbench/dut/hart/ebu/CaptureDataM add wave -hex /testbench/dut/hart/ebu/InstrStall add wave -divider -<<<<<<< HEAD add wave -hex /testbench/dut/uncore/dtim/* add wave -divider -======= ->>>>>>> origin/main add wave -hex /testbench/dut/hart/ifu/PCW add wave -hex /testbench/dut/hart/ifu/InstrW add wave /testbench/InstrWName @@ -78,11 +75,7 @@ add wave -hex /testbench/dut/hart/ieu/dp/ResultW add wave -hex /testbench/dut/hart/ieu/dp/RdW add wave -divider -<<<<<<< HEAD -add wave -hex -r /testbench/* -======= add wave -hex /testbench/dut/uncore/dtim/* add wave -divider add wave -hex -r /testbench/* ->>>>>>> origin/main From c5f99c4a34bbd0d765751ca8b60b188e697808d5 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Sat, 20 Mar 2021 17:34:19 -0400 Subject: [PATCH 13/34] Revert "Change flop to listen to StallF" This reverts commit c8028710a5f80b71a324cf7c6d64173dbeae491c. --- wally-pipelined/src/ifu/icache.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 5b1f847e..bef90008 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -48,7 +48,7 @@ module icache( logic LastReadDataValidF; logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF; - flopenr #(1) flushDLastCycleFlop(clk, reset, ~StallF, FlushD, FlushDLastCycle); + flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF, DelayD); flopenr #(1) delaySideDFlop(clk, reset, ~StallF, DelaySideF, DelaySideD); flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF); From a2bf5ac2022db684fc154e4400235eaf17ce479c Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Sat, 20 Mar 2021 17:54:40 -0400 Subject: [PATCH 14/34] Fix another bug in the icache (why so many of them?) --- wally-pipelined/src/ifu/icache.sv | 12 ++++++++---- wally-pipelined/testbench/testbench-imperas.sv | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index bef90008..29fef63d 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -48,10 +48,14 @@ module icache( logic LastReadDataValidF; logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF; + // This flop doesn't stall if StallF is high because we should output a nop + // when FlushD happens, even if the pipeline is also stalled. flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); - flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF, DelayD); + + flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF & ~CompressedF, DelayD); flopenr #(1) delaySideDFlop(clk, reset, ~StallF, DelaySideF, DelaySideD); flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF); + // This flop stores the first half of a misaligned instruction while waiting for the other half flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD); // This flop is here to simulate pulling data out of the cache, which is edge-triggered @@ -68,7 +72,7 @@ module icache( // and then the upper word, in that order. generate if (`XLEN == 32) begin - assign InstrPAdrF = PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[31:2]+1, 2'b00} : {PCPF[31:2], 2'b00}) : PCPF; + assign InstrPAdrF = PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[31:2], 2'b00} : {PCPF[31:2], 2'b00}) : PCPF; end else begin assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}; end @@ -101,7 +105,7 @@ module icache( assign ICacheStallF = 0; //DelayF & ~DelaySideF; // Detect if the instruction is compressed - assign CompressedF = (DelaySideF & DelayF) ? (MisalignedHalfInstrD[1:0] != 2'b11) : (InstrF[1:0] != 2'b11); + assign CompressedF = (DelayD) ? (MisalignedHalfInstrD[1:0] != 2'b11) : (InstrF[1:0] != 2'b11); // Pick the correct output, depending on whether we have to assemble this // instruction from two reads or not. @@ -113,7 +117,7 @@ module icache( end else if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin assign InstrDMuxChoice = 2'b11; end else begin - assign InstrDMuxChoice = {1'b0, DelaySideF}; + assign InstrDMuxChoice = {1'b0, DelayD}; end mux4 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, {16'b0, MisalignedHalfInstrD}, InstrDMuxChoice, InstrRawD); endmodule diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 8b128b17..1060d8cb 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -349,7 +349,7 @@ string tests32i[] = { end else begin // RV32 // *** add the 32 bit bp tests tests = {tests32i}; - if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic}; + if (`C_SUPPORTED % 2 == 1) tests = {tests32ic, tests}; else tests = {tests, tests32iNOc}; if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m}; if (`A_SUPPORTED) tests = {tests, tests32a}; From f9cf05a7d45d3088c34262c159d7f6a453024f70 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Sat, 20 Mar 2021 18:06:03 -0400 Subject: [PATCH 15/34] Fix bug with PC incrementing --- wally-pipelined/src/ifu/icache.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 29fef63d..a108ebb0 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -105,7 +105,7 @@ module icache( assign ICacheStallF = 0; //DelayF & ~DelaySideF; // Detect if the instruction is compressed - assign CompressedF = (DelayD) ? (MisalignedHalfInstrD[1:0] != 2'b11) : (InstrF[1:0] != 2'b11); + assign CompressedF = InstrF[1:0] != 2'b11; // Pick the correct output, depending on whether we have to assemble this // instruction from two reads or not. From 097e8edb3dfad02f146bd1895789f1da5e183eca Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Sat, 20 Mar 2021 18:19:51 -0400 Subject: [PATCH 16/34] Put Imperas testbench back --- wally-pipelined/testbench/testbench-imperas.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 1060d8cb..d4682dd2 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -349,7 +349,7 @@ string tests32i[] = { end else begin // RV32 // *** add the 32 bit bp tests tests = {tests32i}; - if (`C_SUPPORTED % 2 == 1) tests = {tests32ic, tests}; + if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic}; else tests = {tests, tests32iNOc}; if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m}; if (`A_SUPPORTED) tests = {tests, tests32a}; @@ -483,7 +483,7 @@ string tests32i[] = { // initialize the branch predictor initial begin - $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.Predictor.DirPredictor.PHT.memory); + $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.DirPredictor.memory.memory); $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.TargetPredictor.memory.memory); end From 5b1db9b6a2b535bc8c181b04895bd86a64702262 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Sat, 20 Mar 2021 18:20:27 -0400 Subject: [PATCH 17/34] Change busybear testbench to reflect new location of InstrF --- wally-pipelined/regression/wally-busybear.do | 2 +- .../testbench/testbench-busybear.sv | 34 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/wally-pipelined/regression/wally-busybear.do b/wally-pipelined/regression/wally-busybear.do index e4b75a08..636b3dbe 100644 --- a/wally-pipelined/regression/wally-busybear.do +++ b/wally-pipelined/regression/wally-busybear.do @@ -46,7 +46,7 @@ add wave -divider add wave -hex /testbench_busybear/PCtext add wave -hex /testbench_busybear/pcExpected add wave -hex /testbench_busybear/dut/hart/ifu/PCF -add wave -hex /testbench_busybear/dut/hart/ifu/InstrF +add wave -hex /testbench_busybear/dut/hart/ifu/ic/InstrF add wave -hex /testbench_busybear/dut/hart/ifu/StallD add wave -hex /testbench_busybear/dut/hart/ifu/FlushD add wave -hex /testbench_busybear/dut/hart/ifu/InstrRawD diff --git a/wally-pipelined/testbench/testbench-busybear.sv b/wally-pipelined/testbench/testbench-busybear.sv index dcd1a4c8..df1e6e49 100644 --- a/wally-pipelined/testbench/testbench-busybear.sv +++ b/wally-pipelined/testbench/testbench-busybear.sv @@ -356,10 +356,10 @@ module testbench_busybear(); logic [31:0] InstrMask; logic forcedInstr; logic [63:0] lastPCF; - always @(dut.PCF or dut.hart.ifu.InstrF or reset) begin + always @(dut.PCF or dut.hart.ifu.ic.InstrF or reset) begin if(~HWRITE) begin #3; - if (~reset && dut.hart.ifu.InstrF[15:0] !== {16{1'bx}} && ~dut.hart.StallD) begin + if (~reset && dut.hart.ifu.ic.InstrF[15:0] !== {16{1'bx}} && ~dut.hart.StallD) begin if (dut.PCF !== lastPCF) begin lastCheckInstrF = CheckInstrF; lastPC <= dut.PCF; @@ -367,23 +367,23 @@ module testbench_busybear(); if (speculative && (lastPC != pcExpected)) begin speculative = ~equal(dut.PCF,pcExpected,3); if(dut.PCF===pcExpected) begin - if(dut.hart.ifu.InstrF[6:0] == 7'b1010011) begin // for now, NOP out any float instrs + if(dut.hart.ifu.ic.InstrF[6:0] == 7'b1010011) begin // for now, NOP out any float instrs force CheckInstrF = 32'b0010011; release CheckInstrF; - force dut.hart.ifu.InstrF = 32'b0010011; + force dut.hart.ifu.ic.InstrF = 32'b0010011; #7; - release dut.hart.ifu.InstrF; + release dut.hart.ifu.ic.InstrF; $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.PCF, instrs, $time); warningCount += 1; forcedInstr = 1; end else begin - if(dut.hart.ifu.InstrF[28:27] != 2'b11 && dut.hart.ifu.InstrF[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD + if(dut.hart.ifu.ic.InstrF[28:27] != 2'b11 && dut.hart.ifu.ic.InstrF[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD force CheckInstrF = {12'b0, CheckInstrF[19:7], 7'b0000011}; release CheckInstrF; - force dut.hart.ifu.InstrF = {12'b0, dut.hart.ifu.InstrF[19:7], 7'b0000011}; + force dut.hart.ifu.ic.InstrF = {12'b0, dut.hart.ifu.ic.InstrF[19:7], 7'b0000011}; #7; - release dut.hart.ifu.InstrF; + release dut.hart.ifu.ic.InstrF; $display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.PCF); warningCount += 1; forcedInstr = 1; @@ -406,23 +406,23 @@ module testbench_busybear(); end scan_file_PC = $fscanf(data_file_PC, "%x\n", CheckInstrF); if(dut.PCF === pcExpected) begin - if(dut.hart.ifu.InstrF[6:0] == 7'b1010011) begin // for now, NOP out any float instrs + if(dut.hart.ifu.ic.InstrF[6:0] == 7'b1010011) begin // for now, NOP out any float instrs force CheckInstrF = 32'b0010011; release CheckInstrF; - force dut.hart.ifu.InstrF = 32'b0010011; + force dut.hart.ifu.ic.InstrF = 32'b0010011; #7; - release dut.hart.ifu.InstrF; + release dut.hart.ifu.ic.InstrF; $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.PCF, instrs, $time); warningCount += 1; forcedInstr = 1; end else begin - if(dut.hart.ifu.InstrF[28:27] != 2'b11 && dut.hart.ifu.InstrF[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD + if(dut.hart.ifu.ic.InstrF[28:27] != 2'b11 && dut.hart.ifu.ic.InstrF[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD force CheckInstrF = {12'b0, CheckInstrF[19:7], 7'b0000011}; release CheckInstrF; - force dut.hart.ifu.InstrF = {12'b0, dut.hart.ifu.InstrF[19:7], 7'b0000011}; + force dut.hart.ifu.ic.InstrF = {12'b0, dut.hart.ifu.ic.InstrF[19:7], 7'b0000011}; #7; - release dut.hart.ifu.InstrF; + release dut.hart.ifu.ic.InstrF; $display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.PCF); warningCount += 1; forcedInstr = 1; @@ -467,8 +467,8 @@ module testbench_busybear(); `ERROR end InstrMask = CheckInstrF[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; - if ((~forcedInstr) && (~speculative) && ((InstrMask & dut.hart.ifu.InstrF) !== (InstrMask & CheckInstrF))) begin - $display("%0t ps, instr %0d: InstrF does not equal CheckInstrF: %x, %x, PC: %x", $time, instrs, dut.hart.ifu.InstrF, CheckInstrF, dut.PCF); + if ((~forcedInstr) && (~speculative) && ((InstrMask & dut.hart.ifu.ic.InstrF) !== (InstrMask & CheckInstrF))) begin + $display("%0t ps, instr %0d: InstrF does not equal CheckInstrF: %x, %x, PC: %x", $time, instrs, dut.hart.ifu.ic.InstrF, CheckInstrF, dut.PCF); `ERROR end end @@ -481,7 +481,7 @@ module testbench_busybear(); // Track names of instructions string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; logic [31:0] InstrW; - instrNameDecTB dec(dut.hart.ifu.InstrF, InstrFName); + instrNameDecTB dec(dut.hart.ifu.ic.InstrF, InstrFName); instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, dut.hart.ifu.InstrM, InstrW, From b871bfe7140eba21e0c4e394e6af2fbc06769418 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Mon, 22 Mar 2021 15:04:46 -0400 Subject: [PATCH 18/34] Update icache interface --- wally-pipelined/src/ifu/icache.sv | 25 +++++++++++-------- wally-pipelined/src/ifu/ifu.sv | 7 +++++- .../testbench/testbench-imperas.sv | 2 +- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index a108ebb0..6ed1727d 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -26,18 +26,19 @@ `include "wally-config.vh" module icache( - input logic clk, reset, - input logic StallF, StallD, - input logic FlushD, + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, // Fetch - input logic [`XLEN-1:0] PCPF, - input logic [`XLEN-1:0] InstrInF, - output logic [`XLEN-1:0] InstrPAdrF, - output logic InstrReadF, - output logic CompressedF, - output logic ICacheStallF, + input logic [`XLEN-1:12] UpperPCPF, + input logic [11:0] LowerPCF, + input logic [`XLEN-1:0] InstrInF, + output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF, + output logic CompressedF, + output logic ICacheStallF, // Decode - output logic [31:0] InstrRawD + output logic [31:0] InstrRawD ); logic DelayF, DelaySideF, FlushDLastCycle, DelayD, DelaySideD; @@ -48,6 +49,10 @@ module icache( logic LastReadDataValidF; logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF; + // Temporary change to bridge the new interface to old behaviors + logic [`XLEN-1:0] PCPF; + assign PCPF = {UpperPCPF, LowerPCF}; + // This flop doesn't stall if StallF is high because we should output a nop // when FlushD happens, even if the pipeline is also stalled. flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 99bf380f..c92ff403 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -93,7 +93,12 @@ module ifu ( // jarred 2021-03-14 Add instrution cache block to remove rd2 assign PCPF = PCF; // Temporary workaround until iTLB is live - icache ic(clk, reset, StallF, StallD, FlushD, PCPF, InstrInF, ICacheInstrPAdrF, InstrReadF, CompressedF, ICacheStallF, InstrRawD); + icache ic( + .*, + .InstrPAdrF(ICacheInstrPAdrF), + .UpperPCPF(PCPF[`XLEN-1:12]), + .LowerPCF(PCF[11:0]) + ); // Prioritize the iTLB for reads if it wants one mux2 #(`XLEN) instrPAdrMux(ICacheInstrPAdrF, ITLBInstrPAdrF, ITLBMissF, InstrPAdrF); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index d4682dd2..8b128b17 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -483,7 +483,7 @@ string tests32i[] = { // initialize the branch predictor initial begin - $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.DirPredictor.memory.memory); + $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.Predictor.DirPredictor.PHT.memory); $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.TargetPredictor.memory.memory); end From 6ce52f9b80268e4d9555594d5915b9b38d01e252 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Mon, 22 Mar 2021 15:13:23 -0400 Subject: [PATCH 19/34] Remove DelaySideD since it isn't needed --- wally-pipelined/src/ifu/icache.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 6ed1727d..d7932eec 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -41,7 +41,7 @@ module icache( output logic [31:0] InstrRawD ); - logic DelayF, DelaySideF, FlushDLastCycle, DelayD, DelaySideD; + logic DelayF, DelaySideF, FlushDLastCycle, DelayD; logic [1:0] InstrDMuxChoice; logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; logic [31:0] InstrF, AlignedInstrD; @@ -58,7 +58,6 @@ module icache( flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF & ~CompressedF, DelayD); - flopenr #(1) delaySideDFlop(clk, reset, ~StallF, DelaySideF, DelaySideD); flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF); // This flop stores the first half of a misaligned instruction while waiting for the other half flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD); @@ -82,8 +81,8 @@ module icache( assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}; end endgenerate - // For now, we always read since the cache doesn't actually cache + // Read from memory if we don't have the address we want always_comb if (LastReadDataValidF & (InstrPAdrF == LastReadAdrF)) begin assign InstrReadF = 0; end else begin @@ -107,7 +106,8 @@ module icache( assign MisalignedHalfInstrF = InDataF[63:48]; end endgenerate - assign ICacheStallF = 0; //DelayF & ~DelaySideF; + // We will likely need to stall later, but stalls are handled by the rest of the pipeline for now + assign ICacheStallF = 0; // Detect if the instruction is compressed assign CompressedF = InstrF[1:0] != 2'b11; From d5bd5fa9d77c53023045689132d20437d09a468c Mon Sep 17 00:00:00 2001 From: Noah Boorstin Date: Mon, 22 Mar 2021 23:45:02 -0400 Subject: [PATCH 20/34] start migrating busybear over to InstrRawD/PCD this breaks busybear for now --- wally-pipelined/regression/wally-busybear.do | 8 +- .../testbench/testbench-busybear.sv | 86 +++++++++---------- 2 files changed, 47 insertions(+), 47 deletions(-) diff --git a/wally-pipelined/regression/wally-busybear.do b/wally-pipelined/regression/wally-busybear.do index 636b3dbe..b7e92ae3 100644 --- a/wally-pipelined/regression/wally-busybear.do +++ b/wally-pipelined/regression/wally-busybear.do @@ -45,13 +45,13 @@ add wave /testbench_busybear/reset add wave -divider add wave -hex /testbench_busybear/PCtext add wave -hex /testbench_busybear/pcExpected -add wave -hex /testbench_busybear/dut/hart/ifu/PCF -add wave -hex /testbench_busybear/dut/hart/ifu/ic/InstrF +add wave -hex /testbench_busybear/dut/hart/ifu/PCD +add wave -hex /testbench_busybear/dut/hart/ifu/InstrD add wave -hex /testbench_busybear/dut/hart/ifu/StallD add wave -hex /testbench_busybear/dut/hart/ifu/FlushD add wave -hex /testbench_busybear/dut/hart/ifu/InstrRawD -add wave /testbench_busybear/CheckInstrF -add wave /testbench_busybear/lastCheckInstrF +add wave /testbench_busybear/CheckInstrD +add wave /testbench_busybear/lastCheckInstrD add wave /testbench_busybear/speculative add wave /testbench_busybear/lastPC2 add wave -divider diff --git a/wally-pipelined/testbench/testbench-busybear.sv b/wally-pipelined/testbench/testbench-busybear.sv index d4c5ed9d..59624106 100644 --- a/wally-pipelined/testbench/testbench-busybear.sv +++ b/wally-pipelined/testbench/testbench-busybear.sv @@ -7,7 +7,7 @@ module testbench_busybear(); logic [31:0] GPIOPinsOut, GPIOPinsEn; // instantiate device to be tested - logic [31:0] CheckInstrF; + logic [31:0] CheckInstrD; logic [`AHBW-1:0] HRDATA; logic [31:0] HADDR; @@ -344,7 +344,7 @@ module testbench_busybear(); initial begin speculative = 0; end - logic [63:0] lastCheckInstrF, lastPC, lastPC2; + logic [63:0] lastCheckInstrD, lastPC, lastPC2; string PCtextW, PCtext2W; logic [31:0] InstrWExpected; @@ -379,36 +379,36 @@ module testbench_busybear(); end logic [31:0] InstrMask; logic forcedInstr; - logic [63:0] lastPCF; - always @(dut.PCF or dut.hart.ifu.ic.InstrF or reset) begin + logic [63:0] lastPCD; + always @(dut.hart.ifu.PCD or dut.hart.ifu.InstrRawD or reset) begin if(~HWRITE) begin #3; - if (~reset && dut.hart.ifu.ic.InstrF[15:0] !== {16{1'bx}} && ~dut.hart.StallD) begin - if (dut.PCF !== lastPCF) begin - lastCheckInstrF = CheckInstrF; - lastPC <= dut.PCF; + if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && ~dut.hart.StallD) begin + if (dut.hart.ifu.PCD !== lastPCD) begin + lastCheckInstrD = CheckInstrD; + lastPC <= dut.hart.ifu.PCD; lastPC2 <= lastPC; if (speculative && (lastPC != pcExpected)) begin - speculative = ~equal(dut.PCF,pcExpected,3); - if(dut.PCF===pcExpected) begin - if(dut.hart.ifu.ic.InstrF[6:0] == 7'b1010011) begin // for now, NOP out any float instrs - force CheckInstrF = 32'b0010011; - release CheckInstrF; - force dut.hart.ifu.ic.InstrF = 32'b0010011; + speculative = ~equal(dut.hart.ifu.PCD,pcExpected,3); + if(dut.hart.ifu.PCD===pcExpected) begin + if(dut.hart.ifu.InstrRawD[6:0] == 7'b1010011) begin // for now, NOP out any float instrs + force CheckInstrD = 32'b0010011; + release CheckInstrD; + force dut.hart.ifu.InstrRawD = 32'b0010011; #7; - release dut.hart.ifu.ic.InstrF; - $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.PCF, instrs, $time); + release dut.hart.ifu.InstrRawD; + $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.hart.ifu.PCD, instrs, $time); warningCount += 1; forcedInstr = 1; end else begin - if(dut.hart.ifu.ic.InstrF[28:27] != 2'b11 && dut.hart.ifu.ic.InstrF[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD - force CheckInstrF = {12'b0, CheckInstrF[19:7], 7'b0000011}; - release CheckInstrF; - force dut.hart.ifu.ic.InstrF = {12'b0, dut.hart.ifu.ic.InstrF[19:7], 7'b0000011}; + if(dut.hart.ifu.InstrRawD[28:27] != 2'b11 && dut.hart.ifu.InstrRawD[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD + force CheckInstrD = {12'b0, CheckInstrD[19:7], 7'b0000011}; + release CheckInstrD; + force dut.hart.ifu.InstrRawD = {12'b0, dut.hart.ifu.InstrRawD[19:7], 7'b0000011}; #7; - release dut.hart.ifu.ic.InstrF; - $display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.PCF); + release dut.hart.ifu.InstrRawD; + $display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.hart.ifu.PCD); warningCount += 1; forcedInstr = 1; end @@ -428,26 +428,26 @@ module testbench_busybear(); scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext2); PCtext = {PCtext, " ", PCtext2}; end - scan_file_PC = $fscanf(data_file_PC, "%x\n", CheckInstrF); - if(dut.PCF === pcExpected) begin - if(dut.hart.ifu.ic.InstrF[6:0] == 7'b1010011) begin // for now, NOP out any float instrs - force CheckInstrF = 32'b0010011; - release CheckInstrF; - force dut.hart.ifu.ic.InstrF = 32'b0010011; + scan_file_PC = $fscanf(data_file_PC, "%x\n", CheckInstrD); + if(dut.hart.ifu.PCD === pcExpected) begin + if(dut.hart.ifu.InstrRawD[6:0] == 7'b1010011) begin // for now, NOP out any float instrs + force CheckInstrD = 32'b0010011; + release CheckInstrD; + force dut.hart.ifu.InstrRawD = 32'b0010011; #7; - release dut.hart.ifu.ic.InstrF; - $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.PCF, instrs, $time); + release dut.hart.ifu.InstrRawD; + $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.hart.ifu.PCD, instrs, $time); warningCount += 1; forcedInstr = 1; end else begin - if(dut.hart.ifu.ic.InstrF[28:27] != 2'b11 && dut.hart.ifu.ic.InstrF[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD - force CheckInstrF = {12'b0, CheckInstrF[19:7], 7'b0000011}; - release CheckInstrF; - force dut.hart.ifu.ic.InstrF = {12'b0, dut.hart.ifu.ic.InstrF[19:7], 7'b0000011}; + if(dut.hart.ifu.InstrRawD[28:27] != 2'b11 && dut.hart.ifu.InstrRawD[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD + force CheckInstrD = {12'b0, CheckInstrD[19:7], 7'b0000011}; + release CheckInstrD; + force dut.hart.ifu.InstrRawD = {12'b0, dut.hart.ifu.InstrRawD[19:7], 7'b0000011}; #7; - release dut.hart.ifu.ic.InstrF; - $display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.PCF); + release dut.hart.ifu.InstrRawD; + $display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.hart.ifu.PCD); warningCount += 1; forcedInstr = 1; end @@ -465,7 +465,7 @@ module testbench_busybear(); end instrs += 1; // are we at a branch/jump? - casex (lastCheckInstrF[31:0]) + casex (lastCheckInstrD[31:0]) 32'b00000000001000000000000001110011, // URET 32'b00010000001000000000000001110011, // SRET 32'b00110000001000000000000001110011, // MRET @@ -486,18 +486,18 @@ module testbench_busybear(); endcase //check things! - if ((~speculative) && (~equal(dut.PCF,pcExpected,3))) begin - $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.PCF, pcExpected); + if ((~speculative) && (~equal(dut.hart.ifu.PCD,pcExpected,3))) begin + $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, pcExpected); `ERROR end - InstrMask = CheckInstrF[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; - if ((~forcedInstr) && (~speculative) && ((InstrMask & dut.hart.ifu.ic.InstrF) !== (InstrMask & CheckInstrF))) begin - $display("%0t ps, instr %0d: InstrF does not equal CheckInstrF: %x, %x, PC: %x", $time, instrs, dut.hart.ifu.ic.InstrF, CheckInstrF, dut.PCF); + InstrMask = CheckInstrD[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; + if ((~forcedInstr) && (~speculative) && ((InstrMask & dut.hart.ifu.InstrRawD) !== (InstrMask & CheckInstrD))) begin + $display("%0t ps, instr %0d: InstrD does not equal CheckInstrD: %x, %x, PC: %x", $time, instrs, dut.hart.ifu.InstrRawD, CheckInstrD, dut.hart.ifu.PCD); `ERROR end end end - lastPCF = dut.PCF; + lastPCD = dut.hart.ifu.PCD; end end end From 827993598d7204b78a0f9737346da7db4c3675fb Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Mon, 22 Mar 2021 23:57:01 -0400 Subject: [PATCH 21/34] Small commit to see if new hook tests non-main branch --- wally-pipelined/src/ifu/icache.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index d7932eec..ceb06ddc 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -4,7 +4,7 @@ // Written: jaallen@g.hmc.edu 2021-03-02 // Modified: // -// Purpose: Cache instructions for the ifu so it can access memory less often +// Purpose: Cache instructions for the ifu so it can access memory less often, saving cycles // // A component of the Wally configurable RISC-V project. // From 6ffa01cc4dc866b4ddf4e521b4028800eee59c59 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Tue, 23 Mar 2021 00:07:39 -0400 Subject: [PATCH 22/34] Add comments explaining icache inputs --- wally-pipelined/src/ifu/icache.sv | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index ceb06ddc..98a58f7d 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -26,18 +26,25 @@ `include "wally-config.vh" module icache( + // Basic pipeline stuff input logic clk, reset, input logic StallF, StallD, input logic FlushD, - // Fetch + // Upper bits of physical address for PC input logic [`XLEN-1:12] UpperPCPF, + // Lower 12 bits of virtual PC address, since it's faster this way input logic [11:0] LowerPCF, + // Data read in from the ebu unit input logic [`XLEN-1:0] InstrInF, + // Read requested from the ebu unit output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, + // High if the instruction currently in the fetch stage is compressed output logic CompressedF, + // High if the icache is requesting a stall output logic ICacheStallF, - // Decode + // The raw (not decompressed) instruction that was requested + // If the next instruction is compressed, the upper 16 bits may be anything output logic [31:0] InstrRawD ); From 0f8fe8fb3b8cba3842180b61fb1fcae4c257fdfd Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Tue, 23 Mar 2021 00:10:35 -0400 Subject: [PATCH 23/34] Document some internal signals --- wally-pipelined/src/ifu/icache.sv | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 98a58f7d..2eab6ed8 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -52,10 +52,13 @@ module icache( logic [1:0] InstrDMuxChoice; logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; logic [31:0] InstrF, AlignedInstrD; - logic [31:0] nop = 32'h00000013; // instruction for NOP + // Buffer the last read, for ease of accessing it again logic LastReadDataValidF; logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF; + // instruction for NOP + logic [31:0] nop = 32'h00000013; + // Temporary change to bridge the new interface to old behaviors logic [`XLEN-1:0] PCPF; assign PCPF = {UpperPCPF, LowerPCF}; From f3194c6388e0deffb4c9642af61e10fb277ff791 Mon Sep 17 00:00:00 2001 From: Noah Boorstin Date: Tue, 23 Mar 2021 13:28:44 -0400 Subject: [PATCH 24/34] busybear: ignore illegal instruction when starting --- .../testbench/testbench-busybear.sv | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/wally-pipelined/testbench/testbench-busybear.sv b/wally-pipelined/testbench/testbench-busybear.sv index 59624106..65c6dc37 100644 --- a/wally-pipelined/testbench/testbench-busybear.sv +++ b/wally-pipelined/testbench/testbench-busybear.sv @@ -282,14 +282,16 @@ module testbench_busybear(); //CSR checking \ always @(``PATH``.``CSR``_REGW) begin \ if ($time > 1) begin \ - scan_file_csr = $fscanf(data_file_csr, "%s\n", CSR); \ - scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \ - if(CSR.icompare(`"CSR`")) begin \ - $display("%0t ps, instr %0d: %s changed, expected %s", $time, instrs, `"CSR`", CSR); \ - end \ - if(``PATH``.``CSR``_REGW != ``expected``CSR) begin \ - $display("%0t ps, instr %0d: %s does not equal %s expected: %x, %x", $time, instrs, `"CSR`", CSR, ``PATH``.``CSR``_REGW, ``expected``CSR); \ - `ERROR \ + if (instrs != 0) begin \ + scan_file_csr = $fscanf(data_file_csr, "%s\n", CSR); \ + scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \ + if(CSR.icompare(`"CSR`")) begin \ + $display("%0t ps, instr %0d: %s changed, expected %s", $time, instrs, `"CSR`", CSR); \ + end \ + if(``PATH``.``CSR``_REGW != ``expected``CSR) begin \ + $display("%0t ps, instr %0d: %s does not equal %s expected: %x, %x", $time, instrs, `"CSR`", CSR, ``PATH``.``CSR``_REGW, ``expected``CSR); \ + `ERROR \ + end \ end \ end else begin \ for(integer j=0; j Date: Tue, 23 Mar 2021 14:06:21 -0400 Subject: [PATCH 25/34] busybear: more progress moving from instrf to instrrawd --- .../testbench/testbench-busybear.sv | 190 +++++++++--------- 1 file changed, 95 insertions(+), 95 deletions(-) diff --git a/wally-pipelined/testbench/testbench-busybear.sv b/wally-pipelined/testbench/testbench-busybear.sv index 65c6dc37..385c45d5 100644 --- a/wally-pipelined/testbench/testbench-busybear.sv +++ b/wally-pipelined/testbench/testbench-busybear.sv @@ -262,7 +262,7 @@ module testbench_busybear(); end always @(dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW) begin - if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs != 0) begin + if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs > 1) begin $display("!!!!!! illegal instruction !!!!!!!!!!"); $display("(as a reminder, MCAUSE and MEPC are set by this)"); $display("at %0t ps, instr %0d, HADDR %x", $time, instrs, HADDR); @@ -282,7 +282,7 @@ module testbench_busybear(); //CSR checking \ always @(``PATH``.``CSR``_REGW) begin \ if ($time > 1) begin \ - if (instrs != 0) begin \ + if (instrs > 1) begin \ scan_file_csr = $fscanf(data_file_csr, "%s\n", CSR); \ scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \ if(CSR.icompare(`"CSR`")) begin \ @@ -384,123 +384,123 @@ module testbench_busybear(); logic [63:0] lastPCD; always @(dut.hart.ifu.PCD or dut.hart.ifu.InstrRawD or reset) begin if(~HWRITE) begin - #3; - if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && ~dut.hart.StallD) begin - if (dut.hart.ifu.PCD !== lastPCD) begin - lastCheckInstrD = CheckInstrD; - lastPC <= dut.hart.ifu.PCD; - lastPC2 <= lastPC; - if (speculative && (lastPC != pcExpected)) begin - speculative = ~equal(dut.hart.ifu.PCD,pcExpected,3); - if(dut.hart.ifu.PCD===pcExpected) begin - if(dut.hart.ifu.InstrRawD[6:0] == 7'b1010011) begin // for now, NOP out any float instrs - force CheckInstrD = 32'b0010011; - release CheckInstrD; - force dut.hart.ifu.InstrRawD = 32'b0010011; - #7; - release dut.hart.ifu.InstrRawD; - $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.hart.ifu.PCD, instrs, $time); - warningCount += 1; - forcedInstr = 1; - end - else begin - if(dut.hart.ifu.InstrRawD[28:27] != 2'b11 && dut.hart.ifu.InstrRawD[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD - force CheckInstrD = {12'b0, CheckInstrD[19:7], 7'b0000011}; + #3; + if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && dut.hart.ifu.PCD !== 64'h0) begin + if (dut.hart.ifu.PCD !== lastPCD) begin + lastCheckInstrD = CheckInstrD; + lastPC <= dut.hart.ifu.PCD; + lastPC2 <= lastPC; + if (speculative && (lastPC != pcExpected)) begin + speculative = ~equal(dut.hart.ifu.PCD,pcExpected,3); + if(dut.hart.ifu.PCD===pcExpected) begin + if(dut.hart.ifu.InstrRawD[6:0] == 7'b1010011) begin // for now, NOP out any float instrs + force CheckInstrD = 32'b0010011; release CheckInstrD; - force dut.hart.ifu.InstrRawD = {12'b0, dut.hart.ifu.InstrRawD[19:7], 7'b0000011}; + force dut.hart.ifu.InstrRawD = 32'b0010011; #7; release dut.hart.ifu.InstrRawD; - $display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.hart.ifu.PCD); + $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.hart.ifu.PCD, instrs, $time); warningCount += 1; forcedInstr = 1; end else begin - forcedInstr = 0; + if(dut.hart.ifu.InstrRawD[28:27] != 2'b11 && dut.hart.ifu.InstrRawD[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD + force CheckInstrD = {12'b0, CheckInstrD[19:7], 7'b0000011}; + release CheckInstrD; + force dut.hart.ifu.InstrRawD = {12'b0, dut.hart.ifu.InstrRawD[19:7], 7'b0000011}; + #7; + release dut.hart.ifu.InstrRawD; + $display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.hart.ifu.PCD); + warningCount += 1; + forcedInstr = 1; + end + else begin + forcedInstr = 0; + end end end end - end - else begin - if($feof(data_file_PC)) begin - $display("no more PC data to read"); - `ERROR - end - scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext); - if (PCtext != "ret" && PCtext != "fence" && PCtext != "nop" && PCtext != "mret" && PCtext != "sfence.vma" && PCtext != "unimp") begin - scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext2); - PCtext = {PCtext, " ", PCtext2}; - end - scan_file_PC = $fscanf(data_file_PC, "%x\n", CheckInstrD); - if(dut.hart.ifu.PCD === pcExpected) begin - if(dut.hart.ifu.InstrRawD[6:0] == 7'b1010011) begin // for now, NOP out any float instrs - force CheckInstrD = 32'b0010011; - release CheckInstrD; - force dut.hart.ifu.InstrRawD = 32'b0010011; - #7; - release dut.hart.ifu.InstrRawD; - $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.hart.ifu.PCD, instrs, $time); - warningCount += 1; - forcedInstr = 1; + else begin + if($feof(data_file_PC)) begin + $display("no more PC data to read"); + `ERROR end - else begin - if(dut.hart.ifu.InstrRawD[28:27] != 2'b11 && dut.hart.ifu.InstrRawD[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD - force CheckInstrD = {12'b0, CheckInstrD[19:7], 7'b0000011}; + scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext); + if (PCtext != "ret" && PCtext != "fence" && PCtext != "nop" && PCtext != "mret" && PCtext != "sfence.vma" && PCtext != "unimp") begin + scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext2); + PCtext = {PCtext, " ", PCtext2}; + end + scan_file_PC = $fscanf(data_file_PC, "%x\n", CheckInstrD); + if(dut.hart.ifu.PCD === pcExpected) begin + if(dut.hart.ifu.InstrRawD[6:0] == 7'b1010011) begin // for now, NOP out any float instrs + force CheckInstrD = 32'b0010011; release CheckInstrD; - force dut.hart.ifu.InstrRawD = {12'b0, dut.hart.ifu.InstrRawD[19:7], 7'b0000011}; + force dut.hart.ifu.InstrRawD = 32'b0010011; #7; release dut.hart.ifu.InstrRawD; - $display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.hart.ifu.PCD); + $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.hart.ifu.PCD, instrs, $time); warningCount += 1; forcedInstr = 1; end else begin - forcedInstr = 0; + if(dut.hart.ifu.InstrRawD[28:27] != 2'b11 && dut.hart.ifu.InstrRawD[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD + force CheckInstrD = {12'b0, CheckInstrD[19:7], 7'b0000011}; + release CheckInstrD; + force dut.hart.ifu.InstrRawD = {12'b0, dut.hart.ifu.InstrRawD[19:7], 7'b0000011}; + #7; + release dut.hart.ifu.InstrRawD; + $display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.hart.ifu.PCD); + warningCount += 1; + forcedInstr = 1; + end + else begin + forcedInstr = 0; + end end end - end - // then expected PC value - scan_file_PC = $fscanf(data_file_PC, "%x\n", pcExpected); - if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) || - (instrs <= 1000 && instrs % 100 == 0) || (instrs <= 10000 && instrs % 1000 == 0) || - (instrs <= 100000 && instrs % 10000 == 0) || (instrs <= 1000000 && instrs % 100000 == 0)) begin - $display("loaded %0d instructions", instrs); - end - instrs += 1; - // are we at a branch/jump? - casex (lastCheckInstrD[31:0]) - 32'b00000000001000000000000001110011, // URET - 32'b00010000001000000000000001110011, // SRET - 32'b00110000001000000000000001110011, // MRET - 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1101111, // JAL - 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100111, // JALR - 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100011, // B - 32'bXXXXXXXXXXXXXXXX110XXXXXXXXXXX01, // C.BEQZ - 32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ - 32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J - speculative = 1; - 32'bXXXXXXXXXXXXXXXX1001000000000010: // C.EBREAK: - speculative = 0; // tbh don't really know what should happen here - 32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR - 32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL - speculative = 1; - default: - speculative = 0; - endcase + // then expected PC value + scan_file_PC = $fscanf(data_file_PC, "%x\n", pcExpected); + if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) || + (instrs <= 1000 && instrs % 100 == 0) || (instrs <= 10000 && instrs % 1000 == 0) || + (instrs <= 100000 && instrs % 10000 == 0) || (instrs <= 1000000 && instrs % 100000 == 0)) begin + $display("loaded %0d instructions", instrs); + end + instrs += 1; + // are we at a branch/jump? + casex (lastCheckInstrD[31:0]) + 32'b00000000001000000000000001110011, // URET + 32'b00010000001000000000000001110011, // SRET + 32'b00110000001000000000000001110011, // MRET + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1101111, // JAL + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100111, // JALR + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100011, // B + 32'bXXXXXXXXXXXXXXXX110XXXXXXXXXXX01, // C.BEQZ + 32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ + 32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J + speculative = 1; + 32'bXXXXXXXXXXXXXXXX1001000000000010: // C.EBREAK: + speculative = 0; // tbh don't really know what should happen here + 32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR + 32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL + speculative = 1; + default: + speculative = 0; + endcase - //check things! - if ((~speculative) && (~equal(dut.hart.ifu.PCD,pcExpected,3))) begin - $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, pcExpected); - `ERROR - end - InstrMask = CheckInstrD[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; - if ((~forcedInstr) && (~speculative) && ((InstrMask & dut.hart.ifu.InstrRawD) !== (InstrMask & CheckInstrD))) begin - $display("%0t ps, instr %0d: InstrD does not equal CheckInstrD: %x, %x, PC: %x", $time, instrs, dut.hart.ifu.InstrRawD, CheckInstrD, dut.hart.ifu.PCD); - `ERROR + //check things! + if ((~speculative) && (~equal(dut.hart.ifu.PCD,pcExpected,3))) begin + $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, pcExpected); + `ERROR + end + InstrMask = CheckInstrD[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; + if ((~forcedInstr) && (~speculative) && ((InstrMask & dut.hart.ifu.InstrRawD) !== (InstrMask & CheckInstrD))) begin + $display("%0t ps, instr %0d: InstrD does not equal CheckInstrD: %x, %x, PC: %x", $time, instrs, dut.hart.ifu.InstrRawD, CheckInstrD, dut.hart.ifu.PCD); + `ERROR + end end end + lastPCD = dut.hart.ifu.PCD; end - lastPCD = dut.hart.ifu.PCD; - end end end From 0d05c51af9aedf31ae94ab7e8e7f2151f6992e3c Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Tue, 23 Mar 2021 14:17:01 -0400 Subject: [PATCH 26/34] Remove deleted signal from waves --- wally-pipelined/regression/wave-dos/ahb-waves.do | 1 - wally-pipelined/regression/wave-dos/default-waves.do | 1 - 2 files changed, 2 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/ahb-waves.do b/wally-pipelined/regression/wave-dos/ahb-waves.do index 70609fa2..f043d779 100644 --- a/wally-pipelined/regression/wave-dos/ahb-waves.do +++ b/wally-pipelined/regression/wave-dos/ahb-waves.do @@ -28,7 +28,6 @@ add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF add wave /testbench/dut/hart/ifu/ic/DelayF add wave /testbench/dut/hart/ifu/ic/DelaySideF add wave /testbench/dut/hart/ifu/ic/DelayD -add wave /testbench/dut/hart/ifu/ic/DelaySideD add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider diff --git a/wally-pipelined/regression/wave-dos/default-waves.do b/wally-pipelined/regression/wave-dos/default-waves.do index aff5f380..4b645651 100644 --- a/wally-pipelined/regression/wave-dos/default-waves.do +++ b/wally-pipelined/regression/wave-dos/default-waves.do @@ -27,7 +27,6 @@ add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD add wave /testbench/dut/hart/ifu/ic/DelayF add wave /testbench/dut/hart/ifu/ic/DelaySideF add wave /testbench/dut/hart/ifu/ic/DelayD -add wave /testbench/dut/hart/ifu/ic/DelaySideD add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider add wave -hex /testbench/dut/hart/ifu/PCE From 69e531967572b27ea69d843eddedf2aff6a41c44 Mon Sep 17 00:00:00 2001 From: Noah Boorstin Date: Tue, 23 Mar 2021 14:49:26 -0400 Subject: [PATCH 27/34] busybear: more progress --- wally-pipelined/regression/wally-busybear.do | 2 ++ wally-pipelined/testbench/testbench-busybear.sv | 15 +++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/regression/wally-busybear.do b/wally-pipelined/regression/wally-busybear.do index b7e92ae3..71444cdb 100644 --- a/wally-pipelined/regression/wally-busybear.do +++ b/wally-pipelined/regression/wally-busybear.do @@ -49,6 +49,8 @@ add wave -hex /testbench_busybear/dut/hart/ifu/PCD add wave -hex /testbench_busybear/dut/hart/ifu/InstrD add wave -hex /testbench_busybear/dut/hart/ifu/StallD add wave -hex /testbench_busybear/dut/hart/ifu/FlushD +add wave -hex /testbench_busybear/dut/hart/ifu/StallE +add wave -hex /testbench_busybear/dut/hart/ifu/FlushE add wave -hex /testbench_busybear/dut/hart/ifu/InstrRawD add wave /testbench_busybear/CheckInstrD add wave /testbench_busybear/lastCheckInstrD diff --git a/wally-pipelined/testbench/testbench-busybear.sv b/wally-pipelined/testbench/testbench-busybear.sv index 385c45d5..254e672d 100644 --- a/wally-pipelined/testbench/testbench-busybear.sv +++ b/wally-pipelined/testbench/testbench-busybear.sv @@ -192,7 +192,7 @@ module testbench_busybear(); always @(dut.HRDATA) begin #1; - if (dut.hart.MemRWM[1] && HADDR != dut.PCF && dut.HRDATA !== {64{1'bx}}) begin + if (dut.hart.MemRWM[1] && HADDR[31:3] != dut.PCF[31:3] && dut.HRDATA !== {64{1'bx}}) begin //$display("%0t", $time); if($feof(data_file_memR)) begin $display("no more memR data to read"); @@ -335,6 +335,13 @@ module testbench_busybear(); `CHECK_CSR2(STVAL, `CSRS) `CHECK_CSR(STVEC) + initial begin //this is just fun to make causes easier to understand + #38; + force dut.hart.priv.csr.genblk1.csrm.NextCauseM = 0; + #16; + release dut.hart.priv.csr.genblk1.csrm.NextCauseM; + end + initial begin //this is temporary until the bug can be fixed!!! #18909760; force dut.hart.ieu.dp.regf.rf[5] = 64'h0000000080000004; @@ -382,10 +389,10 @@ module testbench_busybear(); logic [31:0] InstrMask; logic forcedInstr; logic [63:0] lastPCD; - always @(dut.hart.ifu.PCD or dut.hart.ifu.InstrRawD or reset) begin + always @(dut.hart.ifu.PCD or dut.hart.ifu.InstrRawD or reset or negedge dut.hart.ifu.StallE) begin if(~HWRITE) begin - #3; - if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && dut.hart.ifu.PCD !== 64'h0) begin + #2; + if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && dut.hart.ifu.PCD !== 64'h0 && ~dut.hart.ifu.StallE) begin if (dut.hart.ifu.PCD !== lastPCD) begin lastCheckInstrD = CheckInstrD; lastPC <= dut.hart.ifu.PCD; From d6ecc3ede0b8ebb5f106bf9547c37bcd61ace7b9 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Tue, 23 Mar 2021 17:03:02 -0400 Subject: [PATCH 28/34] Begin work on direct-mapped cache --- wally-pipelined/src/cache/dmapped.sv | 91 ++++++++++++++++++++++++++++ wally-pipelined/src/cache/line.sv | 60 ++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 wally-pipelined/src/cache/dmapped.sv create mode 100644 wally-pipelined/src/cache/line.sv diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv new file mode 100644 index 00000000..b57f2401 --- /dev/null +++ b/wally-pipelined/src/cache/dmapped.sv @@ -0,0 +1,91 @@ +/////////////////////////////////////////// +// dmapped.sv +// +// Written: jaallen@g.hmc.edu 2021-03-23 +// Modified: +// +// Purpose: An implementation of a direct-mapped cache memory +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module rodirectmapped #(parameter LINESIZE = 256, parameter NUMLINES = 512, parameter WORDSIZE = `XLEN) ( + // Pipeline stuff + input logic clk, + input logic reset, + // If flush is high, invalidate the entire cache + input logic flush, + // Select which address to read (broken for efficiency's sake) + input logic [`XLEN-1:12] UpperPAdr, + input logic [11:0] LowerAdr, + // Write new data to the cache + input logic WriteEnable, + input logic [LINESIZE-1:0] WriteLine, + input logic [`XLEN-1:0] WritePAdr, + // Output the word, as well as if it is valid + output logic [WORDSIZE-1:0] DataWord, + output logic DataValid +); + + integer TAGWIDTH = `XLEN-$clog2(NUMLINES)-$clog2(LINESIZE); + integer SETWIDTH = $clog2(NUMLINES); + integer OFFSETWIDTH = $clog2(LINESIZE/8); + + logic [NUMLINES-1:0][WORDSIZE-1:0] LineOutputs; + logic [NUMLINES-1:0] ValidOutputs; + logic [NUMLINES-1:0][TAGSIZE-1:0] TagOutputs; + logic [OFFSETWIDTH-1:0] WordSelect; + logic [`XLEN-1:0] ReadPAdr; + logic [SETWIDTH-1:0] ReadSet, WriteSet; + logic [TAGWIDTH-1:0] ReadTag, WriteTag; + + // Swizzle bits to get the offset, set, and tag out of the read and write addresses + always_comb begin + // Read address + assign WordSelect = LowerAdr[OFFSETWIDTH-1:0]; + assign ReadPAdr = {UpperPAdr, LowerAdr}; + assign ReadSet = ReadPAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH]; + assign ReadTag = ReadPAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH]; + // Write address + assign WriteSet = WritePAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH]; + assign WriteTag = WritePAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH]; + end + + genvar i; + generate + for (i=0; i < NUMLINES; i++) begin + rocacheline #(LINESIZE, TAGSIZE, WORDSIZE) lines[NUMLINES]( + .*, + .WriteEnable(WriteEnable & (WriteSet == i)), + .WriteData(WriteLine), + .WriteTag(WriteTag), + .DataWord(LineOutputs[i]), + .DataTag(TagOutputs[i]), + .DataValid(ValidOutputs[i]), + ); + end + endgenerate + + // Get the data and valid out of the lines + always_comb begin + assign DataWord = LineOutputs[ReadSet]; + assign DataValid = ValidOutputs[ReadSet] & (TagOutputs[ReadSet] == ReadTag); + end + +endmodule diff --git a/wally-pipelined/src/cache/line.sv b/wally-pipelined/src/cache/line.sv new file mode 100644 index 00000000..85d4073b --- /dev/null +++ b/wally-pipelined/src/cache/line.sv @@ -0,0 +1,60 @@ +/////////////////////////////////////////// +// line.sv +// +// Written: jaallen@g.hmc.edu 2021-03-23 +// Modified: +// +// Purpose: An implementation of a single cache line +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +// A read-only cache line ("write"ing to this line is loading new data, not writing to memory +module rocacheline #(parameter LINESIZE = 256, parameter TAGSIZE = 32, parameter WORDSIZE = `XLEN) ( + // Pipeline stuff + input logic clk, + input logic reset, + // If flush is high, invalidate this word + input logic flush, + // Select which word within the line + input logic [$clog2(LINESIZE/8)-1:0] WordSelect, + // Write new data to the line + input logic WriteEnable, + input logic [LINESIZE-1:0] WriteData, + input logic [TAGSIZE-1:0] WriteTag, + // Output the word, as well as the tag and if it is valid + output logic [WORDSIZE-1:0] DataWord, + output logic [TAGSIZE-1:0] DataTag, + output logic DataValid +); + + logic [LINESIZE-1:0] DataLine; + logic [$clog2(LINESIZE/8)-1:0] AlignedWordSelect; + + flopenr #(1) ValidBitFlop(clk, reset, WriteEnable | flush, ~flush, DataValid); + flopenr #(TAGSIZE) TagFlop(clk, reset, WriteEnable, WriteTag, DataTag); + flopenr #(LINESIZE) LineFlop(clk, reset, WriteEnable, WriteData, DataLine); + + + always_comb begin + assign AlignedWordSelect = {WordSelect[$clog2(LINESIZE/8)-1:$clog2(WORDSIZE)], {$clog2(WORDSIZE){'b0}}}; + assign DataWord = DataLine[WORDSIZE+AlignedWordSelect-1:AlignedWordSelect]; + end + +endmodule From a51257abca58ea5bc43d66e9ebefcb17a9dd8ea2 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Wed, 24 Mar 2021 00:58:56 -0400 Subject: [PATCH 29/34] Fix compile errors from const not actually being constant (why does Verilog do this) --- wally-pipelined/src/cache/dmapped.sv | 12 ++++++------ wally-pipelined/src/cache/line.sv | 18 +++++++++++++----- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv index b57f2401..52027b39 100644 --- a/wally-pipelined/src/cache/dmapped.sv +++ b/wally-pipelined/src/cache/dmapped.sv @@ -43,13 +43,13 @@ module rodirectmapped #(parameter LINESIZE = 256, parameter NUMLINES = 512, para output logic DataValid ); - integer TAGWIDTH = `XLEN-$clog2(NUMLINES)-$clog2(LINESIZE); - integer SETWIDTH = $clog2(NUMLINES); - integer OFFSETWIDTH = $clog2(LINESIZE/8); + localparam integer SETWIDTH = $clog2(NUMLINES); + localparam integer OFFSETWIDTH = $clog2(LINESIZE/8); + localparam integer TAGWIDTH = `XLEN-SETWIDTH-OFFSETWIDTH; logic [NUMLINES-1:0][WORDSIZE-1:0] LineOutputs; logic [NUMLINES-1:0] ValidOutputs; - logic [NUMLINES-1:0][TAGSIZE-1:0] TagOutputs; + logic [NUMLINES-1:0][TAGWIDTH-1:0] TagOutputs; logic [OFFSETWIDTH-1:0] WordSelect; logic [`XLEN-1:0] ReadPAdr; logic [SETWIDTH-1:0] ReadSet, WriteSet; @@ -70,14 +70,14 @@ module rodirectmapped #(parameter LINESIZE = 256, parameter NUMLINES = 512, para genvar i; generate for (i=0; i < NUMLINES; i++) begin - rocacheline #(LINESIZE, TAGSIZE, WORDSIZE) lines[NUMLINES]( + rocacheline #(LINESIZE, TAGWIDTH, WORDSIZE) lines ( .*, .WriteEnable(WriteEnable & (WriteSet == i)), .WriteData(WriteLine), .WriteTag(WriteTag), .DataWord(LineOutputs[i]), .DataTag(TagOutputs[i]), - .DataValid(ValidOutputs[i]), + .DataValid(ValidOutputs[i]) ); end endgenerate diff --git a/wally-pipelined/src/cache/line.sv b/wally-pipelined/src/cache/line.sv index 85d4073b..e498d073 100644 --- a/wally-pipelined/src/cache/line.sv +++ b/wally-pipelined/src/cache/line.sv @@ -44,17 +44,25 @@ module rocacheline #(parameter LINESIZE = 256, parameter TAGSIZE = 32, parameter output logic DataValid ); - logic [LINESIZE-1:0] DataLine; - logic [$clog2(LINESIZE/8)-1:0] AlignedWordSelect; + localparam integer OFFSETSIZE = $clog2(LINESIZE/8); + localparam integer NUMWORDS = LINESIZE/WORDSIZE; + + logic [NUMWORDS-1:0][WORDSIZE-1:0] DataLinesIn, DataLinesOut; flopenr #(1) ValidBitFlop(clk, reset, WriteEnable | flush, ~flush, DataValid); flopenr #(TAGSIZE) TagFlop(clk, reset, WriteEnable, WriteTag, DataTag); - flopenr #(LINESIZE) LineFlop(clk, reset, WriteEnable, WriteData, DataLine); + + genvar i; + generate + for (i=0; i < NUMWORDS; i++) begin + assign DataLinesIn[i] = WriteData[NUMWORDS*i+WORDSIZE-1:NUMWORDS*i]; + flopenr #(LINESIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]); + end + endgenerate always_comb begin - assign AlignedWordSelect = {WordSelect[$clog2(LINESIZE/8)-1:$clog2(WORDSIZE)], {$clog2(WORDSIZE){'b0}}}; - assign DataWord = DataLine[WORDSIZE+AlignedWordSelect-1:AlignedWordSelect]; + assign DataWord = DataLinesOut[WordSelect[OFFSETSIZE-1:$clog2(WORDSIZE)]]; end endmodule From c1fe16b70b64e21f985c74dee24925be1bf3f884 Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Wed, 24 Mar 2021 12:31:01 -0400 Subject: [PATCH 30/34] Give some cache mem inputs a better name --- wally-pipelined/src/cache/dmapped.sv | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv index 52027b39..9a51737a 100644 --- a/wally-pipelined/src/cache/dmapped.sv +++ b/wally-pipelined/src/cache/dmapped.sv @@ -5,6 +5,7 @@ // Modified: // // Purpose: An implementation of a direct-mapped cache memory +// This cache is read-only, so "write"s to the memory are loading new data // // A component of the Wally configurable RISC-V project. // @@ -25,15 +26,15 @@ `include "wally-config.vh" -module rodirectmapped #(parameter LINESIZE = 256, parameter NUMLINES = 512, parameter WORDSIZE = `XLEN) ( +module rodirectmappedmem #(parameter LINESIZE = 256, parameter NUMLINES = 512, parameter WORDSIZE = `XLEN) ( // Pipeline stuff input logic clk, input logic reset, // If flush is high, invalidate the entire cache input logic flush, // Select which address to read (broken for efficiency's sake) - input logic [`XLEN-1:12] UpperPAdr, - input logic [11:0] LowerAdr, + input logic [`XLEN-1:12] ReadUpperPAdr, + input logic [11:0] ReadLowerAdr, // Write new data to the cache input logic WriteEnable, input logic [LINESIZE-1:0] WriteLine, @@ -58,8 +59,8 @@ module rodirectmapped #(parameter LINESIZE = 256, parameter NUMLINES = 512, para // Swizzle bits to get the offset, set, and tag out of the read and write addresses always_comb begin // Read address - assign WordSelect = LowerAdr[OFFSETWIDTH-1:0]; - assign ReadPAdr = {UpperPAdr, LowerAdr}; + assign WordSelect = ReadLowerAdr[OFFSETWIDTH-1:0]; + assign ReadPAdr = {ReadUpperPAdr, ReadLowerAdr}; assign ReadSet = ReadPAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH]; assign ReadTag = ReadPAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH]; // Write address @@ -89,3 +90,4 @@ module rodirectmapped #(parameter LINESIZE = 256, parameter NUMLINES = 512, para end endmodule + From 67b27cd2f5a4d06d34294db7fafed956dcaae232 Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 25 Mar 2021 00:44:35 -0400 Subject: [PATCH 31/34] instrfault direspecting stalls bugfix --- wally-pipelined/src/privileged/privileged.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index 8a6854e9..c967d262 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -125,10 +125,10 @@ module privileged ( // pipeline fault signals flopenrc #(1) faultregD(clk, reset, FlushD, ~StallD, InstrAccessFaultF, InstrAccessFaultD); - floprc #(2) faultregE(clk, reset, FlushE, + flopenrc #(2) faultregE(clk, reset, FlushE, ~StallE, {IllegalIEUInstrFaultD, InstrAccessFaultD}, // ** vs IllegalInstrFaultInD {IllegalIEUInstrFaultE, InstrAccessFaultE}); - floprc #(2) faultregM(clk, reset, FlushM, + flopenrc #(2) faultregM(clk, reset, FlushM, ~StallM, {IllegalIEUInstrFaultE, InstrAccessFaultE}, {IllegalIEUInstrFaultM, InstrAccessFaultM}); From b5fa410e151e2dca17e5c6dc630a59f3e8a13e2c Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 25 Mar 2021 02:15:28 -0400 Subject: [PATCH 32/34] added 1 tick delay on tim reads --- wally-pipelined/src/uncore/dtim.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index a5c4574e..f96a14fb 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -85,14 +85,14 @@ module dtim #(parameter BASE=0, RANGE = 65535) ( generate if (`XLEN == 64) begin always_ff @(posedge HCLK) begin - HWADDR <= A; - HREADTim0 <= RAM[A[31:3]]; + HWADDR <= #1 A; + HREADTim0 <= #1 RAM[A[31:3]]; if (memwrite && risingHREADYTim) RAM[HWADDR[31:3]] <= HWDATA; end end else begin always_ff @(posedge HCLK) begin - HWADDR <= A; - HREADTim0 <= RAM[A[31:2]]; + HWADDR <= #1 A; + HREADTim0 <= #1 RAM[A[31:2]]; if (memwrite && risingHREADYTim) RAM[HWADDR[31:2]] <= HWDATA; end end From b774d35c34d8a2028fc1bad9f859b136a8aee7df Mon Sep 17 00:00:00 2001 From: Jarred Allen Date: Thu, 25 Mar 2021 12:42:48 -0400 Subject: [PATCH 33/34] Output NOP instead of BAD when reset --- wally-pipelined/src/ifu/icache.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 2eab6ed8..4208c355 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -48,7 +48,7 @@ module icache( output logic [31:0] InstrRawD ); - logic DelayF, DelaySideF, FlushDLastCycle, DelayD; + logic DelayF, DelaySideF, FlushDLastCyclen, DelayD; logic [1:0] InstrDMuxChoice; logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; logic [31:0] InstrF, AlignedInstrD; @@ -65,7 +65,7 @@ module icache( // This flop doesn't stall if StallF is high because we should output a nop // when FlushD happens, even if the pipeline is also stalled. - flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); + flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen); flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF & ~CompressedF, DelayD); flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF); @@ -127,7 +127,7 @@ module icache( // Output the requested instruction (we don't need to worry if the read is // incomplete, since the pipeline stalls for us when it isn't), or a NOP for // the cycle when the first of two reads comes in. - always_comb if (FlushDLastCycle) begin + always_comb if (~FlushDLastCyclen) begin assign InstrDMuxChoice = 2'b10; end else if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin assign InstrDMuxChoice = 2'b11; From 44060b579b8abb197e414e416beb0ac9083cd0fb Mon Sep 17 00:00:00 2001 From: Noah Boorstin Date: Thu, 25 Mar 2021 14:29:10 -0400 Subject: [PATCH 34/34] busybear: quick fix to mem reading also stop ignoring mcause at the start --- .../testbench/testbench-busybear.sv | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/wally-pipelined/testbench/testbench-busybear.sv b/wally-pipelined/testbench/testbench-busybear.sv index 254e672d..71664a43 100644 --- a/wally-pipelined/testbench/testbench-busybear.sv +++ b/wally-pipelined/testbench/testbench-busybear.sv @@ -191,7 +191,7 @@ module testbench_busybear(); logic [`XLEN-1:0] readAdrExpected; always @(dut.HRDATA) begin - #1; + #2; if (dut.hart.MemRWM[1] && HADDR[31:3] != dut.PCF[31:3] && dut.HRDATA !== {64{1'bx}}) begin //$display("%0t", $time); if($feof(data_file_memR)) begin @@ -282,16 +282,14 @@ module testbench_busybear(); //CSR checking \ always @(``PATH``.``CSR``_REGW) begin \ if ($time > 1) begin \ - if (instrs > 1) begin \ - scan_file_csr = $fscanf(data_file_csr, "%s\n", CSR); \ - scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \ - if(CSR.icompare(`"CSR`")) begin \ - $display("%0t ps, instr %0d: %s changed, expected %s", $time, instrs, `"CSR`", CSR); \ - end \ - if(``PATH``.``CSR``_REGW != ``expected``CSR) begin \ - $display("%0t ps, instr %0d: %s does not equal %s expected: %x, %x", $time, instrs, `"CSR`", CSR, ``PATH``.``CSR``_REGW, ``expected``CSR); \ - `ERROR \ - end \ + scan_file_csr = $fscanf(data_file_csr, "%s\n", CSR); \ + scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \ + if(CSR.icompare(`"CSR`")) begin \ + $display("%0t ps, instr %0d: %s changed, expected %s", $time, instrs, `"CSR`", CSR); \ + end \ + if(``PATH``.``CSR``_REGW != ``expected``CSR) begin \ + $display("%0t ps, instr %0d: %s does not equal %s expected: %x, %x", $time, instrs, `"CSR`", CSR, ``PATH``.``CSR``_REGW, ``expected``CSR); \ + `ERROR \ end \ end else begin \ for(integer j=0; j