diff --git a/wally-pipelined/regression/wally-busybear.do b/wally-pipelined/regression/wally-busybear.do index 0f426d11..b704aba9 100644 --- a/wally-pipelined/regression/wally-busybear.do +++ b/wally-pipelined/regression/wally-busybear.do @@ -45,13 +45,15 @@ add wave /testbench_busybear/reset add wave -divider add wave -hex /testbench_busybear/PCtext add wave -hex /testbench_busybear/pcExpected -add wave -hex /testbench_busybear/dut/hart/ifu/PCF -add wave -hex /testbench_busybear/dut/hart/ifu/InstrF +add wave -hex /testbench_busybear/dut/hart/ifu/PCD +add wave -hex /testbench_busybear/dut/hart/ifu/InstrD add wave -hex /testbench_busybear/dut/hart/ifu/StallD add wave -hex /testbench_busybear/dut/hart/ifu/FlushD +add wave -hex /testbench_busybear/dut/hart/ifu/StallE +add wave -hex /testbench_busybear/dut/hart/ifu/FlushE add wave -hex /testbench_busybear/dut/hart/ifu/InstrRawD -add wave /testbench_busybear/CheckInstrF -add wave /testbench_busybear/lastCheckInstrF +add wave /testbench_busybear/CheckInstrD +add wave /testbench_busybear/lastCheckInstrD add wave /testbench_busybear/speculative add wave /testbench_busybear/lastPC2 add wave -divider diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index a6833db5..a5041005 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -42,7 +42,7 @@ vsim workopt view wave -- display input and output signals as hexidecimal values -do ./wave-dos/default-waves.do +do ./wave-dos/ahb-waves.do -- Set Wave Output Items TreeUpdate [SetDefaultTree] diff --git a/wally-pipelined/regression/wave-dos/ahb-waves.do b/wally-pipelined/regression/wave-dos/ahb-waves.do index 5101c757..f043d779 100644 --- a/wally-pipelined/regression/wave-dos/ahb-waves.do +++ b/wally-pipelined/regression/wave-dos/ahb-waves.do @@ -14,14 +14,21 @@ add wave /testbench/dut/hart/FlushD add wave /testbench/dut/hart/FlushE add wave /testbench/dut/hart/FlushM add wave /testbench/dut/hart/FlushW -add wave -divider +add wave -divider add wave -hex /testbench/dut/hart/ifu/PCF -add wave -hex /testbench/dut/hart/ifu/InstrF -add wave /testbench/InstrFName add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/InstrD + add wave /testbench/InstrDName +add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD +add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD +add wave -divider +add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF +add wave /testbench/dut/hart/ifu/ic/DelayF +add wave /testbench/dut/hart/ifu/ic/DelaySideF +add wave /testbench/dut/hart/ifu/ic/DelayD +add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider add wave -hex /testbench/dut/hart/ifu/PCE @@ -55,8 +62,11 @@ add wave -hex /testbench/dut/hart/ebu/CaptureDataM add wave -hex /testbench/dut/hart/ebu/InstrStall add wave -divider -add wave -hex /testbench/PCW -add wave -hex /testbench/InstrW +add wave -hex /testbench/dut/uncore/dtim/* +add wave -divider + +add wave -hex /testbench/dut/hart/ifu/PCW +add wave -hex /testbench/dut/hart/ifu/InstrW add wave /testbench/InstrWName add wave /testbench/dut/hart/ieu/dp/RegWriteW add wave -hex /testbench/dut/hart/ebu/ReadDataW @@ -67,4 +77,4 @@ add wave -divider add wave -hex /testbench/dut/uncore/dtim/* add wave -divider -add wave -hex -r /testbench/* \ No newline at end of file +add wave -hex -r /testbench/* diff --git a/wally-pipelined/regression/wave-dos/default-waves.do b/wally-pipelined/regression/wave-dos/default-waves.do index 4b921435..76e1c168 100644 --- a/wally-pipelined/regression/wave-dos/default-waves.do +++ b/wally-pipelined/regression/wave-dos/default-waves.do @@ -19,11 +19,15 @@ add wave /testbench/dut/hart/FlushW add wave -divider add wave -hex /testbench/dut/hart/ifu/PCF -add wave -hex /testbench/dut/hart/ifu/InstrF -add wave /testbench/InstrFName add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/InstrD add wave /testbench/InstrDName +add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD +add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD +add wave /testbench/dut/hart/ifu/ic/DelayF +add wave /testbench/dut/hart/ifu/ic/DelaySideF +add wave /testbench/dut/hart/ifu/ic/DelayD +add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/dut/hart/ifu/InstrE @@ -48,4 +52,4 @@ add wave -hex /testbench/dut/hart/ieu/dp/ResultW add wave -hex /testbench/dut/hart/ieu/dp/RdW add wave -divider #add ww -add wave -hex -r /testbench/* \ No newline at end of file +add wave -hex -r /testbench/* diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv new file mode 100644 index 00000000..9a51737a --- /dev/null +++ b/wally-pipelined/src/cache/dmapped.sv @@ -0,0 +1,93 @@ +/////////////////////////////////////////// +// dmapped.sv +// +// Written: jaallen@g.hmc.edu 2021-03-23 +// Modified: +// +// Purpose: An implementation of a direct-mapped cache memory +// This cache is read-only, so "write"s to the memory are loading new data +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module rodirectmappedmem #(parameter LINESIZE = 256, parameter NUMLINES = 512, parameter WORDSIZE = `XLEN) ( + // Pipeline stuff + input logic clk, + input logic reset, + // If flush is high, invalidate the entire cache + input logic flush, + // Select which address to read (broken for efficiency's sake) + input logic [`XLEN-1:12] ReadUpperPAdr, + input logic [11:0] ReadLowerAdr, + // Write new data to the cache + input logic WriteEnable, + input logic [LINESIZE-1:0] WriteLine, + input logic [`XLEN-1:0] WritePAdr, + // Output the word, as well as if it is valid + output logic [WORDSIZE-1:0] DataWord, + output logic DataValid +); + + localparam integer SETWIDTH = $clog2(NUMLINES); + localparam integer OFFSETWIDTH = $clog2(LINESIZE/8); + localparam integer TAGWIDTH = `XLEN-SETWIDTH-OFFSETWIDTH; + + logic [NUMLINES-1:0][WORDSIZE-1:0] LineOutputs; + logic [NUMLINES-1:0] ValidOutputs; + logic [NUMLINES-1:0][TAGWIDTH-1:0] TagOutputs; + logic [OFFSETWIDTH-1:0] WordSelect; + logic [`XLEN-1:0] ReadPAdr; + logic [SETWIDTH-1:0] ReadSet, WriteSet; + logic [TAGWIDTH-1:0] ReadTag, WriteTag; + + // Swizzle bits to get the offset, set, and tag out of the read and write addresses + always_comb begin + // Read address + assign WordSelect = ReadLowerAdr[OFFSETWIDTH-1:0]; + assign ReadPAdr = {ReadUpperPAdr, ReadLowerAdr}; + assign ReadSet = ReadPAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH]; + assign ReadTag = ReadPAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH]; + // Write address + assign WriteSet = WritePAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH]; + assign WriteTag = WritePAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH]; + end + + genvar i; + generate + for (i=0; i < NUMLINES; i++) begin + rocacheline #(LINESIZE, TAGWIDTH, WORDSIZE) lines ( + .*, + .WriteEnable(WriteEnable & (WriteSet == i)), + .WriteData(WriteLine), + .WriteTag(WriteTag), + .DataWord(LineOutputs[i]), + .DataTag(TagOutputs[i]), + .DataValid(ValidOutputs[i]) + ); + end + endgenerate + + // Get the data and valid out of the lines + always_comb begin + assign DataWord = LineOutputs[ReadSet]; + assign DataValid = ValidOutputs[ReadSet] & (TagOutputs[ReadSet] == ReadTag); + end + +endmodule + diff --git a/wally-pipelined/src/cache/line.sv b/wally-pipelined/src/cache/line.sv new file mode 100644 index 00000000..e498d073 --- /dev/null +++ b/wally-pipelined/src/cache/line.sv @@ -0,0 +1,68 @@ +/////////////////////////////////////////// +// line.sv +// +// Written: jaallen@g.hmc.edu 2021-03-23 +// Modified: +// +// Purpose: An implementation of a single cache line +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +// A read-only cache line ("write"ing to this line is loading new data, not writing to memory +module rocacheline #(parameter LINESIZE = 256, parameter TAGSIZE = 32, parameter WORDSIZE = `XLEN) ( + // Pipeline stuff + input logic clk, + input logic reset, + // If flush is high, invalidate this word + input logic flush, + // Select which word within the line + input logic [$clog2(LINESIZE/8)-1:0] WordSelect, + // Write new data to the line + input logic WriteEnable, + input logic [LINESIZE-1:0] WriteData, + input logic [TAGSIZE-1:0] WriteTag, + // Output the word, as well as the tag and if it is valid + output logic [WORDSIZE-1:0] DataWord, + output logic [TAGSIZE-1:0] DataTag, + output logic DataValid +); + + localparam integer OFFSETSIZE = $clog2(LINESIZE/8); + localparam integer NUMWORDS = LINESIZE/WORDSIZE; + + logic [NUMWORDS-1:0][WORDSIZE-1:0] DataLinesIn, DataLinesOut; + + flopenr #(1) ValidBitFlop(clk, reset, WriteEnable | flush, ~flush, DataValid); + flopenr #(TAGSIZE) TagFlop(clk, reset, WriteEnable, WriteTag, DataTag); + + genvar i; + generate + for (i=0; i < NUMWORDS; i++) begin + assign DataLinesIn[i] = WriteData[NUMWORDS*i+WORDSIZE-1:NUMWORDS*i]; + flopenr #(LINESIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]); + end + endgenerate + + + always_comb begin + assign DataWord = DataLinesOut[WordSelect[OFFSETSIZE-1:$clog2(WORDSIZE)]]; + end + +endmodule diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 6e9bb2f8..9542020d 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -29,7 +29,7 @@ module hazard( // Detect hazards input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, - input logic InstrStall, DataStall, + input logic InstrStall, DataStall, ICacheStallF, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, output logic FlushF, FlushD, FlushE, FlushM, FlushW diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv new file mode 100644 index 00000000..4208c355 --- /dev/null +++ b/wally-pipelined/src/ifu/icache.sv @@ -0,0 +1,138 @@ +/////////////////////////////////////////// +// icache.sv +// +// Written: jaallen@g.hmc.edu 2021-03-02 +// Modified: +// +// Purpose: Cache instructions for the ifu so it can access memory less often, saving cycles +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module icache( + // Basic pipeline stuff + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, + // Upper bits of physical address for PC + input logic [`XLEN-1:12] UpperPCPF, + // Lower 12 bits of virtual PC address, since it's faster this way + input logic [11:0] LowerPCF, + // Data read in from the ebu unit + input logic [`XLEN-1:0] InstrInF, + // Read requested from the ebu unit + output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF, + // High if the instruction currently in the fetch stage is compressed + output logic CompressedF, + // High if the icache is requesting a stall + output logic ICacheStallF, + // The raw (not decompressed) instruction that was requested + // If the next instruction is compressed, the upper 16 bits may be anything + output logic [31:0] InstrRawD +); + + logic DelayF, DelaySideF, FlushDLastCyclen, DelayD; + logic [1:0] InstrDMuxChoice; + logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; + logic [31:0] InstrF, AlignedInstrD; + // Buffer the last read, for ease of accessing it again + logic LastReadDataValidF; + logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF; + + // instruction for NOP + logic [31:0] nop = 32'h00000013; + + // Temporary change to bridge the new interface to old behaviors + logic [`XLEN-1:0] PCPF; + assign PCPF = {UpperPCPF, LowerPCF}; + + // This flop doesn't stall if StallF is high because we should output a nop + // when FlushD happens, even if the pipeline is also stalled. + flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen); + + flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF & ~CompressedF, DelayD); + flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF); + // This flop stores the first half of a misaligned instruction while waiting for the other half + flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD); + + // This flop is here to simulate pulling data out of the cache, which is edge-triggered + flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD); + + // These flops cache the previous read, to accelerate things + flopenr #(`XLEN) lastReadDataFlop(clk, reset, InstrReadF & ~StallF, InstrInF, LastReadDataF); + flopenr #(1) lastReadDataVFlop(clk, reset, InstrReadF & ~StallF, 1'b1, LastReadDataValidF); + flopenr #(`XLEN) lastReadAdrFlop(clk, reset, InstrReadF & ~StallF, InstrPAdrF, LastReadAdrF); + + // Decide which address needs to be fetched and sent out over InstrPAdrF + // If the requested address fits inside one read from memory, we fetch that + // address, adjusted to the bit width. Otherwise, we request the lower word + // and then the upper word, in that order. + generate + if (`XLEN == 32) begin + assign InstrPAdrF = PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[31:2], 2'b00} : {PCPF[31:2], 2'b00}) : PCPF; + end else begin + assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}; + end + endgenerate + + // Read from memory if we don't have the address we want + always_comb if (LastReadDataValidF & (InstrPAdrF == LastReadAdrF)) begin + assign InstrReadF = 0; + end else begin + assign InstrReadF = 1; + end + + // Pick from the memory input or from the previous read, as appropriate + mux2 #(`XLEN) inDataMux(LastReadDataF, InstrInF, InstrReadF, InDataF); + + // If the instruction fits in one memory read, then we put the right bits + // into InstrF. Otherwise, we activate DelayF to signal the rest of the + // machinery to swizzle bits. + generate + if (`XLEN == 32) begin + assign InstrF = PCPF[1] ? {16'b0, InDataF[31:16]} : InDataF; + assign DelayF = PCPF[1]; + assign MisalignedHalfInstrF = InDataF[31:16]; + end else begin + assign InstrF = PCPF[2] ? (PCPF[1] ? {16'b0, InDataF[63:48]} : InDataF[63:32]) : (PCPF[1] ? InDataF[47:16] : InDataF[31:0]); + assign DelayF = PCPF[1] && PCPF[2]; + assign MisalignedHalfInstrF = InDataF[63:48]; + end + endgenerate + // We will likely need to stall later, but stalls are handled by the rest of the pipeline for now + assign ICacheStallF = 0; + + // Detect if the instruction is compressed + assign CompressedF = InstrF[1:0] != 2'b11; + + // Pick the correct output, depending on whether we have to assemble this + // instruction from two reads or not. + // Output the requested instruction (we don't need to worry if the read is + // incomplete, since the pipeline stalls for us when it isn't), or a NOP for + // the cycle when the first of two reads comes in. + always_comb if (~FlushDLastCyclen) begin + assign InstrDMuxChoice = 2'b10; + end else if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin + assign InstrDMuxChoice = 2'b11; + end else begin + assign InstrDMuxChoice = {1'b0, DelayD}; + end + mux4 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, {16'b0, MisalignedHalfInstrD}, InstrDMuxChoice, InstrRawD); +endmodule diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index c869aa2c..35844fca 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -2,7 +2,7 @@ // ifu.sv // // Written: David_Harris@hmc.edu 9 January 2021 -// Modified: +// Modified: // // Purpose: Instrunction Fetch Unit // PC, branch prediction, instruction cache @@ -35,6 +35,7 @@ module ifu ( output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, + output logic ICacheStallF, // Decode // Execute output logic [`XLEN-1:0] PCLinkE, @@ -61,27 +62,25 @@ module ifu ( input logic [`XLEN-1:0] PageTableEntryF, input logic [`XLEN-1:0] SATP_REGW, input logic ITLBWriteF, // ITLBFlushF, - output logic ITLBMissF, ITLBHitF, - // bogus - input logic [15:0] rd2 - + output logic ITLBMissF, ITLBHitF ); logic [`XLEN-1:0] UnalignedPCNextF, PCNextF; - logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; - logic PrivilegedChangePCM; - logic IllegalCompInstrD; - logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCLinkD, PCLinkM; - logic CompressedF; - logic [31:0] InstrF, InstrRawD, InstrE; - logic [31:0] nop = 32'h00000013; // instruction for NOP + logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; + logic PrivilegedChangePCM; + logic IllegalCompInstrD; + logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCPF; + logic CompressedF; + logic [31:0] InstrRawD, InstrE, InstrW; + logic [31:0] nop = 32'h00000013; // instruction for NOP + logic [`XLEN-1:0] ITLBInstrPAdrF, ICacheInstrPAdrF; // *** temporary hack until walker is hooked up -- Thomas F // logic [`XLEN-1:0] PageTableEntryF = '0; logic ITLBFlushF = '0; // logic ITLBWriteF = '0; tlb #(3) itlb(clk, reset, SATP_REGW, PrivilegeModeW, PCF, PageTableEntryF, ITLBWriteF, ITLBFlushF, - InstrPAdrF, ITLBMissF, ITLBHitF); + ITLBInstrPAdrF, ITLBMissF, ITLBHitF); // branch predictor signals logic SelBPPredF; @@ -92,11 +91,21 @@ module ifu ( // *** put memory interface on here, InstrF becomes output //assign InstrPAdrF = PCF; // *** no MMU //assign InstrReadF = ~StallD; // *** & ICacheMissF; add later - assign InstrReadF = 1; // *** & ICacheMissF; add later + // assign InstrReadF = 1; // *** & ICacheMissF; add later + + // jarred 2021-03-14 Add instrution cache block to remove rd2 + assign PCPF = PCF; // Temporary workaround until iTLB is live + icache ic( + .*, + .InstrPAdrF(ICacheInstrPAdrF), + .UpperPCPF(PCPF[`XLEN-1:12]), + .LowerPCF(PCF[11:0]) + ); + // Prioritize the iTLB for reads if it wants one + mux2 #(`XLEN) instrPAdrMux(ICacheInstrPAdrF, ITLBInstrPAdrF, ITLBMissF, InstrPAdrF); assign PrivilegedChangePCM = RetM | TrapM; - //mux3 #(`XLEN) pcmux(PCPlus2or4F, PCCorrectE, PrivilegedNextPCM, {PrivilegedChangePCM, BPPredWrongE}, UnalignedPCNextF); mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), .d1(BPPredPCF), @@ -114,7 +123,7 @@ module ifu ( .y(UnalignedPCNextF)); assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment - flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); + flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); // branch and jump predictor // I am making the port connection explicit for now as I want to see them and they will be changing. @@ -141,9 +150,7 @@ module ifu ( // pcadder // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 - assign CompressedF = (InstrF[1:0] != 2'b11); // is it a 16-bit compressed instruction? assign PCPlusUpperF = PCF[`XLEN-1:2] + 1; // add 4 to PC - // choose PC+2 or PC+4 always_comb if (CompressedF) // add 2 @@ -151,18 +158,7 @@ module ifu ( else PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10}; else PCPlus2or4F = {PCPlusUpperF, PCF[1:0]}; // add 4 - // harris 2/23/21 Add code to fetch instruction split across two words - generate - if (`XLEN==32) begin - assign InstrF = PCF[1] ? {rd2[15:0], InstrInF[31:16]} : InstrInF; - end else begin - assign InstrF = PCF[2] ? (PCF[1] ? {rd2[15:0], InstrInF[63:48]} : InstrInF[63:32]) - : (PCF[1] ? InstrInF[47:16] : InstrInF[31:0]); - end - endgenerate - // Decode stage pipeline register and logic - flopenl #(32) InstrDReg(clk, reset, ~StallD | FlushD, (FlushD ? nop : InstrF), nop, InstrRawD); flopenrc #(`XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD); // expand 16-bit compressed instructions to 32 bits diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 67ef8b20..c858befd 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -98,6 +98,8 @@ module wallypipelinedhart ( logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM; + // IMem stalls + logic ICacheStallF; logic [`XLEN-1:0] MMUPAdr, MMUReadPTE; logic MMUTranslate, MMUTranslationComplete, MMUReady; diff --git a/wally-pipelined/testbench/testbench-busybear.sv b/wally-pipelined/testbench/testbench-busybear.sv index 2de456d1..8a75eb81 100644 --- a/wally-pipelined/testbench/testbench-busybear.sv +++ b/wally-pipelined/testbench/testbench-busybear.sv @@ -7,7 +7,7 @@ module testbench_busybear(); logic [31:0] GPIOPinsOut, GPIOPinsEn; // instantiate device to be tested - logic [31:0] CheckInstrF; + logic [31:0] CheckInstrD; logic [`AHBW-1:0] HRDATA; logic [31:0] HADDR; @@ -194,8 +194,8 @@ module testbench_busybear(); logic [`XLEN-1:0] readAdrExpected; always @(dut.HRDATA) begin - #1; - if (dut.hart.MemRWM[1] && ~HWRITE && HADDR != dut.PCF && dut.HRDATA !== {64{1'bx}}) begin + #2; + if (dut.hart.MemRWM[1] && ~HWRITE && HADDR[31:3] != dut.PCF[31:3] && dut.HRDATA !== {64{1'bx}}) begin //$display("%0t", $time); if($feof(data_file_memR)) begin $display("no more memR data to read"); @@ -265,7 +265,7 @@ module testbench_busybear(); end always @(dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW) begin - if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs != 0) begin + if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs > 1) begin $display("!!!!!! illegal instruction !!!!!!!!!!"); $display("(as a reminder, MCAUSE and MEPC are set by this)"); $display("at %0t ps, instr %0d, HADDR %x", $time, instrs, HADDR); @@ -337,7 +337,7 @@ module testbench_busybear(); `CHECK_CSR(STVEC) initial begin //this is temporary until the bug can be fixed!!! - #18909760; + #11130100; force dut.hart.ieu.dp.regf.rf[5] = 64'h0000000080000004; #100; release dut.hart.ieu.dp.regf.rf[5]; @@ -347,7 +347,7 @@ module testbench_busybear(); initial begin speculative = 0; end - logic [63:0] lastCheckInstrF, lastPC, lastPC2; + logic [63:0] lastCheckInstrD, lastPC, lastPC2; string PCtextW, PCtext2W; logic [31:0] InstrWExpected; @@ -382,102 +382,102 @@ module testbench_busybear(); end logic [31:0] InstrMask; logic forcedInstr; - logic [63:0] lastPCF; - always @(dut.PCF or dut.hart.ifu.InstrF or reset) begin + logic [63:0] lastPCD; + always @(dut.hart.ifu.PCD or dut.hart.ifu.InstrRawD or reset or negedge dut.hart.ifu.StallE) begin if(~HWRITE) begin - #3; - if (~reset && dut.hart.ifu.InstrF[15:0] !== {16{1'bx}} && ~dut.hart.StallD) begin - if (dut.PCF !== lastPCF) begin - lastCheckInstrF = CheckInstrF; - lastPC <= dut.PCF; - lastPC2 <= lastPC; - if (speculative && (lastPC != pcExpected)) begin - speculative = ~equal(dut.PCF,pcExpected,3); - if(dut.PCF===pcExpected) begin - if(dut.hart.ifu.InstrF[6:0] == 7'b1010011) begin // for now, NOP out any float instrs - force CheckInstrF = 32'b0010011; - release CheckInstrF; - force dut.hart.ifu.InstrF = 32'b0010011; - #7; - release dut.hart.ifu.InstrF; - $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.PCF, instrs, $time); - warningCount += 1; - forcedInstr = 1; - end - else begin - forcedInstr = 0; + #2; + if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && dut.hart.ifu.PCD !== 64'h0 && ~dut.hart.ifu.StallE) begin + if (dut.hart.ifu.PCD !== lastPCD) begin + lastCheckInstrD = CheckInstrD; + lastPC <= dut.hart.ifu.PCD; + lastPC2 <= lastPC; + if (speculative && (lastPC != pcExpected)) begin + speculative = ~equal(dut.hart.ifu.PCD,pcExpected,3); + if(dut.hart.ifu.PCD===pcExpected) begin + if(dut.hart.ifu.InstrRawD[6:0] == 7'b1010011) begin // for now, NOP out any float instrs + force CheckInstrD = 32'b0010011; + release CheckInstrD; + force dut.hart.ifu.InstrRawD = 32'b0010011; + #7; + release dut.hart.ifu.InstrRawD; + $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.hart.ifu.PCD, instrs, $time); + warningCount += 1; + forcedInstr = 1; + end + else begin + forcedInstr = 0; + end end end - end - else begin - if($feof(data_file_PC)) begin - $display("no more PC data to read"); - `ERROR - end - scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext); - if (PCtext != "ret" && PCtext != "fence" && PCtext != "nop" && PCtext != "mret" && PCtext != "sfence.vma" && PCtext != "unimp") begin - scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext2); - PCtext = {PCtext, " ", PCtext2}; - end - scan_file_PC = $fscanf(data_file_PC, "%x\n", CheckInstrF); - if(dut.PCF === pcExpected) begin - if(dut.hart.ifu.InstrF[6:0] == 7'b1010011) begin // for now, NOP out any float instrs - force CheckInstrF = 32'b0010011; - release CheckInstrF; - force dut.hart.ifu.InstrF = 32'b0010011; - #7; - release dut.hart.ifu.InstrF; - $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.PCF, instrs, $time); - warningCount += 1; - forcedInstr = 1; + else begin + if($feof(data_file_PC)) begin + $display("no more PC data to read"); + `ERROR end - else begin - forcedInstr = 0; + scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext); + if (PCtext != "ret" && PCtext != "fence" && PCtext != "nop" && PCtext != "mret" && PCtext != "sfence.vma" && PCtext != "unimp") begin + scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext2); + PCtext = {PCtext, " ", PCtext2}; end - end - // then expected PC value - scan_file_PC = $fscanf(data_file_PC, "%x\n", pcExpected); - if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) || - (instrs <= 1000 && instrs % 100 == 0) || (instrs <= 10000 && instrs % 1000 == 0) || - (instrs <= 100000 && instrs % 10000 == 0) || (instrs <= 1000000 && instrs % 100000 == 0)) begin - $display("loaded %0d instructions", instrs); - end - instrs += 1; - // are we at a branch/jump? - casex (lastCheckInstrF[31:0]) - 32'b00000000001000000000000001110011, // URET - 32'b00010000001000000000000001110011, // SRET - 32'b00110000001000000000000001110011, // MRET - 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1101111, // JAL - 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100111, // JALR - 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100011, // B - 32'bXXXXXXXXXXXXXXXX110XXXXXXXXXXX01, // C.BEQZ - 32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ - 32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J - speculative = 1; - 32'bXXXXXXXXXXXXXXXX1001000000000010: // C.EBREAK: - speculative = 0; // tbh don't really know what should happen here - 32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR - 32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL - speculative = 1; - default: - speculative = 0; - endcase + scan_file_PC = $fscanf(data_file_PC, "%x\n", CheckInstrD); + if(dut.hart.ifu.PCD === pcExpected) begin + if(dut.hart.ifu.InstrRawD[6:0] == 7'b1010011) begin // for now, NOP out any float instrs + force CheckInstrD = 32'b0010011; + release CheckInstrD; + force dut.hart.ifu.InstrRawD = 32'b0010011; + #7; + release dut.hart.ifu.InstrRawD; + $display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.hart.ifu.PCD, instrs, $time); + warningCount += 1; + forcedInstr = 1; + end + else begin + forcedInstr = 0; + end + end + // then expected PC value + scan_file_PC = $fscanf(data_file_PC, "%x\n", pcExpected); + if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) || + (instrs <= 1000 && instrs % 100 == 0) || (instrs <= 10000 && instrs % 1000 == 0) || + (instrs <= 100000 && instrs % 10000 == 0) || (instrs <= 1000000 && instrs % 100000 == 0)) begin + $display("loaded %0d instructions", instrs); + end + instrs += 1; + // are we at a branch/jump? + casex (lastCheckInstrD[31:0]) + 32'b00000000001000000000000001110011, // URET + 32'b00010000001000000000000001110011, // SRET + 32'b00110000001000000000000001110011, // MRET + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1101111, // JAL + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100111, // JALR + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100011, // B + 32'bXXXXXXXXXXXXXXXX110XXXXXXXXXXX01, // C.BEQZ + 32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ + 32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J + speculative = 1; + 32'bXXXXXXXXXXXXXXXX1001000000000010: // C.EBREAK: + speculative = 0; // tbh don't really know what should happen here + 32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR + 32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL + speculative = 1; + default: + speculative = 0; + endcase - //check things! - if ((~speculative) && (~equal(dut.PCF,pcExpected,3))) begin - $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.PCF, pcExpected); - `ERROR - end - InstrMask = CheckInstrF[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; - if ((~forcedInstr) && (~speculative) && ((InstrMask & dut.hart.ifu.InstrF) !== (InstrMask & CheckInstrF))) begin - $display("%0t ps, instr %0d: InstrF does not equal CheckInstrF: %x, %x, PC: %x", $time, instrs, dut.hart.ifu.InstrF, CheckInstrF, dut.PCF); - `ERROR + //check things! + if ((~speculative) && (~equal(dut.hart.ifu.PCD,pcExpected,3))) begin + $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, pcExpected); + `ERROR + end + InstrMask = CheckInstrD[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; + if ((~forcedInstr) && (~speculative) && ((InstrMask & dut.hart.ifu.InstrRawD) !== (InstrMask & CheckInstrD))) begin + $display("%0t ps, instr %0d: InstrD does not equal CheckInstrD: %x, %x, PC: %x", $time, instrs, dut.hart.ifu.InstrRawD, CheckInstrD, dut.hart.ifu.PCD); + `ERROR + end end end + lastPCD = dut.hart.ifu.PCD; end - lastPCF = dut.PCF; - end end end @@ -485,7 +485,7 @@ module testbench_busybear(); string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; logic [31:0] InstrW; flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW); - instrNameDecTB dec(dut.hart.ifu.InstrF, InstrFName); + instrNameDecTB dec(dut.hart.ifu.ic.InstrF, InstrFName); instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, dut.hart.ifu.InstrM, InstrW, diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index b55eb53e..c312c745 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -65,7 +65,6 @@ module testbench(); // "rv64m/I-REMW-01", "3000" }; string tests64ic[] = '{ - "rv64ic/I-C-ADD-01", "3000", "rv64ic/I-C-ADDI-01", "3000", "rv64ic/I-C-ADDIW-01", "3000", @@ -381,9 +380,9 @@ string tests32i[] = { // Track names of instructions instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, - dut.hart.ifu.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, - dut.hart.ifu.InstrM, InstrW, - InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); + dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, + dut.hart.ifu.InstrM, InstrW, InstrFName, InstrDName, + InstrEName, InstrMName, InstrWName); // initialize tests initial