diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index c90512741..cf9a39cd9 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -60,11 +60,13 @@ add wave /testbench/dut/hart/FlushW add wave -divider add wave -hex /testbench/dut/hart/ifu/PCF -add wave -hex /testbench/dut/hart/ifu/InstrF -add wave /testbench/InstrFName add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/InstrD add wave /testbench/InstrDName +add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD +add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD +add wave /testbench/dut/hart/ifu/ic/DelayF +add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/dut/hart/ifu/InstrE diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv new file mode 100644 index 000000000..df0de4d30 --- /dev/null +++ b/wally-pipelined/src/ifu/icache.sv @@ -0,0 +1,90 @@ +/////////////////////////////////////////// +// icache.sv +// +// Written: jaallen@g.hmc.edu 2021-03-02 +// Modified: +// +// Purpose: Cache instructions for the ifu so it can access memory less often +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module icache( + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, + // Fetch + input logic [`XLEN-1:0] PCPF, + input logic [`XLEN-1:0] InstrInF, + output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF, + // Decode + output logic [31:0] InstrRawD +); + + logic DelayF, DelaySideF, FlushDLastCycle; + logic [1:0] InstrDMuxChoice; + logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; + logic [31:0] InstrF, AlignedInstrD; + logic [31:0] nop = 32'h00000013; // instruction for NOP + + flopr #(1) flushDLastCycleFlop(clk, reset, FlushD | (FlushDLastCycle & StallF), FlushDLastCycle); + flopenr #(1) delayStateFlop(clk, reset, ~StallF, (DelayF & ~DelaySideF) ? 1'b1 : 1'b0 , DelaySideF); + flopenr #(16) halfInstrFlop(clk, reset, DelayF, MisalignedHalfInstrF, MisalignedHalfInstrD); + + flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD); + + // Decide which address needs to be fetched and sent out over InstrPAdrF + // If the requested address fits inside one read from memory, we fetch that + // address, adjusted to the bit width. Otherwise, we request the lower word + // and then the upper word, in that order. + generate + if (`XLEN == 32) begin + assign InstrPAdrF = PCPF[1] ? (DelaySideF ? {PCPF[31:2]+1, 2'b00} : {PCPF[31:2], 2'b00}) : PCPF; + end else begin + assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? (DelaySideF ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}; + end + endgenerate + // For now, we always read since the cache doesn't actually cache + assign InstrReadF = 1; + + // If the instruction fits in one memory read, then we put the right bits + // into InstrF. Otherwise, we activate DelayF to signal the rest of the + // machinery to swizzle bits. + generate + if (`XLEN == 32) begin + assign InstrF = PCPF[1] ? 32'b0 : InstrInF; + assign DelayF = PCPF[1]; + assign MisalignedHalfInstrF = InstrInF[31:16]; + end else begin + assign InstrF = PCPF[2] ? (PCPF[1] ? 64'b0 : InstrInF[63:32]) : (PCPF[1] ? InstrInF[47:16] : InstrInF[31:0]); + assign DelayF = PCPF[1] && PCPF[2]; + assign MisalignedHalfInstrF = InstrInF[63:48]; + end + endgenerate + + // Pick the correct output, depending on whether we have to assemble this + // instruction from two reads or not. + // Output the requested instruction (we don't need to worry if the read is + // incomplete, since the pipeline stalls for us when it isn't), or a NOP for + // the cycle when the first of two reads comes in. + always_comb + assign InstrDMuxChoice = FlushDLastCycle ? 2'b10 : (DelayF ? (DelaySideF ? 2'b01 : 2'b10) : 2'b00); + mux3 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, InstrDMuxChoice, InstrRawD); +endmodule diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 88e4f0bef..abfb37c88 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -2,7 +2,7 @@ // ifu.sv // // Written: David_Harris@hmc.edu 9 January 2021 -// Modified: +// Modified: // // Purpose: Instrunction Fetch Unit // PC, branch prediction, instruction cache @@ -51,25 +51,24 @@ module ifu ( input logic IllegalBaseInstrFaultD, output logic IllegalIEUInstrFaultD, output logic InstrMisalignedFaultM, - output logic [`XLEN-1:0] InstrMisalignedAdrM, // TLB management //input logic [`XLEN-1:0] PageTableEntryF, //input logic ITLBWriteF, ITLBFlushF, // *** satp value will come from CSRs // input logic [`XLEN-1:0] SATP, output logic ITLBMissF, ITLBHitF, - // bogus - input logic [15:0] rd2 + output logic [`XLEN-1:0] InstrMisalignedAdrM ); logic [`XLEN-1:0] UnalignedPCNextF, PCNextF; logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; logic PrivilegedChangePCM; logic IllegalCompInstrD; - logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM; + logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM, PCPF; logic CompressedF; - logic [31:0] InstrF, InstrRawD, InstrE, InstrW; + logic [31:0] InstrRawD, InstrE, InstrW; logic [31:0] nop = 32'h00000013; // instruction for NOP + logic [`XLEN-1:0] ITLBInstrPAdrF, ICacheInstrPAdrF; // *** temporary hack until we can figure out how to get actual satp value // from priv unit -- Thomas F @@ -79,23 +78,28 @@ module ifu ( logic ITLBFlushF = '0; logic ITLBWriteF = '0; tlb #(3) itlb(clk, reset, SATP, PCF, PageTableEntryF, ITLBWriteF, ITLBFlushF, - InstrPAdrF, ITLBMissF, ITLBHitF); + ITLBInstrPAdrF, ITLBMissF, ITLBHitF); // *** put memory interface on here, InstrF becomes output //assign InstrPAdrF = PCF; // *** no MMU //assign InstrReadF = ~StallD; // *** & ICacheMissF; add later - assign InstrReadF = 1; // *** & ICacheMissF; add later + // assign InstrReadF = 1; // *** & ICacheMissF; add later + + // jarred 2021-03-04 Add instrution cache block to remove rd2 + assign PCPF = PCF; // Temporary workaround until iTLB is live + icache ic(clk, reset, StallF, StallD, FlushD, PCPF, InstrInF, ICacheInstrPAdrF, InstrReadF, InstrRawD); + // Prioritize the iTLB for reads if it wants one + mux2 #(`XLEN) instrPAdrMux(ICacheInstrPAdrF, ITLBInstrPAdrF, ITLBMissF, InstrPAdrF); assign PrivilegedChangePCM = RetM | TrapM; - mux3 #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF); assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); // pcadder // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 - assign CompressedF = (InstrF[1:0] != 2'b11); // is it a 16-bit compressed instruction? + assign CompressedF = 0; // is it a 16-bit compressed instruction? TODO Fix this assign PCPlusUpperF = PCF[`XLEN-1:2] + 1; // add 4 to PC // choose PC+2 or PC+4 @@ -105,18 +109,7 @@ module ifu ( else PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10}; else PCPlus2or4F = {PCPlusUpperF, PCF[1:0]}; // add 4 - // harris 2/23/21 Add code to fetch instruction split across two words - generate - if (`XLEN==32) begin - assign InstrF = PCF[1] ? {rd2[15:0], InstrInF[31:16]} : InstrInF; - end else begin - assign InstrF = PCF[2] ? (PCF[1] ? {rd2[15:0], InstrInF[63:48]} : InstrInF[63:32]) - : (PCF[1] ? InstrInF[47:16] : InstrInF[31:0]); - end - endgenerate - // Decode stage pipeline register and logic - flopenl #(32) InstrDReg(clk, reset, ~StallD, (FlushD ? nop : InstrF), nop, InstrRawD); flopenrc #(`XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD); // expand 16-bit compressed instructions to 32 bits diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 49a5263d6..7498de76d 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -347,7 +347,7 @@ string tests32i[] = { // Track names of instructions instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, - dut.hart.ifu.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, + dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, dut.hart.ifu.InstrM, dut.hart.ifu.InstrW, InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);