From 33110ed6363bc3a692e9a0255139b0fff334ce16 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 7 Feb 2021 23:21:55 -0500 Subject: [PATCH 01/10] Data memory bus integration --- wally-pipelined/config/rv64ic/wally-config.vh | 2 +- wally-pipelined/regression/wally-pipelined.do | 9 +- wally-pipelined/src/dmem/dmem.sv | 17 ++-- wally-pipelined/src/ebu/ahblite.sv | 93 +++++++++++++++---- wally-pipelined/src/ebu/subwordread.sv | 20 ++-- wally-pipelined/src/hazard/hazard.sv | 38 +++++--- wally-pipelined/src/ieu/controller.sv | 12 +-- wally-pipelined/src/ieu/datapath.sv | 30 +++--- wally-pipelined/src/ieu/forward.sv | 2 +- wally-pipelined/src/ieu/ieu.sv | 3 +- wally-pipelined/src/ifu/ifu.sv | 33 ++++--- wally-pipelined/src/uncore/dtim.sv | 52 ++++++++--- wally-pipelined/src/uncore/subwordwrite.sv | 33 ++++--- wally-pipelined/src/uncore/uartPC16550D.sv | 1 + wally-pipelined/src/uncore/uncore.sv | 6 +- .../src/wally/wallypipelinedhart.sv | 39 +++++--- .../src/wally/wallypipelinedsoc.sv | 3 + .../testbench/testbench-imperas.sv | 11 ++- 18 files changed, 267 insertions(+), 137 deletions(-) diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index eb340ddc2..8ab42cdd3 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -61,7 +61,7 @@ // Bus Interface width `define AHBW 64 -// Peripheral Addresses +// Peripheral Physiccal Addresses // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index b7f41535d..a88569f49 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -45,11 +45,14 @@ view wave add wave /testbench/clk add wave /testbench/reset add wave -divider -add wave /testbench/dut/hart/ebu/IReadF +#add wave /testbench/dut/hart/ebu/IReadF add wave /testbench/dut/hart/DataStall add wave /testbench/dut/hart/InstrStall add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD +add wave /testbench/dut/hart/StallE +add wave /testbench/dut/hart/StallM +add wave /testbench/dut/hart/StallW add wave /testbench/dut/hart/FlushD add wave /testbench/dut/hart/FlushE add wave /testbench/dut/hart/FlushM @@ -101,6 +104,6 @@ configure wave -childrowmargin 2 set DefaultRadix hexadecimal -- Run the Simulation -#run 1000 -run -all +run 2000 +#run -all #quit diff --git a/wally-pipelined/src/dmem/dmem.sv b/wally-pipelined/src/dmem/dmem.sv index 4c602659f..aab74fadc 100644 --- a/wally-pipelined/src/dmem/dmem.sv +++ b/wally-pipelined/src/dmem/dmem.sv @@ -30,19 +30,19 @@ module dmem ( input logic clk, reset, input logic FlushW, - output logic DataStall, + //output logic DataStall, // Memory Stage input logic [1:0] MemRWM, input logic [`XLEN-1:0] MemAdrM, input logic [2:0] Funct3M, - input logic [`XLEN-1:0] ReadDataM, + //input logic [`XLEN-1:0] ReadDataW, input logic [`XLEN-1:0] WriteDataM, output logic [`XLEN-1:0] MemPAdrM, - output logic [1:0] MemRWAlignedM, + output logic MemReadM, MemWriteM, output logic DataMisalignedM, // Writeback Stage input logic MemAckW, - output logic [`XLEN-1:0] ReadDataW, + input logic [`XLEN-1:0] ReadDataW, // faults input logic DataAccessFaultM, output logic LoadMisalignedFaultM, LoadAccessFaultM, @@ -52,9 +52,6 @@ module dmem ( // Initially no MMU assign MemPAdrM = MemAdrM; - // Pipeline register *** AHB data will eventually come back in W anyway - floprc #(`XLEN) ReadDataWReg(clk, reset, FlushW, ReadDataM, ReadDataW); - // Determine if an Unaligned access is taking place always_comb case(Funct3M[1:0]) @@ -66,7 +63,9 @@ module dmem ( // Squash unaligned data accesses // *** this is also the place to squash if the cache is hit - assign MemRWAlignedM = MemRWM & {2{~DataMisalignedM}}; + assign MemReadM = MemRWM[1] & ~DataMisalignedM; + assign MemWriteM = MemRWM[0] & ~DataMisalignedM; +// assign MemRWAlignedM = MemRWM & {2{~DataMisalignedM}}; // Determine if address is valid assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; @@ -75,7 +74,7 @@ module dmem ( assign StoreAccessFaultM = DataAccessFaultM & MemRWM[0]; // Data stall - assign DataStall = 0; + //assign DataStall = 0; endmodule diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 8ce17545f..9d4e62be7 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -36,16 +36,16 @@ module ahblite ( input logic UnsignedLoadM, // Signals from Instruction Cache input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram - input logic IReadF, - output logic [`XLEN-1:0] IRData, + input logic InstrReadF, + output logic [31:0] InstrRData, // output logic IReady, // Signals from Data Cache input logic [`XLEN-1:0] MemPAdrM, - input logic DReadM, DWriteM, + input logic MemReadM, MemWriteM, input logic [`XLEN-1:0] WriteDataM, - input logic [1:0] DSizeM, + input logic [1:0] MemSizeM, // Return from bus - output logic [`XLEN-1:0] DRData, + output logic [`XLEN-1:0] ReadDataW, // output logic DReady, // AHB-Lite external signals input logic [`AHBW-1:0] HRDATA, @@ -59,49 +59,108 @@ module ahblite ( output logic [3:0] HPROT, output logic [1:0] HTRANS, output logic HMASTLOCK, + // Delayed signals for subword write + output logic [2:0] HADDRD, + output logic [3:0] HSIZED, + output logic HWRITED, // Acknowledge - output logic InstrAckD, MemAckW + output logic InstrAckD, MemAckW, // Stalls -// output logic InstrStall, DataStall + output logic InstrStall, DataStall ); logic GrantData; logic [2:0] ISize; logic [`AHBW-1:0] HRDATAMasked; logic IReady, DReady; +// logic [3:0] HSIZED; // size delayed by one cycle for reads +// logic [2:0] HADDRD; // address delayed for subword reads assign HCLK = clk; assign HRESETn = ~reset; // Arbitrate requests by giving data priority over instructions - assign GrantData = DReadM | DWriteM; + assign GrantData = MemReadM | MemWriteM; // *** initially support HABW = XLEN + // track bus state + typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD} statetype; + statetype AdrState, DataState, NextAdrState; // what is happening in the first and second phases of the bus + always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) begin + AdrState <= IDLE; DataState <= IDLE; + HWDATA <= 0; // unnecessary but avoids x at startup + HSIZED <= 0; + HADDRD <= 0; + HWRITED <= 0; + end else begin + if (HREADY || (DataState == IDLE)) begin // only advance bus state if bus is idle or previous transaction returns ready + DataState <= AdrState; + AdrState <= NextAdrState; + if (HWRITE) HWDATA <= WriteDataM; + HSIZED <= {UnsignedLoadM, HSIZE}; + HADDRD <= HADDR[2:0]; + HWRITED <= HWRITE; + end + end + always_comb + if (MemReadM) NextAdrState = MEMREAD; + else if (MemWriteM) NextAdrState = MEMWRITE; + else if (InstrReadF) NextAdrState = INSTRREAD; + else NextAdrState = IDLE; + + // Generate acknowledges based on bus state and ready + assign MemAckW = (AdrState == MEMREAD || AdrState == MEMWRITE) && HREADY; + assign InstrAckD = (AdrState == INSTRREAD) && HREADY; + // Choose ISize based on XLen generate - if (`AHBW == 32) assign ISize = 3'b010; // 32-bit transfers - else assign ISize = 3'b011; // 64-bit transfers + //if (`AHBW == 32) assign ISize = 3'b010; // 32-bit transfers + //else assign ISize = 3'b011; // 64-bit transfers + assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width endgenerate // drive bus outputs assign HADDR = GrantData ? MemPAdrM[31:0] : InstrPAdrF[31:0]; - assign HWDATA = WriteDataM; + //assign HWDATA = WriteDataW; //flop #(`XLEN) wdreg(HCLK, DWDataM, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN - assign HWRITE = DWriteM; - assign HSIZE = GrantData ? {1'b0, DSizeM} : ISize; + assign HWRITE = MemWriteM; + assign HSIZE = GrantData ? {1'b0, MemSizeM} : ISize; assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfHPROT assign HPROT = 4'b0011; // not used; see Section 3.7 - assign HTRANS = IReadF | DReadM | DWriteM ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise + assign HTRANS = InstrReadF | MemReadM | MemWriteM ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise assign HMASTLOCK = 0; // no locking supported // Route signals to Instruction and Data Caches // *** assumes AHBW = XLEN - assign IRData = HRDATAMasked; - assign IReady = HREADY & IReadF & ~GrantData; // maybe unused?*** - assign DRData = HRDATAMasked; + assign InstrRData = HRDATAMasked[31:0]; + assign IReady = HREADY & InstrReadF & ~GrantData; // maybe unused?*** + assign ReadDataW = HRDATAMasked; assign DReady = HREADY & GrantData; // ***unused? + + // State machines for stalls (probably can merge with FSM above***) + // Idle, DataBusy, InstrBusy. Stall while in busystate add suffixes + logic MemState, NextMemState, InstrState, NextInstrState; + flopr #(1) msreg(HCLK, ~HRESETn, NextMemState, MemState); + flopr #(1) isreg(HCLK, ~HRESETn, NextInstrState, InstrState); +/* always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) MemState <= 0; + else MemState <= NextMemState; */ + assign NextMemState = (MemState == 0 && InstrState == 0 && (MemReadM || MemWriteM)) || (MemState == 1 && ~MemAckW); + assign DataStall = NextMemState; +/* always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) InstrState <= 0; + else InstrState <= NextInstrState;*/ + + assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM && InstrReadF)) || + (InstrState == 1 && ~InstrAckD); + assign InstrStall = NextInstrState | MemState | NextMemState; // *** check this, explain better + // temporarily turn off stalls and check it works + //assign DataStall = 0; + //assign InstrStall = 0; + // stalls // Stall MEM stage if data is being accessed and bus isn't yet ready //assign DataStall = GrantData & ~HREADY; diff --git a/wally-pipelined/src/ebu/subwordread.sv b/wally-pipelined/src/ebu/subwordread.sv index 74b1e0aa7..352a33c05 100644 --- a/wally-pipelined/src/ebu/subwordread.sv +++ b/wally-pipelined/src/ebu/subwordread.sv @@ -28,9 +28,9 @@ module subwordread ( // from AHB Interface input logic [`XLEN-1:0] HRDATA, - input logic [31:0] HADDR, - input logic UnsignedLoadM, - input logic [2:0] HSIZE, + input logic [2:0] HADDRD, + //input logic UnsignedLoadM, + input logic [3:0] HSIZED, // to ifu/dmems output logic [`XLEN-1:0] HRDATAMasked ); @@ -42,7 +42,7 @@ module subwordread ( if (`XLEN == 64) begin // ByteMe mux always_comb - case(HADDR[2:0]) + case(HADDRD[2:0]) 3'b000: ByteM = HRDATA[7:0]; 3'b001: ByteM = HRDATA[15:8]; 3'b010: ByteM = HRDATA[23:16]; @@ -55,7 +55,7 @@ module subwordread ( // halfword mux always_comb - case(HADDR[2:1]) + case(HADDRD[2:1]) 2'b00: HalfwordM = HRDATA[15:0]; 2'b01: HalfwordM = HRDATA[31:16]; 2'b10: HalfwordM = HRDATA[47:32]; @@ -65,14 +65,14 @@ module subwordread ( logic [31:0] WordM; always_comb - case(HADDR[2]) + case(HADDRD[2]) 1'b0: WordM = HRDATA[31:0]; 1'b1: WordM = HRDATA[63:32]; endcase // sign extension always_comb - case({UnsignedLoadM, HSIZE[1:0]}) + case({HSIZED[3], HSIZED[1:0]}) // HSIZED[3] indicates unsigned load 3'b000: HRDATAMasked = {{56{ByteM[7]}}, ByteM}; // lb 3'b001: HRDATAMasked = {{48{HalfwordM[15]}}, HalfwordM[15:0]}; // lh 3'b010: HRDATAMasked = {{32{WordM[31]}}, WordM[31:0]}; // lw @@ -85,7 +85,7 @@ module subwordread ( end else begin // 32-bit // byte mux always_comb - case(HADDR[1:0]) + case(HADDRD[1:0]) 2'b00: ByteM = HRDATA[7:0]; 2'b01: ByteM = HRDATA[15:8]; 2'b10: ByteM = HRDATA[23:16]; @@ -94,14 +94,14 @@ module subwordread ( // halfword mux always_comb - case(HADDR[1]) + case(HADDRD[1]) 1'b0: HalfwordM = HRDATA[15:0]; 1'b1: HalfwordM = HRDATA[31:16]; endcase // sign extension always_comb - case({UnsignedLoadM, HSIZE[1:0]}) + case({HSIZED[3], HSIZED[1:0]}) 3'b000: HRDATAMasked = {{24{ByteM[7]}}, ByteM}; // lb 3'b001: HRDATAMasked = {{16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh 3'b010: HRDATAMasked = HRDATA; // lw diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 2fe0541a5..7c4afd6fe 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -34,12 +34,14 @@ module hazard( input logic LoadStallD, input logic InstrStall, DataStall, // Stall outputs - output logic StallF, StallD, FlushD, FlushE, FlushM, FlushW + output logic StallF, StallD, StallE, StallM, StallW, + output logic FlushD, FlushE, FlushM, FlushW ); logic BranchFlushDE; - logic StallDCause, StallFCause, StallWCause; - + logic StallFCause, StallDCause, StallECause, StallMCause, StallWCause; + logic FirstUnstalledD, FirstUnstalledE, FirstUnstalledM, FirstUnstalledW; + // stalls and flushes // loads: stall for one cycle if the subsequent instruction depends on the load // branches and jumps: flush the next two instructions if the branch is taken in EXE @@ -54,14 +56,28 @@ module hazard( assign BranchFlushDE = PCSrcE | RetM | TrapM; - assign StallDCause = LoadStallD; - assign StallFCause = InstrStall | CSRWritePendingDEM; - assign StallWCause = DataStall; // *** not yet used + assign StallFCause = InstrStall | CSRWritePendingDEM; // stall at fetch if unable to get the instruction, + // or if a CSR will be written and may change system behavior + assign StallDCause = LoadStallD; // stall in decode if instruction is a load dependent on previous + assign StallECause = 0; + assign StallMCause = 0; // sDataStall; // not yet used*** + assign StallWCause = DataStall; - assign StallD = StallDCause; + // Each stage stalls if the next stage is stalled or there is a cause to stall this stage. assign StallF = StallD | StallFCause; - assign FlushD = BranchFlushDE | StallFCause; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM; - assign FlushE = StallD | BranchFlushDE; //LoadStallD | PCSrcE | RetM | TrapM; - assign FlushM = RetM | TrapM; - assign FlushW = TrapM; + assign StallD = StallE | StallDCause; + assign StallE = StallM | StallECause; + assign StallM = StallW | StallMCause; + assign StallW = StallWCause; + + assign FirstUnstalledD = (~StallD & StallF); + assign FirstUnstalledE = (~StallE & StallD); + assign FirstUnstalledM = (~StallM & StallE); + assign FirstUnstalledW = (~StallW & StallM);; + + // Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush + assign FlushD = FirstUnstalledD || BranchFlushDE; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM; + assign FlushE = FirstUnstalledE || BranchFlushDE; //LoadStallD | PCSrcE | RetM | TrapM; + assign FlushM = FirstUnstalledM || RetM || TrapM; + assign FlushW = FirstUnstalledW | TrapM; endmodule diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 5a62f0147..1b480dec3 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -37,7 +37,7 @@ module controller( input logic IllegalIEUInstrFaultD, output logic IllegalBaseInstrFaultD, // Execute stage control signals - input logic FlushE, + input logic StallE, FlushE, input logic [2:0] FlagsE, output logic PCSrcE, // for datapath and Hazard Unit output logic [4:0] ALUControlE, @@ -45,14 +45,14 @@ module controller( output logic TargetSrcE, output logic MemReadE, // for Hazard Unit // Memory stage control signals - input logic FlushM, + input logic StallM, FlushM, input logic DataMisalignedM, output logic [1:0] MemRWM, output logic CSRWriteM, PrivilegedM, output logic [2:0] Funct3M, output logic RegWriteM, // for Hazard Unit // Writeback stage control signals - input logic FlushW, + input logic StallW, FlushW, output logic RegWriteW, // for datapath and Hazard Unit output logic [1:0] ResultSrcW, output logic InstrValidW, @@ -132,7 +132,7 @@ module controller( endcase // Execute stage pipeline control register and logic - floprc #(21) controlregE(clk, reset, FlushE, + flopenrc #(21) controlregE(clk, reset, FlushE, ~StallE, {RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, TargetSrcD, CSRWriteD, PrivilegedD, Funct3D, 1'b1}, {RegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, TargetSrcE, CSRWriteE, PrivilegedE, Funct3E, InstrValidE}); @@ -155,12 +155,12 @@ module controller( assign MemReadE = MemRWE[1]; // Memory stage pipeline control register - floprc #(11) controlregM(clk, reset, FlushM, + flopenrc #(11) controlregM(clk, reset, FlushM, ~StallM, {RegWriteE, ResultSrcE, MemRWE, CSRWriteE, PrivilegedE, Funct3E, InstrValidE}, {RegWriteM, ResultSrcM, MemRWM, CSRWriteM, PrivilegedM, Funct3M, InstrValidM}); // Writeback stage pipeline control register - floprc #(4) controlregW(clk, reset, FlushW, + flopenrc #(4) controlregW(clk, reset, FlushW, ~StallW, {RegWriteM, ResultSrcM, InstrValidM}, {RegWriteW, ResultSrcW, InstrValidW}); diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index bb02bad53..0147c487d 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -32,7 +32,7 @@ module datapath ( input logic [2:0] ImmSrcD, input logic [31:0] InstrD, // Execute stage signals - input logic FlushE, + input logic StallE, FlushE, input logic [1:0] ForwardAE, ForwardBE, input logic PCSrcE, input logic [4:0] ALUControlE, @@ -42,7 +42,7 @@ module datapath ( output logic [2:0] FlagsE, output logic [`XLEN-1:0] PCTargetE, // Memory stage signals - input logic FlushM, + input logic StallM, FlushM, input logic [2:0] Funct3M, input logic [`XLEN-1:0] CSRReadValW, input logic [`XLEN-1:0] ReadDataW, @@ -50,7 +50,7 @@ module datapath ( output logic [`XLEN-1:0] SrcAM, output logic [`XLEN-1:0] WriteDataM, MemAdrM, // Writeback stage signals - input logic FlushW, + input logic StallW, FlushW, input logic RegWriteW, input logic [1:0] ResultSrcW, input logic [`XLEN-1:0] PCLinkW, @@ -85,12 +85,12 @@ module datapath ( extend ext(.InstrD(InstrD[31:7]), .*); // Execute stage pipeline register and logic - floprc #(`XLEN) RD1EReg(clk, reset, FlushE, RD1D, RD1E); - floprc #(`XLEN) RD2EReg(clk, reset, FlushE, RD2D, RD2E); - floprc #(`XLEN) ExtImmEReg(clk, reset, FlushE, ExtImmD, ExtImmE); - floprc #(5) Rs1EReg(clk, reset, FlushE, Rs1D, Rs1E); - floprc #(5) Rs2EReg(clk, reset, FlushE, Rs2D, Rs2E); - floprc #(5) RdEReg(clk, reset, FlushE, RdD, RdE); + flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, RD1D, RD1E); + flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, RD2D, RD2E); + flopenrc #(`XLEN) ExtImmEReg(clk, reset, FlushE, ~StallE, ExtImmD, ExtImmE); + flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E); + flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); + flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); mux3 #(`XLEN) faemux(RD1E, ResultW, ALUResultM, ForwardAE, PreSrcAE); mux3 #(`XLEN) fbemux(RD2E, ResultW, ALUResultM, ForwardBE, WriteDataE); @@ -101,15 +101,15 @@ module datapath ( assign PCTargetE = ExtImmE + TargetBaseE; // Memory stage pipeline register - floprc #(`XLEN) SrcAMReg(clk, reset, FlushM, SrcAE, SrcAM); - floprc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ALUResultE, ALUResultM); + flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); + flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM); assign MemAdrM = ALUResultM; - floprc #(`XLEN) WriteDataMReg(clk, reset, FlushM, WriteDataE, WriteDataM); - floprc #(5) RdMEg(clk, reset, FlushM, RdE, RdM); + flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); + flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); // Writeback stage pipeline register and logic - floprc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ALUResultM, ALUResultW); - floprc #(5) RdWEg(clk, reset, FlushW, RdM, RdW); + flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW); + flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, ResultW); endmodule diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index 6dcd5154f..166ebe23a 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -30,7 +30,7 @@ module forward( input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, input logic MemReadE, input logic RegWriteM, RegWriteW, - // Forwaring controls + // Forwarding controls output logic [1:0] ForwardAE, ForwardBE, output logic LoadStallD ); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 7ed4bdffa..2b1e7d415 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -47,7 +47,8 @@ module ieu ( input logic [`XLEN-1:0] PCLinkW, output logic InstrValidW, // hazards - input logic StallD, FlushD, FlushE, FlushM, FlushW, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, input logic RetM, TrapM, output logic LoadStallD, output logic PCSrcE, diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 3a12b330a..86f96b95c 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -28,13 +28,15 @@ module ifu ( input logic clk, reset, - input logic StallF, StallD, FlushD, FlushE, FlushM, FlushW, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, // Fetch input logic [31:0] InstrF, output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF, // Decode - output logic InstrStall, + //output logic InstrStall, // Execute input logic PCSrcE, input logic [`XLEN-1:0] PCTargetE, @@ -59,12 +61,12 @@ module ifu ( logic IllegalCompInstrD; logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM; logic CompressedF; - logic [31:0] InstrRawD, InstrE; + logic [31:0] InstrRawD, InstrE, InstrW; logic [31:0] nop = 32'h00000013; // instruction for NOP // *** put memory interface on here, InstrF becomes output - assign InstrStall = 0; // *** assign InstrPAdrF = PCF; // *** no MMU + assign InstrReadF = ~StallD; assign PrivilegedChangePCM = RetM | TrapM; @@ -107,25 +109,26 @@ module ifu ( // pipeline misaligned faults to M stage assign BranchMisalignedFaultE = misaligned & PCSrcE; // E-stage (Branch/Jump) misaligned - flopr #(1) InstrMisalginedReg(clk, reset, BranchMisalignedFaultE, BranchMisalignedFaultM); - flopr #(`XLEN) InstrMisalignedAdrReg(clk, reset, PCNextF, InstrMisalignedAdrM); + flopenr #(1) InstrMisalginedReg(clk, reset, ~StallM, BranchMisalignedFaultE, BranchMisalignedFaultM); + flopenr #(`XLEN) InstrMisalignedAdrReg(clk, reset, ~StallM, PCNextF, InstrMisalignedAdrM); assign TrapMisalignedFaultM = misaligned & PrivilegedChangePCM; assign InstrMisalignedFaultM = BranchMisalignedFaultM; // | TrapMisalignedFaultM; *** put this back in without causing a cyclic path - flopr #(32) InstrEReg(clk, reset, FlushE ? nop : InstrD, InstrE); - flopr #(32) InstrMReg(clk, reset, FlushM ? nop : InstrE, InstrM); - flopr #(`XLEN) PCEReg(clk, reset, PCD, PCE); - flopr #(`XLEN) PCMReg(clk, reset, PCE, PCM); - flopr #(`XLEN) PCWReg(clk, reset, PCM, PCW); // *** probably not needed; delete later + flopenr #(32) InstrEReg(clk, reset, ~StallE, FlushE ? nop : InstrD, InstrE); + flopenr #(32) InstrMReg(clk, reset, ~StallM, FlushM ? nop : InstrE, InstrM); + flopenr #(32) InstrWReg(clk, reset, ~StallW, FlushW ? nop : InstrM, InstrW); // just for testbench, delete later + flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE); + flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM); + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); // *** probably not needed; delete later // seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL. // either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or // have dedicated adder in Mem stage based on PCM + 2 or 4 // *** redo this - flopr #(`XLEN) PCPDReg(clk, reset, PCPlus2or4F, PCLinkD); - flopr #(`XLEN) PCPEReg(clk, reset, PCLinkD, PCLinkE); - flopr #(`XLEN) PCPMReg(clk, reset, PCLinkE, PCLinkM); - flopr #(`XLEN) PCPWReg(clk, reset, PCLinkM, PCLinkW); + flopenr #(`XLEN) PCPDReg(clk, reset, ~StallD, PCPlus2or4F, PCLinkD); + flopenr #(`XLEN) PCPEReg(clk, reset, ~StallE, PCLinkD, PCLinkE); + flopenr #(`XLEN) PCPMReg(clk, reset, ~StallM, PCLinkE, PCLinkM); + flopenr #(`XLEN) PCPWReg(clk, reset, ~StallW, PCLinkM, PCLinkW); endmodule diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index 6a49fcf59..611183605 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -36,13 +36,15 @@ module dtim ( ); logic [`XLEN-1:0] RAM[0:65535]; + logic [18:0] HWADDR; + // logic [`XLEN-1:0] write; logic [15:0] entry; logic memread, memwrite; logic [3:0] busycount; // busy FSM to extend READY signal - always_ff @(posedge HCLK, negedge HRESETn) +/* always_ff @(posedge HCLK, negedge HRESETn) if (~HRESETn) begin HREADYTim <= 1; end else begin @@ -52,25 +54,34 @@ module dtim ( end else if (~HREADYTim) begin if (busycount == 0) begin // TIM latency, for testing purposes HREADYTim <= 1; - end else + end else begin busycount <= busycount + 1; + end end + end*/ + always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) begin + HREADYTim <= 0; + end else begin + HREADYTim <= HSELTim; // always respond one cycle later end - + assign memread = MemRWtim[1]; assign memwrite = MemRWtim[0]; +// always_ff @(posedge HCLK) +// memwrite <= MemRWtim[0]; // delay memwrite to write phase assign HRESPTim = 0; // OK // assign HREADYTim = 1; // Respond immediately; *** extend this // word aligned reads - generate +/* generate if (`XLEN==64) assign #2 entry = HADDR[18:3]; else assign #2 entry = HADDR[17:2]; - endgenerate - assign HREADTim = RAM[entry]; + endgenerate */ +// assign HREADTim = RAM[entry]; // assign HREADTim = HREADYTim ? RAM[entry] : ~RAM[entry]; // *** temproary mess up read value before ready // write each byte based on the byte mask @@ -105,17 +116,34 @@ module dtim ( if (memwrite) RAM[HADDR[17:2]] <= write; end endgenerate */ + + // Model memory read and write + // If write occurs at end of phase (rising edge of clock), + // then read of same address on next cycle won't work. Would need to bypass. + // Faking for now with negedge clock write. Will need to adjust this to + // match capabilities of FPGA or actual chip RAM. + // Also, writes occuring later than reads throws off single ported RAM that + // might be asked to write on one instruction and read on the next and would need + // to stall because both accesses happen on same cycle with AHB delay + generate - if (`XLEN == 64) + if (`XLEN == 64) begin + always_ff @(negedge HCLK) + if (memwrite) RAM[HWADDR[17:3]] <= HWDATA; always_ff @(posedge HCLK) begin - if (memwrite) RAM[HADDR[17:3]] <= HWDATA; -// HREADTim <= RAM[HADDR[17:3]]; + //if (memwrite) RAM[HADDR[17:3]] <= HWDATA; + HWADDR <= HADDR; + HREADTim <= RAM[HADDR[17:3]]; end - else + end else begin + always_ff @(negedge HCLK) + if (memwrite) RAM[HWADDR[17:2]] <= HWDATA; always_ff @(posedge HCLK) begin - if (memwrite) RAM[HADDR[17:2]] <= HWDATA; -// HREADTim <= RAM[HADDR[17:2]]; + //if (memwrite) RAM[HADDR[17:2]] <= HWDATA; + HWADDR <= HADDR; + HREADTim <= RAM[HADDR[17:2]]; end + end endgenerate endmodule diff --git a/wally-pipelined/src/uncore/subwordwrite.sv b/wally-pipelined/src/uncore/subwordwrite.sv index 68c2b0e47..b1e8d683f 100644 --- a/wally-pipelined/src/uncore/subwordwrite.sv +++ b/wally-pipelined/src/uncore/subwordwrite.sv @@ -27,37 +27,35 @@ module subwordwrite ( input logic [`XLEN-1:0] HRDATA, - input logic [31:0] HADDR, - input logic [2:0] HSIZE, + input logic [2:0] HADDRD, + input logic [3:0] HSIZED, input logic [`XLEN-1:0] HWDATAIN, output logic [`XLEN-1:0] HWDATA ); - logic [7:0] ByteM; // *** declare locally to generate as either 4 or 8 bits - logic [15:0] HalfwordM; logic [`XLEN-1:0] WriteDataSubwordDuplicated; - logic [7:0] ByteMaskM; generate if (`XLEN == 64) begin + logic [7:0] ByteMaskM; // Compute write mask always_comb - case(HSIZE[1:0]) - 2'b00: begin ByteMaskM = 8'b00000000; ByteMaskM[HADDR[2:0]] = 1; end // sb - 2'b01: case (HADDR[2:1]) + case(HSIZED[1:0]) + 2'b00: begin ByteMaskM = 8'b00000000; ByteMaskM[HADDRD[2:0]] = 1; end // sb + 2'b01: case (HADDRD[2:1]) 2'b00: ByteMaskM = 8'b00000011; 2'b01: ByteMaskM = 8'b00001100; 2'b10: ByteMaskM = 8'b00110000; 2'b11: ByteMaskM = 8'b11000000; endcase - 2'b10: if (HADDR[2]) ByteMaskM = 8'b11110000; + 2'b10: if (HADDRD[2]) ByteMaskM = 8'b11110000; else ByteMaskM = 8'b00001111; 2'b11: ByteMaskM = 8'b11111111; endcase // Handle subword writes always_comb - case(HSIZE[1:0]) + case(HSIZED[1:0]) 2'b00: WriteDataSubwordDuplicated = {8{HWDATAIN[7:0]}}; // sb 2'b01: WriteDataSubwordDuplicated = {4{HWDATAIN[15:0]}}; // sh 2'b10: WriteDataSubwordDuplicated = {2{HWDATAIN[31:0]}}; // sw @@ -77,19 +75,20 @@ module subwordwrite ( end end else begin // 32-bit + logic [3:0] ByteMaskM; // Compute write mask always_comb - case(HSIZE[1:0]) - 2'b00: begin ByteMaskM = 8'b0000; ByteMaskM[{1'b0, HADDR[1:0]}] = 1; end // sb - 2'b01: if (HADDR[1]) ByteMaskM = 8'b1100; - else ByteMaskM = 8'b0011; - 2'b10: ByteMaskM = 8'b1111; - default: ByteMaskM = 8'b111; // shouldn't happen + case(HSIZED[1:0]) + 2'b00: begin ByteMaskM = 4'b0000; ByteMaskM[HADDRD[1:0]] = 1; end // sb + 2'b01: if (HADDRD[1]) ByteMaskM = 4'b1100; + else ByteMaskM = 4'b0011; + 2'b10: ByteMaskM = 4'b1111; + default: ByteMaskM = 4'b111; // shouldn't happen endcase // Handle subword writes always_comb - case(HSIZE[1:0]) + case(HSIZED[1:0]) 2'b00: WriteDataSubwordDuplicated = {4{HWDATAIN[7:0]}}; // sb 2'b01: WriteDataSubwordDuplicated = {2{HWDATAIN[15:0]}}; // sh 2'b10: WriteDataSubwordDuplicated = HWDATAIN; // sw diff --git a/wally-pipelined/src/uncore/uartPC16550D.sv b/wally-pipelined/src/uncore/uartPC16550D.sv index ec3e25227..051bcef47 100644 --- a/wally-pipelined/src/uncore/uartPC16550D.sv +++ b/wally-pipelined/src/uncore/uartPC16550D.sv @@ -6,6 +6,7 @@ // // Purpose: Universial Asynchronous Receiver/ Transmitter with FIFOs // Emulates interface of Texas Instruments PC16550D +// https://media.digikey.com/pdf/Data%20Sheets/Texas%20Instruments%20PDFs/PC16550D.pdf // Compatible with UART in Imperas Virtio model *** // // Compatible with most of PC16550D with the following known exceptions: diff --git a/wally-pipelined/src/uncore/uncore.sv b/wally-pipelined/src/uncore/uncore.sv index ada1eb93c..2d12d2b50 100644 --- a/wally-pipelined/src/uncore/uncore.sv +++ b/wally-pipelined/src/uncore/uncore.sv @@ -43,6 +43,10 @@ module uncore ( input logic HREADYEXT, HRESPEXT, output logic [`AHBW-1:0] HRDATA, output logic HREADY, HRESP, + // delayed signals + input logic [2:0] HADDRD, + input logic [3:0] HSIZED, + input logic HWRITED, // bus interface output logic DataAccessFaultM, // peripheral pins @@ -71,7 +75,7 @@ module uncore ( assign HSELUART = PreHSELUART && (HSIZE == 3'b000); // only byte writes to UART are supported // Enable read or write based on decoded address - assign MemRW = {~HWRITE, HWRITE}; + assign MemRW = {~HWRITE, HWRITED}; assign MemRWtim = MemRW & {2{HSELTim}}; assign MemRWclint = MemRW & {2{HSELCLINT}}; assign MemRWgpio = MemRW & {2{HSELGPIO}}; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 408045e23..c491bfc23 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -45,11 +45,16 @@ module wallypipelinedhart ( output logic [2:0] HBURST, output logic [3:0] HPROT, output logic [1:0] HTRANS, - output logic HMASTLOCK + output logic HMASTLOCK, + // Delayed signals for subword write + output logic [2:0] HADDRD, + output logic [3:0] HSIZED, + output logic HWRITED ); - logic [1:0] ForwardAE, ForwardBE; - logic StallF, StallD, FlushD, FlushE, FlushM, FlushW; +// logic [1:0] ForwardAE, ForwardBE; + logic StallF, StallD, StallE, StallM, StallW; + logic FlushD, FlushE, FlushM, FlushW; logic RetM, TrapM; // new signals that must connect through DP @@ -79,26 +84,34 @@ module wallypipelinedhart ( logic FloatRegWriteW; // bus interface to dmem - logic [1:0] MemRWAlignedM; - logic [2:0] Funct3M; + logic MemReadM, MemWriteM; + logic [2:0] Funct3M; logic [`XLEN-1:0] MemAdrM, MemPAdrM, WriteDataM; - logic [`XLEN-1:0] ReadDataM, ReadDataW; + logic [`XLEN-1:0] ReadDataW; logic [`XLEN-1:0] InstrPAdrF; + logic InstrReadF; logic DataStall, InstrStall; logic InstrAckD, MemAckW; ifu ifu(.*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // inteber execution unit: integer register file, datapath and controller - dmem dmem(/*.Funct3M(InstrM[14:12]),*/ .*); // data cache unit + dmem dmem(.*); // data cache unit - ahblite ebu( // *** make IRData InstrF - .IReadF(1'b1), .IRData(), //.IReady(), - .DReadM(MemRWAlignedM[1]), .DWriteM(MemRWAlignedM[0]), - .DSizeM(Funct3M[1:0]), .DRData(ReadDataM), //.DReady(), - .UnsignedLoadM(Funct3M[2]), +/* + ahblite ebu( + //.InstrReadF(1'b0), + .InstrRData(InstrF), // hook up InstrF later + .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), + .*); +*/ +// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed. +// Would need to insertinstruction as InstrD, not InstrF + ahblite ebu( + .InstrReadF(1'b0), + .InstrRData(), // hook up InstrF later + .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .*); - //assign InstrF = ReadDataM[31:0]; /* mdu mdu(.*); // multiply and divide unit diff --git a/wally-pipelined/src/wally/wallypipelinedsoc.sv b/wally-pipelined/src/wally/wallypipelinedsoc.sv index 9b0ed2456..bdb621726 100644 --- a/wally-pipelined/src/wally/wallypipelinedsoc.sv +++ b/wally-pipelined/src/wally/wallypipelinedsoc.sv @@ -64,6 +64,9 @@ module wallypipelinedsoc ( logic InstrAccessFaultF, DataAccessFaultM; logic TimerIntM, SwIntM; // from CLINT logic ExtIntM = 0; // not yet connected + logic [2:0] HADDRD; + logic [3:0] HSIZED; + logic HWRITED; // instantiate processor and memories wallypipelinedhart hart(.*); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 67a896445..4f5a24ffc 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -35,7 +35,7 @@ module testbench(); logic [`XLEN-1:0] signature[0:10000]; logic [`XLEN-1:0] testadr; string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; - logic [31:0] InstrW; + //logic [31:0] InstrW; logic [`XLEN-1:0] meminit; string tests64ic[] = '{ @@ -75,7 +75,7 @@ string tests64iNOc[] = { "rv64i/I-MISALIGN_JMP-01","2000" }; string tests64i[] = '{ - "rv64i/I-LW-01", "4110", + "rv64i/I-ENDIANESS-01", "2010", "rv64i/I-ADD-01", "3000", "rv64i/I-ADDI-01", "3000", "rv64i/I-ADDIW-01", "3000", @@ -262,7 +262,7 @@ string tests32i[] = { // Track names of instructions instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, - dut.hart.ifu.InstrM, InstrW, + dut.hart.ifu.InstrM, dut.hart.ifu.InstrW, InstrDName, InstrEName, InstrMName, InstrWName); // initialize tests @@ -368,11 +368,12 @@ module instrTrackerTB( input logic clk, reset, FlushE, input logic [31:0] InstrD, input logic [31:0] InstrE, InstrM, - output logic [31:0] InstrW, + input logic [31:0] InstrW, +// output logic [31:0] InstrW, output string InstrDName, InstrEName, InstrMName, InstrWName); // stage Instr to Writeback for visualization - flopr #(32) InstrWReg(clk, reset, InstrM, InstrW); + // flopr #(32) InstrWReg(clk, reset, InstrM, InstrW); instrNameDecTB ddec(InstrD, InstrDName); instrNameDecTB edec(InstrE, InstrEName); From 74bc4c0444f9dda05b7a3eb965a1a15ebd4ea17b Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 7 Feb 2021 23:28:21 -0500 Subject: [PATCH 02/10] Fixed lw by delaying read value by one cycle --- wally-pipelined/src/ebu/ahblite.sv | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 9d4e62be7..55f79cebc 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -32,6 +32,7 @@ module ahblite ( input logic clk, reset, + input logic StallW, FlushW, // Load control input logic UnsignedLoadM, // Signals from Instruction Cache @@ -71,7 +72,7 @@ module ahblite ( logic GrantData; logic [2:0] ISize; - logic [`AHBW-1:0] HRDATAMasked; + logic [`AHBW-1:0] HRDATAMasked, ReadDataM; logic IReady, DReady; // logic [3:0] HSIZED; // size delayed by one cycle for reads // logic [2:0] HADDRD; // address delayed for subword reads @@ -136,7 +137,9 @@ module ahblite ( // *** assumes AHBW = XLEN assign InstrRData = HRDATAMasked[31:0]; assign IReady = HREADY & InstrReadF & ~GrantData; // maybe unused?*** - assign ReadDataW = HRDATAMasked; +// assign ReadDataW = HRDATAMasked; + assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 + flopenrc #(`XLEN) ReadDataWReg(clk, reset, FlushW, ~StallW, ReadDataM, ReadDataW); assign DReady = HREADY & GrantData; // ***unused? From 842c374de929c0d23296996954fcbefd90572178 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 9 Feb 2021 11:02:17 -0500 Subject: [PATCH 03/10] Debugging instruction fetch --- wally-pipelined/regression/wally-pipelined.do | 11 ++++++----- wally-pipelined/src/ebu/ahblite.sv | 4 +++- wally-pipelined/src/ifu/ifu.sv | 6 +++++- wally-pipelined/src/wally/wallypipelinedhart.sv | 15 ++++++++------- wally-pipelined/testbench/testbench-imperas.sv | 10 +++++----- 5 files changed, 27 insertions(+), 19 deletions(-) diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index a88569f49..227ea3770 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -62,26 +62,27 @@ add wave -divider add wave -hex /testbench/dut/hart/ifu/PCF add wave -hex /testbench/dut/hart/ifu/InstrF add wave /testbench/InstrFName -#add wave -hex /testbench/dut/hart/ifu/PCD +add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/InstrD add wave /testbench/InstrDName add wave -divider -#add wave -hex /testbench/dut/hart/ifu/PCE -#add wave -hex /testbench/dut/hart/ifu/InstrE +add wave -hex /testbench/dut/hart/ifu/PCE +add wave -hex /testbench/dut/hart/ifu/InstrE add wave /testbench/InstrEName add wave -hex /testbench/dut/hart/ieu/dp/SrcAE add wave -hex /testbench/dut/hart/ieu/dp/SrcBE add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE add wave /testbench/dut/hart/ieu/dp/PCSrcE add wave -divider -#add wave -hex /testbench/dut/hart/ifu/PCM -#add wave -hex /testbench/dut/hart/ifu/InstrM +add wave -hex /testbench/dut/hart/ifu/PCM +add wave -hex /testbench/dut/hart/ifu/InstrM add wave /testbench/InstrMName add wave /testbench/dut/uncore/dtim/memwrite add wave -hex /testbench/dut/uncore/HADDR add wave -hex /testbench/dut/uncore/HWDATA add wave -divider add wave -hex /testbench/dut/hart/ifu/PCW +add wave -hex /testbench/dut/hart/ifu/InstrW add wave /testbench/InstrWName add wave /testbench/dut/hart/ieu/dp/RegWriteW add wave -hex /testbench/dut/hart/ieu/dp/ResultW diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 55f79cebc..ce54a21fe 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -38,6 +38,7 @@ module ahblite ( // Signals from Instruction Cache input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram input logic InstrReadF, + input logic ResolveBranchD, output logic [31:0] InstrRData, // output logic IReady, // Signals from Data Cache @@ -158,7 +159,8 @@ module ahblite ( else InstrState <= NextInstrState;*/ assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM && InstrReadF)) || - (InstrState == 1 && ~InstrAckD); + (InstrState == 1 && ~InstrAckD) || + (InstrState == 1 && ResolveBranchD); // dh 2/8/2021 fixing assign InstrStall = NextInstrState | MemState | NextMemState; // *** check this, explain better // temporarily turn off stalls and check it works //assign DataStall = 0; diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 86f96b95c..6c16ebd8f 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -37,6 +37,7 @@ module ifu ( output logic InstrReadF, // Decode //output logic InstrStall, + output logic ResolveBranchD, // Execute input logic PCSrcE, input logic [`XLEN-1:0] PCTargetE, @@ -66,12 +67,15 @@ module ifu ( // *** put memory interface on here, InstrF becomes output assign InstrPAdrF = PCF; // *** no MMU - assign InstrReadF = ~StallD; + assign InstrReadF = ~StallD; // *** & ICacheMissF; add later assign PrivilegedChangePCM = RetM | TrapM; assign StallExceptResolveBranchesF = StallF & ~(PCSrcE | PrivilegedChangePCM); + // dh 2/8/2022 keep in instruction fetch stall mode when taking branch + flopr #(1) rbreg(clk, reset, (PCSrcE | PrivilegedChangePCM), ResolveBranchD); + mux3 #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF); assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallExceptResolveBranchesF, PCNextF, `RESET_VECTOR, PCF); diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index c491bfc23..56c98d386 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -29,7 +29,7 @@ module wallypipelinedhart ( input logic clk, reset, output logic [`XLEN-1:0] PCF, - input logic [31:0] InstrF, +// input logic [31:0] InstrF, // Privileged input logic TimerIntM, ExtIntM, SwIntM, input logic InstrAccessFaultF, @@ -60,7 +60,7 @@ module wallypipelinedhart ( // new signals that must connect through DP logic CSRWriteM, PrivilegedM; logic [`XLEN-1:0] SrcAM; -// logic [31:0] InstrF; + logic [31:0] InstrF; logic [31:0] InstrD, InstrM; logic [`XLEN-1:0] PCE, PCM, PCLinkW; logic [`XLEN-1:0] PCTargetE; @@ -75,6 +75,7 @@ module wallypipelinedhart ( logic StoreMisalignedFaultM, StoreAccessFaultM; logic [`XLEN-1:0] InstrMisalignedAdrM; logic [`XLEN-1:0] zero = 0; + logic ResolveBranchD; logic PCSrcE; logic CSRWritePendingDEM; @@ -98,20 +99,20 @@ module wallypipelinedhart ( ieu ieu(.*); // inteber execution unit: integer register file, datapath and controller dmem dmem(.*); // data cache unit -/* + ahblite ebu( //.InstrReadF(1'b0), .InstrRData(InstrF), // hook up InstrF later .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .*); -*/ + // changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed. // Would need to insertinstruction as InstrD, not InstrF - ahblite ebu( - .InstrReadF(1'b0), + /*ahblite ebu( + .InstrReadF(1'b0), .InstrRData(), // hook up InstrF later .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), - .*); + .*); */ /* mdu mdu(.*); // multiply and divide unit diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 4f5a24ffc..98c176161 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -75,7 +75,6 @@ string tests64iNOc[] = { "rv64i/I-MISALIGN_JMP-01","2000" }; string tests64i[] = '{ - "rv64i/I-ENDIANESS-01", "2010", "rv64i/I-ADD-01", "3000", "rv64i/I-ADDI-01", "3000", "rv64i/I-ADDIW-01", "3000", @@ -261,9 +260,9 @@ string tests32i[] = { // Track names of instructions instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, - dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, + dut.hart.ifu.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, dut.hart.ifu.InstrM, dut.hart.ifu.InstrW, - InstrDName, InstrEName, InstrMName, InstrWName); + InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); // initialize tests initial @@ -366,15 +365,16 @@ endmodule module instrTrackerTB( input logic clk, reset, FlushE, - input logic [31:0] InstrD, + input logic [31:0] InstrF, InstrD, input logic [31:0] InstrE, InstrM, input logic [31:0] InstrW, // output logic [31:0] InstrW, - output string InstrDName, InstrEName, InstrMName, InstrWName); + output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); // stage Instr to Writeback for visualization // flopr #(32) InstrWReg(clk, reset, InstrM, InstrW); + instrNameDecTB fdec(InstrF, InstrFName); instrNameDecTB ddec(InstrD, InstrDName); instrNameDecTB edec(InstrE, InstrEName); instrNameDecTB mdec(InstrM, InstrMName); From b121b90b287025a195ae0884ef36cd60b96139f3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 10 Feb 2021 01:43:54 -0500 Subject: [PATCH 04/10] Debugging bus interface. --- wally-pipelined/src/ebu/ahblite.sv | 7 +++++-- wally-pipelined/src/hazard/hazard.sv | 2 +- wally-pipelined/testbench/testbench-imperas.sv | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index ce54a21fe..368824288 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -110,6 +110,7 @@ module ahblite ( if (MemReadM) NextAdrState = MEMREAD; else if (MemWriteM) NextAdrState = MEMWRITE; else if (InstrReadF) NextAdrState = INSTRREAD; +// else if (1) NextAdrState = INSTRREAD; // dm 2/9/2021 testing else NextAdrState = IDLE; // Generate acknowledges based on bus state and ready @@ -159,8 +160,10 @@ module ahblite ( else InstrState <= NextInstrState;*/ assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM && InstrReadF)) || - (InstrState == 1 && ~InstrAckD) || - (InstrState == 1 && ResolveBranchD); // dh 2/8/2021 fixing + (InstrState == 1 && ~InstrAckD) || + (InstrState == 1 && ResolveBranchD); // dh 2/8/2021 fixing; delete this later +/* assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM)) || + (InstrState == 1 && ~InstrAckD); // *** removed InstrReadF above dh 2/9/20 */ assign InstrStall = NextInstrState | MemState | NextMemState; // *** check this, explain better // temporarily turn off stalls and check it works //assign DataStall = 0; diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 7c4afd6fe..88d78ee47 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -61,7 +61,7 @@ module hazard( assign StallDCause = LoadStallD; // stall in decode if instruction is a load dependent on previous assign StallECause = 0; assign StallMCause = 0; // sDataStall; // not yet used*** - assign StallWCause = DataStall; + assign StallWCause = DataStall; // | InstrStall; // Each stage stalls if the next stage is stalled or there is a cause to stall this stage. assign StallF = StallD | StallFCause; diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 98c176161..cad2d68d2 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -75,6 +75,7 @@ string tests64iNOc[] = { "rv64i/I-MISALIGN_JMP-01","2000" }; string tests64i[] = '{ + "rv64i/I-ENDIANESS-01", "2010", "rv64i/I-ADD-01", "3000", "rv64i/I-ADDI-01", "3000", "rv64i/I-ADDIW-01", "3000", From cc42655789b68f14a819ce76023ded6940a311b5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 15 Feb 2021 10:10:50 -0500 Subject: [PATCH 05/10] More memory interface, ALU testgen --- wally-pipelined/src/ebu/ahblite.sv | 127 ++++++++++----- wally-pipelined/src/generic/flop.sv | 10 ++ wally-pipelined/src/hazard/hazard.sv | 4 +- wally-pipelined/src/ifu/ifu.sv | 10 +- wally-pipelined/src/privileged/csr.sv | 2 +- wally-pipelined/src/privileged/csrsr.sv | 4 +- wally-pipelined/src/privileged/privileged.sv | 10 +- wally-pipelined/src/uncore/dtim.sv | 81 +++------ ...=> testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py} | 37 ++++- .../testgen/testgen-SLL-SRL-SRA.py | 154 ++++++++++++++++++ 10 files changed, 317 insertions(+), 122 deletions(-) rename wally-pipelined/testgen/{testgen-ADD-SUB.py => testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py} (82%) create mode 100755 wally-pipelined/testgen/testgen-SLL-SRL-SRA.py diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 368824288..e917b6680 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -38,9 +38,8 @@ module ahblite ( // Signals from Instruction Cache input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram input logic InstrReadF, - input logic ResolveBranchD, +// input logic ResolveBranchD, output logic [31:0] InstrRData, -// output logic IReady, // Signals from Data Cache input logic [`XLEN-1:0] MemPAdrM, input logic MemReadM, MemWriteM, @@ -48,7 +47,6 @@ module ahblite ( input logic [1:0] MemSizeM, // Return from bus output logic [`XLEN-1:0] ReadDataW, -// output logic DReady, // AHB-Lite external signals input logic [`AHBW-1:0] HRDATA, input logic HREADY, HRESP, @@ -61,14 +59,12 @@ module ahblite ( output logic [3:0] HPROT, output logic [1:0] HTRANS, output logic HMASTLOCK, - // Delayed signals for subword write + // Delayed signals for writes output logic [2:0] HADDRD, output logic [3:0] HSIZED, output logic HWRITED, - // Acknowledge - output logic InstrAckD, MemAckW, // Stalls - output logic InstrStall, DataStall + output logic InstrStall,/*InstrUpdate, */DataStall ); logic GrantData; @@ -81,12 +77,61 @@ module ahblite ( assign HCLK = clk; assign HRESETn = ~reset; - // Arbitrate requests by giving data priority over instructions - assign GrantData = MemReadM | MemWriteM; - // *** initially support HABW = XLEN // track bus state + // Data accesses have priority over instructions. However, if a data access comes + // while an instruction read is occuring, the instruction read finishes before + // the data access can take place. + typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD, INSTRREADMEMPENDING} statetype; + statetype BusState, NextBusState; + + always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) BusState <= #1 IDLE; + else BusState <= #1 NextBusState; + + always_comb + case (BusState) + IDLE: if (MemReadM) NextBusState = MEMREAD; // Memory has pirority over instructions + else if (MemWriteM) NextBusState = MEMWRITE; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; + MEMREAD: if (~HREADY) NextBusState = MEMREAD; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; + MEMWRITE: if (~HREADY) NextBusState = MEMWRITE; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; + INSTRREAD: //if (~HREADY & (MemReadM | MemWriteM)) NextBusState = INSTRREADMEMPENDING; // *** shouldn't happen, delete + if (~HREADY) NextBusState = INSTRREAD; + else NextBusState = IDLE; + INSTRREADMEMPENDING: if (~HREADY) NextBusState = INSTRREADMEMPENDING; // *** shouldn't happen, delete + else if (MemReadM) NextBusState = MEMREAD; + else NextBusState = MEMWRITE; // must be write if not a read. Don't return to idle. + endcase + + // stall signals + assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || (NextBusState == INSTRREADMEMPENDING); + assign #1 InstrStall = (NextBusState == INSTRREAD); + // assign InstrUpdate = (BusState == INSTRREADMEMPENDING) && (NextBusState != INSTRREADMEMPENDING); + + // bus outputs + assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE); + assign #1 HADDR = (GrantData) ? MemPAdrM[31:0] : InstrPAdrF[31:0]; + assign #1 HSIZE = GrantData ? {1'b0, MemSizeM} : ISize; + assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH + assign HPROT = 4'b0011; // not used; see Section 3.7 + assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise + assign HMASTLOCK = 0; // no locking supported + assign HWRITE = (NextBusState == MEMWRITE); + // delay write data by one cycle for + flop #(`XLEN) wdreg(HCLK, WriteDataM, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN + // delay signals for subword writes + flop #(3) adrreg(HCLK, HADDR[2:0], HADDRD); + flop #(4) sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED); + flop #(1) writereg(HCLK, HWRITE, HWRITED); + + /* typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD} statetype; statetype AdrState, DataState, NextAdrState; // what is happening in the first and second phases of the bus always_ff @(posedge HCLK, negedge HRESETn) @@ -117,13 +162,42 @@ module ahblite ( assign MemAckW = (AdrState == MEMREAD || AdrState == MEMWRITE) && HREADY; assign InstrAckD = (AdrState == INSTRREAD) && HREADY; + // State machines for stalls (probably can merge with FSM above***) + // Idle, DataBusy, InstrBusy. Stall while in busystate add suffixes + logic MemState, NextMemState, InstrState, NextInstrState; + flopr #(1) msreg(HCLK, ~HRESETn, NextMemState, MemState); + flopr #(1) isreg(HCLK, ~HRESETn, NextInstrState, InstrState); +/* always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) MemState <= 0; + else MemState <= NextMemState; + assign NextMemState = (MemState == 0 && InstrState == 0 && (MemReadM || MemWriteM)) || (MemState == 1 && ~MemAckW); + assign DataStall = NextMemState; +/* always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) InstrState <= 0; + else InstrState <= NextInstrState; + + assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM && InstrReadF)) || + (InstrState == 1 && ~InstrAckD) || + (InstrState == 1 && ResolveBranchD); // dh 2/8/2021 fixing; delete this later +/* assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM)) || + (InstrState == 1 && ~InstrAckD); // *** removed InstrReadF above dh 2/9/20 + assign InstrStall = NextInstrState | MemState | NextMemState; // *** check this, explain better + // temporarily turn off stalls and check it works + //assign DataStall = 0; + //assign InstrStall = 0; + + assign DReady = HREADY & GrantData; // ***unused? + assign IReady = HREADY & InstrReadF & ~GrantData; // maybe unused?*** + +*/ + // Choose ISize based on XLen generate //if (`AHBW == 32) assign ISize = 3'b010; // 32-bit transfers //else assign ISize = 3'b011; // 64-bit transfers assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width endgenerate - +/* // drive bus outputs assign HADDR = GrantData ? MemPAdrM[31:0] : InstrPAdrF[31:0]; //assign HWDATA = WriteDataW; @@ -134,41 +208,16 @@ module ahblite ( assign HPROT = 4'b0011; // not used; see Section 3.7 assign HTRANS = InstrReadF | MemReadM | MemWriteM ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise assign HMASTLOCK = 0; // no locking supported - + */ // Route signals to Instruction and Data Caches // *** assumes AHBW = XLEN assign InstrRData = HRDATAMasked[31:0]; - assign IReady = HREADY & InstrReadF & ~GrantData; // maybe unused?*** // assign ReadDataW = HRDATAMasked; assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 - flopenrc #(`XLEN) ReadDataWReg(clk, reset, FlushW, ~StallW, ReadDataM, ReadDataW); - assign DReady = HREADY & GrantData; // ***unused? + assign CaptureDataM = (BusState == MEMREAD) && (NextBusState != MEMREAD); + flopenr #(`XLEN) ReadDataWReg(clk, reset, CaptureDataM, ReadDataM, ReadDataW); - // State machines for stalls (probably can merge with FSM above***) - // Idle, DataBusy, InstrBusy. Stall while in busystate add suffixes - logic MemState, NextMemState, InstrState, NextInstrState; - flopr #(1) msreg(HCLK, ~HRESETn, NextMemState, MemState); - flopr #(1) isreg(HCLK, ~HRESETn, NextInstrState, InstrState); -/* always_ff @(posedge HCLK, negedge HRESETn) - if (~HRESETn) MemState <= 0; - else MemState <= NextMemState; */ - assign NextMemState = (MemState == 0 && InstrState == 0 && (MemReadM || MemWriteM)) || (MemState == 1 && ~MemAckW); - assign DataStall = NextMemState; -/* always_ff @(posedge HCLK, negedge HRESETn) - if (~HRESETn) InstrState <= 0; - else InstrState <= NextInstrState;*/ - - assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM && InstrReadF)) || - (InstrState == 1 && ~InstrAckD) || - (InstrState == 1 && ResolveBranchD); // dh 2/8/2021 fixing; delete this later -/* assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM)) || - (InstrState == 1 && ~InstrAckD); // *** removed InstrReadF above dh 2/9/20 */ - assign InstrStall = NextInstrState | MemState | NextMemState; // *** check this, explain better - // temporarily turn off stalls and check it works - //assign DataStall = 0; - //assign InstrStall = 0; - // stalls // Stall MEM stage if data is being accessed and bus isn't yet ready //assign DataStall = GrantData & ~HREADY; diff --git a/wally-pipelined/src/generic/flop.sv b/wally-pipelined/src/generic/flop.sv index 263a4fb90..7e954a8f8 100644 --- a/wally-pipelined/src/generic/flop.sv +++ b/wally-pipelined/src/generic/flop.sv @@ -47,6 +47,16 @@ module flopr #(parameter WIDTH = 8) ( else q <= #1 d; endmodule +// flop with enable +module flopen #(parameter WIDTH = 8) ( + input logic clk, en, + input logic [WIDTH-1:0] d, + output logic [WIDTH-1:0] q); + + always_ff @(posedge clk) + if (en) q <= #1 d; +endmodule + // flop with enable, asynchronous reset, synchronous clear module flopenrc #(parameter WIDTH = 8) ( input logic clk, reset, clear, en, diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 88d78ee47..c45fb47f4 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -56,12 +56,12 @@ module hazard( assign BranchFlushDE = PCSrcE | RetM | TrapM; - assign StallFCause = InstrStall | CSRWritePendingDEM; // stall at fetch if unable to get the instruction, + assign StallFCause = /*InstrStall | */ CSRWritePendingDEM; // stall at fetch if unable to get the instruction, // or if a CSR will be written and may change system behavior assign StallDCause = LoadStallD; // stall in decode if instruction is a load dependent on previous assign StallECause = 0; assign StallMCause = 0; // sDataStall; // not yet used*** - assign StallWCause = DataStall; // | InstrStall; + assign StallWCause = DataStall | InstrStall; // Each stage stalls if the next stage is stalled or there is a cause to stall this stage. assign StallF = StallD | StallFCause; diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 6c16ebd8f..3ba28a059 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -67,18 +67,20 @@ module ifu ( // *** put memory interface on here, InstrF becomes output assign InstrPAdrF = PCF; // *** no MMU - assign InstrReadF = ~StallD; // *** & ICacheMissF; add later + //assign InstrReadF = ~StallD; // *** & ICacheMissF; add later + assign InstrReadF = 1; // *** & ICacheMissF; add later assign PrivilegedChangePCM = RetM | TrapM; - assign StallExceptResolveBranchesF = StallF & ~(PCSrcE | PrivilegedChangePCM); + //assign StallExceptResolveBranchesF = StallF & ~(PCSrcE | PrivilegedChangePCM); // dh 2/8/2022 keep in instruction fetch stall mode when taking branch - flopr #(1) rbreg(clk, reset, (PCSrcE | PrivilegedChangePCM), ResolveBranchD); + //flopr #(1) rbreg(clk, reset, (PCSrcE | PrivilegedChangePCM), ResolveBranchD); mux3 #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF); assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment - flopenl #(`XLEN) pcreg(clk, reset, ~StallExceptResolveBranchesF, PCNextF, `RESET_VECTOR, PCF); +// flopenl #(`XLEN) pcreg(clk, reset, ~StallExceptResolveBranchesF, PCNextF, `RESET_VECTOR, PCF); + flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); // pcadder // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 496b0a080..e1e76aa02 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -28,7 +28,7 @@ module csr ( input logic clk, reset, - input logic FlushW, + input logic FlushW, StallW, input logic [31:0] InstrM, input logic [`XLEN-1:0] PCM, SrcAM, input logic CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM, diff --git a/wally-pipelined/src/privileged/csrsr.sv b/wally-pipelined/src/privileged/csrsr.sv index 6db96dcc5..ecfbecfda 100644 --- a/wally-pipelined/src/privileged/csrsr.sv +++ b/wally-pipelined/src/privileged/csrsr.sv @@ -27,7 +27,7 @@ `include "wally-config.vh" module csrsr ( - input logic clk, reset, + input logic clk, reset, StallW, input logic WriteMSTATUSM, WriteSSTATUSM, WriteUSTATUSM, input logic TrapM, FloatRegWriteW, input logic [1:0] NextPrivilegeModeM, PrivilegeModeW, @@ -118,7 +118,7 @@ module csrsr ( STATUS_MIE <= 0; // Per Priv 3.3 STATUS_SIE <= `S_SUPPORTED; STATUS_UIE <= `U_SUPPORTED; - end else begin + end else if (~StallW) begin if (WriteMSTATUSM) begin STATUS_SUM_INT <= CSRWriteValM[18]; STATUS_MPRV_INT <= CSRWriteValM[17]; diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index be97b51cd..6b1249720 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// exceptions.sv +// privileged.sv // // Written: David_Harris@hmc.edu 5 January 2021 // Modified: @@ -45,7 +45,7 @@ module privileged ( input logic [`XLEN-1:0] InstrMisalignedAdrM, MemAdrM, input logic [4:0] SetFflagsM, output logic [2:0] FRM_REGW, - input logic FlushD, FlushE, FlushM, StallD + input logic FlushD, FlushE, FlushM, StallD, StallW ); logic [1:0] NextPrivilegeModeM, PrivilegeModeW; @@ -81,8 +81,8 @@ module privileged ( // PrivilegeMode FSM always_comb - if (reset) NextPrivilegeModeM = `M_MODE; // Privilege resets to 11 (Machine Mode) - else if (mretM) NextPrivilegeModeM = STATUS_MPP; + /* if (reset) NextPrivilegeModeM = `M_MODE; // Privilege resets to 11 (Machine Mode) // moved reset to flop + else */ if (mretM) NextPrivilegeModeM = STATUS_MPP; else if (sretM) NextPrivilegeModeM = {1'b0, STATUS_SPP}; else if (uretM) NextPrivilegeModeM = `U_MODE; else if (TrapM) begin // Change privilege based on DELEG registers (see 3.1.8) @@ -96,7 +96,7 @@ module privileged ( else NextPrivilegeModeM = `M_MODE; end else NextPrivilegeModeM = PrivilegeModeW; - flop #(2) privmodereg(clk, NextPrivilegeModeM, PrivilegeModeW); + flopenl #(2) privmodereg(clk, reset, ~StallW, NextPrivilegeModeM, `M_MODE, PrivilegeModeW); /////////////////////////////////////////// // decode privileged instructions diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index 611183605..4216b356c 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -37,6 +37,7 @@ module dtim ( logic [`XLEN-1:0] RAM[0:65535]; logic [18:0] HWADDR; + logic [`XLEN-1:0] HREADTim0; // logic [`XLEN-1:0] write; logic [15:0] entry; @@ -44,27 +45,28 @@ module dtim ( logic [3:0] busycount; // busy FSM to extend READY signal -/* always_ff @(posedge HCLK, negedge HRESETn) + always_ff @(posedge HCLK, negedge HRESETn) if (~HRESETn) begin HREADYTim <= 1; end else begin if (HREADYTim & HSELTim) begin busycount <= 0; - HREADYTim <= 0; + HREADYTim <= #1 0; end else if (~HREADYTim) begin - if (busycount == 0) begin // TIM latency, for testing purposes - HREADYTim <= 1; + if (busycount == 2) begin // TIM latency, for testing purposes + HREADYTim <= #1 1; end else begin busycount <= busycount + 1; end end - end*/ - always_ff @(posedge HCLK, negedge HRESETn) + end + + /* always_ff @(posedge HCLK, negedge HRESETn) if (~HRESETn) begin HREADYTim <= 0; end else begin HREADYTim <= HSELTim; // always respond one cycle later - end + end */ assign memread = MemRWtim[1]; @@ -73,77 +75,32 @@ module dtim ( // memwrite <= MemRWtim[0]; // delay memwrite to write phase assign HRESPTim = 0; // OK // assign HREADYTim = 1; // Respond immediately; *** extend this - - // word aligned reads -/* generate - if (`XLEN==64) - assign #2 entry = HADDR[18:3]; - else - assign #2 entry = HADDR[17:2]; - endgenerate */ -// assign HREADTim = RAM[entry]; -// assign HREADTim = HREADYTim ? RAM[entry] : ~RAM[entry]; // *** temproary mess up read value before ready - // write each byte based on the byte mask - // UInstantiate a byte-writable memory here if possible - // and drop tihs masking logic. Otherwise, use the masking - // from dmem - /*generate - - if (`XLEN==64) begin - always_comb begin - write=HREADTim; - if (ByteMaskM[0]) write[7:0] = HWDATA[7:0]; - if (ByteMaskM[1]) write[15:8] = HWDATA[15:8]; - if (ByteMaskM[2]) write[23:16] = HWDATA[23:16]; - if (ByteMaskM[3]) write[31:24] = HWDATA[31:24]; - if (ByteMaskM[4]) write[39:32] = HWDATA[39:32]; - if (ByteMaskM[5]) write[47:40] = HWDATA[47:40]; - if (ByteMaskM[6]) write[55:48] = HWDATA[55:48]; - if (ByteMaskM[7]) write[63:56] = HWDATA[63:56]; - end - always_ff @(posedge clk) - if (memwrite) RAM[HADDR[18:3]] <= write; - end else begin // 32-bit - always_comb begin - write=HREADTim; - if (ByteMaskM[0]) write[7:0] = HWDATA[7:0]; - if (ByteMaskM[1]) write[15:8] = HWDATA[15:8]; - if (ByteMaskM[2]) write[23:16] = HWDATA[23:16]; - if (ByteMaskM[3]) write[31:24] = HWDATA[31:24]; - end - always_ff @(posedge clk) - if (memwrite) RAM[HADDR[17:2]] <= write; - end - endgenerate */ // Model memory read and write - // If write occurs at end of phase (rising edge of clock), - // then read of same address on next cycle won't work. Would need to bypass. - // Faking for now with negedge clock write. Will need to adjust this to - // match capabilities of FPGA or actual chip RAM. - // Also, writes occuring later than reads throws off single ported RAM that - // might be asked to write on one instruction and read on the next and would need - // to stall because both accesses happen on same cycle with AHB delay generate if (`XLEN == 64) begin - always_ff @(negedge HCLK) - if (memwrite) RAM[HWADDR[17:3]] <= HWDATA; +// always_ff @(negedge HCLK) +// if (memwrite) RAM[HWADDR[17:3]] <= HWDATA; always_ff @(posedge HCLK) begin //if (memwrite) RAM[HADDR[17:3]] <= HWDATA; HWADDR <= HADDR; - HREADTim <= RAM[HADDR[17:3]]; + HREADTim0 <= RAM[HADDR[17:3]]; + if (memwrite && HREADYTim) RAM[HWADDR[17:3]] <= HWDATA; end end else begin - always_ff @(negedge HCLK) - if (memwrite) RAM[HWADDR[17:2]] <= HWDATA; +// always_ff @(negedge HCLK) +// if (memwrite) RAM[HWADDR[17:2]] <= HWDATA; always_ff @(posedge HCLK) begin //if (memwrite) RAM[HADDR[17:2]] <= HWDATA; HWADDR <= HADDR; - HREADTim <= RAM[HADDR[17:2]]; + HREADTim0 <= RAM[HADDR[17:2]]; + if (memwrite && HREADYTim) RAM[HWADDR[17:2]] <= HWDATA; end end endgenerate + + assign HREADTim = HREADYTim ? HREADTim0 : 'bz; endmodule diff --git a/wally-pipelined/testgen/testgen-ADD-SUB.py b/wally-pipelined/testgen/testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py similarity index 82% rename from wally-pipelined/testgen/testgen-ADD-SUB.py rename to wally-pipelined/testgen/testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py index 16e02be6b..9f795036b 100755 --- a/wally-pipelined/testgen/testgen-ADD-SUB.py +++ b/wally-pipelined/testgen/testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 ################################## -# testgen-ADD-SUB.py +# testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py # # David_Harris@hmc.edu 19 January 2021 # @@ -19,11 +19,34 @@ from random import getrandbits # functions ################################## -def computeExpected(a, b, test): +def twoscomp(a): + amsb = a >> (xlen-1) + alsbs = ((1 << (xlen-1)) - 1) & a + if (amsb): + asigned = a - (1<> (xlen-1) + alsbs = ((1 << (xlen-1)) - 1) & a + if (amsb): + asigned = a - (1<> b + elif (test == "SRA"): + return asigned >> b + else: + die("bad test name ", test) + # exit(1) + +def randRegs(): + reg1 = randint(1,31) + reg2 = randint(1,31) + reg3 = randint(1,31) + if (reg1 == 6 or reg2 == 6 or reg3 == 6 or reg1 == reg2): + return randRegs() + else: + return reg1, reg2, reg3 + +def writeVector(a, b, storecmd, xlen): + global testnum + expected = computeExpected(a, b, test, xlen) + expected = expected % 2**xlen # drop carry if necessary + if (expected < 0): # take twos complement + expected = 2**xlen + expected + reg1, reg2, reg3 = randRegs() + lines = "\n# Testcase " + str(testnum) + ": rs1:x" + str(reg1) + "(" + formatstr.format(a) + lines = lines + "), rs2:x" + str(reg2) + "(" +formatstr.format(b) + lines = lines + "), result rd:x" + str(reg3) + "(" + formatstr.format(expected) +")\n" + lines = lines + "li x" + str(reg1) + ", MASK_XLEN(" + formatstr.format(a) + ")\n" + lines = lines + "li x" + str(reg2) + ", MASK_XLEN(" + formatstr.format(b) + ")\n" + lines = lines + test + " x" + str(reg3) + ", x" + str(reg1) + ", x" + str(reg2) + "\n" + lines = lines + storecmd + " x" + str(reg3) + ", " + str(wordsize*testnum) + "(x6)\n" + lines = lines + "RVTEST_IO_ASSERT_GPR_EQ(x7, " + str(reg3) +", "+formatstr.format(expected)+")\n" + f.write(lines) + if (xlen == 32): + line = formatrefstr.format(expected)+"\n" + else: + line = formatrefstr.format(expected % 2**32)+"\n" + formatrefstr.format(expected >> 32) + "\n" + r.write(line) + testnum = testnum+1 + +################################## +# main body +################################## + +# change these to suite your tests +tests = ["SLL", "SRL", "SRA"] +author = "David_Harris@hmc.edu" +xlens = [32, 64] +numrand = 48 + +# setup +seed(0) # make tests reproducible + +# generate files for each test +for xlen in xlens: + formatstrlen = str(int(xlen/4)) + formatstr = "0x{:0" + formatstrlen + "x}" # format as xlen-bit hexadecimal number + formatrefstr = "{:08x}" # format as xlen-bit hexadecimal number with no leading 0x + if (xlen == 32): + storecmd = "sw" + wordsize = 4 + else: + storecmd = "sd" + wordsize = 8 + for test in tests: + corners = [0, 1, 2, 0xFF, 0x624B3E976C52DD14 % 2**xlen, 2**(xlen-1)-2, 2**(xlen-1)-1, + 2**(xlen-1), 2**(xlen-1)+1, 0xC365DDEB9173AB42 % 2**xlen, 2**(xlen)-2, 2**(xlen)-1] + if (xlen == 32): + shamt = [0, 1, 2, 3, 4, 8, 15, 16, 29, 30, 31] + else: + shamt = [0, 1, 3, 8, 15, 16, 29, 31, 32, 47, 48, 62, 63] + + imperaspath = "../../imperas-riscv-tests/riscv-test-suite/rv" + str(xlen) + "i/" + basename = "WALLY-" + test + fname = imperaspath + "src/" + basename + ".S" + refname = imperaspath + "references/" + basename + ".reference_output" + testnum = 0 + + # print custom header part + f = open(fname, "w") + r = open(refname, "w") + line = "///////////////////////////////////////////\n" + f.write(line) + lines="// "+fname+ "\n// " + author + "\n" + f.write(lines) + line ="// Created " + str(datetime.now()) + f.write(line) + + # insert generic header + h = open("testgen_header.S", "r") + for line in h: + f.write(line) + + # print directed and random test vectors + for a in corners: + for b in shamt: + writeVector(a, b, storecmd, xlen) + for i in range(0,numrand): + a = getrandbits(xlen) + b = getrandbits(xlen) + writeVector(a, b, storecmd, xlen) + + + # print footer + h = open("testgen_footer.S", "r") + for line in h: + f.write(line) + + # Finish + lines = ".fill " + str(testnum) + ", " + str(wordsize) + ", -1\n" + lines = lines + "\nRV_COMPLIANCE_DATA_END\n" + f.write(lines) + f.close() + r.close() + + + + From f372e2b8e8d75270af6c0ea52b933967f4f71e4f Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 22 Feb 2021 13:48:30 -0500 Subject: [PATCH 07/10] Debugging Bus interface --- wally-pipelined/src/ebu/ahblite.sv | 25 +++++++++++++------ wally-pipelined/src/uncore/clint.sv | 5 ++++ .../testbench/testbench-imperas.sv | 7 +++--- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index e917b6680..655714762 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -69,7 +69,7 @@ module ahblite ( logic GrantData; logic [2:0] ISize; - logic [`AHBW-1:0] HRDATAMasked, ReadDataM; + logic [`AHBW-1:0] HRDATAMasked, ReadDataM, ReadDataPreW; logic IReady, DReady; // logic [3:0] HSIZED; // size delayed by one cycle for reads // logic [2:0] HADDRD; // address delayed for subword reads @@ -114,6 +114,12 @@ module ahblite ( assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || (NextBusState == INSTRREADMEMPENDING); assign #1 InstrStall = (NextBusState == INSTRREAD); // assign InstrUpdate = (BusState == INSTRREADMEMPENDING) && (NextBusState != INSTRREADMEMPENDING); + + // DH 2/20/22: A cyclic path presently exists + // HREADY->NextBusState->GrantData->HSIZE->HSELUART->HREADY + // This is because the peripherals assert HREADY on the same cycle + // When memory is working, also fix the peripherals to respond on the subsequent cycle + // and this path should be fixed. // bus outputs assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE); @@ -131,6 +137,16 @@ module ahblite ( flop #(4) sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED); flop #(1) writereg(HCLK, HWRITE, HWRITED); + // Route signals to Instruction and Data Caches + // *** assumes AHBW = XLEN + assign InstrRData = HRDATAMasked[31:0]; +// assign ReadDataW = HRDATAMasked; + assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 + assign CaptureDataM = (BusState == MEMREAD) && (NextBusState != MEMREAD); + flopenr #(`XLEN) ReadDataPreWReg(clk, reset, CaptureDataM, ReadDataM, ReadDataPreW); // *** this may break when there is no instruction read after data read + flopenr #(`XLEN) ReadDataWReg(clk, reset, ~StallW, ReadDataPreW, ReadDataW); + + /* typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD} statetype; statetype AdrState, DataState, NextAdrState; // what is happening in the first and second phases of the bus @@ -209,13 +225,6 @@ module ahblite ( assign HTRANS = InstrReadF | MemReadM | MemWriteM ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise assign HMASTLOCK = 0; // no locking supported */ - // Route signals to Instruction and Data Caches - // *** assumes AHBW = XLEN - assign InstrRData = HRDATAMasked[31:0]; -// assign ReadDataW = HRDATAMasked; - assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 - assign CaptureDataM = (BusState == MEMREAD) && (NextBusState != MEMREAD); - flopenr #(`XLEN) ReadDataWReg(clk, reset, CaptureDataM, ReadDataM, ReadDataW); // stalls diff --git a/wally-pipelined/src/uncore/clint.sv b/wally-pipelined/src/uncore/clint.sv index 00804a480..e3fc1ea04 100644 --- a/wally-pipelined/src/uncore/clint.sv +++ b/wally-pipelined/src/uncore/clint.sv @@ -54,6 +54,11 @@ module clint ( assign #2 entry = {HADDR[15:2], 2'b00}; endgenerate + // DH 2/20/21: Eventually allow MTIME to run off a separate clock + // This will require synchronizing MTIME to the system clock + // before it is read or compared to MTIMECMP. + // It will also require synchronizing the write to MTIMECMP. + // Use req and ack signals synchronized across the clock domains. // register access generate diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index cad2d68d2..fd49fee0a 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -75,6 +75,7 @@ string tests64iNOc[] = { "rv64i/I-MISALIGN_JMP-01","2000" }; string tests64i[] = '{ + "rv64i/I-ECALL-01", "2000", "rv64i/I-ENDIANESS-01", "2010", "rv64i/I-ADD-01", "3000", "rv64i/I-ADDI-01", "3000", @@ -90,8 +91,8 @@ string tests64iNOc[] = { "rv64i/I-BLTU-01", "4000", "rv64i/I-BNE-01", "4000", "rv64i/I-DELAY_SLOTS-01", "2000", - "rv64i/I-EBREAK-01", "2000", - "rv64i/I-ECALL-01", "2000", +// "rv64i/I-EBREAK-01", "2000", +// "rv64i/I-ECALL-01", "2000", "rv64i/I-ENDIANESS-01", "2010", "rv64i/I-IO-01", "2050", "rv64i/I-JAL-01", "3000", @@ -104,7 +105,7 @@ string tests64iNOc[] = { "rv64i/I-LUI-01", "2000", "rv64i/I-LW-01", "4110", "rv64i/I-LWU-01", "4110", - "rv64i/I-MISALIGN_LDST-01", "2010", + //"rv64i/I-MISALIGN_LDST-01", "2010", "rv64i/I-NOP-01", "2000", "rv64i/I-OR-01", "3000", "rv64i/I-ORI-01", "3000", From 7737b0f7093f276c5b7afdc4a10058742e076639 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Feb 2021 09:08:57 -0500 Subject: [PATCH 08/10] Fixed fetch stall after jump in bus unit --- wally-pipelined/src/hazard/hazard.sv | 6 +++++- wally-pipelined/src/privileged/csr.sv | 4 +++- wally-pipelined/testbench/testbench-imperas.sv | 11 ++++------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index c45fb47f4..a6cdd82cf 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -56,7 +56,11 @@ module hazard( assign BranchFlushDE = PCSrcE | RetM | TrapM; - assign StallFCause = /*InstrStall | */ CSRWritePendingDEM; // stall at fetch if unable to get the instruction, + // changed 2/22/21 harris to turn off stallF when RetM or TrapM + // changed 2/23/21 harris to BranchFlushDEM to solve bug in ECALL about JAL being ignored +// assign StallFCause = /*InstrStall | */ CSRWritePendingDEM; // stall at fetch if unable to get the instruction, +// assign StallFCause = /*InstrStall | */ CSRWritePendingDEM & ~(RetM | TrapM); // stall at fetch if unable to get the instruction, + assign StallFCause = /*InstrStall | */ CSRWritePendingDEM & ~(BranchFlushDE); // stall at fetch if unable to get the instruction, // or if a CSR will be written and may change system behavior assign StallDCause = LoadStallD; // stall in decode if instruction is a load dependent on previous assign StallECause = 0; diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index e1e76aa02..bee0ba86c 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -102,7 +102,9 @@ module csr ( // merge CSR Reads assign CSRReadValM = CSRUReadValM | CSRSReadValM | CSRMReadValM | CSRCReadValM | CSRNReadValM; - floprc #(`XLEN) CSRValWReg(clk, reset, FlushW, CSRReadValM, CSRReadValW); + // *** add W stall 2/22/21 dh to try fixing memory stalls +// floprc #(`XLEN) CSRValWReg(clk, reset, FlushW, CSRReadValM, CSRReadValW); + flopenrc #(`XLEN) CSRValWReg(clk, reset, FlushW, ~StallW, CSRReadValM, CSRReadValW); // merge illegal accesses: illegal if none of the CSR addresses is legal or privilege is insufficient assign InsufficientCSRPrivilegeM = (CSRAdrM[9:8] == 2'b11 && PrivilegeModeW != `M_MODE) || diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index fd49fee0a..491301cd6 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -75,8 +75,6 @@ string tests64iNOc[] = { "rv64i/I-MISALIGN_JMP-01","2000" }; string tests64i[] = '{ - "rv64i/I-ECALL-01", "2000", - "rv64i/I-ENDIANESS-01", "2010", "rv64i/I-ADD-01", "3000", "rv64i/I-ADDI-01", "3000", "rv64i/I-ADDIW-01", "3000", @@ -91,8 +89,8 @@ string tests64iNOc[] = { "rv64i/I-BLTU-01", "4000", "rv64i/I-BNE-01", "4000", "rv64i/I-DELAY_SLOTS-01", "2000", -// "rv64i/I-EBREAK-01", "2000", -// "rv64i/I-ECALL-01", "2000", + "rv64i/I-EBREAK-01", "2000", + "rv64i/I-ECALL-01", "2000", "rv64i/I-ENDIANESS-01", "2010", "rv64i/I-IO-01", "2050", "rv64i/I-JAL-01", "3000", @@ -105,7 +103,7 @@ string tests64iNOc[] = { "rv64i/I-LUI-01", "2000", "rv64i/I-LW-01", "4110", "rv64i/I-LWU-01", "4110", - //"rv64i/I-MISALIGN_LDST-01", "2010", + "rv64i/I-MISALIGN_LDST-01", "2010", "rv64i/I-NOP-01", "2000", "rv64i/I-OR-01", "3000", "rv64i/I-ORI-01", "3000", @@ -140,7 +138,6 @@ string tests64iNOc[] = { "rv64i/WALLY-SUB", "4000" }; string tests32ic[] = '{ -// "rv32ic/WALLY-C-ADHOC-01", "2000", "rv32ic/I-C-ADD-01", "2000", "rv32ic/I-C-ADDI-01", "2000", "rv32ic/I-C-AND-01", "2000", @@ -239,7 +236,7 @@ string tests32i[] = { initial if (`XLEN == 64) begin // RV64 tests = {tests64i}; - if (`C_SUPPORTED % 2 == 1) tests = {tests, tests64ic}; + if (`C_SUPPORTED % 2 == 1) tests = {tests64ic, tests}; else tests = {tests, tests64iNOc}; end else begin // RV32 tests = {tests32i}; From 38b8cc652c39ff396b66bed4e854d81c364171fe Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 24 Feb 2021 07:25:03 -0500 Subject: [PATCH 09/10] All tests passing with bus interface --- wally-pipelined/src/ebu/ahblite.sv | 9 ++++++--- wally-pipelined/src/ifu/ifu.sv | 17 ++++++++++++++--- wally-pipelined/src/uncore/imem.sv | 3 ++- wally-pipelined/src/wally/wallypipelinedhart.sv | 6 ++++-- wally-pipelined/src/wally/wallypipelinedsoc.sv | 1 + wally-pipelined/testbench/testbench-imperas.sv | 2 +- 6 files changed, 28 insertions(+), 10 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 655714762..ef26a8004 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -39,7 +39,7 @@ module ahblite ( input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram input logic InstrReadF, // input logic ResolveBranchD, - output logic [31:0] InstrRData, + output logic [`XLEN-1:0] InstrRData, // Signals from Data Cache input logic [`XLEN-1:0] MemPAdrM, input logic MemReadM, MemWriteM, @@ -71,6 +71,7 @@ module ahblite ( logic [2:0] ISize; logic [`AHBW-1:0] HRDATAMasked, ReadDataM, ReadDataPreW; logic IReady, DReady; + logic CaptureDataM; // logic [3:0] HSIZED; // size delayed by one cycle for reads // logic [2:0] HADDRD; // address delayed for subword reads @@ -139,8 +140,10 @@ module ahblite ( // Route signals to Instruction and Data Caches // *** assumes AHBW = XLEN - assign InstrRData = HRDATAMasked[31:0]; -// assign ReadDataW = HRDATAMasked; + + // fix harris 2/24/21 to read all WLEN bits directly for instruction + assign InstrRData = HRDATA; + assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 assign CaptureDataM = (BusState == MEMREAD) && (NextBusState != MEMREAD); flopenr #(`XLEN) ReadDataPreWReg(clk, reset, CaptureDataM, ReadDataM, ReadDataPreW); // *** this may break when there is no instruction read after data read diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 3ba28a059..3deb64255 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -31,7 +31,7 @@ module ifu ( input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, // Fetch - input logic [31:0] InstrF, + input logic [`XLEN-1:0] InstrInF, output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, @@ -53,7 +53,9 @@ module ifu ( input logic IllegalBaseInstrFaultD, output logic IllegalIEUInstrFaultD, output logic InstrMisalignedFaultM, - output logic [`XLEN-1:0] InstrMisalignedAdrM + output logic [`XLEN-1:0] InstrMisalignedAdrM, + // bogus + input logic [15:0] rd2 ); logic [`XLEN-1:0] UnalignedPCNextF, PCNextF; @@ -62,7 +64,7 @@ module ifu ( logic IllegalCompInstrD; logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM; logic CompressedF; - logic [31:0] InstrRawD, InstrE, InstrW; + logic [31:0] InstrF, InstrRawD, InstrE, InstrW; logic [31:0] nop = 32'h00000013; // instruction for NOP // *** put memory interface on here, InstrF becomes output @@ -94,6 +96,15 @@ module ifu ( else PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10}; else PCPlus2or4F = {PCPlusUpperF, PCF[1:0]}; // add 4 + // harris 2/23/21 Add code to fetch instruction split across two words + generate + if (`XLEN==32) begin + assign InstrF = PCF[1] ? {rd2[15:0], InstrInF[31:16]} : InstrInF; + end else begin + assign InstrF = PCF[2] ? (PCF[1] ? {rd2[15:0], InstrInF[63:48]} : InstrInF[63:32]) + : (PCF[1] ? InstrInF[47:16] : InstrInF[31:0]); + end + endgenerate // Decode stage pipeline register and logic flopenl #(32) InstrDReg(clk, reset, ~StallD, (FlushD ? nop : InstrF), nop, InstrRawD); diff --git a/wally-pipelined/src/uncore/imem.sv b/wally-pipelined/src/uncore/imem.sv index 274be7dbf..09a6c2ce8 100644 --- a/wally-pipelined/src/uncore/imem.sv +++ b/wally-pipelined/src/uncore/imem.sv @@ -28,6 +28,7 @@ module imem ( input logic [`XLEN-1:1] AdrF, output logic [31:0] InstrF, + output logic [15:0] rd2, // bogus, delete when real multicycle fetch works output logic InstrAccessFaultF); /* verilator lint_off UNDRIVEN */ @@ -35,7 +36,7 @@ module imem ( /* verilator lint_on UNDRIVEN */ logic [15:0] adrbits; logic [`XLEN-1:0] rd; - logic [15:0] rd2; +// logic [15:0] rd2; generate if (`XLEN==32) assign adrbits = AdrF[17:2]; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 56c98d386..2819ec0a6 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -35,6 +35,7 @@ module wallypipelinedhart ( input logic InstrAccessFaultF, input logic DataAccessFaultM, // Bus Interface + input logic [15:0] rd2, // bogus, delete when real multicycle fetch works input logic [`AHBW-1:0] HRDATA, input logic HREADY, HRESP, output logic HCLK, HRESETn, @@ -90,11 +91,12 @@ module wallypipelinedhart ( logic [`XLEN-1:0] MemAdrM, MemPAdrM, WriteDataM; logic [`XLEN-1:0] ReadDataW; logic [`XLEN-1:0] InstrPAdrF; + logic [`XLEN-1:0] InstrRData; logic InstrReadF; logic DataStall, InstrStall; logic InstrAckD, MemAckW; - ifu ifu(.*); // instruction fetch unit: PC, branch prediction, instruction cache + ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // inteber execution unit: integer register file, datapath and controller dmem dmem(.*); // data cache unit @@ -102,7 +104,7 @@ module wallypipelinedhart ( ahblite ebu( //.InstrReadF(1'b0), - .InstrRData(InstrF), // hook up InstrF later + //.InstrRData(InstrF), // hook up InstrF later .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .*); diff --git a/wally-pipelined/src/wally/wallypipelinedsoc.sv b/wally-pipelined/src/wally/wallypipelinedsoc.sv index bdb621726..7b8883c57 100644 --- a/wally-pipelined/src/wally/wallypipelinedsoc.sv +++ b/wally-pipelined/src/wally/wallypipelinedsoc.sv @@ -67,6 +67,7 @@ module wallypipelinedsoc ( logic [2:0] HADDRD; logic [3:0] HSIZED; logic HWRITED; + logic [15:0] rd2; // bogus, delete when real multicycle fetch works // instantiate processor and memories wallypipelinedhart hart(.*); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 491301cd6..8ccad1caa 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -236,7 +236,7 @@ string tests32i[] = { initial if (`XLEN == 64) begin // RV64 tests = {tests64i}; - if (`C_SUPPORTED % 2 == 1) tests = {tests64ic, tests}; + if (`C_SUPPORTED % 2 == 1) tests = {tests, tests64ic}; else tests = {tests, tests64iNOc}; end else begin // RV32 tests = {tests32i};