diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index a46bfbe80..4a9c303d4 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -61,7 +61,7 @@ // Bus Interface width `define AHBW 64 -// Peripheral Addresses +// Peripheral Physiccal Addresses // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index b7f41535d..227ea3770 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -45,11 +45,14 @@ view wave add wave /testbench/clk add wave /testbench/reset add wave -divider -add wave /testbench/dut/hart/ebu/IReadF +#add wave /testbench/dut/hart/ebu/IReadF add wave /testbench/dut/hart/DataStall add wave /testbench/dut/hart/InstrStall add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD +add wave /testbench/dut/hart/StallE +add wave /testbench/dut/hart/StallM +add wave /testbench/dut/hart/StallW add wave /testbench/dut/hart/FlushD add wave /testbench/dut/hart/FlushE add wave /testbench/dut/hart/FlushM @@ -59,26 +62,27 @@ add wave -divider add wave -hex /testbench/dut/hart/ifu/PCF add wave -hex /testbench/dut/hart/ifu/InstrF add wave /testbench/InstrFName -#add wave -hex /testbench/dut/hart/ifu/PCD +add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/InstrD add wave /testbench/InstrDName add wave -divider -#add wave -hex /testbench/dut/hart/ifu/PCE -#add wave -hex /testbench/dut/hart/ifu/InstrE +add wave -hex /testbench/dut/hart/ifu/PCE +add wave -hex /testbench/dut/hart/ifu/InstrE add wave /testbench/InstrEName add wave -hex /testbench/dut/hart/ieu/dp/SrcAE add wave -hex /testbench/dut/hart/ieu/dp/SrcBE add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE add wave /testbench/dut/hart/ieu/dp/PCSrcE add wave -divider -#add wave -hex /testbench/dut/hart/ifu/PCM -#add wave -hex /testbench/dut/hart/ifu/InstrM +add wave -hex /testbench/dut/hart/ifu/PCM +add wave -hex /testbench/dut/hart/ifu/InstrM add wave /testbench/InstrMName add wave /testbench/dut/uncore/dtim/memwrite add wave -hex /testbench/dut/uncore/HADDR add wave -hex /testbench/dut/uncore/HWDATA add wave -divider add wave -hex /testbench/dut/hart/ifu/PCW +add wave -hex /testbench/dut/hart/ifu/InstrW add wave /testbench/InstrWName add wave /testbench/dut/hart/ieu/dp/RegWriteW add wave -hex /testbench/dut/hart/ieu/dp/ResultW @@ -101,6 +105,6 @@ configure wave -childrowmargin 2 set DefaultRadix hexadecimal -- Run the Simulation -#run 1000 -run -all +run 2000 +#run -all #quit diff --git a/wally-pipelined/src/dmem/dmem.sv b/wally-pipelined/src/dmem/dmem.sv index 4c602659f..aab74fadc 100644 --- a/wally-pipelined/src/dmem/dmem.sv +++ b/wally-pipelined/src/dmem/dmem.sv @@ -30,19 +30,19 @@ module dmem ( input logic clk, reset, input logic FlushW, - output logic DataStall, + //output logic DataStall, // Memory Stage input logic [1:0] MemRWM, input logic [`XLEN-1:0] MemAdrM, input logic [2:0] Funct3M, - input logic [`XLEN-1:0] ReadDataM, + //input logic [`XLEN-1:0] ReadDataW, input logic [`XLEN-1:0] WriteDataM, output logic [`XLEN-1:0] MemPAdrM, - output logic [1:0] MemRWAlignedM, + output logic MemReadM, MemWriteM, output logic DataMisalignedM, // Writeback Stage input logic MemAckW, - output logic [`XLEN-1:0] ReadDataW, + input logic [`XLEN-1:0] ReadDataW, // faults input logic DataAccessFaultM, output logic LoadMisalignedFaultM, LoadAccessFaultM, @@ -52,9 +52,6 @@ module dmem ( // Initially no MMU assign MemPAdrM = MemAdrM; - // Pipeline register *** AHB data will eventually come back in W anyway - floprc #(`XLEN) ReadDataWReg(clk, reset, FlushW, ReadDataM, ReadDataW); - // Determine if an Unaligned access is taking place always_comb case(Funct3M[1:0]) @@ -66,7 +63,9 @@ module dmem ( // Squash unaligned data accesses // *** this is also the place to squash if the cache is hit - assign MemRWAlignedM = MemRWM & {2{~DataMisalignedM}}; + assign MemReadM = MemRWM[1] & ~DataMisalignedM; + assign MemWriteM = MemRWM[0] & ~DataMisalignedM; +// assign MemRWAlignedM = MemRWM & {2{~DataMisalignedM}}; // Determine if address is valid assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; @@ -75,7 +74,7 @@ module dmem ( assign StoreAccessFaultM = DataAccessFaultM & MemRWM[0]; // Data stall - assign DataStall = 0; + //assign DataStall = 0; endmodule diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 8ce17545f..ef26a8004 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -32,21 +32,21 @@ module ahblite ( input logic clk, reset, + input logic StallW, FlushW, // Load control input logic UnsignedLoadM, // Signals from Instruction Cache input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram - input logic IReadF, - output logic [`XLEN-1:0] IRData, -// output logic IReady, + input logic InstrReadF, +// input logic ResolveBranchD, + output logic [`XLEN-1:0] InstrRData, // Signals from Data Cache input logic [`XLEN-1:0] MemPAdrM, - input logic DReadM, DWriteM, + input logic MemReadM, MemWriteM, input logic [`XLEN-1:0] WriteDataM, - input logic [1:0] DSizeM, + input logic [1:0] MemSizeM, // Return from bus - output logic [`XLEN-1:0] DRData, -// output logic DReady, + output logic [`XLEN-1:0] ReadDataW, // AHB-Lite external signals input logic [`AHBW-1:0] HRDATA, input logic HREADY, HRESP, @@ -59,48 +59,176 @@ module ahblite ( output logic [3:0] HPROT, output logic [1:0] HTRANS, output logic HMASTLOCK, - // Acknowledge - output logic InstrAckD, MemAckW + // Delayed signals for writes + output logic [2:0] HADDRD, + output logic [3:0] HSIZED, + output logic HWRITED, // Stalls -// output logic InstrStall, DataStall + output logic InstrStall,/*InstrUpdate, */DataStall ); logic GrantData; logic [2:0] ISize; - logic [`AHBW-1:0] HRDATAMasked; + logic [`AHBW-1:0] HRDATAMasked, ReadDataM, ReadDataPreW; logic IReady, DReady; + logic CaptureDataM; +// logic [3:0] HSIZED; // size delayed by one cycle for reads +// logic [2:0] HADDRD; // address delayed for subword reads assign HCLK = clk; assign HRESETn = ~reset; - // Arbitrate requests by giving data priority over instructions - assign GrantData = DReadM | DWriteM; - // *** initially support HABW = XLEN + // track bus state + // Data accesses have priority over instructions. However, if a data access comes + // while an instruction read is occuring, the instruction read finishes before + // the data access can take place. + typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD, INSTRREADMEMPENDING} statetype; + statetype BusState, NextBusState; + + always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) BusState <= #1 IDLE; + else BusState <= #1 NextBusState; + + always_comb + case (BusState) + IDLE: if (MemReadM) NextBusState = MEMREAD; // Memory has pirority over instructions + else if (MemWriteM) NextBusState = MEMWRITE; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; + MEMREAD: if (~HREADY) NextBusState = MEMREAD; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; + MEMWRITE: if (~HREADY) NextBusState = MEMWRITE; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; + INSTRREAD: //if (~HREADY & (MemReadM | MemWriteM)) NextBusState = INSTRREADMEMPENDING; // *** shouldn't happen, delete + if (~HREADY) NextBusState = INSTRREAD; + else NextBusState = IDLE; + INSTRREADMEMPENDING: if (~HREADY) NextBusState = INSTRREADMEMPENDING; // *** shouldn't happen, delete + else if (MemReadM) NextBusState = MEMREAD; + else NextBusState = MEMWRITE; // must be write if not a read. Don't return to idle. + endcase + + // stall signals + assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || (NextBusState == INSTRREADMEMPENDING); + assign #1 InstrStall = (NextBusState == INSTRREAD); + // assign InstrUpdate = (BusState == INSTRREADMEMPENDING) && (NextBusState != INSTRREADMEMPENDING); + + // DH 2/20/22: A cyclic path presently exists + // HREADY->NextBusState->GrantData->HSIZE->HSELUART->HREADY + // This is because the peripherals assert HREADY on the same cycle + // When memory is working, also fix the peripherals to respond on the subsequent cycle + // and this path should be fixed. + + // bus outputs + assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE); + assign #1 HADDR = (GrantData) ? MemPAdrM[31:0] : InstrPAdrF[31:0]; + assign #1 HSIZE = GrantData ? {1'b0, MemSizeM} : ISize; + assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH + assign HPROT = 4'b0011; // not used; see Section 3.7 + assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise + assign HMASTLOCK = 0; // no locking supported + assign HWRITE = (NextBusState == MEMWRITE); + // delay write data by one cycle for + flop #(`XLEN) wdreg(HCLK, WriteDataM, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN + // delay signals for subword writes + flop #(3) adrreg(HCLK, HADDR[2:0], HADDRD); + flop #(4) sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED); + flop #(1) writereg(HCLK, HWRITE, HWRITED); + + // Route signals to Instruction and Data Caches + // *** assumes AHBW = XLEN + + // fix harris 2/24/21 to read all WLEN bits directly for instruction + assign InstrRData = HRDATA; + + assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 + assign CaptureDataM = (BusState == MEMREAD) && (NextBusState != MEMREAD); + flopenr #(`XLEN) ReadDataPreWReg(clk, reset, CaptureDataM, ReadDataM, ReadDataPreW); // *** this may break when there is no instruction read after data read + flopenr #(`XLEN) ReadDataWReg(clk, reset, ~StallW, ReadDataPreW, ReadDataW); + + + /* + typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD} statetype; + statetype AdrState, DataState, NextAdrState; // what is happening in the first and second phases of the bus + always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) begin + AdrState <= IDLE; DataState <= IDLE; + HWDATA <= 0; // unnecessary but avoids x at startup + HSIZED <= 0; + HADDRD <= 0; + HWRITED <= 0; + end else begin + if (HREADY || (DataState == IDLE)) begin // only advance bus state if bus is idle or previous transaction returns ready + DataState <= AdrState; + AdrState <= NextAdrState; + if (HWRITE) HWDATA <= WriteDataM; + HSIZED <= {UnsignedLoadM, HSIZE}; + HADDRD <= HADDR[2:0]; + HWRITED <= HWRITE; + end + end + always_comb + if (MemReadM) NextAdrState = MEMREAD; + else if (MemWriteM) NextAdrState = MEMWRITE; + else if (InstrReadF) NextAdrState = INSTRREAD; +// else if (1) NextAdrState = INSTRREAD; // dm 2/9/2021 testing + else NextAdrState = IDLE; + + // Generate acknowledges based on bus state and ready + assign MemAckW = (AdrState == MEMREAD || AdrState == MEMWRITE) && HREADY; + assign InstrAckD = (AdrState == INSTRREAD) && HREADY; + + // State machines for stalls (probably can merge with FSM above***) + // Idle, DataBusy, InstrBusy. Stall while in busystate add suffixes + logic MemState, NextMemState, InstrState, NextInstrState; + flopr #(1) msreg(HCLK, ~HRESETn, NextMemState, MemState); + flopr #(1) isreg(HCLK, ~HRESETn, NextInstrState, InstrState); +/* always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) MemState <= 0; + else MemState <= NextMemState; + assign NextMemState = (MemState == 0 && InstrState == 0 && (MemReadM || MemWriteM)) || (MemState == 1 && ~MemAckW); + assign DataStall = NextMemState; +/* always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) InstrState <= 0; + else InstrState <= NextInstrState; + + assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM && InstrReadF)) || + (InstrState == 1 && ~InstrAckD) || + (InstrState == 1 && ResolveBranchD); // dh 2/8/2021 fixing; delete this later +/* assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM)) || + (InstrState == 1 && ~InstrAckD); // *** removed InstrReadF above dh 2/9/20 + assign InstrStall = NextInstrState | MemState | NextMemState; // *** check this, explain better + // temporarily turn off stalls and check it works + //assign DataStall = 0; + //assign InstrStall = 0; + + assign DReady = HREADY & GrantData; // ***unused? + assign IReady = HREADY & InstrReadF & ~GrantData; // maybe unused?*** + +*/ + // Choose ISize based on XLen generate - if (`AHBW == 32) assign ISize = 3'b010; // 32-bit transfers - else assign ISize = 3'b011; // 64-bit transfers + //if (`AHBW == 32) assign ISize = 3'b010; // 32-bit transfers + //else assign ISize = 3'b011; // 64-bit transfers + assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width endgenerate - +/* // drive bus outputs assign HADDR = GrantData ? MemPAdrM[31:0] : InstrPAdrF[31:0]; - assign HWDATA = WriteDataM; + //assign HWDATA = WriteDataW; //flop #(`XLEN) wdreg(HCLK, DWDataM, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN - assign HWRITE = DWriteM; - assign HSIZE = GrantData ? {1'b0, DSizeM} : ISize; + assign HWRITE = MemWriteM; + assign HSIZE = GrantData ? {1'b0, MemSizeM} : ISize; assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfHPROT assign HPROT = 4'b0011; // not used; see Section 3.7 - assign HTRANS = IReadF | DReadM | DWriteM ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise + assign HTRANS = InstrReadF | MemReadM | MemWriteM ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise assign HMASTLOCK = 0; // no locking supported - - // Route signals to Instruction and Data Caches - // *** assumes AHBW = XLEN - assign IRData = HRDATAMasked; - assign IReady = HREADY & IReadF & ~GrantData; // maybe unused?*** - assign DRData = HRDATAMasked; - assign DReady = HREADY & GrantData; // ***unused? + */ + // stalls // Stall MEM stage if data is being accessed and bus isn't yet ready diff --git a/wally-pipelined/src/ebu/subwordread.sv b/wally-pipelined/src/ebu/subwordread.sv index 74b1e0aa7..352a33c05 100644 --- a/wally-pipelined/src/ebu/subwordread.sv +++ b/wally-pipelined/src/ebu/subwordread.sv @@ -28,9 +28,9 @@ module subwordread ( // from AHB Interface input logic [`XLEN-1:0] HRDATA, - input logic [31:0] HADDR, - input logic UnsignedLoadM, - input logic [2:0] HSIZE, + input logic [2:0] HADDRD, + //input logic UnsignedLoadM, + input logic [3:0] HSIZED, // to ifu/dmems output logic [`XLEN-1:0] HRDATAMasked ); @@ -42,7 +42,7 @@ module subwordread ( if (`XLEN == 64) begin // ByteMe mux always_comb - case(HADDR[2:0]) + case(HADDRD[2:0]) 3'b000: ByteM = HRDATA[7:0]; 3'b001: ByteM = HRDATA[15:8]; 3'b010: ByteM = HRDATA[23:16]; @@ -55,7 +55,7 @@ module subwordread ( // halfword mux always_comb - case(HADDR[2:1]) + case(HADDRD[2:1]) 2'b00: HalfwordM = HRDATA[15:0]; 2'b01: HalfwordM = HRDATA[31:16]; 2'b10: HalfwordM = HRDATA[47:32]; @@ -65,14 +65,14 @@ module subwordread ( logic [31:0] WordM; always_comb - case(HADDR[2]) + case(HADDRD[2]) 1'b0: WordM = HRDATA[31:0]; 1'b1: WordM = HRDATA[63:32]; endcase // sign extension always_comb - case({UnsignedLoadM, HSIZE[1:0]}) + case({HSIZED[3], HSIZED[1:0]}) // HSIZED[3] indicates unsigned load 3'b000: HRDATAMasked = {{56{ByteM[7]}}, ByteM}; // lb 3'b001: HRDATAMasked = {{48{HalfwordM[15]}}, HalfwordM[15:0]}; // lh 3'b010: HRDATAMasked = {{32{WordM[31]}}, WordM[31:0]}; // lw @@ -85,7 +85,7 @@ module subwordread ( end else begin // 32-bit // byte mux always_comb - case(HADDR[1:0]) + case(HADDRD[1:0]) 2'b00: ByteM = HRDATA[7:0]; 2'b01: ByteM = HRDATA[15:8]; 2'b10: ByteM = HRDATA[23:16]; @@ -94,14 +94,14 @@ module subwordread ( // halfword mux always_comb - case(HADDR[1]) + case(HADDRD[1]) 1'b0: HalfwordM = HRDATA[15:0]; 1'b1: HalfwordM = HRDATA[31:16]; endcase // sign extension always_comb - case({UnsignedLoadM, HSIZE[1:0]}) + case({HSIZED[3], HSIZED[1:0]}) 3'b000: HRDATAMasked = {{24{ByteM[7]}}, ByteM}; // lb 3'b001: HRDATAMasked = {{16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh 3'b010: HRDATAMasked = HRDATA; // lw diff --git a/wally-pipelined/src/generic/flop.sv b/wally-pipelined/src/generic/flop.sv index 263a4fb90..7e954a8f8 100644 --- a/wally-pipelined/src/generic/flop.sv +++ b/wally-pipelined/src/generic/flop.sv @@ -47,6 +47,16 @@ module flopr #(parameter WIDTH = 8) ( else q <= #1 d; endmodule +// flop with enable +module flopen #(parameter WIDTH = 8) ( + input logic clk, en, + input logic [WIDTH-1:0] d, + output logic [WIDTH-1:0] q); + + always_ff @(posedge clk) + if (en) q <= #1 d; +endmodule + // flop with enable, asynchronous reset, synchronous clear module flopenrc #(parameter WIDTH = 8) ( input logic clk, reset, clear, en, diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index ad9a00749..3055c4f17 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -34,12 +34,14 @@ module hazard( input logic LoadStallD, MulDivStallD, input logic InstrStall, DataStall, // Stall outputs - output logic StallF, StallD, FlushD, FlushE, FlushM, FlushW + output logic StallF, StallD, StallE, StallM, StallW, + output logic FlushD, FlushE, FlushM, FlushW ); logic BranchFlushDE; - logic StallDCause, StallFCause, StallWCause; - + logic StallFCause, StallDCause, StallECause, StallMCause, StallWCause; + logic FirstUnstalledD, FirstUnstalledE, FirstUnstalledM, FirstUnstalledW; + // stalls and flushes // loads: stall for one cycle if the subsequent instruction depends on the load // branches and jumps: flush the next two instructions if the branch is taken in EXE @@ -54,14 +56,32 @@ module hazard( assign BranchFlushDE = PCSrcE | RetM | TrapM; - assign StallDCause = LoadStallD | MulDivStallD; - assign StallFCause = InstrStall | CSRWritePendingDEM; - assign StallWCause = DataStall; // *** not yet used + // changed 2/22/21 harris to turn off stallF when RetM or TrapM + // changed 2/23/21 harris to BranchFlushDEM to solve bug in ECALL about JAL being ignored +// assign StallFCause = /*InstrStall | */ CSRWritePendingDEM; // stall at fetch if unable to get the instruction, +// assign StallFCause = /*InstrStall | */ CSRWritePendingDEM & ~(RetM | TrapM); // stall at fetch if unable to get the instruction, + assign StallFCause = /*InstrStall | */ CSRWritePendingDEM & ~(BranchFlushDE); // stall at fetch if unable to get the instruction, + // or if a CSR will be written and may change system behavior + assign StallDCause = LoadStallD | MulDivStallD; // stall in decode if instruction is a load dependent on previous + assign StallECause = 0; + assign StallMCause = 0; // sDataStall; // not yet used*** + assign StallWCause = DataStall | InstrStall; - assign StallD = StallDCause; + // Each stage stalls if the next stage is stalled or there is a cause to stall this stage. assign StallF = StallD | StallFCause; - assign FlushD = BranchFlushDE | StallFCause; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM; - assign FlushE = StallD | BranchFlushDE; //LoadStallD | PCSrcE | RetM | TrapM; - assign FlushM = RetM | TrapM; - assign FlushW = TrapM; + assign StallD = StallE | StallDCause; + assign StallE = StallM | StallECause; + assign StallM = StallW | StallMCause; + assign StallW = StallWCause; + + assign FirstUnstalledD = (~StallD & StallF); + assign FirstUnstalledE = (~StallE & StallD); + assign FirstUnstalledM = (~StallM & StallE); + assign FirstUnstalledW = (~StallW & StallM);; + + // Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush + assign FlushD = FirstUnstalledD || BranchFlushDE; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM; + assign FlushE = FirstUnstalledE || BranchFlushDE; //LoadStallD | PCSrcE | RetM | TrapM; + assign FlushM = FirstUnstalledM || RetM || TrapM; + assign FlushW = FirstUnstalledW | TrapM; endmodule diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 6d5412b94..a20d224ff 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -37,8 +37,8 @@ module controller( input logic IllegalIEUInstrFaultD, output logic IllegalBaseInstrFaultD, // Execute stage control signals - input logic FlushE, - input logic [2:0] FlagsE, + input logic StallE, FlushE, + input logic [2:0] FlagsE, output logic PCSrcE, // for datapath and Hazard Unit output logic [4:0] ALUControlE, output logic ALUSrcAE, ALUSrcBE, @@ -47,14 +47,14 @@ module controller( output logic [2:0] Funct3E, output logic MulDivE, W64E, // Memory stage control signals - input logic FlushM, + input logic StallM, FlushM, input logic DataMisalignedM, output logic [1:0] MemRWM, output logic CSRWriteM, PrivilegedM, output logic [2:0] Funct3M, output logic RegWriteM, // for Hazard Unit // Writeback stage control signals - input logic FlushW, + input logic StallW, FlushW, output logic RegWriteW, // for datapath and Hazard Unit output logic [2:0] ResultSrcW, output logic InstrValidW, @@ -145,7 +145,7 @@ module controller( endcase // Execute stage pipeline control register and logic - floprc #(24) controlregE(clk, reset, FlushE, + flopenrc #(24) controlregE(clk, reset, FlushE, ~StallE, {RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, TargetSrcD, CSRWriteD, PrivilegedD, Funct3D, W64D, MulDivD, 1'b1}, {RegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, TargetSrcE, CSRWriteE, PrivilegedE, Funct3E, W64E, MulDivE, InstrValidE}); @@ -168,12 +168,12 @@ module controller( assign MemReadE = MemRWE[1]; // Memory stage pipeline control register - floprc #(12) controlregM(clk, reset, FlushM, + flopenrc #(12) controlregM(clk, reset, FlushM, ~StallM, {RegWriteE, ResultSrcE, MemRWE, CSRWriteE, PrivilegedE, Funct3E, InstrValidE}, {RegWriteM, ResultSrcM, MemRWM, CSRWriteM, PrivilegedM, Funct3M, InstrValidM}); // Writeback stage pipeline control register - floprc #(5) controlregW(clk, reset, FlushW, + flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW, {RegWriteM, ResultSrcM, InstrValidM}, {RegWriteW, ResultSrcW, InstrValidW}); diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 86d9830a2..db060dc2b 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -32,7 +32,7 @@ module datapath ( input logic [2:0] ImmSrcD, input logic [31:0] InstrD, // Execute stage signals - input logic FlushE, + input logic StallE, FlushE, input logic [1:0] ForwardAE, ForwardBE, input logic PCSrcE, input logic [4:0] ALUControlE, @@ -43,13 +43,13 @@ module datapath ( output logic [`XLEN-1:0] PCTargetE, output logic [`XLEN-1:0] SrcAE, SrcBE, // Memory stage signals - input logic FlushM, + input logic StallM, FlushM, input logic [2:0] Funct3M, input logic RetM, TrapM, output logic [`XLEN-1:0] SrcAM, output logic [`XLEN-1:0] WriteDataM, MemAdrM, // Writeback stage signals - input logic FlushW, + input logic StallW, FlushW, input logic RegWriteW, input logic [2:0] ResultSrcW, input logic [`XLEN-1:0] PCLinkW, @@ -85,12 +85,12 @@ module datapath ( extend ext(.InstrD(InstrD[31:7]), .*); // Execute stage pipeline register and logic - floprc #(`XLEN) RD1EReg(clk, reset, FlushE, RD1D, RD1E); - floprc #(`XLEN) RD2EReg(clk, reset, FlushE, RD2D, RD2E); - floprc #(`XLEN) ExtImmEReg(clk, reset, FlushE, ExtImmD, ExtImmE); - floprc #(5) Rs1EReg(clk, reset, FlushE, Rs1D, Rs1E); - floprc #(5) Rs2EReg(clk, reset, FlushE, Rs2D, Rs2E); - floprc #(5) RdEReg(clk, reset, FlushE, RdD, RdE); + flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, RD1D, RD1E); + flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, RD2D, RD2E); + flopenrc #(`XLEN) ExtImmEReg(clk, reset, FlushE, ~StallE, ExtImmD, ExtImmE); + flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E); + flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); + flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); mux3 #(`XLEN) faemux(RD1E, ResultW, ALUResultM, ForwardAE, PreSrcAE); mux3 #(`XLEN) fbemux(RD2E, ResultW, ALUResultM, ForwardBE, WriteDataE); @@ -101,15 +101,15 @@ module datapath ( assign PCTargetE = ExtImmE + TargetBaseE; // Memory stage pipeline register - floprc #(`XLEN) SrcAMReg(clk, reset, FlushM, SrcAE, SrcAM); - floprc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ALUResultE, ALUResultM); + flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); + flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM); assign MemAdrM = ALUResultM; - floprc #(`XLEN) WriteDataMReg(clk, reset, FlushM, WriteDataE, WriteDataM); - floprc #(5) RdMEg(clk, reset, FlushM, RdE, RdM); + flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); + flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); // Writeback stage pipeline register and logic - floprc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ALUResultM, ALUResultW); - floprc #(5) RdWEg(clk, reset, FlushW, RdM, RdW); + flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW); + flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); mux5 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, MulDivResultW, ResultSrcW, ResultW); endmodule diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index 36bc4827b..fd367ae71 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -30,7 +30,7 @@ module forward( input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, input logic MemReadE, MulDivE, input logic RegWriteM, RegWriteW, - // Forwaring controls + // Forwarding controls output logic [1:0] ForwardAE, ForwardBE, output logic LoadStallD, MulDivStallD ); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index e95bd6d43..2e5305981 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -49,7 +49,8 @@ module ieu ( input logic [`XLEN-1:0] PCLinkW, output logic InstrValidW, // hazards - input logic StallD, FlushD, FlushE, FlushM, FlushW, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, input logic RetM, TrapM, output logic LoadStallD, MulDivStallD, output logic PCSrcE, diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 3a12b330a..3deb64255 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -28,13 +28,16 @@ module ifu ( input logic clk, reset, - input logic StallF, StallD, FlushD, FlushE, FlushM, FlushW, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, // Fetch - input logic [31:0] InstrF, + input logic [`XLEN-1:0] InstrInF, output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF, // Decode - output logic InstrStall, + //output logic InstrStall, + output logic ResolveBranchD, // Execute input logic PCSrcE, input logic [`XLEN-1:0] PCTargetE, @@ -50,7 +53,9 @@ module ifu ( input logic IllegalBaseInstrFaultD, output logic IllegalIEUInstrFaultD, output logic InstrMisalignedFaultM, - output logic [`XLEN-1:0] InstrMisalignedAdrM + output logic [`XLEN-1:0] InstrMisalignedAdrM, + // bogus + input logic [15:0] rd2 ); logic [`XLEN-1:0] UnalignedPCNextF, PCNextF; @@ -59,20 +64,25 @@ module ifu ( logic IllegalCompInstrD; logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM; logic CompressedF; - logic [31:0] InstrRawD, InstrE; + logic [31:0] InstrF, InstrRawD, InstrE, InstrW; logic [31:0] nop = 32'h00000013; // instruction for NOP // *** put memory interface on here, InstrF becomes output - assign InstrStall = 0; // *** assign InstrPAdrF = PCF; // *** no MMU + //assign InstrReadF = ~StallD; // *** & ICacheMissF; add later + assign InstrReadF = 1; // *** & ICacheMissF; add later assign PrivilegedChangePCM = RetM | TrapM; - assign StallExceptResolveBranchesF = StallF & ~(PCSrcE | PrivilegedChangePCM); + //assign StallExceptResolveBranchesF = StallF & ~(PCSrcE | PrivilegedChangePCM); + + // dh 2/8/2022 keep in instruction fetch stall mode when taking branch + //flopr #(1) rbreg(clk, reset, (PCSrcE | PrivilegedChangePCM), ResolveBranchD); mux3 #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF); assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment - flopenl #(`XLEN) pcreg(clk, reset, ~StallExceptResolveBranchesF, PCNextF, `RESET_VECTOR, PCF); +// flopenl #(`XLEN) pcreg(clk, reset, ~StallExceptResolveBranchesF, PCNextF, `RESET_VECTOR, PCF); + flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); // pcadder // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 @@ -86,6 +96,15 @@ module ifu ( else PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10}; else PCPlus2or4F = {PCPlusUpperF, PCF[1:0]}; // add 4 + // harris 2/23/21 Add code to fetch instruction split across two words + generate + if (`XLEN==32) begin + assign InstrF = PCF[1] ? {rd2[15:0], InstrInF[31:16]} : InstrInF; + end else begin + assign InstrF = PCF[2] ? (PCF[1] ? {rd2[15:0], InstrInF[63:48]} : InstrInF[63:32]) + : (PCF[1] ? InstrInF[47:16] : InstrInF[31:0]); + end + endgenerate // Decode stage pipeline register and logic flopenl #(32) InstrDReg(clk, reset, ~StallD, (FlushD ? nop : InstrF), nop, InstrRawD); @@ -107,25 +126,26 @@ module ifu ( // pipeline misaligned faults to M stage assign BranchMisalignedFaultE = misaligned & PCSrcE; // E-stage (Branch/Jump) misaligned - flopr #(1) InstrMisalginedReg(clk, reset, BranchMisalignedFaultE, BranchMisalignedFaultM); - flopr #(`XLEN) InstrMisalignedAdrReg(clk, reset, PCNextF, InstrMisalignedAdrM); + flopenr #(1) InstrMisalginedReg(clk, reset, ~StallM, BranchMisalignedFaultE, BranchMisalignedFaultM); + flopenr #(`XLEN) InstrMisalignedAdrReg(clk, reset, ~StallM, PCNextF, InstrMisalignedAdrM); assign TrapMisalignedFaultM = misaligned & PrivilegedChangePCM; assign InstrMisalignedFaultM = BranchMisalignedFaultM; // | TrapMisalignedFaultM; *** put this back in without causing a cyclic path - flopr #(32) InstrEReg(clk, reset, FlushE ? nop : InstrD, InstrE); - flopr #(32) InstrMReg(clk, reset, FlushM ? nop : InstrE, InstrM); - flopr #(`XLEN) PCEReg(clk, reset, PCD, PCE); - flopr #(`XLEN) PCMReg(clk, reset, PCE, PCM); - flopr #(`XLEN) PCWReg(clk, reset, PCM, PCW); // *** probably not needed; delete later + flopenr #(32) InstrEReg(clk, reset, ~StallE, FlushE ? nop : InstrD, InstrE); + flopenr #(32) InstrMReg(clk, reset, ~StallM, FlushM ? nop : InstrE, InstrM); + flopenr #(32) InstrWReg(clk, reset, ~StallW, FlushW ? nop : InstrM, InstrW); // just for testbench, delete later + flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE); + flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM); + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); // *** probably not needed; delete later // seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL. // either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or // have dedicated adder in Mem stage based on PCM + 2 or 4 // *** redo this - flopr #(`XLEN) PCPDReg(clk, reset, PCPlus2or4F, PCLinkD); - flopr #(`XLEN) PCPEReg(clk, reset, PCLinkD, PCLinkE); - flopr #(`XLEN) PCPMReg(clk, reset, PCLinkE, PCLinkM); - flopr #(`XLEN) PCPWReg(clk, reset, PCLinkM, PCLinkW); + flopenr #(`XLEN) PCPDReg(clk, reset, ~StallD, PCPlus2or4F, PCLinkD); + flopenr #(`XLEN) PCPEReg(clk, reset, ~StallE, PCLinkD, PCLinkE); + flopenr #(`XLEN) PCPMReg(clk, reset, ~StallM, PCLinkE, PCLinkM); + flopenr #(`XLEN) PCPWReg(clk, reset, ~StallW, PCLinkM, PCLinkW); endmodule diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 496b0a080..bee0ba86c 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -28,7 +28,7 @@ module csr ( input logic clk, reset, - input logic FlushW, + input logic FlushW, StallW, input logic [31:0] InstrM, input logic [`XLEN-1:0] PCM, SrcAM, input logic CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM, @@ -102,7 +102,9 @@ module csr ( // merge CSR Reads assign CSRReadValM = CSRUReadValM | CSRSReadValM | CSRMReadValM | CSRCReadValM | CSRNReadValM; - floprc #(`XLEN) CSRValWReg(clk, reset, FlushW, CSRReadValM, CSRReadValW); + // *** add W stall 2/22/21 dh to try fixing memory stalls +// floprc #(`XLEN) CSRValWReg(clk, reset, FlushW, CSRReadValM, CSRReadValW); + flopenrc #(`XLEN) CSRValWReg(clk, reset, FlushW, ~StallW, CSRReadValM, CSRReadValW); // merge illegal accesses: illegal if none of the CSR addresses is legal or privilege is insufficient assign InsufficientCSRPrivilegeM = (CSRAdrM[9:8] == 2'b11 && PrivilegeModeW != `M_MODE) || diff --git a/wally-pipelined/src/privileged/csrsr.sv b/wally-pipelined/src/privileged/csrsr.sv index 6db96dcc5..ecfbecfda 100644 --- a/wally-pipelined/src/privileged/csrsr.sv +++ b/wally-pipelined/src/privileged/csrsr.sv @@ -27,7 +27,7 @@ `include "wally-config.vh" module csrsr ( - input logic clk, reset, + input logic clk, reset, StallW, input logic WriteMSTATUSM, WriteSSTATUSM, WriteUSTATUSM, input logic TrapM, FloatRegWriteW, input logic [1:0] NextPrivilegeModeM, PrivilegeModeW, @@ -118,7 +118,7 @@ module csrsr ( STATUS_MIE <= 0; // Per Priv 3.3 STATUS_SIE <= `S_SUPPORTED; STATUS_UIE <= `U_SUPPORTED; - end else begin + end else if (~StallW) begin if (WriteMSTATUSM) begin STATUS_SUM_INT <= CSRWriteValM[18]; STATUS_MPRV_INT <= CSRWriteValM[17]; diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index be97b51cd..6b1249720 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// exceptions.sv +// privileged.sv // // Written: David_Harris@hmc.edu 5 January 2021 // Modified: @@ -45,7 +45,7 @@ module privileged ( input logic [`XLEN-1:0] InstrMisalignedAdrM, MemAdrM, input logic [4:0] SetFflagsM, output logic [2:0] FRM_REGW, - input logic FlushD, FlushE, FlushM, StallD + input logic FlushD, FlushE, FlushM, StallD, StallW ); logic [1:0] NextPrivilegeModeM, PrivilegeModeW; @@ -81,8 +81,8 @@ module privileged ( // PrivilegeMode FSM always_comb - if (reset) NextPrivilegeModeM = `M_MODE; // Privilege resets to 11 (Machine Mode) - else if (mretM) NextPrivilegeModeM = STATUS_MPP; + /* if (reset) NextPrivilegeModeM = `M_MODE; // Privilege resets to 11 (Machine Mode) // moved reset to flop + else */ if (mretM) NextPrivilegeModeM = STATUS_MPP; else if (sretM) NextPrivilegeModeM = {1'b0, STATUS_SPP}; else if (uretM) NextPrivilegeModeM = `U_MODE; else if (TrapM) begin // Change privilege based on DELEG registers (see 3.1.8) @@ -96,7 +96,7 @@ module privileged ( else NextPrivilegeModeM = `M_MODE; end else NextPrivilegeModeM = PrivilegeModeW; - flop #(2) privmodereg(clk, NextPrivilegeModeM, PrivilegeModeW); + flopenl #(2) privmodereg(clk, reset, ~StallW, NextPrivilegeModeM, `M_MODE, PrivilegeModeW); /////////////////////////////////////////// // decode privileged instructions diff --git a/wally-pipelined/src/uncore/clint.sv b/wally-pipelined/src/uncore/clint.sv index 00804a480..e3fc1ea04 100644 --- a/wally-pipelined/src/uncore/clint.sv +++ b/wally-pipelined/src/uncore/clint.sv @@ -54,6 +54,11 @@ module clint ( assign #2 entry = {HADDR[15:2], 2'b00}; endgenerate + // DH 2/20/21: Eventually allow MTIME to run off a separate clock + // This will require synchronizing MTIME to the system clock + // before it is read or compared to MTIMECMP. + // It will also require synchronizing the write to MTIMECMP. + // Use req and ack signals synchronized across the clock domains. // register access generate diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index 6a49fcf59..4216b356c 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -36,6 +36,9 @@ module dtim ( ); logic [`XLEN-1:0] RAM[0:65535]; + logic [18:0] HWADDR; + logic [`XLEN-1:0] HREADTim0; + // logic [`XLEN-1:0] write; logic [15:0] entry; logic memread, memwrite; @@ -48,74 +51,56 @@ module dtim ( end else begin if (HREADYTim & HSELTim) begin busycount <= 0; - HREADYTim <= 0; + HREADYTim <= #1 0; end else if (~HREADYTim) begin - if (busycount == 0) begin // TIM latency, for testing purposes - HREADYTim <= 1; - end else + if (busycount == 2) begin // TIM latency, for testing purposes + HREADYTim <= #1 1; + end else begin busycount <= busycount + 1; + end end end - + + /* always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) begin + HREADYTim <= 0; + end else begin + HREADYTim <= HSELTim; // always respond one cycle later + end */ + assign memread = MemRWtim[1]; assign memwrite = MemRWtim[0]; +// always_ff @(posedge HCLK) +// memwrite <= MemRWtim[0]; // delay memwrite to write phase assign HRESPTim = 0; // OK // assign HREADYTim = 1; // Respond immediately; *** extend this + + + // Model memory read and write - // word aligned reads generate - if (`XLEN==64) - assign #2 entry = HADDR[18:3]; - else - assign #2 entry = HADDR[17:2]; - endgenerate - assign HREADTim = RAM[entry]; -// assign HREADTim = HREADYTim ? RAM[entry] : ~RAM[entry]; // *** temproary mess up read value before ready - - // write each byte based on the byte mask - // UInstantiate a byte-writable memory here if possible - // and drop tihs masking logic. Otherwise, use the masking - // from dmem - /*generate - - if (`XLEN==64) begin - always_comb begin - write=HREADTim; - if (ByteMaskM[0]) write[7:0] = HWDATA[7:0]; - if (ByteMaskM[1]) write[15:8] = HWDATA[15:8]; - if (ByteMaskM[2]) write[23:16] = HWDATA[23:16]; - if (ByteMaskM[3]) write[31:24] = HWDATA[31:24]; - if (ByteMaskM[4]) write[39:32] = HWDATA[39:32]; - if (ByteMaskM[5]) write[47:40] = HWDATA[47:40]; - if (ByteMaskM[6]) write[55:48] = HWDATA[55:48]; - if (ByteMaskM[7]) write[63:56] = HWDATA[63:56]; - end - always_ff @(posedge clk) - if (memwrite) RAM[HADDR[18:3]] <= write; - end else begin // 32-bit - always_comb begin - write=HREADTim; - if (ByteMaskM[0]) write[7:0] = HWDATA[7:0]; - if (ByteMaskM[1]) write[15:8] = HWDATA[15:8]; - if (ByteMaskM[2]) write[23:16] = HWDATA[23:16]; - if (ByteMaskM[3]) write[31:24] = HWDATA[31:24]; - end - always_ff @(posedge clk) - if (memwrite) RAM[HADDR[17:2]] <= write; + if (`XLEN == 64) begin +// always_ff @(negedge HCLK) +// if (memwrite) RAM[HWADDR[17:3]] <= HWDATA; + always_ff @(posedge HCLK) begin + //if (memwrite) RAM[HADDR[17:3]] <= HWDATA; + HWADDR <= HADDR; + HREADTim0 <= RAM[HADDR[17:3]]; + if (memwrite && HREADYTim) RAM[HWADDR[17:3]] <= HWDATA; + end + end else begin +// always_ff @(negedge HCLK) +// if (memwrite) RAM[HWADDR[17:2]] <= HWDATA; + always_ff @(posedge HCLK) begin + //if (memwrite) RAM[HADDR[17:2]] <= HWDATA; + HWADDR <= HADDR; + HREADTim0 <= RAM[HADDR[17:2]]; + if (memwrite && HREADYTim) RAM[HWADDR[17:2]] <= HWDATA; + end end - endgenerate */ - generate - if (`XLEN == 64) - always_ff @(posedge HCLK) begin - if (memwrite) RAM[HADDR[17:3]] <= HWDATA; -// HREADTim <= RAM[HADDR[17:3]]; - end - else - always_ff @(posedge HCLK) begin - if (memwrite) RAM[HADDR[17:2]] <= HWDATA; -// HREADTim <= RAM[HADDR[17:2]]; - end endgenerate + + assign HREADTim = HREADYTim ? HREADTim0 : 'bz; endmodule diff --git a/wally-pipelined/src/uncore/imem.sv b/wally-pipelined/src/uncore/imem.sv index 274be7dbf..09a6c2ce8 100644 --- a/wally-pipelined/src/uncore/imem.sv +++ b/wally-pipelined/src/uncore/imem.sv @@ -28,6 +28,7 @@ module imem ( input logic [`XLEN-1:1] AdrF, output logic [31:0] InstrF, + output logic [15:0] rd2, // bogus, delete when real multicycle fetch works output logic InstrAccessFaultF); /* verilator lint_off UNDRIVEN */ @@ -35,7 +36,7 @@ module imem ( /* verilator lint_on UNDRIVEN */ logic [15:0] adrbits; logic [`XLEN-1:0] rd; - logic [15:0] rd2; +// logic [15:0] rd2; generate if (`XLEN==32) assign adrbits = AdrF[17:2]; diff --git a/wally-pipelined/src/uncore/subwordwrite.sv b/wally-pipelined/src/uncore/subwordwrite.sv index 68c2b0e47..b1e8d683f 100644 --- a/wally-pipelined/src/uncore/subwordwrite.sv +++ b/wally-pipelined/src/uncore/subwordwrite.sv @@ -27,37 +27,35 @@ module subwordwrite ( input logic [`XLEN-1:0] HRDATA, - input logic [31:0] HADDR, - input logic [2:0] HSIZE, + input logic [2:0] HADDRD, + input logic [3:0] HSIZED, input logic [`XLEN-1:0] HWDATAIN, output logic [`XLEN-1:0] HWDATA ); - logic [7:0] ByteM; // *** declare locally to generate as either 4 or 8 bits - logic [15:0] HalfwordM; logic [`XLEN-1:0] WriteDataSubwordDuplicated; - logic [7:0] ByteMaskM; generate if (`XLEN == 64) begin + logic [7:0] ByteMaskM; // Compute write mask always_comb - case(HSIZE[1:0]) - 2'b00: begin ByteMaskM = 8'b00000000; ByteMaskM[HADDR[2:0]] = 1; end // sb - 2'b01: case (HADDR[2:1]) + case(HSIZED[1:0]) + 2'b00: begin ByteMaskM = 8'b00000000; ByteMaskM[HADDRD[2:0]] = 1; end // sb + 2'b01: case (HADDRD[2:1]) 2'b00: ByteMaskM = 8'b00000011; 2'b01: ByteMaskM = 8'b00001100; 2'b10: ByteMaskM = 8'b00110000; 2'b11: ByteMaskM = 8'b11000000; endcase - 2'b10: if (HADDR[2]) ByteMaskM = 8'b11110000; + 2'b10: if (HADDRD[2]) ByteMaskM = 8'b11110000; else ByteMaskM = 8'b00001111; 2'b11: ByteMaskM = 8'b11111111; endcase // Handle subword writes always_comb - case(HSIZE[1:0]) + case(HSIZED[1:0]) 2'b00: WriteDataSubwordDuplicated = {8{HWDATAIN[7:0]}}; // sb 2'b01: WriteDataSubwordDuplicated = {4{HWDATAIN[15:0]}}; // sh 2'b10: WriteDataSubwordDuplicated = {2{HWDATAIN[31:0]}}; // sw @@ -77,19 +75,20 @@ module subwordwrite ( end end else begin // 32-bit + logic [3:0] ByteMaskM; // Compute write mask always_comb - case(HSIZE[1:0]) - 2'b00: begin ByteMaskM = 8'b0000; ByteMaskM[{1'b0, HADDR[1:0]}] = 1; end // sb - 2'b01: if (HADDR[1]) ByteMaskM = 8'b1100; - else ByteMaskM = 8'b0011; - 2'b10: ByteMaskM = 8'b1111; - default: ByteMaskM = 8'b111; // shouldn't happen + case(HSIZED[1:0]) + 2'b00: begin ByteMaskM = 4'b0000; ByteMaskM[HADDRD[1:0]] = 1; end // sb + 2'b01: if (HADDRD[1]) ByteMaskM = 4'b1100; + else ByteMaskM = 4'b0011; + 2'b10: ByteMaskM = 4'b1111; + default: ByteMaskM = 4'b111; // shouldn't happen endcase // Handle subword writes always_comb - case(HSIZE[1:0]) + case(HSIZED[1:0]) 2'b00: WriteDataSubwordDuplicated = {4{HWDATAIN[7:0]}}; // sb 2'b01: WriteDataSubwordDuplicated = {2{HWDATAIN[15:0]}}; // sh 2'b10: WriteDataSubwordDuplicated = HWDATAIN; // sw diff --git a/wally-pipelined/src/uncore/uartPC16550D.sv b/wally-pipelined/src/uncore/uartPC16550D.sv index 445dca38c..d494e19f2 100644 --- a/wally-pipelined/src/uncore/uartPC16550D.sv +++ b/wally-pipelined/src/uncore/uartPC16550D.sv @@ -6,6 +6,7 @@ // // Purpose: Universial Asynchronous Receiver/ Transmitter with FIFOs // Emulates interface of Texas Instruments PC16550D +// https://media.digikey.com/pdf/Data%20Sheets/Texas%20Instruments%20PDFs/PC16550D.pdf // Compatible with UART in Imperas Virtio model *** // // Compatible with most of PC16550D with the following known exceptions: diff --git a/wally-pipelined/src/uncore/uncore.sv b/wally-pipelined/src/uncore/uncore.sv index 170e02c85..d899717e4 100644 --- a/wally-pipelined/src/uncore/uncore.sv +++ b/wally-pipelined/src/uncore/uncore.sv @@ -43,6 +43,10 @@ module uncore ( input logic HREADYEXT, HRESPEXT, output logic [`AHBW-1:0] HRDATA, output logic HREADY, HRESP, + // delayed signals + input logic [2:0] HADDRD, + input logic [3:0] HSIZED, + input logic HWRITED, // bus interface output logic DataAccessFaultM, // peripheral pins @@ -71,7 +75,7 @@ module uncore ( assign HSELUART = PreHSELUART && (HSIZE == 3'b000); // only byte writes to UART are supported // Enable read or write based on decoded address - assign MemRW = {~HWRITE, HWRITE}; + assign MemRW = {~HWRITE, HWRITED}; assign MemRWtim = MemRW & {2{HSELTim}}; assign MemRWclint = MemRW & {2{HSELCLINT}}; assign MemRWgpio = MemRW & {2{HSELGPIO}}; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 957336f1f..d468283d0 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -29,12 +29,13 @@ module wallypipelinedhart ( input logic clk, reset, output logic [`XLEN-1:0] PCF, - input logic [31:0] InstrF, +// input logic [31:0] InstrF, // Privileged input logic TimerIntM, ExtIntM, SwIntM, input logic InstrAccessFaultF, input logic DataAccessFaultM, // Bus Interface + input logic [15:0] rd2, // bogus, delete when real multicycle fetch works input logic [`AHBW-1:0] HRDATA, input logic HREADY, HRESP, output logic HCLK, HRESETn, @@ -45,11 +46,16 @@ module wallypipelinedhart ( output logic [2:0] HBURST, output logic [3:0] HPROT, output logic [1:0] HTRANS, - output logic HMASTLOCK + output logic HMASTLOCK, + // Delayed signals for subword write + output logic [2:0] HADDRD, + output logic [3:0] HSIZED, + output logic HWRITED ); - logic [1:0] ForwardAE, ForwardBE; - logic StallF, StallD, FlushD, FlushE, FlushM, FlushW; +// logic [1:0] ForwardAE, ForwardBE; + logic StallF, StallD, StallE, StallM, StallW; + logic FlushD, FlushE, FlushM, FlushW; logic RetM, TrapM; // new signals that must connect through DP @@ -73,6 +79,7 @@ module wallypipelinedhart ( logic StoreMisalignedFaultM, StoreAccessFaultM; logic [`XLEN-1:0] InstrMisalignedAdrM; logic [`XLEN-1:0] zero = 0; + logic ResolveBranchD; logic PCSrcE; logic CSRWritePendingDEM; @@ -82,26 +89,35 @@ module wallypipelinedhart ( logic FloatRegWriteW; // bus interface to dmem - logic [1:0] MemRWAlignedM; - logic [2:0] Funct3M; + logic MemReadM, MemWriteM; + logic [2:0] Funct3M; logic [`XLEN-1:0] MemAdrM, MemPAdrM, WriteDataM; - logic [`XLEN-1:0] ReadDataM, ReadDataW; + logic [`XLEN-1:0] ReadDataW; logic [`XLEN-1:0] InstrPAdrF; + logic [`XLEN-1:0] InstrRData; + logic InstrReadF; logic DataStall, InstrStall; logic InstrAckD, MemAckW; - ifu ifu(.*); // instruction fetch unit: PC, branch prediction, instruction cache + ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // inteber execution unit: integer register file, datapath and controller - dmem dmem(/*.Funct3M(InstrM[14:12]),*/ .*); // data cache unit + dmem dmem(.*); // data cache unit - ahblite ebu( // *** make IRData InstrF - .IReadF(1'b1), .IRData(), //.IReady(), - .DReadM(MemRWAlignedM[1]), .DWriteM(MemRWAlignedM[0]), - .DSizeM(Funct3M[1:0]), .DRData(ReadDataM), //.DReady(), - .UnsignedLoadM(Funct3M[2]), + + ahblite ebu( + //.InstrReadF(1'b0), + //.InstrRData(InstrF), // hook up InstrF later + .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .*); - //assign InstrF = ReadDataM[31:0]; + +// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed. +// Would need to insertinstruction as InstrD, not InstrF + /*ahblite ebu( + .InstrReadF(1'b0), + .InstrRData(), // hook up InstrF later + .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), + .*); */ muldiv mdu(.*); // multiply and divide unit diff --git a/wally-pipelined/src/wally/wallypipelinedsoc.sv b/wally-pipelined/src/wally/wallypipelinedsoc.sv index 9b0ed2456..7b8883c57 100644 --- a/wally-pipelined/src/wally/wallypipelinedsoc.sv +++ b/wally-pipelined/src/wally/wallypipelinedsoc.sv @@ -64,6 +64,10 @@ module wallypipelinedsoc ( logic InstrAccessFaultF, DataAccessFaultM; logic TimerIntM, SwIntM; // from CLINT logic ExtIntM = 0; // not yet connected + logic [2:0] HADDRD; + logic [3:0] HSIZED; + logic HWRITED; + logic [15:0] rd2; // bogus, delete when real multicycle fetch works // instantiate processor and memories wallypipelinedhart hart(.*); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index ce36c0364..7ec8fa594 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -35,7 +35,7 @@ module testbench(); logic [`XLEN-1:0] signature[0:10000]; logic [`XLEN-1:0] testadr; string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; - logic [31:0] InstrW; + //logic [31:0] InstrW; logic [`XLEN-1:0] meminit; string tests64m[] = '{ "rv64m/I-MUL-01", "3000", @@ -90,7 +90,6 @@ string tests64iNOc[] = { "rv64i/I-MISALIGN_JMP-01","2000" }; string tests64i[] = '{ - "rv64i/I-LW-01", "4110", "rv64i/I-ADD-01", "3000", "rv64i/I-ADDI-01", "3000", "rv64i/I-ADDIW-01", "3000", @@ -198,7 +197,6 @@ string tests64iNOc[] = { // "rv32m/I-REMU-01", "2000" }; string tests32ic[] = '{ -// "rv32ic/WALLY-C-ADHOC-01", "2000", "rv32ic/I-C-ADD-01", "2000", "rv32ic/I-C-ADDI-01", "2000", "rv32ic/I-C-AND-01", "2000", @@ -347,9 +345,9 @@ string tests32i[] = { // Track names of instructions instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, - dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, - dut.hart.ifu.InstrM, InstrW, - InstrDName, InstrEName, InstrMName, InstrWName); + dut.hart.ifu.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, + dut.hart.ifu.InstrM, dut.hart.ifu.InstrW, + InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); // initialize tests initial @@ -452,14 +450,16 @@ endmodule module instrTrackerTB( input logic clk, reset, FlushE, - input logic [31:0] InstrD, + input logic [31:0] InstrF, InstrD, input logic [31:0] InstrE, InstrM, - output logic [31:0] InstrW, - output string InstrDName, InstrEName, InstrMName, InstrWName); + input logic [31:0] InstrW, +// output logic [31:0] InstrW, + output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); // stage Instr to Writeback for visualization - flopr #(32) InstrWReg(clk, reset, InstrM, InstrW); + // flopr #(32) InstrWReg(clk, reset, InstrM, InstrW); + instrNameDecTB fdec(InstrF, InstrFName); instrNameDecTB ddec(InstrD, InstrDName); instrNameDecTB edec(InstrE, InstrEName); instrNameDecTB mdec(InstrM, InstrMName);