diff --git a/wally-pipelined/src/dmem/dmem.sv b/wally-pipelined/src/dmem/dmem.sv index 547663fce..321d38383 100644 --- a/wally-pipelined/src/dmem/dmem.sv +++ b/wally-pipelined/src/dmem/dmem.sv @@ -37,7 +37,7 @@ module dmem ( input logic [2:0] Funct3M, //input logic [`XLEN-1:0] ReadDataW, input logic [`XLEN-1:0] WriteDataM, - input logic AtomicM, + input logic [1:0] AtomicM, output logic [`XLEN-1:0] MemPAdrM, output logic MemReadM, MemWriteM, output logic DataMisalignedM, @@ -98,8 +98,8 @@ module dmem ( logic ReservationValidM, ReservationValidW; logic lrM, scM, WriteAdrMatchM; - assign lrM = MemReadM && AtomicM; - assign scM = MemRWM[0] && AtomicM; + assign lrM = MemReadM && AtomicM[0]; + assign scM = MemRWM[0] && AtomicM[0]; assign WriteAdrMatchM = MemRWM[0] && (MemPAdrM[`XLEN-1:2] == ReservationPAdrW) && ReservationValidW; assign SquashSCM = scM && ~WriteAdrMatchM; always_comb begin // ReservationValidM (next valiue of valid reservation) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 07918cdb2..2c840a0c7 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -35,6 +35,8 @@ module ahblite ( input logic StallW, FlushW, // Load control input logic UnsignedLoadM, + input logic [1:0] AtomicM, + input logic [6:0] Funct7M, // Signals from Instruction Cache input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram input logic InstrReadF, @@ -68,7 +70,7 @@ module ahblite ( logic GrantData; logic [2:0] ISize; - logic [`AHBW-1:0] HRDATAMasked, ReadDataM, ReadDataPreW; + logic [`AHBW-1:0] HRDATAMasked, ReadDataM, ReadDataPreW, WriteData; logic IReady, DReady; logic CaptureDataM; @@ -81,17 +83,23 @@ module ahblite ( // Data accesses have priority over instructions. However, if a data access comes // while an instruction read is occuring, the instruction read finishes before // the data access can take place. - typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD, INSTRREADMEMPENDING} statetype; + typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD, INSTRREADMEMPENDING, ATOMICREAD, ATOMICWRITE} statetype; statetype BusState, NextBusState; flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextBusState, IDLE, BusState); always_comb case (BusState) - IDLE: if (MemReadM) NextBusState = MEMREAD; // Memory has pirority over instructions + IDLE: if (AtomicM[1]) NextBusState = ATOMICREAD; + else if (MemReadM) NextBusState = MEMREAD; // Memory has pirority over instructions else if (MemWriteM) NextBusState = MEMWRITE; else if (InstrReadF) NextBusState = INSTRREAD; else NextBusState = IDLE; + ATOMICREAD: if (~HREADY) NextBusState = ATOMICREAD; + else NextBusState = ATOMICWRITE; + ATOMICWRITE: if (~HREADY) NextBusState = ATOMICWRITE; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; MEMREAD: if (~HREADY) NextBusState = MEMREAD; else if (InstrReadF) NextBusState = INSTRREAD; else NextBusState = IDLE; @@ -107,17 +115,14 @@ module ahblite ( endcase // stall signals - assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || (NextBusState == INSTRREADMEMPENDING); + assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || + (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE) || + (NextBusState == INSTRREADMEMPENDING); assign #1 InstrStall = (NextBusState == INSTRREAD); - - // DH 2/20/22: A cyclic path presently exists - // HREADY->NextBusState->GrantData->HSIZE->HSELUART->HREADY - // This is because the peripherals assert HREADY on the same cycle - // When memory is working, also fix the peripherals to respond on the subsequent cycle - // and this path should be fixed. - + // bus outputs - assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE); + assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || + (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE); assign #1 HADDR = (GrantData) ? MemPAdrM[31:0] : InstrPAdrF[31:0]; assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway assign #1 HSIZE = GrantData ? {1'b0, MemSizeM} : ISize; @@ -125,9 +130,9 @@ module ahblite ( assign HPROT = 4'b0011; // not used; see Section 3.7 assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise assign HMASTLOCK = 0; // no locking supported - assign HWRITE = (NextBusState == MEMWRITE); + assign HWRITE = (NextBusState == MEMWRITE) || (NextBusState == ATOMICWRITE); // delay write data by one cycle for - flop #(`XLEN) wdreg(HCLK, WriteDataM, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN + flop #(`XLEN) wdreg(HCLK, WriteData, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN // delay signals for subword writes flop #(3) adrreg(HCLK, HADDR[2:0], HADDRD); flop #(4) sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED); @@ -138,11 +143,24 @@ module ahblite ( assign InstrRData = HRDATA; assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 - assign CaptureDataM = (BusState == MEMREAD) && (NextBusState != MEMREAD); + assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) || + ((BusState == ATOMICREAD) && (NextBusState == ATOMICWRITE)); + // *** check if this introduces an unnecessary cycle of latency in memory accesses flopenr #(`XLEN) ReadDataPreWReg(clk, reset, CaptureDataM, ReadDataM, ReadDataPreW); // *** this may break when there is no instruction read after data read flopenr #(`XLEN) ReadDataWReg(clk, reset, ~StallW, ReadDataPreW, ReadDataW); + // Extract and sign-extend subwords if necessary subwordread swr(.*); -endmodule + // Handle AMO instructions if applicable + generate + if (`A_SUPPORTED) begin + logic [`XLEN-1:0] AMOResult; + amoalu amoalu(.a(HRDATA), .b(WriteDataM), .funct(Funct7M), .width(MemSizeM), + .result(AMOResult)); + mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicM[1], WriteData); + end else + assign WriteData = WriteDataM; + endgenerate +endmodule diff --git a/wally-pipelined/src/ebu/amoalu.sv b/wally-pipelined/src/ebu/amoalu.sv new file mode 100644 index 000000000..59d9f6927 --- /dev/null +++ b/wally-pipelined/src/ebu/amoalu.sv @@ -0,0 +1,66 @@ +/////////////////////////////////////////// +// amoalu.sv +// +// Written: David_Harris@hmc.edu 10 March 2021 +// Modified: +// +// Purpose: Performs AMO operations +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module amoalu ( + input logic [`XLEN-1:0] a, b, + input logic [6:0] funct, + input logic [1:0] width, + output logic [`XLEN-1:0] result); + + logic [`XLEN-1:0] y; + + // *** see how synthesis generates this and optimize more structurally if necessary to share hardware + // a single carry chain should be shared for + and the four min/max + // and the same mux can be used to select b for swap. + always_comb + case (funct[6:2]) + 5'b00001: y = b; // amoswap + 5'b00000: y = a + b; // amoadd + 5'b00100: y = a ^ b; // amoxor + 5'b01100: y = a & b; // amoand + 5'b01000: y = a | b; // amoor + 5'b10000: y = (a < b) ? a : b; // amomin + 5'b10100: y = (a >= b) ? a : b; // amomax + 5'b11000: y = ({1'b0, a} < {1'b0, b}) ? a : b; // amominu + 5'b11100: y = ({1'b0, a} >= {1'b0, b}) ? a : b; // amomaxu + default: y = 'bx; // undefined; *** could change to b for efficiency + endcase + + // sign extend if necessary + generate + if (`XLEN == 32) begin + assign result = y; + end else begin // `XLEN = 64 + always_comb + if (width == 2'b10) // sign-extend word-length operations + result = {{32{y[31]}}, y[31:0]}; + else result = y; + end + endgenerate + +endmodule + diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index dc4f16d5b..cf8c3e17e 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -48,7 +48,7 @@ module controller( input logic StallM, FlushM, output logic [1:0] MemRWM, output logic CSRReadM, CSRWriteM, PrivilegedM, - output logic AtomicM, + output logic [1:0] AtomicM, output logic [2:0] Funct3M, output logic RegWriteM, // for Hazard Unit // Writeback stage control signals @@ -65,6 +65,8 @@ module controller( logic [6:0] Funct7D; logic [4:0] Rs1D; + `define CTRLW 23 + // pipelined control signals logic RegWriteD, RegWriteE; logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; @@ -77,11 +79,11 @@ module controller( logic TargetSrcD, W64D, MulDivD; logic CSRZeroSrcD; logic CSRReadD; - logic AtomicD, AtomicE; + logic [1:0] AtomicD, AtomicE; logic CSRWriteD, CSRWriteE; logic InstrValidE, InstrValidM; logic PrivilegedD, PrivilegedE; - logic [21:0] ControlsD; + logic [`CTRLW-1:0] ControlsD; logic aluc3D; logic subD, sraD, sltD, sltuD; logic BranchTakenE; @@ -100,48 +102,47 @@ module controller( generate always_comb case(OpD) - // *** Atomic p. 132 assembly encodings, defs 48 // RegWrite_ImmSrc_ALUSrc_MemRW_ResultSrc_Branch_ALUOp_Jump_TargetSrc_W64_CSRRead_Privileged_MulDiv_Atomic_Illegal - 7'b0000000: ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // illegal instruction - 7'b0000011: ControlsD = 22'b1_000_01_10_001_0_00_0_0_0_0_0_0_0_0; // lw - 7'b0001111: ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_0; // fence = nop - 7'b0010011: ControlsD = 22'b1_000_01_00_000_0_10_0_0_0_0_0_0_0_0; // I-type ALU - 7'b0010111: ControlsD = 22'b1_100_11_00_000_0_00_0_0_0_0_0_0_0_0; // auipc + 7'b0000000: ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // illegal instruction + 7'b0000011: ControlsD = `CTRLW'b1_000_01_10_001_0_00_0_0_0_0_0_0_00_0; // lw + 7'b0001111: ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_0; // fence = nop + 7'b0010011: ControlsD = `CTRLW'b1_000_01_00_000_0_10_0_0_0_0_0_0_00_0; // I-type ALU + 7'b0010111: ControlsD = `CTRLW'b1_100_11_00_000_0_00_0_0_0_0_0_0_00_0; // auipc 7'b0011011: if (`XLEN == 64) - ControlsD = 22'b1_000_01_00_000_0_10_0_0_1_0_0_0_0_0; // IW-type ALU for RV64i + ControlsD = `CTRLW'b1_000_01_00_000_0_10_0_0_1_0_0_0_00_0; // IW-type ALU for RV64i else - ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // non-implemented instruction - 7'b0100011: ControlsD = 22'b0_001_01_01_000_0_00_0_0_0_0_0_0_0_0; // sw + ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction + 7'b0100011: ControlsD = `CTRLW'b0_001_01_01_000_0_00_0_0_0_0_0_0_00_0; // sw 7'b0101111: if (`A_SUPPORTED) begin if (InstrD[31:27] == 5'b00010) - ControlsD = 22'b1_000_00_10_001_0_00_0_0_0_0_0_0_1_0; // lr + ControlsD = `CTRLW'b1_000_00_10_001_0_00_0_0_0_0_0_0_01_0; // lr else if (InstrD[31:27] == 5'b00011) - ControlsD = 22'b1_101_01_01_101_0_00_0_0_0_0_0_0_1_0; // sc + ControlsD = `CTRLW'b1_101_01_01_101_0_00_0_0_0_0_0_0_01_0; // sc else - ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_1_0; // other atomic; decode later + ControlsD = `CTRLW'b1_101_00_11_001_0_00_0_0_0_0_0_0_10_0;; // amo end else - ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // non-implemented instruction + ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction 7'b0110011: if (Funct7D == 7'b0000000 || Funct7D == 7'b0100000) - ControlsD = 22'b1_000_00_00_000_0_10_0_0_0_0_0_0_0_0; // R-type + ControlsD = `CTRLW'b1_000_00_00_000_0_10_0_0_0_0_0_0_00_0; // R-type else if (Funct7D == 7'b0000001 && `M_SUPPORTED) - ControlsD = 22'b1_000_00_00_100_0_00_0_0_0_0_0_1_0_0; // Multiply/Divide + ControlsD = `CTRLW'b1_000_00_00_100_0_00_0_0_0_0_0_1_00_0; // Multiply/Divide else - ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // non-implemented instruction - 7'b0110111: ControlsD = 22'b1_100_01_00_000_0_11_0_0_0_0_0_0_0_0; // lui + ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction + 7'b0110111: ControlsD = `CTRLW'b1_100_01_00_000_0_11_0_0_0_0_0_0_00_0; // lui 7'b0111011: if ((Funct7D == 7'b0000000 || Funct7D == 7'b0100000) && `XLEN == 64) - ControlsD = 22'b1_000_00_00_000_0_10_0_0_1_0_0_0_0_0; // R-type W instructions for RV64i + ControlsD = `CTRLW'b1_000_00_00_000_0_10_0_0_1_0_0_0_00_0; // R-type W instructions for RV64i else if (Funct7D == 7'b0000001 && `M_SUPPORTED && `XLEN == 64) - ControlsD = 22'b1_000_00_00_100_0_00_0_0_1_0_0_1_0_0; // W-type Multiply/Divide + ControlsD = `CTRLW'b1_000_00_00_100_0_00_0_0_1_0_0_1_00_0; // W-type Multiply/Divide else - ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // non-implemented instruction - 7'b1100011: ControlsD = 22'b0_010_00_00_000_1_01_0_0_0_0_0_0_0_0; // beq - 7'b1100111: ControlsD = 22'b1_000_00_00_010_0_00_1_1_0_0_0_0_0_0; // jalr - 7'b1101111: ControlsD = 22'b1_011_00_00_010_0_00_1_0_0_0_0_0_0_0; // jal + ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction + 7'b1100011: ControlsD = `CTRLW'b0_010_00_00_000_1_01_0_0_0_0_0_0_00_0; // beq + 7'b1100111: ControlsD = `CTRLW'b1_000_00_00_010_0_00_1_1_0_0_0_0_00_0; // jalr + 7'b1101111: ControlsD = `CTRLW'b1_011_00_00_010_0_00_1_0_0_0_0_0_00_0; // jal 7'b1110011: if (Funct3D == 3'b000) - ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_1_0_0_0; // privileged; decoded further in priveleged modules + ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_1_0_00_0; // privileged; decoded further in priveleged modules else - ControlsD = 22'b1_000_00_00_011_0_00_0_0_0_1_0_0_0_0; // csrs - default: ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // non-implemented instruction + ControlsD = `CTRLW'b1_000_00_00_011_0_00_0_0_0_1_0_0_00_0; // csrs + default: ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction endcase endgenerate @@ -173,7 +174,7 @@ module controller( endcase // Execute stage pipeline control register and logic - flopenrc #(26) controlregE(clk, reset, FlushE, ~StallE, + flopenrc #(27) controlregE(clk, reset, FlushE, ~StallE, {RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, TargetSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, MulDivD, AtomicD, 1'b1}, {RegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, TargetSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MulDivE, AtomicE, InstrValidE}); @@ -196,7 +197,7 @@ module controller( assign MemReadE = MemRWE[1]; // Memory stage pipeline control register - flopenrc #(14) controlregM(clk, reset, FlushM, ~StallM, + flopenrc #(15) controlregM(clk, reset, FlushM, ~StallM, {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, AtomicE, InstrValidE}, {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, AtomicM, InstrValidM}); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index d021bca62..f408fbd9d 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -43,7 +43,7 @@ module ieu ( input logic DataAccessFaultM, input logic SquashSCW, output logic [1:0] MemRWM, - output logic AtomicM, + output logic [1:0] AtomicM, output logic [`XLEN-1:0] MemAdrM, WriteDataM, output logic [`XLEN-1:0] SrcAM, output logic [2:0] Funct3M, diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index 42ae7fbcc..5d3f47e3a 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -59,7 +59,7 @@ module dtim #(parameter BASE=0, RANGE = 65535) ( busycount <= 0; HREADYTim <= #1 0; end else if (~HREADYTim) begin - if (busycount == 2) begin // TIM latency, for testing purposes + if (busycount == 2) begin // TIM latency, for testing purposes. *** test with different values HREADYTim <= #1 1; end else begin busycount <= busycount + 1; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 3c8b25c37..da4abad24 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -60,7 +60,8 @@ module wallypipelinedhart ( // new signals that must connect through DP logic MulDivE, W64E; - logic CSRReadM, CSRWriteM, PrivilegedM, AtomicM; + logic CSRReadM, CSRWriteM, PrivilegedM; + logic [1:0] AtomicM; logic [`XLEN-1:0] SrcAE, SrcBE; logic [`XLEN-1:0] SrcAM; logic [2:0] Funct3E; @@ -113,6 +114,7 @@ module wallypipelinedhart ( //.InstrReadF(1'b0), //.InstrRData(InstrF), // hook up InstrF later .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), + .Funct7M(InstrM[31:25]), .*); // changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.