Initial untested implementation of AMO instructions

This commit is contained in:
David Harris 2021-03-11 00:11:31 -05:00
parent 2d1f63b590
commit fe4d288589
7 changed files with 140 additions and 53 deletions

View File

@ -37,7 +37,7 @@ module dmem (
input logic [2:0] Funct3M,
//input logic [`XLEN-1:0] ReadDataW,
input logic [`XLEN-1:0] WriteDataM,
input logic AtomicM,
input logic [1:0] AtomicM,
output logic [`XLEN-1:0] MemPAdrM,
output logic MemReadM, MemWriteM,
output logic DataMisalignedM,
@ -98,8 +98,8 @@ module dmem (
logic ReservationValidM, ReservationValidW;
logic lrM, scM, WriteAdrMatchM;
assign lrM = MemReadM && AtomicM;
assign scM = MemRWM[0] && AtomicM;
assign lrM = MemReadM && AtomicM[0];
assign scM = MemRWM[0] && AtomicM[0];
assign WriteAdrMatchM = MemRWM[0] && (MemPAdrM[`XLEN-1:2] == ReservationPAdrW) && ReservationValidW;
assign SquashSCM = scM && ~WriteAdrMatchM;
always_comb begin // ReservationValidM (next valiue of valid reservation)

View File

@ -35,6 +35,8 @@ module ahblite (
input logic StallW, FlushW,
// Load control
input logic UnsignedLoadM,
input logic [1:0] AtomicM,
input logic [6:0] Funct7M,
// Signals from Instruction Cache
input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram
input logic InstrReadF,
@ -68,7 +70,7 @@ module ahblite (
logic GrantData;
logic [2:0] ISize;
logic [`AHBW-1:0] HRDATAMasked, ReadDataM, ReadDataPreW;
logic [`AHBW-1:0] HRDATAMasked, ReadDataM, ReadDataPreW, WriteData;
logic IReady, DReady;
logic CaptureDataM;
@ -81,17 +83,23 @@ module ahblite (
// Data accesses have priority over instructions. However, if a data access comes
// while an instruction read is occuring, the instruction read finishes before
// the data access can take place.
typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD, INSTRREADMEMPENDING} statetype;
typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD, INSTRREADMEMPENDING, ATOMICREAD, ATOMICWRITE} statetype;
statetype BusState, NextBusState;
flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextBusState, IDLE, BusState);
always_comb
case (BusState)
IDLE: if (MemReadM) NextBusState = MEMREAD; // Memory has pirority over instructions
IDLE: if (AtomicM[1]) NextBusState = ATOMICREAD;
else if (MemReadM) NextBusState = MEMREAD; // Memory has pirority over instructions
else if (MemWriteM) NextBusState = MEMWRITE;
else if (InstrReadF) NextBusState = INSTRREAD;
else NextBusState = IDLE;
ATOMICREAD: if (~HREADY) NextBusState = ATOMICREAD;
else NextBusState = ATOMICWRITE;
ATOMICWRITE: if (~HREADY) NextBusState = ATOMICWRITE;
else if (InstrReadF) NextBusState = INSTRREAD;
else NextBusState = IDLE;
MEMREAD: if (~HREADY) NextBusState = MEMREAD;
else if (InstrReadF) NextBusState = INSTRREAD;
else NextBusState = IDLE;
@ -107,17 +115,14 @@ module ahblite (
endcase
// stall signals
assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || (NextBusState == INSTRREADMEMPENDING);
assign #2 DataStall = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) ||
(NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE) ||
(NextBusState == INSTRREADMEMPENDING);
assign #1 InstrStall = (NextBusState == INSTRREAD);
// DH 2/20/22: A cyclic path presently exists
// HREADY->NextBusState->GrantData->HSIZE->HSELUART->HREADY
// This is because the peripherals assert HREADY on the same cycle
// When memory is working, also fix the peripherals to respond on the subsequent cycle
// and this path should be fixed.
// bus outputs
assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE);
assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) ||
(NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE);
assign #1 HADDR = (GrantData) ? MemPAdrM[31:0] : InstrPAdrF[31:0];
assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway
assign #1 HSIZE = GrantData ? {1'b0, MemSizeM} : ISize;
@ -125,9 +130,9 @@ module ahblite (
assign HPROT = 4'b0011; // not used; see Section 3.7
assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise
assign HMASTLOCK = 0; // no locking supported
assign HWRITE = (NextBusState == MEMWRITE);
assign HWRITE = (NextBusState == MEMWRITE) || (NextBusState == ATOMICWRITE);
// delay write data by one cycle for
flop #(`XLEN) wdreg(HCLK, WriteDataM, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
flop #(`XLEN) wdreg(HCLK, WriteData, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
// delay signals for subword writes
flop #(3) adrreg(HCLK, HADDR[2:0], HADDRD);
flop #(4) sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED);
@ -138,11 +143,24 @@ module ahblite (
assign InstrRData = HRDATA;
assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021
assign CaptureDataM = (BusState == MEMREAD) && (NextBusState != MEMREAD);
assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) ||
((BusState == ATOMICREAD) && (NextBusState == ATOMICWRITE));
// *** check if this introduces an unnecessary cycle of latency in memory accesses
flopenr #(`XLEN) ReadDataPreWReg(clk, reset, CaptureDataM, ReadDataM, ReadDataPreW); // *** this may break when there is no instruction read after data read
flopenr #(`XLEN) ReadDataWReg(clk, reset, ~StallW, ReadDataPreW, ReadDataW);
// Extract and sign-extend subwords if necessary
subwordread swr(.*);
endmodule
// Handle AMO instructions if applicable
generate
if (`A_SUPPORTED) begin
logic [`XLEN-1:0] AMOResult;
amoalu amoalu(.a(HRDATA), .b(WriteDataM), .funct(Funct7M), .width(MemSizeM),
.result(AMOResult));
mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicM[1], WriteData);
end else
assign WriteData = WriteDataM;
endgenerate
endmodule

View File

@ -0,0 +1,66 @@
///////////////////////////////////////////
// amoalu.sv
//
// Written: David_Harris@hmc.edu 10 March 2021
// Modified:
//
// Purpose: Performs AMO operations
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module amoalu (
input logic [`XLEN-1:0] a, b,
input logic [6:0] funct,
input logic [1:0] width,
output logic [`XLEN-1:0] result);
logic [`XLEN-1:0] y;
// *** see how synthesis generates this and optimize more structurally if necessary to share hardware
// a single carry chain should be shared for + and the four min/max
// and the same mux can be used to select b for swap.
always_comb
case (funct[6:2])
5'b00001: y = b; // amoswap
5'b00000: y = a + b; // amoadd
5'b00100: y = a ^ b; // amoxor
5'b01100: y = a & b; // amoand
5'b01000: y = a | b; // amoor
5'b10000: y = (a < b) ? a : b; // amomin
5'b10100: y = (a >= b) ? a : b; // amomax
5'b11000: y = ({1'b0, a} < {1'b0, b}) ? a : b; // amominu
5'b11100: y = ({1'b0, a} >= {1'b0, b}) ? a : b; // amomaxu
default: y = 'bx; // undefined; *** could change to b for efficiency
endcase
// sign extend if necessary
generate
if (`XLEN == 32) begin
assign result = y;
end else begin // `XLEN = 64
always_comb
if (width == 2'b10) // sign-extend word-length operations
result = {{32{y[31]}}, y[31:0]};
else result = y;
end
endgenerate
endmodule

View File

@ -48,7 +48,7 @@ module controller(
input logic StallM, FlushM,
output logic [1:0] MemRWM,
output logic CSRReadM, CSRWriteM, PrivilegedM,
output logic AtomicM,
output logic [1:0] AtomicM,
output logic [2:0] Funct3M,
output logic RegWriteM, // for Hazard Unit
// Writeback stage control signals
@ -65,6 +65,8 @@ module controller(
logic [6:0] Funct7D;
logic [4:0] Rs1D;
`define CTRLW 23
// pipelined control signals
logic RegWriteD, RegWriteE;
logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM;
@ -77,11 +79,11 @@ module controller(
logic TargetSrcD, W64D, MulDivD;
logic CSRZeroSrcD;
logic CSRReadD;
logic AtomicD, AtomicE;
logic [1:0] AtomicD, AtomicE;
logic CSRWriteD, CSRWriteE;
logic InstrValidE, InstrValidM;
logic PrivilegedD, PrivilegedE;
logic [21:0] ControlsD;
logic [`CTRLW-1:0] ControlsD;
logic aluc3D;
logic subD, sraD, sltD, sltuD;
logic BranchTakenE;
@ -100,48 +102,47 @@ module controller(
generate
always_comb
case(OpD)
// *** Atomic p. 132 assembly encodings, defs 48
// RegWrite_ImmSrc_ALUSrc_MemRW_ResultSrc_Branch_ALUOp_Jump_TargetSrc_W64_CSRRead_Privileged_MulDiv_Atomic_Illegal
7'b0000000: ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // illegal instruction
7'b0000011: ControlsD = 22'b1_000_01_10_001_0_00_0_0_0_0_0_0_0_0; // lw
7'b0001111: ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_0; // fence = nop
7'b0010011: ControlsD = 22'b1_000_01_00_000_0_10_0_0_0_0_0_0_0_0; // I-type ALU
7'b0010111: ControlsD = 22'b1_100_11_00_000_0_00_0_0_0_0_0_0_0_0; // auipc
7'b0000000: ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // illegal instruction
7'b0000011: ControlsD = `CTRLW'b1_000_01_10_001_0_00_0_0_0_0_0_0_00_0; // lw
7'b0001111: ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_0; // fence = nop
7'b0010011: ControlsD = `CTRLW'b1_000_01_00_000_0_10_0_0_0_0_0_0_00_0; // I-type ALU
7'b0010111: ControlsD = `CTRLW'b1_100_11_00_000_0_00_0_0_0_0_0_0_00_0; // auipc
7'b0011011: if (`XLEN == 64)
ControlsD = 22'b1_000_01_00_000_0_10_0_0_1_0_0_0_0_0; // IW-type ALU for RV64i
ControlsD = `CTRLW'b1_000_01_00_000_0_10_0_0_1_0_0_0_00_0; // IW-type ALU for RV64i
else
ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // non-implemented instruction
7'b0100011: ControlsD = 22'b0_001_01_01_000_0_00_0_0_0_0_0_0_0_0; // sw
ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction
7'b0100011: ControlsD = `CTRLW'b0_001_01_01_000_0_00_0_0_0_0_0_0_00_0; // sw
7'b0101111: if (`A_SUPPORTED) begin
if (InstrD[31:27] == 5'b00010)
ControlsD = 22'b1_000_00_10_001_0_00_0_0_0_0_0_0_1_0; // lr
ControlsD = `CTRLW'b1_000_00_10_001_0_00_0_0_0_0_0_0_01_0; // lr
else if (InstrD[31:27] == 5'b00011)
ControlsD = 22'b1_101_01_01_101_0_00_0_0_0_0_0_0_1_0; // sc
ControlsD = `CTRLW'b1_101_01_01_101_0_00_0_0_0_0_0_0_01_0; // sc
else
ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_1_0; // other atomic; decode later
ControlsD = `CTRLW'b1_101_00_11_001_0_00_0_0_0_0_0_0_10_0;; // amo
end else
ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // non-implemented instruction
ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction
7'b0110011: if (Funct7D == 7'b0000000 || Funct7D == 7'b0100000)
ControlsD = 22'b1_000_00_00_000_0_10_0_0_0_0_0_0_0_0; // R-type
ControlsD = `CTRLW'b1_000_00_00_000_0_10_0_0_0_0_0_0_00_0; // R-type
else if (Funct7D == 7'b0000001 && `M_SUPPORTED)
ControlsD = 22'b1_000_00_00_100_0_00_0_0_0_0_0_1_0_0; // Multiply/Divide
ControlsD = `CTRLW'b1_000_00_00_100_0_00_0_0_0_0_0_1_00_0; // Multiply/Divide
else
ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // non-implemented instruction
7'b0110111: ControlsD = 22'b1_100_01_00_000_0_11_0_0_0_0_0_0_0_0; // lui
ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction
7'b0110111: ControlsD = `CTRLW'b1_100_01_00_000_0_11_0_0_0_0_0_0_00_0; // lui
7'b0111011: if ((Funct7D == 7'b0000000 || Funct7D == 7'b0100000) && `XLEN == 64)
ControlsD = 22'b1_000_00_00_000_0_10_0_0_1_0_0_0_0_0; // R-type W instructions for RV64i
ControlsD = `CTRLW'b1_000_00_00_000_0_10_0_0_1_0_0_0_00_0; // R-type W instructions for RV64i
else if (Funct7D == 7'b0000001 && `M_SUPPORTED && `XLEN == 64)
ControlsD = 22'b1_000_00_00_100_0_00_0_0_1_0_0_1_0_0; // W-type Multiply/Divide
ControlsD = `CTRLW'b1_000_00_00_100_0_00_0_0_1_0_0_1_00_0; // W-type Multiply/Divide
else
ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // non-implemented instruction
7'b1100011: ControlsD = 22'b0_010_00_00_000_1_01_0_0_0_0_0_0_0_0; // beq
7'b1100111: ControlsD = 22'b1_000_00_00_010_0_00_1_1_0_0_0_0_0_0; // jalr
7'b1101111: ControlsD = 22'b1_011_00_00_010_0_00_1_0_0_0_0_0_0_0; // jal
ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction
7'b1100011: ControlsD = `CTRLW'b0_010_00_00_000_1_01_0_0_0_0_0_0_00_0; // beq
7'b1100111: ControlsD = `CTRLW'b1_000_00_00_010_0_00_1_1_0_0_0_0_00_0; // jalr
7'b1101111: ControlsD = `CTRLW'b1_011_00_00_010_0_00_1_0_0_0_0_0_00_0; // jal
7'b1110011: if (Funct3D == 3'b000)
ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_1_0_0_0; // privileged; decoded further in priveleged modules
ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_1_0_00_0; // privileged; decoded further in priveleged modules
else
ControlsD = 22'b1_000_00_00_011_0_00_0_0_0_1_0_0_0_0; // csrs
default: ControlsD = 22'b0_000_00_00_000_0_00_0_0_0_0_0_0_0_1; // non-implemented instruction
ControlsD = `CTRLW'b1_000_00_00_011_0_00_0_0_0_1_0_0_00_0; // csrs
default: ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction
endcase
endgenerate
@ -173,7 +174,7 @@ module controller(
endcase
// Execute stage pipeline control register and logic
flopenrc #(26) controlregE(clk, reset, FlushE, ~StallE,
flopenrc #(27) controlregE(clk, reset, FlushE, ~StallE,
{RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, TargetSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, MulDivD, AtomicD, 1'b1},
{RegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, TargetSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MulDivE, AtomicE, InstrValidE});
@ -196,7 +197,7 @@ module controller(
assign MemReadE = MemRWE[1];
// Memory stage pipeline control register
flopenrc #(14) controlregM(clk, reset, FlushM, ~StallM,
flopenrc #(15) controlregM(clk, reset, FlushM, ~StallM,
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, AtomicE, InstrValidE},
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, AtomicM, InstrValidM});

View File

@ -43,7 +43,7 @@ module ieu (
input logic DataAccessFaultM,
input logic SquashSCW,
output logic [1:0] MemRWM,
output logic AtomicM,
output logic [1:0] AtomicM,
output logic [`XLEN-1:0] MemAdrM, WriteDataM,
output logic [`XLEN-1:0] SrcAM,
output logic [2:0] Funct3M,

View File

@ -59,7 +59,7 @@ module dtim #(parameter BASE=0, RANGE = 65535) (
busycount <= 0;
HREADYTim <= #1 0;
end else if (~HREADYTim) begin
if (busycount == 2) begin // TIM latency, for testing purposes
if (busycount == 2) begin // TIM latency, for testing purposes. *** test with different values
HREADYTim <= #1 1;
end else begin
busycount <= busycount + 1;

View File

@ -60,7 +60,8 @@ module wallypipelinedhart (
// new signals that must connect through DP
logic MulDivE, W64E;
logic CSRReadM, CSRWriteM, PrivilegedM, AtomicM;
logic CSRReadM, CSRWriteM, PrivilegedM;
logic [1:0] AtomicM;
logic [`XLEN-1:0] SrcAE, SrcBE;
logic [`XLEN-1:0] SrcAM;
logic [2:0] Funct3E;
@ -113,6 +114,7 @@ module wallypipelinedhart (
//.InstrReadF(1'b0),
//.InstrRData(InstrF), // hook up InstrF later
.MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
.Funct7M(InstrM[31:25]),
.*);
// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.