cvw/toy/riscvpipelined.sv
2021-11-18 17:15:40 -08:00

571 lines
19 KiB
Systemverilog

// riscvpipelined.sv
// RISC-V pipelined processor
// From Section 7.6 of Digital Design & Computer Architecture: RISC-V Edition
// 27 April 2020
// David_Harris@hmc.edu
// Sarah.Harris@unlv.edu
// run 210
// Expect simulator to print "Simulation succeeded"
// when the value 25 (0x19) is written to address 100 (0x64)
// Pipelined implementation of RISC-V (RV32I)
// User-level Instruction Set Architecture V2.2 (May 7, 2017)
// Implements a subset of the base integer instructions:
// lw, sw
// add, sub, and, or, slt,
// addi, andi, ori, slti
// beq
// jal
// Exceptions, traps, and interrupts not implemented
// little-endian memory
// 31 32-bit registers x1-x31, x0 hardwired to 0
// R-Type instructions
// add, sub, and, or, slt
// INSTR rd, rs1, rs2
// Instr[31:25] = funct7 (funct7b5 & opb5 = 1 for sub, 0 for others)
// Instr[24:20] = rs2
// Instr[19:15] = rs1
// Instr[14:12] = funct3
// Instr[11:7] = rd
// Instr[6:0] = opcode
// I-Type Instructions
// lw, I-type ALU (addi, andi, ori, slti)
// lw: INSTR rd, imm(rs1)
// I-type ALU: INSTR rd, rs1, imm (12-bit signed)
// Instr[31:20] = imm[11:0]
// Instr[24:20] = rs2
// Instr[19:15] = rs1
// Instr[14:12] = funct3
// Instr[11:7] = rd
// Instr[6:0] = opcode
// S-Type Instruction
// sw rs2, imm(rs1) (store rs2 into address specified by rs1 + immm)
// Instr[31:25] = imm[11:5] (offset[11:5])
// Instr[24:20] = rs2 (src)
// Instr[19:15] = rs1 (base)
// Instr[14:12] = funct3
// Instr[11:7] = imm[4:0] (offset[4:0])
// Instr[6:0] = opcode
// B-Type Instruction
// beq rs1, rs2, imm (PCTarget = PC + (signed imm x 2))
// Instr[31:25] = imm[12], imm[10:5]
// Instr[24:20] = rs2
// Instr[19:15] = rs1
// Instr[14:12] = funct3
// Instr[11:7] = imm[4:1], imm[11]
// Instr[6:0] = opcode
// J-Type Instruction
// jal rd, imm (signed imm is multiplied by 2 and added to PC, rd = PC+4)
// Instr[31:12] = imm[20], imm[10:1], imm[11], imm[19:12]
// Instr[11:7] = rd
// Instr[6:0] = opcode
// Instruction opcode funct3 funct7
// add 0110011 000 0000000
// sub 0110011 000 0100000
// and 0110011 111 0000000
// or 0110011 110 0000000
// slt 0110011 010 0000000
// addi 0010011 000 immediate
// andi 0010011 111 immediate
// ori 0010011 110 immediate
// slti 0010011 010 immediate
// beq 1100011 000 immediate
// lw 0000011 010 immediate
// sw 0100011 010 immediate
// jal 1101111 immediate immediate
module testbench();
logic clk;
logic reset;
logic [31:0] WriteData, DataAdr;
logic MemWrite;
// instantiate device to be tested
top dut(clk, reset, WriteData, DataAdr, MemWrite);
// initialize test
initial
begin
reset <= 1; # 22; reset <= 0;
end
// generate clock to sequence tests
always
begin
clk <= 1; # 5; clk <= 0; # 5;
end
// check results
always @(negedge clk)
begin
if(MemWrite) begin
if(DataAdr === 100 & WriteData === 25) begin
$display("Simulation succeeded");
$stop;
end else if (DataAdr !== 96) begin
$display("Simulation failed");
$stop;
end
end
end
endmodule
module top(input logic clk, reset,
output logic [31:0] WriteDataM, DataAdrM,
output logic MemWriteM);
logic [31:0] PCF, InstrF, ReadDataM;
// instantiate processor and memories
riscv riscv(clk, reset, PCF, InstrF, MemWriteM, DataAdrM,
WriteDataM, ReadDataM);
imem imem(PCF, InstrF);
dmem dmem(clk, MemWriteM, DataAdrM, WriteDataM, ReadDataM);
endmodule
module riscv(input logic clk, reset,
output logic [31:0] PCF,
input logic [31:0] InstrF,
output logic MemWriteM,
output logic [31:0] ALUResultM, WriteDataM,
input logic [31:0] ReadDataM);
logic [6:0] opD;
logic [2:0] funct3D;
logic funct7b5D;
logic [1:0] ImmSrcD;
logic ZeroE;
logic PCSrcE;
logic [2:0] ALUControlE;
logic ALUSrcE;
logic ResultSrcEb0;
logic RegWriteM;
logic [1:0] ResultSrcW;
logic RegWriteW;
logic [1:0] ForwardAE, ForwardBE;
logic StallF, StallD, FlushD, FlushE;
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW;
controller c(clk, reset,
opD, funct3D, funct7b5D, ImmSrcD,
FlushE, ZeroE, PCSrcE, ALUControlE, ALUSrcE, ResultSrcEb0,
MemWriteM, RegWriteM,
RegWriteW, ResultSrcW);
datapath dp(clk, reset,
StallF, PCF, InstrF,
opD, funct3D, funct7b5D, StallD, FlushD, ImmSrcD,
FlushE, ForwardAE, ForwardBE, PCSrcE, ALUControlE, ALUSrcE, ZeroE,
MemWriteM, WriteDataM, ALUResultM, ReadDataM,
RegWriteW, ResultSrcW,
Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW);
hazard hu(Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
PCSrcE, ResultSrcEb0, RegWriteM, RegWriteW,
ForwardAE, ForwardBE, StallF, StallD, FlushD, FlushE);
endmodule
module controller(input logic clk, reset,
// Decode stage control signals
input logic [6:0] opD,
input logic [2:0] funct3D,
input logic funct7b5D,
output logic [1:0] ImmSrcD,
// Execute stage control signals
input logic FlushE,
input logic ZeroE,
output logic PCSrcE, // for datapath and Hazard Unit
output logic [2:0] ALUControlE,
output logic ALUSrcE,
output logic ResultSrcEb0, // for Hazard Unit
// Memory stage control signals
output logic MemWriteM,
output logic RegWriteM, // for Hazard Unit
// Writeback stage control signals
output logic RegWriteW, // for datapath and Hazard Unit
output logic [1:0] ResultSrcW);
// pipelined control signals
logic RegWriteD, RegWriteE;
logic [1:0] ResultSrcD, ResultSrcE, ResultSrcM;
logic MemWriteD, MemWriteE;
logic JumpD, JumpE;
logic BranchD, BranchE;
logic [1:0] ALUOpD;
logic [2:0] ALUControlD;
logic ALUSrcD;
// Decode stage logic
maindec md(opD, ResultSrcD, MemWriteD, BranchD,
ALUSrcD, RegWriteD, JumpD, ImmSrcD, ALUOpD);
aludec ad(opD[5], funct3D, funct7b5D, ALUOpD, ALUControlD);
// Execute stage pipeline control register and logic
floprc #(10) controlregE(clk, reset, FlushE,
{RegWriteD, ResultSrcD, MemWriteD, JumpD, BranchD, ALUControlD, ALUSrcD},
{RegWriteE, ResultSrcE, MemWriteE, JumpE, BranchE, ALUControlE, ALUSrcE});
assign PCSrcE = (BranchE & ZeroE) | JumpE;
assign ResultSrcEb0 = ResultSrcE[0];
// Memory stage pipeline control register
flopr #(4) controlregM(clk, reset,
{RegWriteE, ResultSrcE, MemWriteE},
{RegWriteM, ResultSrcM, MemWriteM});
// Writeback stage pipeline control register
flopr #(3) controlregW(clk, reset,
{RegWriteM, ResultSrcM},
{RegWriteW, ResultSrcW});
endmodule
module maindec(input logic [6:0] op,
output logic [1:0] ResultSrc,
output logic MemWrite,
output logic Branch, ALUSrc,
output logic RegWrite, Jump,
output logic [1:0] ImmSrc,
output logic [1:0] ALUOp);
logic [10:0] controls;
assign {RegWrite, ImmSrc, ALUSrc, MemWrite,
ResultSrc, Branch, ALUOp, Jump} = controls;
always_comb
case(op)
// RegWrite_ImmSrc_ALUSrc_MemWrite_ResultSrc_Branch_ALUOp_Jump
7'b0000011: controls = 11'b1_00_1_0_01_0_00_0; // lw
7'b0100011: controls = 11'b0_01_1_1_00_0_00_0; // sw
7'b0110011: controls = 11'b1_xx_0_0_00_0_10_0; // R-type
7'b1100011: controls = 11'b0_10_0_0_00_1_01_0; // beq
7'b0010011: controls = 11'b1_00_1_0_00_0_10_0; // I-type ALU
7'b1101111: controls = 11'b1_11_0_0_10_0_00_1; // jal
7'b0000000: controls = 11'b0_00_0_0_00_0_00_0; // need valid values at reset
default: controls = 11'bx_xx_x_x_xx_x_xx_x; // non-implemented instruction
endcase
endmodule
module aludec(input logic opb5,
input logic [2:0] funct3,
input logic funct7b5,
input logic [1:0] ALUOp,
output logic [2:0] ALUControl);
logic RtypeSub;
assign RtypeSub = funct7b5 & opb5; // TRUE for R-type subtract instruction
always_comb
case(ALUOp)
2'b00: ALUControl = 3'b000; // addition
2'b01: ALUControl = 3'b001; // subtraction
default: case(funct3) // R-type or I-type ALU
3'b000: if (RtypeSub)
ALUControl = 3'b001; // sub
else
ALUControl = 3'b000; // add, addi
3'b010: ALUControl = 3'b101; // slt, slti
3'b110: ALUControl = 3'b011; // or, ori
3'b111: ALUControl = 3'b010; // and, andi
default: ALUControl = 3'bxxx; // ???
endcase
endcase
endmodule
module datapath(input logic clk, reset,
// Fetch stage signals
input logic StallF,
output logic [31:0] PCF,
input logic [31:0] InstrF,
// Decode stage signals
output logic [6:0] opD,
output logic [2:0] funct3D,
output logic funct7b5D,
input logic StallD, FlushD,
input logic [1:0] ImmSrcD,
// Execute stage signals
input logic FlushE,
input logic [1:0] ForwardAE, ForwardBE,
input logic PCSrcE,
input logic [2:0] ALUControlE,
input logic ALUSrcE,
output logic ZeroE,
// Memory stage signals
input logic MemWriteM,
output logic [31:0] WriteDataM, ALUResultM,
input logic [31:0] ReadDataM,
// Writeback stage signals
input logic RegWriteW,
input logic [1:0] ResultSrcW,
// Hazard Unit signals
output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E,
output logic [4:0] RdE, RdM, RdW);
// Fetch stage signals
logic [31:0] PCNextF, PCPlus4F;
// Decode stage signals
logic [31:0] InstrD;
logic [31:0] PCD, PCPlus4D;
logic [31:0] RD1D, RD2D;
logic [31:0] ImmExtD;
logic [4:0] RdD;
// Execute stage signals
logic [31:0] RD1E, RD2E;
logic [31:0] PCE, ImmExtE;
logic [31:0] SrcAE, SrcBE;
logic [31:0] ALUResultE;
logic [31:0] WriteDataE;
logic [31:0] PCPlus4E;
logic [31:0] PCTargetE;
// Memory stage signals
logic [31:0] PCPlus4M;
// Writeback stage signals
logic [31:0] ALUResultW;
logic [31:0] ReadDataW;
logic [31:0] PCPlus4W;
logic [31:0] ResultW;
// Fetch stage pipeline register and logic
mux2 #(32) pcmux(PCPlus4F, PCTargetE, PCSrcE, PCNextF);
flopenr #(32) pcreg(clk, reset, ~StallF, PCNextF, PCF);
adder pcadd(PCF, 32'h4, PCPlus4F);
// Decode stage pipeline register and logic
flopenrc #(96) regD(clk, reset, FlushD, ~StallD,
{InstrF, PCF, PCPlus4F},
{InstrD, PCD, PCPlus4D});
assign opD = InstrD[6:0];
assign funct3D = InstrD[14:12];
assign funct7b5D = InstrD[30];
assign Rs1D = InstrD[19:15];
assign Rs2D = InstrD[24:20];
assign RdD = InstrD[11:7];
regfile rf(clk, RegWriteW, Rs1D, Rs2D, RdW, ResultW, RD1D, RD2D);
extend ext(InstrD[31:7], ImmSrcD, ImmExtD);
// Execute stage pipeline register and logic
floprc #(175) regE(clk, reset, FlushE,
{RD1D, RD2D, PCD, Rs1D, Rs2D, RdD, ImmExtD, PCPlus4D},
{RD1E, RD2E, PCE, Rs1E, Rs2E, RdE, ImmExtE, PCPlus4E});
mux3 #(32) faemux(RD1E, ResultW, ALUResultM, ForwardAE, SrcAE);
mux3 #(32) fbemux(RD2E, ResultW, ALUResultM, ForwardBE, WriteDataE);
mux2 #(32) srcbmux(WriteDataE, ImmExtE, ALUSrcE, SrcBE);
alu alu(SrcAE, SrcBE, ALUControlE, ALUResultE, ZeroE);
adder branchadd(ImmExtE, PCE, PCTargetE);
// Memory stage pipeline register
flopr #(101) regM(clk, reset,
{ALUResultE, WriteDataE, RdE, PCPlus4E},
{ALUResultM, WriteDataM, RdM, PCPlus4M});
// Writeback stage pipeline register and logic
flopr #(101) regW(clk, reset,
{ALUResultM, ReadDataM, RdM, PCPlus4M},
{ALUResultW, ReadDataW, RdW, PCPlus4W});
mux3 #(32) resultmux(ALUResultW, ReadDataW, PCPlus4W, ResultSrcW, ResultW);
endmodule
// Hazard Unit: forward, stall, and flush
module hazard(input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
input logic PCSrcE, ResultSrcEb0,
input logic RegWriteM, RegWriteW,
output logic [1:0] ForwardAE, ForwardBE,
output logic StallF, StallD, FlushD, FlushE);
logic lwStallD;
// forwarding logic
always_comb begin
ForwardAE = 2'b00;
ForwardBE = 2'b00;
if (Rs1E != 5'b0)
if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10;
else if ((Rs1E == RdW) & RegWriteW) ForwardAE = 2'b01;
if (Rs2E != 5'b0)
if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10;
else if ((Rs2E == RdW) & RegWriteW) ForwardBE = 2'b01;
end
// stalls and flushes
assign lwStallD = ResultSrcEb0 & ((Rs1D == RdE) | (Rs2D == RdE));
assign StallD = lwStallD;
assign StallF = lwStallD;
assign FlushD = PCSrcE;
assign FlushE = lwStallD | PCSrcE;
endmodule
module regfile(input logic clk,
input logic we3,
input logic [ 4:0] a1, a2, a3,
input logic [31:0] wd3,
output logic [31:0] rd1, rd2);
logic [31:0] rf[31:0];
// three ported register file
// read two ports combinationally (A1/RD1, A2/RD2)
// write third port on rising edge of clock (A3/WD3/WE3)
// write occurs on falling edge of clock
// register 0 hardwired to 0
always_ff @(negedge clk)
if (we3) rf[a3] <= wd3;
assign rd1 = (a1 != 0) ? rf[a1] : 0;
assign rd2 = (a2 != 0) ? rf[a2] : 0;
endmodule
module adder(input [31:0] a, b,
output [31:0] y);
assign y = a + b;
endmodule
module extend(input logic [31:7] instr,
input logic [1:0] immsrc,
output logic [31:0] immext);
always_comb
case(immsrc)
// I-type
2'b00: immext = {{20{instr[31]}}, instr[31:20]};
// S-type (stores)
2'b01: immext = {{20{instr[31]}}, instr[31:25], instr[11:7]};
// B-type (branches)
2'b10: immext = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
// J-type (jal)
2'b11: immext = {{12{instr[31]}}, instr[19:12], instr[20], instr[30:21], 1'b0};
default: immext = 32'bx; // undefined
endcase
endmodule
module flopr #(parameter WIDTH = 8)
(input logic clk, reset,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else q <= d;
endmodule
module flopenr #(parameter WIDTH = 8)
(input logic clk, reset, en,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else if (en) q <= d;
endmodule
module flopenrc #(parameter WIDTH = 8)
(input logic clk, reset, clear, en,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else if (en)
if (clear) q <= 0;
else q <= d;
endmodule
module floprc #(parameter WIDTH = 8)
(input logic clk,
input logic reset,
input logic clear,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else
if (clear) q <= 0;
else q <= d;
endmodule
module mux2 #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] d0, d1,
input logic s,
output logic [WIDTH-1:0] y);
assign y = s ? d1 : d0;
endmodule
module mux3 #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] d0, d1, d2,
input logic [1:0] s,
output logic [WIDTH-1:0] y);
assign y = s[1] ? d2 : (s[0] ? d1 : d0);
endmodule
module imem(input logic [31:0] a,
output logic [31:0] rd);
logic [31:0] RAM[63:0];
initial
$readmemh("riscvtest.txt",RAM);
assign rd = RAM[a[31:2]]; // word aligned
endmodule
module dmem(input logic clk, we,
input logic [31:0] a, wd,
output logic [31:0] rd);
logic [31:0] RAM[63:0];
assign rd = RAM[a[31:2]]; // word aligned
always_ff @(posedge clk)
if (we) RAM[a[31:2]] <= wd;
endmodule
module alu(input logic [31:0] a, b,
input logic [2:0] alucontrol,
output logic [31:0] result,
output logic zero);
logic [31:0] condinvb, sum;
logic v; // overflow
logic isAddSub; // true when is add or subtract operation
assign condinvb = alucontrol[0] ? ~b : b;
assign sum = a + condinvb + alucontrol[0];
assign isAddSub = ~alucontrol[2] & ~alucontrol[1] |
~alucontrol[1] & alucontrol[0];
always_comb
case (alucontrol)
3'b000: result = sum; // add
3'b001: result = sum; // subtract
3'b010: result = a & b; // and
3'b011: result = a | b; // or
3'b100: result = a ^ b; // xor
3'b101: result = sum[31] ^ v; // slt
3'b110: result = a << b[4:0]; // sll
3'b111: result = a >> b[4:0]; // srl
default: result = 32'bx;
endcase
assign zero = (result == 32'b0);
assign v = ~(alucontrol[0] ^ a[31] ^ b[31]) & (a[31] ^ sum[31]) & isAddSub;
endmodule