Merge branch 'main' of github.com:davidharrishmc/riscv-wally into boot

This commit is contained in:
Jacob Pease 2023-01-19 16:59:24 -06:00
commit fbe5c63219
53 changed files with 620 additions and 93249 deletions

View File

@ -1,23 +0,0 @@
# Makefile
CC = gcc
CFLAGS = -O3
LIBS = -lm
LFLAGS = -L.
# Link against the riscv-isa-sim version of SoftFloat rather than
# the regular version to get RISC-V NaN behavior
IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat
LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
SRCS = $(wildcard *.c)
PROGS = $(patsubst %.c,%,$(SRCS))
all: $(PROGS)
%: %.c
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
clean:
rm -f $(PROGS)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,23 +0,0 @@
# fma.do
#
# run with vsim -do "do fma.do"
# add -c before -do for batch simulation
onbreak {resume}
# create library
vlib worklib
vlog -lint -sv -work worklib fma16.v testbench.v
vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
vsim -lib worklib testbenchopt
add wave sim:/testbench_fma16/clk
add wave sim:/testbench_fma16/reset
add wave sim:/testbench_fma16/x
add wave sim:/testbench_fma16/y
add wave sim:/testbench_fma16/z
add wave sim:/testbench_fma16/result
add wave sim:/testbench_fma16/rexpected
run -all

View File

@ -1,268 +0,0 @@
// fma16.sv
// David_Harris@hmc.edu 26 February 2022
// 16-bit floating-point multiply-accumulate
// Operation: general purpose multiply, add, fma, with optional negation
// If mul=1, p = x * y. Else p = x.
// If add=1, result = p + z. Else result = p.
// If negr or negz = 1, negate result or z to handle negations and subtractions
// fadd: mul = 0, add = 1, negr = negz = 0
// fsub: mul = 0, add = 1, negr = 0, negz = 1
// fmul: mul = 1, add = 0, negr = 0, negz = 0
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
`define FFLEN 16
`define Nf 10
`define Ne 5
`define BIAS 15
`define EMIN (-(2**(`Ne-1)-1))
`define EMAX (2**(`Ne-1)-1)
`define NaN 16'h7E00
`define INF 15'h7C00
// rounding modes *** update
`define RZ 3'b00
`define RNE 3'b01
`define RM 3'b10
`define RP 3'b11
module fma16(
input logic [`FFLEN-1:0] x, y, z,
input logic mul, add, negr, negz,
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
output logic [`FFLEN-1:0] result);
logic [`Nf:0] xm, ym, zm; // U1.Nf
logic [`Ne-1:0] xe, ye, ze; // B_Ne
logic xs, ys, zs;
logic zs1; // sign before optional negation
logic [2*`Nf+1:0] pm; // U2.2Nf
logic [`Ne:0] pe; // B_Ne+1
logic ps; // sign of product
logic [22:0] rm;
logic [`Ne+1:0] re;
logic rs;
logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
logic [`Ne+1:0] re2;
unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan); // unpack inputs
//signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs); // handle negations
mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps); // p = x * y
add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs); // r = z + p
postproc16 post(roundmode, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result); // normalize, round, pack
endmodule
module mult16(
input logic mul,
input logic [`Nf:0] xm, ym,
input logic [`Ne-1:0] xe, ye,
input logic xs, ys,
output logic [2*`Nf+1:0] pm,
output logic [`Ne:0] pe,
output logic ps);
// only multiply if mul = 1
assign pm = mul ? xm * ym : {1'b0, xm, 10'b0}; // multiply mantiassas
assign pe = mul ? xe + ye - `BIAS : {1'b0, xe}; // add exponents, account for bias
assign ps = xs ^ ys; // negative if X xor Y are negative
endmodule
module add16(
input logic add,
input logic [2*`Nf+1:0] pm, // U2.2Nf
input logic [`Nf:0] zm, // U1.Nf
input logic [`Ne:0] pe, // B_Ne+1
input logic [`Ne-1:0] ze, // B_Ne
input logic ps, zs,
input logic negz,
output logic [22:0] rm,
output logic [`Ne+1:0] re, // B_Ne+2
output logic [`Ne+1:0] re2,
output logic rs);
logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.
logic [`Nf-1:0] prezsticky;
logic zsticky;
logic effectivesub;
logic rs0;
logic [`Ne:0] leadingzeros, NormCnt; // *** should paramterize size
logic [`Ne:0] re1;
// Alignment shift
assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
always_comb // AlignCount mux; see Muller page 254
if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7; re = {1'b0, pe}; end
else if (ExpDiff <= 2) begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
else if (ExpDiff <= `Nf+3) begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
else begin AlignCnt = 0; re = {2'b0, ze}; end
// Shift Zm right by AlignCnt. Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
// Effective subtraction
assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
assign zalignedaddsub = effectivesub ? ~zaligned : zaligned; // invert zaligned for subtraction
// Adder
assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
assign rs0 = r[`Nf*3+7]; // sign of the initial result
assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
// Sign Logic
assign rs = ps ^ rs0; // flip the sign if necessary
// Leading zero counter
lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
// Normalization shift
always_comb // NormCount mux
if (ExpDiff < 3) begin
if (re1 >= `EMIN) begin NormCnt = `Nf + 3 + leadingzeros; re2 = {1'b0, re1}; end
else begin NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN; end
end else begin NormCnt = AlignCnt; re = {2'b00, ze}; end
assign rnormed = r2 << NormCnt; // *** update sticky
/* temporarily comment out to start synth
// One-bit secondary normalization
if (ExpDiff <= 2) begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
else begin // *** handle sticky
if (rnormed[***]) begin rnormed2 = rnormed >> 1; re2 = re+1; end
else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re; end
else begin rnormed2 = rnormed << 1; re2 = re-1; end
end
// round
assign l = rnormed2[***]; // least significant bit
assign r = rnormed2[***-1]; // rounding bit
assign s = ***; // sticky bit
always_comb
case (roundmode)
RZ: roundup = 0;
RP: roundup = ~rs & (r | s);
RM: roundup = rs & (r | s);
RNE: roundup = r & (s | l);
default: roundup = 0;
endcase
assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
*/
// *** need to handle rounding to MAXNUM vs. INFINITY
// add or pass product through
/* assign rm = add ? arm : {1'b0, pm};
assign re = add ? are : {1'b0, pe};
assign rs = add ? ars : ps; */
endmodule
module lzc(
input logic [`Nf*3+7:0] r2,
output logic [`Ne:0] leadingzeros
);
endmodule
module postproc16(
input logic [1:0] roundmode,
input logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
input logic [22:0] rm,
input logic [`Nf:0] zm, // U1.Nf
input logic [6:0] re,
input logic [`Ne-1:0] ze, // B_Ne
input logic rs, zs, ps,
input logic [`Ne+1:0] re2,
output logic [15:0] result);
logic [9:0] uf, uff;
logic [6:0] ue;
logic [6:0] ueb, uebiased;
logic invalid;
// Special cases
// *** not handling signaling NaN
// *** also add overflow/underflow/inexact
always_comb begin
if (xnan | ynan | znan) begin result = `NaN; invalid = 0; end // propagate NANs
else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
else if (xzero & yinf | xinf & yzero) begin result = `NaN; invalid = 1; end // zero times infinity
else if (xinf | yinf) begin result = {ps, `INF}; invalid = 0; end // X or Y
else if (zinf) begin result = {zs, `INF}; invalid = 0; end // infinite Z
else if (xzero | yzero) begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
else if (re2 >= `EMAX) begin result = {rs, `INF}; invalid = 0; end
else begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
end
always_comb
if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
ue = re + 7'b1;
uf = rm[20:11];
end else begin // no normalization shift needed
ue = re;
uf = rm[19:10];
end
// overflow
always_comb begin
ueb = ue-7'd15;
if (ue >= 7'd46) begin // overflow
/* uebiased = 7'd30;
uff = 10'h3ff; */
end else begin
uebiased = ue-7'd15;
uff = uf;
end
end
assign result = {rs, uebiased[4:0], uff};
// add special case handling for zeros, NaN, Infinity
endmodule
module signadj16(
input logic negr, negz,
input logic xs, ys, zs1,
output logic ps, zs);
assign ps = xs ^ ys; // sign of product
assign zs = zs1 ^ negz; // sign of addend
endmodule
module unpack16(
input logic [15:0] x, y, z,
output logic [10:0] xm, ym, zm,
output logic [4:0] xe, ye, ze,
output logic xs, ys, zs,
output logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
endmodule
module unpacknum16(
input logic [15:0] num,
output logic [10:0] m,
output logic [4:0] e,
output logic s,
output logic zero, inf, nan);
logic [9:0] f; // fraction without leading 1
logic [4:0] eb; // biased exponent
assign {s, eb, f} = num; // pull bit fields out of floating-point number
assign m = {1'b1, f}; // prepend leading 1 to fraction
assign e = eb; // leave bias in exponent ***
assign zero = (e == 0 && f == 0);
assign inf = (e == 31 && f == 0);
assign nan = (e == 31 && f != 0);
endmodule

View File

@ -1,24 +0,0 @@
// fma16.sv
// David_Harris@hmc.edu 26 February 2022
// 16-bit floating-point multiply-accumulate
// Operation: general purpose multiply, add, fma, with optional negation
// If mul=1, p = x * y. Else p = x.
// If add=1, result = p + z. Else result = p.
// If negr or negz = 1, negate result or z to handle negations and subtractions
// fadd: mul = 0, add = 1, negr = negz = 0
// fsub: mul = 0, add = 1, negr = 0, negz = 1
// fmul: mul = 1, add = 0, negr = 0, negz = 0
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
module fma16(
input logic [15:0] x, y, z,
input logic mul, add, negr, negz,
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
output logic [15:0] result);
endmodule

View File

@ -1,240 +0,0 @@
#include <stdio.h>
#include <stdint.h>
#include "softfloat.h"
#include "softfloat_types.h"
typedef union sp {
float32_t v;
float f;
} sp;
// lists of tests, terminated with 0x8000
uint16_t easyExponents[] = {15, 0x8000};
uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000};
uint16_t zeros[] = {0x0000, 0x8000};
uint16_t infs[] = {0x7C00, 0xFC00};
uint16_t nans[] = {0x7D00, 0x7D01};
void softfloatInit(void) {
softfloat_roundingMode = softfloat_round_minMag;
softfloat_exceptionFlags = 0;
softfloat_detectTininess = softfloat_tininess_beforeRounding;
}
float convFloat(float16_t f16) {
float32_t f32;
float res;
sp r;
f32 = f16_to_f32(f16);
r.v = f32;
res = r.f;
return res;
}
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
float16_t result;
int op, flagVals;
char calc[80], flags[80];
float32_t x32, y32, z32, r32;
float xf, yf, zf, rf;
float16_t smallest;
if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
if (!add) z.v = 0x0000; // force z to 0 to avoid add
if (negp) x.v ^= 0x8000; // flip sign of x to negate p
if (negz) z.v ^= 0x8000; // flip sign of z to negate z
op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
softfloat_exceptionFlags = 0; // clear exceptions
result = f16_mulAdd(x, y, z);
sprintf(flags, "NV: %d OF: %d UF: %d NX: %d",
(softfloat_exceptionFlags >> 4) % 2,
(softfloat_exceptionFlags >> 2) % 2,
(softfloat_exceptionFlags >> 1) % 2,
(softfloat_exceptionFlags) % 2);
// pack these four flags into one nibble, discarding DZ flag
flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
// convert to floats for printing
xf = convFloat(x);
yf = convFloat(y);
zf = convFloat(z);
rf = convFloat(result);
if (mul)
if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
else sprintf(calc, "%f * %f = %f", xf, yf, rf);
else sprintf(calc, "%f + %f = %f", xf, zf, rf);
// omit denorms, which aren't required for this project
smallest.v = 0x0400;
float16_t resultmag = result;
resultmag.v &= 0x7FFF; // take absolute value
if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: ");
if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: ");
fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
}
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases,
FILE *fptr, int *numCases) {
int i, j;
fprintf(fptr, desc); fprintf(fptr, "\n");
*numCases=0;
for (i=0; e[i] != 0x8000; i++)
for (j=0; f[j] != 0x8000; j++) {
cases[*numCases].v = f[j] | e[i]<<10;
*numCases = *numCases + 1;
}
}
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
z.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<=sgn; k++) {
y.v ^= (k<<15);
genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
y.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
z.v = cases[j].v;
for (k=0; k<=sgn; k++) {
z.v ^= (k<<15);
genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, l, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<numCases; k++) {
z.v = cases[k].v;
for (l=0; l<=sgn; l++) {
z.v ^= (l<<15);
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
}
fclose(fptr);
}
void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, sx, sy, sz, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
cases[numCases].v = 0x0000; // add +0 case
cases[numCases+1].v = 0x8000; // add -0 case
numCases += 2;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<numCases; k++) {
z.v = cases[k].v;
for (sx=0; sx<=sgn; sx++) {
x.v ^= (sx<<15);
for (sy=0; sy<=sgn; sy++) {
y.v ^= (sy<<15);
for (sz=0; sz<=sgn; sz++) {
z.v ^= (sz<<15);
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
}
}
}
fclose(fptr);
}
int main()
{
softfloatInit(); // configure softfloat modes
// Test cases: multiplication
genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: addition
genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: FMA
genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: Zero, Infinity, NaN
genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
// Full test cases with other rounding modes
softfloat_roundingMode = softfloat_round_near_even;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
softfloat_roundingMode = softfloat_round_min;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
softfloat_roundingMode = softfloat_round_max;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
return 0;
}

View File

@ -1,8 +0,0 @@
#!/bin/bash
# check for warnings in Verilog code
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
export PATH=$PATH:/usr/local/bin/
verilator=`which verilator`
basepath=$(dirname $0)/..
$verilator --lint-only --top-module fma16 fma16.v

View File

@ -1,2 +0,0 @@
vsim -do "do fma.do"

View File

@ -1 +0,0 @@
vsim -c -do "do fma.do"

View File

@ -1 +0,0 @@
make -C ../../../synthDC synth DESIGN=fma16

View File

@ -1,52 +0,0 @@
/* verilator lint_off STMTDLY */
module testbench_fma16;
reg clk, reset;
reg [15:0] x, y, z, rexpected;
wire [15:0] result;
reg [7:0] ctrl;
reg [3:0] flagsexpected;
reg mul, add, negp, negz;
reg [1:0] roundmode;
reg [31:0] vectornum, errors;
reg [75:0] testvectors[10000:0];
// instantiate device under test
fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
// generate clock
always
begin
clk = 1; #5; clk = 0; #5;
end
// at start of test, load vectors and pulse reset
initial
begin
$readmemh("work/fmul_0.tv", testvectors);
vectornum = 0; errors = 0;
reset = 1; #22; reset = 0;
end
// apply test vectors on rising edge of clk
always @(posedge clk)
begin
#1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
{roundmode, mul, add, negp, negz} = ctrl[5:0];
end
// check results on falling edge of clk
always @(negedge clk)
if (~reset) begin // skip during reset
if (result !== rexpected) begin // check result // *** should also add tests on flags eventually
$display("Error: inputs %h * %h + %h", x, y, z);
$display(" result = %h (%h expected)", result, rexpected);
errors = errors + 1;
end
vectornum = vectornum + 1;
if (testvectors[vectornum] === 'x) begin
$display("%d tests completed with %d errors",
vectornum, errors);
$stop;
end
end
endmodule

File diff suppressed because it is too large Load Diff

View File

@ -1,130 +0,0 @@
#!/usr/bin/perl -w
# torturegen.pl
# David_Harris@hmc.edu 19 April 2022
# Convert TestFloat cases into format for fma16 project torture test
# Strip out cases involving denorms
use strict;
my @basenames = ("add", "mul", "mulAdd");
my @roundingmodes = ("rz", "rd", "ru", "rne");
my @names = ();
foreach my $name (@basenames) {
foreach my $mode (@roundingmodes) {
push(@names, "f16_${name}_$mode.tv");
}
}
open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
my $datestring = localtime();
print(TORTURE "// Torture tests generated $datestring by $0\n");
foreach my $tv (@names) {
open(TV, "work/$tv") || die("Can't read $tv");
my $type = &getType($tv); # is it mul, add, mulAdd
my $rm = &getRm($tv); # rounding mode
# if ($rm != 0) { next; } # only do rz
print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
my $linecount = 0;
my $babyTorture = 0;
while (<TV>) {
my $line = $_;
$linecount++;
my $density = 10;
if ($type eq "mulAdd") {$density = 500;}
if ($babyTorture) {
$density = 100;
if ($type eq "mulAdd") {$density = 50000;}
}
if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
chomp($line); # strip off newline
my @parts = split(/_/, $line);
my ($x, $y, $z, $op, $w, $flags);
$x = $parts[0];
if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
$op = $rm << 4;
if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
my $opname = sprintf("%02x", $op);
if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
$flags = substr($flags, -1); # take last character
if (&fpval($w) eq "NaN") { $w = "7e00"; }
my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
my $skip = "";
if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
$skip = "Skipped denorm";
}
my $summary = &summary($x, $y, $z, $w, $type);
if ($skip ne "") {
print TORTURE "// $skip $tv line $linecount $line $summary\n"
}
else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
}
close(TV);
}
close(TORTURE);
sub fpval {
my $val = shift;
$val = hex($val); # convert hex string to number
my $frac = $val & 0x3FF;
my $exp = ($val >> 10) & 0x1F;
my $sign = $val >> 15;
my $res;
if ($exp == 31 && $frac != 0) { return "NaN"; }
elsif ($exp == 31) { $res = "INF"; }
elsif ($val == 0) { $res = 0; }
elsif ($exp == 0) { $res = "Denorm"; }
else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
if ($sign == 1) { $res = "-$res"; }
return $res;
}
sub summary {
my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
my $xv = &fpval($x);
my $yv = &fpval($y);
my $zv = &fpval($z);
my $wv = &fpval($w);
if ($type eq "add") { return "$xv + $zv = $wv"; }
elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
else {return "$xv * $yv + $zv = $wv"; }
}
sub getType {
my $tv = shift;
if ($tv =~ /mulAdd/) { return("mulAdd"); }
elsif ($tv =~ /mul/) { return "mul"; }
else { return "add"; }
}
sub getRm {
my $tv = shift;
if ($tv =~ /rz/) { return 0; }
elsif ($tv =~ /rne/) { return 1; }
elsif ($tv =~ /rd/) {return 2; }
elsif ($tv =~ /ru/) { return 3; }
else { return "bad"; }
}
sub isdenorm {
my $fp = shift;
my $val = hex($fp);
my $expv = $val >> 10;
$expv = $expv & 0x1F;
my $denorm = 0;
if ($expv == 0 && $val != 0) { $denorm = 1;}
# my $e0 = ($expv == 0);
# my $vn0 = ($val != 0);
# my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
# print("Num $fp Exp $expv Denorm $denorm Done\n");
return $denorm;
}

View File

@ -1,62 +0,0 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /testbench_fma16/clk
add wave -noupdate /testbench_fma16/reset
add wave -noupdate /testbench_fma16/x
add wave -noupdate /testbench_fma16/y
add wave -noupdate /testbench_fma16/z
add wave -noupdate /testbench_fma16/result
add wave -noupdate /testbench_fma16/rexpected
add wave -noupdate /testbench_fma16/dut/x
add wave -noupdate /testbench_fma16/dut/y
add wave -noupdate /testbench_fma16/dut/z
add wave -noupdate /testbench_fma16/dut/mul
add wave -noupdate /testbench_fma16/dut/add
add wave -noupdate /testbench_fma16/dut/negr
add wave -noupdate /testbench_fma16/dut/negz
add wave -noupdate /testbench_fma16/dut/roundmode
add wave -noupdate /testbench_fma16/dut/result
add wave -noupdate /testbench_fma16/dut/XManE
add wave -noupdate /testbench_fma16/dut/YManE
add wave -noupdate /testbench_fma16/dut/ZManE
add wave -noupdate /testbench_fma16/dut/XExpE
add wave -noupdate /testbench_fma16/dut/YExpE
add wave -noupdate /testbench_fma16/dut/ZExpE
add wave -noupdate /testbench_fma16/dut/PExpE
add wave -noupdate /testbench_fma16/dut/Ne
add wave -noupdate /testbench_fma16/dut/upOneExt
add wave -noupdate /testbench_fma16/dut/XSgnE
add wave -noupdate /testbench_fma16/dut/YSgnE
add wave -noupdate /testbench_fma16/dut/ZSgnE
add wave -noupdate /testbench_fma16/dut/PSgnE
add wave -noupdate /testbench_fma16/dut/ProdManE
add wave -noupdate /testbench_fma16/dut/NfracS
add wave -noupdate /testbench_fma16/dut/ProdManAl
add wave -noupdate /testbench_fma16/dut/ZManExt
add wave -noupdate /testbench_fma16/dut/ZManAl
add wave -noupdate /testbench_fma16/dut/Nfrac
add wave -noupdate /testbench_fma16/dut/res
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
add wave -noupdate /testbench_fma16/dut/NSamt
add wave -noupdate /testbench_fma16/dut/ZExpGreater
add wave -noupdate /testbench_fma16/dut/ACLess
add wave -noupdate /testbench_fma16/dut/upOne
add wave -noupdate /testbench_fma16/dut/KillProd
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
quietly wave cursor active 2
configure wave -namecolwidth 237
configure wave -valuecolwidth 64
configure wave -justifyvalue left
configure wave -signalnamewidth 0
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {4083 ns} {4235 ns}

View File

@ -5,7 +5,8 @@ WALLYDIR:= $(ROOT)/tests/wally-riscv-arch-test
# IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests # IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests
# ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX) $(IMPERASDIR)/$(SUFFIX) # ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX) $(IMPERASDIR)/$(SUFFIX)
IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests
ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX) #ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX)
ALLDIRS := $(ARCHDIR)/$(SUFFIX)
ELFFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf") ELFFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf")
OBJDUMPFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf.objdump") OBJDUMPFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf.objdump")

View File

@ -1,45 +0,0 @@
# wally-pipelined.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m"
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
# *** modelsim won't take `PA_BITS, but will take other defines for the lengths of DTIM_RANGE and IROM_LEN. For now just live with the warnings.
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench_imperas.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063
vopt +acc work.testbench -G DEBUG=1 -o workopt
vsim workopt +nowarn3829 -fatal 7
view wave
#-- display input and output signals as hexidecimal values
add log -recursive /*
do wave.do
run -all
noview ../testbench/testbench_imperas.sv
view wave

View File

@ -1,14 +1,13 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// ahbcacheinterface.sv // ahbcacheinterface.sv
// //
// Written: Ross Thompson ross1728@gmail.com August 29, 2022 // Written: Ross Thompson ross1728@gmail.com
// Modified: // Created: August 29, 2022
// Modified: 18 January 2023
// //
// Purpose: Cache/Bus data path. // Purpose: Translates cache bus requests and uncached ieu memory requests into AHB transactions.
// Bus Side logic //
// register the fetch data from the next level of memory. // Documentation: RISC-V System on Chip Design Chapter 9 (Figure 9.8)
// This register should be necessary for timing. There is no register in the uncore or
// ahblite controller between the memories and this cache.
// //
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
@ -30,48 +29,55 @@
`include "wally-config.vh" `include "wally-config.vh"
module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) ( module ahbcacheinterface #(
parameter integer BEATSPERLINE, // Number of AHBW words (beats) in cacheline
parameter integer AHBWLOGBWPL, // Log2 of ^
parameter integer LINELEN, // Number of bits in cacheline
parameter integer LLENPOVERAHBW // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
)(
input logic HCLK, HRESETn, input logic HCLK, HRESETn,
// bus interface // bus interface controls
input logic HREADY, input logic HREADY, // AHB peripheral ready
input logic [`AHBW-1:0] HRDATA, output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
output logic [2:0] HSIZE, output logic HWRITE, // AHB 0: Read operation 1: Write operation
output logic [2:0] HBURST, output logic [2:0] HSIZE, // AHB transaction width
output logic [1:0] HTRANS, output logic [2:0] HBURST, // AHB burst length
output logic HWRITE, // bus interface buses
output logic [`PA_BITS-1:0] HADDR, input logic [`AHBW-1:0] HRDATA, // AHB read data
output logic [`AHBW-1:0] HWDATA, output logic [`PA_BITS-1:0] HADDR, // AHB address
output logic [`AHBW/8-1:0] HWSTRB, output logic [`AHBW-1:0] HWDATA, // AHB write data
output logic [LOGWPL-1:0] BeatCount, output logic [`AHBW/8-1:0] HWSTRB, // AHB byte mask
// cache interface // cache interface
input logic [`PA_BITS-1:0] CacheBusAdr, input logic [`PA_BITS-1:0] CacheBusAdr, // Address of cache line
input logic [`LLEN-1:0] CacheReadDataWordM, input logic [`LLEN-1:0] CacheReadDataWordM, // one word of cache line during a writeback
input logic [`LLEN-1:0] WriteDataM, input logic CacheableOrFlushCacheM, // Memory operation is cacheable or flushing D$
input logic CacheableOrFlushCacheM, input logic Cacheable, // Memory operation is cachable
input logic [1:0] CacheBusRW, input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch
output logic CacheBusAck, output logic CacheBusAck, // Handshack to $ indicating bus transaction completed
output logic [LINELEN-1:0] FetchBuffer, output logic [LINELEN-1:0] FetchBuffer, // Register to hold beats of cache line as the arrive from bus
input logic Cacheable, output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase
output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr
// lsu/ifu interface
input logic Flush,
input logic [`PA_BITS-1:0] PAdr,
input logic [1:0] BusRW,
input logic Stall,
input logic [2:0] Funct3,
output logic SelBusBeat,
output logic BusStall,
output logic BusCommitted
);
localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // *** fix me duplciated in lsu.
localparam integer BeatCountThreshold = CACHE_ENABLED ? BEATSPERLINE - 1 : 0; // uncached interface
logic [`PA_BITS-1:0] LocalHADDR; input logic [`PA_BITS-1:0] PAdr, // Physical address of uncached memory operation
logic [LOGWPL-1:0] BeatCountDelayed; input logic [`LLEN-1:0] WriteDataM, // IEU write data for uncached store
logic CaptureEn; input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write
logic [`AHBW-1:0] PreHWDATA; input logic [2:0] Funct3, // Size of uncached memory operation
// lsu/ifu interface
input logic Stall, // Core pipeline is stalled
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
output logic BusStall, // Bus is busy with an in flight memory operation
output logic BusCommitted); // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
localparam integer BeatCountThreshold = BEATSPERLINE - 1; // Largest beat index
logic [`PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation
logic [AHBWLOGBWPL-1:0] BeatCountDelayed; // Beat within the cache line in the second (Data) cache stage
logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA
logic [`AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s
logic [`AHBW-1:0] PreHWDATA; // AHB Address phase write data
genvar index; genvar index;
@ -84,7 +90,7 @@ module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, CACHE_ENABLE
end end
mux2 #(`PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR); mux2 #(`PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR);
assign HADDR = ({{`PA_BITS-LOGWPL{1'b0}}, BeatCount} << $clog2(`AHBW/8)) + LocalHADDR; assign HADDR = ({{`PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(`AHBW/8)) + LocalHADDR;
mux2 #(3) sizemux(.d0(Funct3), .d1(`AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE)); mux2 #(3) sizemux(.d0(Funct3), .d1(`AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE));
@ -105,12 +111,11 @@ module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, CACHE_ENABLE
// *** bummer need a second byte mask for bus as it is AHBW rather than LLEN. // *** bummer need a second byte mask for bus as it is AHBW rather than LLEN.
// probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0. // probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0.
logic [`AHBW/8-1:0] BusByteMaskM;
swbytemask #(`AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(`AHBW/8)-1:0]), .ByteMask(BusByteMaskM)); swbytemask #(`AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(`AHBW/8)-1:0]), .ByteMask(BusByteMaskM));
flopen #(`AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[`AHBW/8-1:0], HWSTRB); flopen #(`AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[`AHBW/8-1:0], HWSTRB);
buscachefsm #(BeatCountThreshold, LOGWPL) AHBBuscachefsm( buscachefsm #(BeatCountThreshold, AHBWLOGBWPL) AHBBuscachefsm(
.HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat, .HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat,
.CacheBusRW, .CacheBusAck, .BeatCount, .BeatCountDelayed, .CacheBusRW, .CacheBusAck, .BeatCount, .BeatCountDelayed,
.HREADY, .HTRANS, .HWRITE, .HBURST); .HREADY, .HTRANS, .HWRITE, .HBURST);

View File

@ -1,15 +1,14 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// ahbinterface.sv // ahbinterface.sv
// //
// Written: Ross Thompson ross1728@gmail.com August 29, 2022 // Written: Ross Thompson ross1728@gmail.com
// Modified: // Created: August 29, 2022
// Modified: 18 January 2023
// //
// Purpose: Cache/Bus data path. // Purpose: Translates LSU simple memory requests into AHB transactions (NON_SEQ).
// Bus Side logic
// register the fetch data from the next level of memory.
// This register should be necessary for timing. There is no register in the uncore or
// ahblite controller between the memories and this cache.
// //
// Documentation: RISC-V System on Chip Design Chapter 6 (Figure 6.21)
//
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -30,25 +29,27 @@
`include "wally-config.vh" `include "wally-config.vh"
module ahbinterface #(parameter LSU = 0) ( // **** modify to use LSU/ifu parameter to control widths of buses module ahbinterface #(
input logic HCLK, HRESETn, parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits
)(
input logic HCLK, HRESETn,
// bus interface // bus interface
input logic HREADY, input logic HREADY, // AHB peripheral ready
input logic [`XLEN-1:0] HRDATA, output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
output logic [1:0] HTRANS, output logic HWRITE, // AHB 0: Read operation 1: Write operation
output logic HWRITE, input logic [`XLEN-1:0] HRDATA, // AHB read data
output logic [`XLEN-1:0] HWDATA, output logic [`XLEN-1:0] HWDATA, // AHB write data
output logic [`XLEN/8-1:0] HWSTRB, output logic [`XLEN/8-1:0] HWSTRB, // AHB byte mask
// lsu/ifu interface // lsu/ifu interface
input logic Flush, input logic Stall, // Core pipeline is stalled
input logic [1:0] BusRW, input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
input logic [`XLEN/8-1:0] ByteMask, input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write
input logic [`XLEN-1:0] WriteData, input logic [`XLEN/8-1:0] ByteMask, // Bytes enables within a word
input logic Stall, input logic [`XLEN-1:0] WriteData, // IEU write data for a store
output logic BusStall, output logic BusStall, // Bus is busy with an in flight memory operation
output logic BusCommitted, output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
output logic [(LSU ? `XLEN : 32)-1:0] FetchBuffer output logic [(LSU ? `XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus
); );
logic CaptureEn; logic CaptureEn;

View File

@ -1,11 +1,14 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// busfsm.sv // busfsm.sv
// //
// Written: Ross Thompson ross1728@gmail.com December 29, 2021 // Written: Ross Thompson ross1728@gmail.com
// Modified: // Created: December 29, 2021
// Modified: 18 January 2023
// //
// Purpose: Load/Store Unit's interface to BUS for cacheless system // Purpose: Controller for cache to AHB bus interface
// //
// Documentation: RISC-V System on Chip Design Chapter 9 (Figure 9.9)
//
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -25,34 +28,40 @@
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
`define BURST_EN 1 `define BURST_EN 1 // Enables burst mode. Disable to show the lost performance.
// HCLK and clk must be the same clock! // HCLK and clk must be the same clock!
module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) ( module buscachefsm #(
parameter integer BeatCountThreshold, // Largest beat index
parameter integer AHBWLOGBWPL // Log2 of BEATSPERLINE
)(
input logic HCLK, input logic HCLK,
input logic HRESETn, input logic HRESETn,
// IEU interface // IEU interface
input logic Flush, input logic Stall, // Core pipeline is stalled
input logic [1:0] BusRW, input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
input logic Stall, input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write
output logic BusCommitted, output logic BusStall, // Bus is busy with an in flight memory operation
output logic BusStall, output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
output logic CaptureEn,
// ahb cache interface locals.
// cache interface output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA
input logic [1:0] CacheBusRW,
output logic CacheBusAck, // cache interface
input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch
output logic CacheBusAck, // Handshack to $ indicating bus transaction completed
// lsu interface // lsu interface
output logic [LOGWPL-1:0] BeatCount, BeatCountDelayed, output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase
output logic SelBusBeat, output logic [AHBWLOGBWPL-1:0] BeatCountDelayed, // Beat within the cache line in the second (Data) cache stage
output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr
// BUS interface // BUS interface
input logic HREADY, input logic HREADY, // AHB peripheral ready
output logic [1:0] HTRANS, output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
output logic HWRITE, output logic HWRITE, // AHB 0: Read operation 1: Write operation
output logic [2:0] HBURST output logic [2:0] HBURST // AHB burst length
); );
typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype; typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype;
@ -60,7 +69,7 @@ module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) (
(* mark_debug = "true" *) busstatetype CurrState, NextState; (* mark_debug = "true" *) busstatetype CurrState, NextState;
logic [LOGWPL-1:0] NextBeatCount; logic [AHBWLOGBWPL-1:0] NextBeatCount;
logic FinalBeatCount; logic FinalBeatCount;
logic [2:0] LocalBurstType; logic [2:0] LocalBurstType;
logic BeatCntEn; logic BeatCntEn;
@ -73,14 +82,14 @@ module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) (
always_comb begin always_comb begin
case(CurrState) case(CurrState)
ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE;
else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK; else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK;
else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH;
else NextState = ADR_PHASE; else NextState = ADR_PHASE;
DATA_PHASE: if(HREADY) NextState = MEM3; DATA_PHASE: if(HREADY) NextState = MEM3;
else NextState = DATA_PHASE; else NextState = DATA_PHASE;
MEM3: if(Stall) NextState = MEM3; MEM3: if(Stall) NextState = MEM3;
else NextState = ADR_PHASE; else NextState = ADR_PHASE;
CACHE_FETCH: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; CACHE_FETCH: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK;
else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH;
else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE;
@ -89,17 +98,17 @@ module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) (
else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH;
else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE;
else NextState = CACHE_WRITEBACK; else NextState = CACHE_WRITEBACK;
default: NextState = ADR_PHASE; default: NextState = ADR_PHASE;
endcase endcase
end end
// IEU, LSU, and IFU controls // IEU, LSU, and IFU controls
// Used to store data from data phase of AHB. // Used to store data from data phase of AHB.
flopenr #(LOGWPL) BeatCountReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, NextBeatCount, BeatCount); flopenr #(AHBWLOGBWPL) BeatCountReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, NextBeatCount, BeatCount);
flopenr #(LOGWPL) BeatCountDelayedReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, BeatCount, BeatCountDelayed); flopenr #(AHBWLOGBWPL) BeatCountDelayedReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, BeatCount, BeatCountDelayed);
assign NextBeatCount = BeatCount + 1'b1; assign NextBeatCount = BeatCount + 1'b1;
assign FinalBeatCount = BeatCountDelayed == BeatCountThreshold[LOGWPL-1:0]; assign FinalBeatCount = BeatCountDelayed == BeatCountThreshold[AHBWLOGBWPL-1:0];
assign BeatCntEn = ((NextState == CACHE_WRITEBACK | NextState == CACHE_FETCH) & HREADY & ~Flush) | assign BeatCntEn = ((NextState == CACHE_WRITEBACK | NextState == CACHE_FETCH) & HREADY & ~Flush) |
(NextState == ADR_PHASE & |CacheBusRW & HREADY); (NextState == ADR_PHASE & |CacheBusRW & HREADY);
assign BeatCntReset = NextState == ADR_PHASE; assign BeatCntReset = NextState == ADR_PHASE;

View File

@ -1,10 +1,13 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// busfsm.sv // busfsm.sv
// //
// Written: Ross Thompson ross1728@gmail.com December 29, 2021 // Written: Ross Thompson ross1728@gmail.com
// Modified: // Created: December 29, 2021
// Modified: 18 January 2023
// //
// Purpose: Load/Store Unit's interface to BUS for cacheless system // Purpose: Simple NON_SEQ (no burst) AHB controller.
//
// Documentation: RISC-V System on Chip Design Chapter 6 (Figure 6.23)
// //
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
@ -32,15 +35,16 @@ module busfsm (
input logic HRESETn, input logic HRESETn,
// IEU interface // IEU interface
input logic Flush, input logic Stall, // Core pipeline is stalled
input logic [1:0] BusRW, input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
input logic Stall, input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write
output logic BusCommitted, output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA
output logic BusStall, output logic BusStall, // Bus is busy with an in flight memory operation
output logic CaptureEn, output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
input logic HREADY, // AHB control signals
output logic [1:0] HTRANS, input logic HREADY, // AHB peripheral ready
output logic HWRITE output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ
output logic HWRITE // AHB 0: Read operation 1: Write operation
); );
typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3} busstatetype; typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3} busstatetype;
@ -54,13 +58,13 @@ module busfsm (
always_comb begin always_comb begin
case(CurrState) case(CurrState)
ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE; ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE;
else NextState = ADR_PHASE; else NextState = ADR_PHASE;
DATA_PHASE: if(HREADY) NextState = MEM3; DATA_PHASE: if(HREADY) NextState = MEM3;
else NextState = DATA_PHASE; else NextState = DATA_PHASE;
MEM3: if(Stall) NextState = MEM3; MEM3: if(Stall) NextState = MEM3;
else NextState = ADR_PHASE; else NextState = ADR_PHASE;
default: NextState = ADR_PHASE; default: NextState = ADR_PHASE;
endcase endcase
end end

View File

@ -1,17 +1,18 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// controller input stage // controller input stage
// //
// Written: Ross Thompson August 31, 2022 // Written: Ross Thompson ross1728@gmail.com
// ross1728@gmail.com // Created: August 31, 2022
// Modified: // Modified: 18 January 2023
// //
// Purpose: AHB multi controller interface to merge LSU and IFU controls. // Purpose: AHB multi controller interface to merge LSU and IFU controls.
// See ARM_HIH0033A_AMBA_AHB-Lite_SPEC 1.0 // See ARM_HIH0033A_AMBA_AHB-Lite_SPEC 1.0
// Arbitrates requests from instruction and data streams // Arbitrates requests from instruction and data streams
// Connects core to peripherals and I/O pins on SOC // Connects core to peripherals and I/O pins on SOC
// Bus width presently matches XLEN // Bus width presently matches XLEN
// Anticipate replacing this with an AXI bus interface to communicate with FPGA DRAM/Flash controllers
// //
// Documentation: RISC-V System on Chip Design Chapter 6 (Figure 6.25)
//
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -32,25 +33,29 @@
`include "wally-config.vh" `include "wally-config.vh"
module controllerinputstage #(parameter SAVE_ENABLED = 1) ( module controllerinputstage #(
input logic HCLK, parameter SAVE_ENABLED = 1 // 1: Save manager inputs if Save = 1, 0: Don't save inputs
input logic HRESETn, )(
input logic Save, Restore, Disable, input logic HCLK,
output logic Request, input logic HRESETn,
input logic Save, // Two or more managers requesting (HTRANS != 00) at the same time. Save the non-granted manager inputs
input logic Restore, // Restore a saved manager inputs when it is finally granted
input logic Disable, // Supress HREADY to the non-granted manager
output logic Request, // This manager is making a request
// controller input // controller input
input logic HWRITEIn, input logic [1:0] HTRANSIn, // Manager input. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
input logic [2:0] HSIZEIn, input logic HWRITEIn, // Manager input. AHB 0: Read operation 1: Write operation
input logic [2:0] HBURSTIn, input logic [2:0] HSIZEIn, // Manager input. AHB transaction width
input logic [1:0] HTRANSIn, input logic [2:0] HBURSTIn, // Manager input. AHB burst length
input logic [`PA_BITS-1:0] HADDRIn, input logic [`PA_BITS-1:0] HADDRIn, // Manager input. AHB address
output logic HREADYOut, output logic HREADYOut, // Indicate to manager the peripherial is not busy and another manager does not have priority
// controller output // controller output
output logic HWRITEOut, output logic [1:0] HTRANSOut, // Aribrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
output logic [2:0] HSIZEOut, output logic HWRITEOut, // Aribrated manager transaction. AHB 0: Read operation 1: Write operation
output logic [2:0] HBURSTOut, output logic [2:0] HSIZEOut, // Aribrated manager transaction. AHB transaction width
output logic [1:0] HTRANSOut, output logic [2:0] HBURSTOut, // Aribrated manager transaction. AHB burst length
output logic [`PA_BITS-1:0] HADDROut, output logic [`PA_BITS-1:0] HADDROut, // Aribrated manager transaction. AHB address
input logic HREADYIn input logic HREADYIn // Peripherial ready
); );
logic HWRITESave; logic HWRITESave;

View File

@ -1,17 +1,18 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// abhmulticontroller // abhmulticontroller
// //
// Written: Ross Thompson August 29, 2022 // Written: Ross Thompson ross1728@gmail.com
// ross1728@gmail.com // Created: August 29, 2022
// Modified: // Modified: 18 January 2023
// //
// Purpose: AHB multi controller interface to merge LSU and IFU controls. // Purpose: AHB multi controller interface to merge LSU and IFU controls.
// See ARM_HIH0033A_AMBA_AHB-Lite_SPEC 1.0 // See ARM_HIH0033A_AMBA_AHB-Lite_SPEC 1.0
// Arbitrates requests from instruction and data streams // Arbitrates requests from instruction and data streams
// Connects core to peripherals and I/O pins on SOC // Connects core to peripherals and I/O pins on SOC
// Bus width presently matches XLEN // Bus width presently matches XLEN
// Anticipate replacing this with an AXI bus interface to communicate with FPGA DRAM/Flash controllers
// //
// Documentation: RISC-V System on Chip Design Chapter 6 (Figures 6.25 and 6.26)
//
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -35,42 +36,46 @@
module ebu ( module ebu (
input logic clk, reset, input logic clk, reset,
// Signals from IFU // Signals from IFU
input logic [`PA_BITS-1:0] IFUHADDR, input logic [1:0] IFUHTRANS, // IFU AHB transaction request
input logic [2:0] IFUHSIZE, input logic [2:0] IFUHSIZE, // IFU AHB transaction size
input logic [2:0] IFUHBURST, input logic [2:0] IFUHBURST, // IFU AHB burst length
input logic [1:0] IFUHTRANS, input logic [`PA_BITS-1:0] IFUHADDR, // IFU AHB address
output logic IFUHREADY, output logic IFUHREADY, // AHB peripheral ready gated by possible non-grant
// Signals from LSU // Signals from LSU
input logic [`PA_BITS-1:0] LSUHADDR, input logic [1:0] LSUHTRANS, // LSU AHB transaction request
input logic LSUHWRITE, // LSU AHB transaction direction. 1: write, 0: read
input logic [2:0] LSUHSIZE, // LSU AHB size
input logic [2:0] LSUHBURST, // LSU AHB burst length
input logic [`PA_BITS-1:0] LSUHADDR, // LSU AHB address
input logic [`XLEN-1:0] LSUHWDATA, // initially support AHBW = XLEN input logic [`XLEN-1:0] LSUHWDATA, // initially support AHBW = XLEN
input logic [`XLEN/8-1:0] LSUHWSTRB, input logic [`XLEN/8-1:0] LSUHWSTRB, // AHB byte mask
input logic [2:0] LSUHSIZE, output logic LSUHREADY, // AHB peripheral. Never gated as LSU always has priority
input logic [2:0] LSUHBURST,
input logic [1:0] LSUHTRANS,
input logic LSUHWRITE,
output logic LSUHREADY,
// add LSUHWSTRB ***
// AHB-Lite external signals // AHB-Lite external signals
(* mark_debug = "true" *) input logic HREADY, HRESP, (* mark_debug = "true" *) output logic HCLK, HRESETn,
(* mark_debug = "true" *) output logic HCLK, HRESETn, (* mark_debug = "true" *) input logic HREADY, // AHB peripheral ready
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] HADDR, (* mark_debug = "true" *) input logic HRESP, // AHB peripheral response. 0: OK 1: Error
(* mark_debug = "true" *) output logic [`AHBW-1:0] HWDATA, (* mark_debug = "true" *) output logic [`PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration
(* mark_debug = "true" *) output logic [`XLEN/8-1:0] HWSTRB, (* mark_debug = "true" *) output logic [`AHBW-1:0] HWDATA, // AHB Write data after arbitration
(* mark_debug = "true" *) output logic HWRITE, (* mark_debug = "true" *) output logic [`XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration
(* mark_debug = "true" *) output logic [2:0] HSIZE, (* mark_debug = "true" *) output logic HWRITE, // AHB transaction direction after arbitration
(* mark_debug = "true" *) output logic [2:0] HBURST, (* mark_debug = "true" *) output logic [2:0] HSIZE, // AHB transaction size after arbitration
(* mark_debug = "true" *) output logic [3:0] HPROT, (* mark_debug = "true" *) output logic [2:0] HBURST, // AHB burst length after arbitration
(* mark_debug = "true" *) output logic [1:0] HTRANS, (* mark_debug = "true" *) output logic [3:0] HPROT, // AHB protection. Wally does not use
(* mark_debug = "true" *) output logic HMASTLOCK (* mark_debug = "true" *) output logic [1:0] HTRANS, // AHB transaction request after arbitration
(* mark_debug = "true" *) output logic HMASTLOCK // AHB master lock. Wally does not use
); );
typedef enum logic [1:0] {IDLE, ARBITRATE} statetype; typedef enum logic [1:0] {IDLE, ARBITRATE} statetype;
statetype CurrState, NextState; statetype CurrState, NextState;
logic LSUDisable, LSUSelect; logic LSUDisable;
logic IFUSave, IFURestore, IFUDisable, IFUSelect; logic LSUSelect;
logic both; logic IFUSave;
logic IFURestore;
logic IFUDisable;
logic IFUSelect;
logic both; // Both the LSU and IFU request at the same time
logic [`PA_BITS-1:0] IFUHADDROut; logic [`PA_BITS-1:0] IFUHADDROut;
logic [1:0] IFUHTRANSOut; logic [1:0] IFUHTRANSOut;
@ -84,14 +89,15 @@ module ebu (
logic [2:0] LSUHSIZEOut; logic [2:0] LSUHSIZEOut;
logic LSUHWRITEOut; logic LSUHWRITEOut;
logic IFUReq, LSUReq; logic IFUReq;
logic LSUReq;
logic BeatCntEn; logic BeatCntEn;
logic [4-1:0] NextBeatCount, BeatCount; logic [4-1:0] NextBeatCount, BeatCount; // Position within a burst transfer
logic FinalBeat, FinalBeatD; logic FinalBeat, FinalBeatD; // Indicates the last beat of a burst
logic CntReset; logic CntReset;
logic [3:0] Threshold; logic [3:0] Threshold; // Number of beats derived from HBURST
logic IFUReqD; logic IFUReqD; // 1 cycle delayed IFU request. Part of arbitration
assign HCLK = clk; assign HCLK = clk;
@ -100,14 +106,16 @@ module ebu (
// if two requests come in at once pick one to select and save the others Address phase // if two requests come in at once pick one to select and save the others Address phase
// inputs. Abritration scheme is LSU always goes first. // inputs. Abritration scheme is LSU always goes first.
// input stage IFU ////////////////////////////////////////////////////////////////////////////////////////////////////
// input stages and muxing for IFU and LSU
////////////////////////////////////////////////////////////////////////////////////////////////////
controllerinputstage IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable), controllerinputstage IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable),
.Request(IFUReq), .Request(IFUReq),
.HWRITEIn(1'b0), .HSIZEIn(IFUHSIZE), .HBURSTIn(IFUHBURST), .HTRANSIn(IFUHTRANS), .HADDRIn(IFUHADDR), .HWRITEIn(1'b0), .HSIZEIn(IFUHSIZE), .HBURSTIn(IFUHBURST), .HTRANSIn(IFUHTRANS), .HADDRIn(IFUHADDR),
.HWRITEOut(IFUHWRITEOut), .HSIZEOut(IFUHSIZEOut), .HBURSTOut(IFUHBURSTOut), .HREADYOut(IFUHREADY), .HWRITEOut(IFUHWRITEOut), .HSIZEOut(IFUHSIZEOut), .HBURSTOut(IFUHBURSTOut), .HREADYOut(IFUHREADY),
.HTRANSOut(IFUHTRANSOut), .HADDROut(IFUHADDROut), .HREADYIn(HREADY)); .HTRANSOut(IFUHTRANSOut), .HADDROut(IFUHADDROut), .HREADYIn(HREADY));
// input stage LSU
// LSU always has priority so there should never be a need to save and restore the address phase inputs. // LSU always has priority so there should never be a need to save and restore the address phase inputs.
controllerinputstage #(0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable), controllerinputstage #(0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable),
.Request(LSUReq), .Request(LSUReq),
@ -115,7 +123,7 @@ module ebu (
.HWRITEOut(LSUHWRITEOut), .HSIZEOut(LSUHSIZEOut), .HBURSTOut(LSUHBURSTOut), .HWRITEOut(LSUHWRITEOut), .HSIZEOut(LSUHSIZEOut), .HBURSTOut(LSUHBURSTOut),
.HTRANSOut(LSUHTRANSOut), .HADDROut(LSUHADDROut), .HREADYIn(HREADY)); .HTRANSOut(LSUHTRANSOut), .HADDROut(LSUHADDROut), .HREADYIn(HREADY));
// output mux //*** rewrite for general number of controllers. // output mux //*** switch to structural implementation
assign HADDR = LSUSelect ? LSUHADDROut : IFUSelect ? IFUHADDROut : '0; assign HADDR = LSUSelect ? LSUHADDROut : IFUSelect ? IFUHADDROut : '0;
assign HSIZE = LSUSelect ? LSUHSIZEOut : IFUSelect ? IFUHSIZEOut: '0; assign HSIZE = LSUSelect ? LSUHSIZEOut : IFUSelect ? IFUHSIZEOut: '0;
assign HBURST = LSUSelect ? LSUHBURSTOut : IFUSelect ? IFUHBURSTOut : '0; // If doing memory accesses, use LSUburst, else use Instruction burst. assign HBURST = LSUSelect ? LSUHBURSTOut : IFUSelect ? IFUHBURSTOut : '0; // If doing memory accesses, use LSUburst, else use Instruction burst.
@ -129,8 +137,13 @@ module ebu (
assign HWSTRB = LSUHWSTRB; assign HWSTRB = LSUHWSTRB;
// HRDATA is sent to all controllers at the core level. // HRDATA is sent to all controllers at the core level.
////////////////////////////////////////////////////////////////////////////////////////////////////
// Aribtration scheme
// FSM decides if arbitration needed. Arbitration is held until the last beat of // FSM decides if arbitration needed. Arbitration is held until the last beat of
// a burst is completed. // a burst is completed.
////////////////////////////////////////////////////////////////////////////////////////////////////
assign both = LSUReq & IFUReq; assign both = LSUReq & IFUReq;
flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextState, IDLE, CurrState); flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextState, IDLE, CurrState);
always_comb always_comb
@ -142,8 +155,27 @@ module ebu (
default: NextState = IDLE; default: NextState = IDLE;
endcase endcase
// This part is only used when burst mode is supported. // basic arb always selects LSU when both
// Controller needs to count beats. // replace this block for more sophisticated arbitration as needed.
// Controller 0 (IFU)
assign IFUSave = CurrState == IDLE & both;
assign IFURestore = CurrState == ARBITRATE;
assign IFUDisable = CurrState == ARBITRATE;
assign IFUSelect = (NextState == ARBITRATE) ? 1'b0 : IFUReq;
// Controller 1 (LSU)
// When both the IFU and LSU request at the same time, the FSM will go into the arbitrate state.
// Once the LSU request is done the fsm returns to IDLE. To prevent the LSU from regaining
// priority and re issuing the same memroy operation, the delayed IFUReqD squashes the LSU request.
// This is necessary because the pipeline is stalled for the entire duration of both transactions,
// and the LSU memory request will stil be active.
flopr #(1) ifureqreg(clk, ~HRESETn, IFUReq, IFUReqD);
assign LSUDisable = CurrState == ARBITRATE ? 1'b0 : (IFUReqD & ~(HREADY & FinalBeatD));
assign LSUSelect = NextState == ARBITRATE ? 1'b1: LSUReq;
////////////////////////////////////////////////////////////////////////////////////////////////////
// Burst mode logic
////////////////////////////////////////////////////////////////////////////////////////////////////
flopenr #(4) BeatCountReg(HCLK, ~HRESETn | CntReset | FinalBeat, BeatCntEn, NextBeatCount, BeatCount); flopenr #(4) BeatCountReg(HCLK, ~HRESETn | CntReset | FinalBeat, BeatCntEn, NextBeatCount, BeatCount);
assign NextBeatCount = BeatCount + 1'b1; assign NextBeatCount = BeatCount + 1'b1;
@ -165,17 +197,6 @@ module ebu (
endcase endcase
end end
// basic arb always selects LSU when both
// replace this block for more sophisticated arbitration as needed.
// Controller 0 (IFU)
assign IFUSave = CurrState == IDLE & both;
assign IFURestore = CurrState == ARBITRATE;
assign IFUDisable = CurrState == ARBITRATE;
assign IFUSelect = (NextState == ARBITRATE) ? 1'b0 : IFUReq;
// Controller 1 (LSU)
assign LSUDisable = CurrState == ARBITRATE ? 1'b0 : (IFUReqD & ~(HREADY & FinalBeatD));
assign LSUSelect = NextState == ARBITRATE ? 1'b1: LSUReq;
flopr #(1) ifureqreg(clk, ~HRESETn, IFUReq, IFUReqD);
endmodule endmodule

View File

@ -38,9 +38,10 @@ module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) (
input logic [WIDTH-1:0] din, input logic [WIDTH-1:0] din,
input logic we, input logic we,
input logic [(WIDTH-1)/8:0] bwe, input logic [(WIDTH-1)/8:0] bwe,
output logic [WIDTH-1:0] dout); output logic [WIDTH-1:0] dout
);
logic [WIDTH-1:0] RAM[DEPTH-1:0]; logic [WIDTH-1:0] RAM[DEPTH-1:0];
// *************************************************************************** // ***************************************************************************
// TRUE SRAM macro // TRUE SRAM macro
@ -64,18 +65,18 @@ module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) (
integer i; integer i;
// Read // Read
always @(posedge clk) always_ff @(posedge clk)
if(ce) dout <= #1 RAM[addr]; if(ce) dout <= #1 RAM[addr];
// Write divided into part for bytes and part for extra msbs // Write divided into part for bytes and part for extra msbs
if(WIDTH >= 8) if(WIDTH >= 8)
always @(posedge clk) always_ff @(posedge clk)
if (ce & we) if (ce & we)
for(i = 0; i < WIDTH/8; i++) for(i = 0; i < WIDTH/8; i++)
if(bwe[i]) RAM[addr][i*8 +: 8] <= #1 din[i*8 +: 8]; if(bwe[i]) RAM[addr][i*8 +: 8] <= #1 din[i*8 +: 8];
if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8 if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8
always @(posedge clk) always_ff @(posedge clk)
if (ce & we & bwe[WIDTH/8]) if (ce & we & bwe[WIDTH/8])
RAM[addr][WIDTH-1:WIDTH-WIDTH%8] <= #1 din[WIDTH-1:WIDTH-WIDTH%8]; RAM[addr][WIDTH-1:WIDTH-WIDTH%8] <= #1 din[WIDTH-1:WIDTH-WIDTH%8];
end end

View File

@ -36,24 +36,20 @@
`include "wally-config.vh" `include "wally-config.vh"
module ram2p1r1wb module ram2p1r1wb #(parameter DEPTH = 10, WIDTH = 2) (
#(parameter int DEPTH = 10, input logic clk,
parameter int WIDTH = 2 input logic reset,
)
(input logic clk, // port 1 is read only
input logic reset, input logic [DEPTH-1:0] ra1,
output logic [WIDTH-1:0] rd1,
// port 1 is read only input logic ren1,
input logic [DEPTH-1:0] ra1,
output logic [WIDTH-1:0] rd1, // port 2 is write only
input logic ren1, input logic [DEPTH-1:0] wa2,
input logic [WIDTH-1:0] wd2,
// port 2 is write only input logic wen2,
input logic [DEPTH-1:0] wa2, input logic [WIDTH-1:0] bwe2
input logic [WIDTH-1:0] wd2,
input logic wen2,
input logic [WIDTH-1:0] bwe2
); );

View File

@ -32,16 +32,17 @@
`include "wally-config.vh" `include "wally-config.vh"
module ram2p1r1wbefix #(parameter DEPTH=128, WIDTH=256) ( module ram2p1r1wbefix #(parameter DEPTH=128, WIDTH=256) (
input logic clk, input logic clk,
input logic ce1, ce2, input logic ce1, ce2,
input logic [$clog2(DEPTH)-1:0] ra1, input logic [$clog2(DEPTH)-1:0] ra1,
input logic [WIDTH-1:0] wd2, input logic [WIDTH-1:0] wd2,
input logic [$clog2(DEPTH)-1:0] wa2, input logic [$clog2(DEPTH)-1:0] wa2,
input logic we2, input logic we2,
input logic [(WIDTH-1)/8:0] bwe2, input logic [(WIDTH-1)/8:0] bwe2,
output logic [WIDTH-1:0] rd1); output logic [WIDTH-1:0] rd1
);
logic [WIDTH-1:0] mem[DEPTH-1:0]; logic [WIDTH-1:0] mem[DEPTH-1:0];
// *************************************************************************** // ***************************************************************************
// TRUE Smem macro // TRUE Smem macro
@ -53,18 +54,18 @@ module ram2p1r1wbefix #(parameter DEPTH=128, WIDTH=256) (
integer i; integer i;
// Read // Read
always @(posedge clk) always_ff @(posedge clk)
if(ce1) rd1 <= #1 mem[ra1]; if(ce1) rd1 <= #1 mem[ra1];
// Write divided into part for bytes and part for extra msbs // Write divided into part for bytes and part for extra msbs
if(WIDTH >= 8) if(WIDTH >= 8)
always @(posedge clk) always_ff @(posedge clk)
if (ce2 & we2) if (ce2 & we2)
for(i = 0; i < WIDTH/8; i++) for(i = 0; i < WIDTH/8; i++)
if(bwe2[i]) mem[wa2][i*8 +: 8] <= #1 wd2[i*8 +: 8]; if(bwe2[i]) mem[wa2][i*8 +: 8] <= #1 wd2[i*8 +: 8];
if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8 if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8
always @(posedge clk) always_ff @(posedge clk)
if (ce2 & we2 & bwe2[WIDTH/8]) if (ce2 & we2 & bwe2[WIDTH/8])
mem[wa2][WIDTH-1:WIDTH-WIDTH%8] <= #1 wd2[WIDTH-1:WIDTH-WIDTH%8]; mem[wa2][WIDTH-1:WIDTH-WIDTH%8] <= #1 wd2[WIDTH-1:WIDTH-WIDTH%8];

View File

@ -34,8 +34,10 @@ module datapath (
// Decode stage signals // Decode stage signals
input logic [2:0] ImmSrcD, // Selects type of immediate extension input logic [2:0] ImmSrcD, // Selects type of immediate extension
input logic [31:0] InstrD, // Instruction in Decode stage input logic [31:0] InstrD, // Instruction in Decode stage
input logic [2:0] Funct3E, // Funct3 field of instruction in Execute stage
// Execute stage signals // Execute stage signals
input logic [`XLEN-1:0] PCE, // PC in Execute stage
input logic [`XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage)
input logic [2:0] Funct3E, // Funct3 field of instruction in Execute stage
input logic StallE, FlushE, // Stall, flush Execute stage input logic StallE, FlushE, // Stall, flush Execute stage
input logic [1:0] ForwardAE, ForwardBE, // Forward ALU operands from later stages input logic [1:0] ForwardAE, ForwardBE, // Forward ALU operands from later stages
input logic [2:0] ALUControlE, // Indicate operation ALU performs input logic [2:0] ALUControlE, // Indicate operation ALU performs
@ -43,8 +45,6 @@ module datapath (
input logic ALUResultSrcE, // Selects result to pass on to Memory stage input logic ALUResultSrcE, // Selects result to pass on to Memory stage
input logic JumpE, // Is a jump (j) instruction input logic JumpE, // Is a jump (j) instruction
input logic BranchSignedE, // Branch comparison operands are signed (if it's a branch) input logic BranchSignedE, // Branch comparison operands are signed (if it's a branch)
input logic [`XLEN-1:0] PCE, // PC in Execute stage
input logic [`XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage)
output logic [1:0] FlagsE, // Comparison flags ({eq, lt}) output logic [1:0] FlagsE, // Comparison flags ({eq, lt})
output logic [`XLEN-1:0] IEUAdrE, // Address computed by ALU output logic [`XLEN-1:0] IEUAdrE, // Address computed by ALU
output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B
@ -77,9 +77,9 @@ module datapath (
logic [`XLEN-1:0] R1E, R2E; // Source operands read from register file logic [`XLEN-1:0] R1E, R2E; // Source operands read from register file
logic [`XLEN-1:0] ImmExtE; // Extended immediate in Execute stage logic [`XLEN-1:0] ImmExtE; // Extended immediate in Execute stage
logic [`XLEN-1:0] SrcAE, SrcBE; // ALU operands logic [`XLEN-1:0] SrcAE, SrcBE; // ALU operands
logic [`XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), computed address *** According to Figure 4.12, IEUResultE should be called IEUAdrE logic [`XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), result of execution stage
// Memory stage signals // Memory stage signals
logic [`XLEN-1:0] IEUResultM; // Address computed by ALU *** According to Figure 4.12, IEUResultM should be called IEUAdrM logic [`XLEN-1:0] IEUResultM; // Result from execution stage
logic [`XLEN-1:0] IFResultM; // Result from either IEU or single-cycle FPU op writing an integer register logic [`XLEN-1:0] IFResultM; // Result from either IEU or single-cycle FPU op writing an integer register
// Writeback stage signals // Writeback stage signals
logic [`XLEN-1:0] SCResultW; // Store Conditional result logic [`XLEN-1:0] SCResultW; // Store Conditional result

View File

@ -37,6 +37,7 @@ module ieu (
// Execute stage signals // Execute stage signals
input logic [`XLEN-1:0] PCE, // PC input logic [`XLEN-1:0] PCE, // PC
input logic [`XLEN-1:0] PCLinkE, // PC + 4 input logic [`XLEN-1:0] PCLinkE, // PC + 4
output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE)
input logic FWriteIntE, FCvtIntE, // FPU writes to integer register file, FPU converts float to int input logic FWriteIntE, FCvtIntE, // FPU writes to integer register file, FPU converts float to int
output logic [`XLEN-1:0] IEUAdrE, // Memory address output logic [`XLEN-1:0] IEUAdrE, // Memory address
output logic IntDivE, W64E, // Integer divide, RV64 W-type instruction output logic IntDivE, W64E, // Integer divide, RV64 W-type instruction
@ -66,29 +67,28 @@ module ieu (
input logic FlushD, FlushE, FlushM, FlushW, // Flush signals input logic FlushD, FlushE, FlushM, FlushW, // Flush signals
output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit
output logic MDUStallD, CSRRdStallD, StoreStallD, output logic MDUStallD, CSRRdStallD, StoreStallD,
output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE)
output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction
output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions
); );
logic [2:0] ImmSrcD; // Select type of immediate extension logic [2:0] ImmSrcD; // Select type of immediate extension
logic [1:0] FlagsE; // Comparison flags ({eq, lt}) logic [1:0] FlagsE; // Comparison flags ({eq, lt})
logic [2:0] ALUControlE; // ALU Control logic [2:0] ALUControlE; // ALU control indicates function to perform
logic ALUSrcAE, ALUSrcBE; // ALU source operands logic ALUSrcAE, ALUSrcBE; // ALU source operands
logic [2:0] ResultSrcW; // Source of result in Writeback stage logic [2:0] ResultSrcW; // Selects result in Writeback stage
logic ALUResultSrcE; // ALU result logic ALUResultSrcE; // Selects ALU result to pass on to Memory stage
logic SCE; // Store Conditional instruction logic SCE; // Store Conditional instruction
logic FWriteIntM; // FPU writing to integer register file logic FWriteIntM; // FPU writing to integer register file
logic IntDivW; // Integer divide instruction logic IntDivW; // Integer divide instruction
// forwarding signals // Forwarding signals
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; // Source and destination registers logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; // Source and destination registers
logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers
logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages
logic MemReadE, CSRReadE; // Load, CSRRead instruction logic MemReadE, CSRReadE; // Load, CSRRead instruction
logic JumpE; // Jump instruction logic JumpE; // Jump instruction
logic BranchSignedE; // Branch does signed comparison on operands logic BranchSignedE; // Branch does signed comparison on operands
logic MDUE; // Multiply/divide instruction logic MDUE; // Multiply/divide instruction
controller c( controller c(
.clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD, .clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD,

View File

@ -1,13 +1,12 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// bpred.sv // bpred.sv
// //
// Written: Ross Thomposn // Written: Ross Thomposn ross1728@gmail.com
// Email: ross1728@gmail.com // Created: 12 February 2021
// Created: February 12, 2021 // Modified: 19 January 2023
// Modified:
// //
// Purpose: Branch prediction unit // Purpose: Branch direction prediction and jump/branch target prediction.
// Produces a branch prediction based on branch history. // Prediction made during the fetch stage and corrected in the execution stage.
// //
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
@ -35,30 +34,29 @@ module bpred (
input logic FlushD, FlushE, FlushM, FlushW, input logic FlushD, FlushE, FlushM, FlushW,
// Fetch stage // Fetch stage
// the prediction // the prediction
input logic [31:0] InstrD, // Decompressed decode stage instruction input logic [31:0] InstrD, // Decompressed decode stage instruction. Used to decode instruction class
input logic [`XLEN-1:0] PCNextF, // Next Fetch Address input logic [`XLEN-1:0] PCNextF, // Next Fetch Address
input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4 input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4
output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction
output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage. output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage.
// Update Predictor // Update Predictor
input logic [`XLEN-1:0] PCF, // Fetch stage instruction address. input logic [`XLEN-1:0] PCF, // Fetch stage instruction address.
input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took. input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took.
input logic [`XLEN-1:0] PCE, // Execution stage instruction address. input logic [`XLEN-1:0] PCE, // Execution stage instruction address.
input logic [`XLEN-1:0] PCM, // Memory stage instruction address. input logic [`XLEN-1:0] PCM, // Memory stage instruction address.
// *** after reviewing the compressed instruction set I am leaning towards having the btb predict the instruction class. // Branch and jump outcome
// *** the specifics of how this is encode is subject to change. input logic PCSrcE, // Executation stage branch is taken
input logic PCSrcE, // Executation stage branch is taken input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br
output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br
// Report branch prediction status // Report branch prediction status
output logic BPPredWrongE, // Prediction is wrong. output logic BPPredWrongE, // Prediction is wrong.
output logic DirPredictionWrongM, // Prediction direction is wrong. output logic DirPredictionWrongM, // Prediction direction is wrong.
output logic BTBPredPCWrongM, // Prediction target wrong. output logic BTBPredPCWrongM, // Prediction target wrong.
output logic RASPredPCWrongM, // RAS prediction is wrong. output logic RASPredPCWrongM, // RAS prediction is wrong.
output logic PredictionInstrClassWrongM // Class prediction is wrong. output logic PredictionInstrClassWrongM // Class prediction is wrong.
); );

View File

@ -1,10 +1,15 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// decompress.sv // decompress.sv
// //
// Written: David_Harris@hmc.edu 9 January 2021 // Written: David_Harris@hmc.edu
// Modified: // Created: 9 January 2021
// Modified: 18 January 2023
// //
// Purpose: Expand 16-bit compressed instructions to 32 bits // Purpose: Expand 16-bit compressed instructions to 32 bits
//
// Documentation: RISC-V System on Chip Design Chapter 11 (Section 11.3.1)
// RISC-V Specification 13 Dec 2019 Chapter 16 pg. 97
// *** probably need more documentation in this file since the book is very light on decompression.
// //
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
@ -27,9 +32,10 @@
`include "wally-config.vh" `include "wally-config.vh"
module decompress ( module decompress (
input logic [31:0] InstrRawD, input logic [31:0] InstrRawD, // 32-bit instruction or raw un decompress instruction
output logic [31:0] InstrD, output logic [31:0] InstrD, // Decompressed instruction
output logic IllegalCompInstrD); output logic IllegalCompInstrD // Invalid decompressed instruction
);
logic [15:0] instr16; logic [15:0] instr16;
logic [4:0] rds1, rs2, rs1p, rs2p, rds1p, rdp; logic [4:0] rds1, rs2, rs1p, rs2p, rds1p, rdp;

View File

@ -121,11 +121,10 @@ module ifu (
// Spill Support // Spill Support
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
if(`C_SUPPORTED) begin : SpillSupport if(`C_SUPPORTED) begin : Spill
spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .Flush(FlushD), .PCF, .PCPlus4F, .PCNextF, .InstrRawF(InstrRawF), spill #(`ICACHE) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF,
.InstrDAPageFaultF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .InstrDAPageFaultF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF);
.SelNextSpillF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill
end else begin : NoSpillSupport
assign PCNextFSpill = PCNextF; assign PCNextFSpill = PCNextF;
assign PCFSpill = PCF; assign PCFSpill = PCF;
assign PostSpillInstrRawF = InstrRawF; assign PostSpillInstrRawF = InstrRawF;
@ -189,9 +188,11 @@ module ifu (
assign IgnoreRequest = ITLBMissF | FlushD; assign IgnoreRequest = ITLBMissF | FlushD;
// The IROM uses untranslated addresses, so it is not compatible with virtual memory. // The IROM uses untranslated addresses, so it is not compatible with virtual memory.
if (`IROM_SUPPORTED) begin : irom if (`IROM_SUPPORTED) begin : irom
logic IROMce;
assign IROMce = ~GatedStallD | reset;
assign IFURWF = 2'b10; assign IFURWF = 2'b10;
irom irom(.clk, .ce(~GatedStallD | reset), .Adr(PCNextFSpill[`XLEN-1:0]), .ReadData(IROMInstrF)); irom irom(.clk, .ce(IROMce), .Adr(PCNextFSpill[`XLEN-1:0]), .IROMInstrF);
end else begin end else begin
assign IFURWF = 2'b10; assign IFURWF = 2'b10;
end end
@ -201,6 +202,7 @@ module ifu (
localparam integer LOGBWPL = `ICACHE ? $clog2(WORDSPERLINE) : 1; localparam integer LOGBWPL = `ICACHE ? $clog2(WORDSPERLINE) : 1;
if(`ICACHE) begin : icache if(`ICACHE) begin : icache
localparam integer LINELEN = `ICACHE ? `ICACHE_LINELENINBITS : `XLEN; localparam integer LINELEN = `ICACHE ? `ICACHE_LINELENINBITS : `XLEN;
localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
logic [LINELEN-1:0] FetchBuffer; logic [LINELEN-1:0] FetchBuffer;
logic [`PA_BITS-1:0] ICacheBusAdr; logic [`PA_BITS-1:0] ICacheBusAdr;
logic ICacheBusAck; logic ICacheBusAck;
@ -226,7 +228,7 @@ module ifu (
.NextAdr(PCNextFSpill[11:0]), .NextAdr(PCNextFSpill[11:0]),
.PAdr(PCPF), .PAdr(PCPF),
.CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM)); .CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM));
ahbcacheinterface #(WORDSPERLINE, LINELEN, LOGBWPL, `ICACHE) ahbcacheinterface #(WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW)
ahbcacheinterface(.HCLK(clk), .HRESETn(~reset), ahbcacheinterface(.HCLK(clk), .HRESETn(~reset),
.HRDATA, .HRDATA,
.Flush(FlushD), .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(), .Flush(FlushD), .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(),

View File

@ -1,8 +1,9 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// irom.sv // irom.sv
// //
// Written: Ross Thompson ross1728@gmail.com January 30, 2022 // Written: Ross Thompson ross1728@gmail.com
// Modified: // Created: 30 January 2022
// Modified: 18 January 2023
// //
// Purpose: simple instruction ROM // Purpose: simple instruction ROM
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
@ -26,23 +27,30 @@
`include "wally-config.vh" `include "wally-config.vh"
module irom( module irom(
input logic clk, ce, input logic clk,
input logic [`XLEN-1:0] Adr, input logic ce, // Chip Enable. 0: Holds IROMInstrF constant
output logic [31:0] ReadData input logic [`XLEN-1:0] Adr, // PCNextFSpill
output logic [31:0] IROMInstrF // Instruction read data
); );
localparam ADDR_WDITH = $clog2(`IROM_RANGE/8); localparam ADDR_WDITH = $clog2(`IROM_RANGE/8);
localparam OFFSET = $clog2(`XLEN/8); localparam OFFSET = $clog2(`XLEN/8);
logic [`XLEN-1:0] ReadDataFull; logic [`XLEN-1:0] IROMInstrFFull;
logic [31:0] RawIROMInstrF;
rom1p1r #(ADDR_WDITH, `XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataFull)); logic [1:0] AdrD;
if (`XLEN == 32) assign ReadData = ReadDataFull; flopen #(2) AdrReg(clk, ce, Adr[2:1], AdrD);
// have to delay Ardr[OFFSET-1] by 1 cycle
rom1p1r #(ADDR_WDITH, `XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull));
if (`XLEN == 32) assign RawIROMInstrF = IROMInstrFFull;
else begin else begin
logic AdrD; // IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two
flopen #(1) AdrReg(clk, ce, Adr[OFFSET-1], AdrD); // haves. Adr is the Next PCF not PCF so we delay 1 cycle.
assign ReadData = AdrD ? ReadDataFull[63:32] : ReadDataFull[31:0]; assign RawIROMInstrF = AdrD[1] ? IROMInstrFFull[63:32] : IROMInstrFFull[31:0];
end end
// If the memory addres is aligned to 2 bytes return the upper 2 bytes in the lower 2 bytes.
// The spill logic will handle merging the two together.
assign IROMInstrF = AdrD[0] ? {16'b0, RawIROMInstrF[31:16]} : RawIROMInstrF;
endmodule endmodule

View File

@ -123,8 +123,8 @@ module speculativegshare
assign GHRNextD = FlushD ? {GHRNextE, GHRNextE[0]} : {DirPredictionF[1], GHRF, GHRF[0]}; assign GHRNextD = FlushD ? {GHRNextE, GHRNextE[0]} : {DirPredictionF[1], GHRF, GHRF[0]};
flopenr #(k+2) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, OldGHRD); flopenr #(k+2) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, OldGHRD);
assign GHRD = WrongPredInstrClassD[0] & BranchInstrD ? {DirPredictionD[1], OldGHRD[k:1]} : // shift right assign GHRD = WrongPredInstrClassD[0] & BranchInstrD ? {DirPredictionD[1], OldGHRD[k:1]} : // shift right
WrongPredInstrClassD[0] & ~BranchInstrD ? OldGHRD[k-2:-1] : // shift left WrongPredInstrClassD[0] & ~BranchInstrD ? OldGHRD[k-1:-1] : // shift left
OldGHRD; OldGHRD[k:0];
assign GHRNextE = FlushE ? GHRNextM : GHRD; assign GHRNextE = FlushE ? GHRNextM : GHRD;
flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE); flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE);

112
pipelined/src/ifu/spill.sv Normal file
View File

@ -0,0 +1,112 @@
///////////////////////////////////////////
// spill.sv
//
// Written: Ross Thompson ross1728@gmail.com
// Created: 28 January 2022
// Modified: 19 January 2023
//
// Purpose: allows the IFU to make extra memory request if instruction address crosses
// cache line boundaries or if instruction address without a cache crosses
// XLEN/8 boundary.
//
// Documentation: RISC-V System on Chip Design Chapter 11 (Figure 11.5)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module spill #(
parameter CACHE_ENABLED // Changes spill threshold to 1 if there is no cache
)(input logic clk,
input logic reset,
input logic StallD, FlushD,
input logic [`XLEN-1:0] PCF, // 2 byte aligned PC in Fetch stage
input logic [`XLEN-1:2] PCPlus4F, // PCF + 4
input logic [`XLEN-1:0] PCNextF, // The next PCF
input logic [31:0] InstrRawF, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed
input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched
input logic ITLBMissF, // ITLB miss, ignore memory request
input logic InstrDAPageFaultF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active)
output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill
output logic [`XLEN-1:0] PCFSpill, // PCF for one of the two memory addresses of the spill
output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline
output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction
output logic CompressedF); // The fetched instruction is compressed
// Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1]
localparam integer SPILLTHRESHOLD = CACHE_ENABLED ? `ICACHE_LINELENINBITS/32 : 1;
logic [`XLEN-1:0] PCPlus2F;
logic TakeSpillF;
logic SpillF;
logic SelSpillF;
logic SpillSaveF;
logic [15:0] InstrFirstHalf;
typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype;
(* mark_debug = "true" *) statetype CurrState, NextState;
////////////////////////////////////////////////////////////////////////////////////////////////////
// PC logic
////////////////////////////////////////////////////////////////////////////////////////////////////
// compute PCF+2 from the raw PC+4
mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F));
// select between PCNextF and PCF+2
mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~FlushD), .y(PCNextFSpill));
// select between PCF and PCF+2
mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill));
////////////////////////////////////////////////////////////////////////////////////////////////////
// Detect spill
////////////////////////////////////////////////////////////////////////////////////////////////////
assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1];
assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF));
always_ff @(posedge clk)
if (reset | FlushD) CurrState <= #1 STATE_READY;
else CurrState <= #1 NextState;
always_comb begin
case (CurrState)
STATE_READY: if (TakeSpillF) NextState = STATE_SPILL;
else NextState = STATE_READY;
STATE_SPILL: if(IFUCacheBusStallD | StallD) NextState = STATE_SPILL;
else NextState = STATE_READY;
default: NextState = STATE_READY;
endcase
end
assign SelSpillF = (CurrState == STATE_SPILL);
assign SelNextSpillF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallD);
assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushD;
////////////////////////////////////////////////////////////////////////////////////////////////////
// Merge spilled instruction
////////////////////////////////////////////////////////////////////////////////////////////////////
// save the first 2 bytes
flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalf);
// merge together
mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalf}, SpillF, PostSpillInstrRawF);
assign CompressedF = PostSpillInstrRawF[1:0] != 2'b11;
endmodule

View File

@ -1,98 +0,0 @@
///////////////////////////////////////////
// spillsupport.sv
//
// Written: Ross Thompson ross1728@gmail.com January 28, 2022
// Modified:
//
// Purpose: allows the IFU to make extra memory request if instruction address crosses
// cache line boundaries or if instruction address without a cache crosses
// XLEN/8 boundary.
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module spillsupport #(parameter CACHE_ENABLED)
(input logic clk,
input logic reset,
input logic StallF, Flush,
input logic [`XLEN-1:0] PCF,
input logic [`XLEN-1:2] PCPlus4F,
input logic [`XLEN-1:0] PCNextF,
input logic [31:0] InstrRawF,
input logic IFUCacheBusStallD,
input logic ITLBMissF,
input logic InstrDAPageFaultF,
output logic [`XLEN-1:0] PCNextFSpill,
output logic [`XLEN-1:0] PCFSpill,
output logic SelNextSpillF,
output logic [31:0] PostSpillInstrRawF,
output logic CompressedF);
localparam integer SPILLTHRESHOLD = CACHE_ENABLED ? `ICACHE_LINELENINBITS/32 : 1;
logic [`XLEN-1:0] PCPlus2F;
logic TakeSpillF;
logic SpillF;
logic SelSpillF, SpillSaveF;
logic [15:0] SpillDataLine0, SavedInstr;
typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype;
(* mark_debug = "true" *) statetype CurrState, NextState;
// compute PCF+2
mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F));
// select between PCNextF and PCF+2
mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~Flush), .y(PCNextFSpill));
// select between PCF and PCF+2
mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill));
assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1];
assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF));
always_ff @(posedge clk)
if (reset | Flush) CurrState <= #1 STATE_READY;
else CurrState <= #1 NextState;
always_comb begin
case (CurrState)
STATE_READY: if (TakeSpillF) NextState = STATE_SPILL;
else NextState = STATE_READY;
STATE_SPILL: if(IFUCacheBusStallD | StallF) NextState = STATE_SPILL;
else NextState = STATE_READY;
default: NextState = STATE_READY;
endcase
end
assign SelSpillF = (CurrState == STATE_SPILL);
assign SelNextSpillF = (CurrState == STATE_READY & TakeSpillF) |
(CurrState == STATE_SPILL & IFUCacheBusStallD);
assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF;
assign SavedInstr = CACHE_ENABLED ? InstrRawF[15:0] : InstrRawF[31:16];
flopenr #(16) SpillInstrReg(.clk(clk),
.en(SpillSaveF & ~Flush),
.reset(reset),
.d(SavedInstr),
.q(SpillDataLine0));
mux2 #(32) postspillmux(.d0(InstrRawF), .d1({InstrRawF[15:0], SpillDataLine0}), .s(SpillF),
.y(PostSpillInstrRawF));
assign CompressedF = PostSpillInstrRawF[1:0] != 2'b11;
endmodule

View File

@ -1,11 +1,14 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// amoalu.sv // amoalu.sv
// //
// Written: David_Harris@hmc.edu 10 March 2021 // Written: David_Harris@hmc.edu
// Modified: // Created: 10 March 2021
// Modified: 18 January 2023
// //
// Purpose: Performs AMO operations // Purpose: Performs AMO operations
// //
// Documentation: RISC-V System on Chip Design Chapter 14 (Figure ***)
//
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -26,13 +29,12 @@
`include "wally-config.vh" `include "wally-config.vh"
// *** this should probably be moved into the LSU because it is instantiated in the D$
module amoalu ( module amoalu (
input logic [`XLEN-1:0] srca, srcb, input logic [`XLEN-1:0] ReadDataM, // LSU's ReadData
input logic [6:0] funct, input logic [`XLEN-1:0] IHWriteDataM, // LSU's WriteData
input logic [1:0] width, input logic [6:0] LSUFunct7M, // ALU Operation
output logic [`XLEN-1:0] result input logic [2:0] LSUFunct3M, // Memoy access width
output logic [`XLEN-1:0] AMOResult // ALU output
); );
logic [`XLEN-1:0] a, b, y; logic [`XLEN-1:0] a, b, y;
@ -41,7 +43,7 @@ module amoalu (
// a single carry chain should be shared for + and the four min/max // a single carry chain should be shared for + and the four min/max
// and the same mux can be used to select b for swap. // and the same mux can be used to select b for swap.
always_comb always_comb
case (funct[6:2]) case (LSUFunct7M[6:2])
5'b00001: y = b; // amoswap 5'b00001: y = b; // amoswap
5'b00000: y = a + b; // amoadd 5'b00000: y = a + b; // amoadd
5'b00100: y = a ^ b; // amoxor 5'b00100: y = a ^ b; // amoxor
@ -56,19 +58,19 @@ module amoalu (
// sign extend if necessary // sign extend if necessary
if (`XLEN == 32) begin:sext if (`XLEN == 32) begin:sext
assign a = srca; assign a = ReadDataM;
assign b = srcb; assign b = IHWriteDataM;
assign result = y; assign AMOResult = y;
end else begin:sext // `XLEN = 64 end else begin:sext // `XLEN = 64
always_comb always_comb
if (width == 2'b10) begin // sign-extend word-length operations if (LSUFunct3M[1:0] == 2'b10) begin // sign-extend word-length operations
a = {{32{srca[31]}}, srca[31:0]}; a = {{32{ReadDataM[31]}}, ReadDataM[31:0]};
b = {{32{srcb[31]}}, srcb[31:0]}; b = {{32{IHWriteDataM[31]}}, IHWriteDataM[31:0]};
result = {{32{y[31]}}, y[31:0]}; AMOResult = {{32{y[31]}}, y[31:0]};
end else begin end else begin
a = srca; a = ReadDataM;
b = srcb; b = IHWriteDataM;
result = y; AMOResult = y;
end end
end end
endmodule endmodule

View File

@ -1,10 +1,13 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// atomic.sv // atomic.sv
// //
// Written: Ross Thompson ross1728@gmail.com January 31, 2022 // Written: Ross Thompson ross1728@gmail.com
// Modified: // Created: 31 January 2022
// Modified: 18 January 2023
// //
// Purpose: atomic data path. // Purpose: Wrapper for amoalu and lrsc
//
// Documentation: RISC-V System on Chip Design Chapter 14 (Figure ***)
// //
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
@ -28,25 +31,25 @@
module atomic ( module atomic (
input logic clk, input logic clk,
input logic reset, StallW, input logic reset,
input logic [`XLEN-1:0] ReadDataM, input logic StallW,
input logic [`XLEN-1:0] IHWriteDataM, input logic [`XLEN-1:0] ReadDataM, // LSU ReadData XLEN because FPU does not issue atomic memory operation from FPU registers
input logic [`PA_BITS-1:0] PAdrM, input logic [`XLEN-1:0] IHWriteDataM, // LSU WriteData XLEN because FPU does not issue atomic memory operation from FPU registers
input logic [6:0] LSUFunct7M, input logic [`PA_BITS-1:0] PAdrM, // Physical memory address
input logic [2:0] LSUFunct3M, input logic [6:0] LSUFunct7M, // AMO alu operation gated by HPTW
input logic [1:0] LSUAtomicM, input logic [2:0] LSUFunct3M, // IEU or HPTW memory operation size
input logic [1:0] PreLSURWM, input logic [1:0] LSUAtomicM, // 10: AMO operation, select AMOResult as the writedata output, 01: LR/SC operation
input logic IgnoreRequest, input logic [1:0] PreLSURWM, // IEU or HPTW Read/Write signal
output logic [`XLEN-1:0] IMAWriteDataM, input logic IgnoreRequest, // On FlushM or TLB miss ignore memory operation
output logic SquashSCW, output logic [`XLEN-1:0] IMAWriteDataM, // IEU, HPTW, or AMO write data
output logic [1:0] LSURWM output logic SquashSCW, // Store conditional failed disable write to GPR
output logic [1:0] LSURWM // IEU or HPTW Read/Write signal gated by LR/SC
); );
logic [`XLEN-1:0] AMOResult; logic [`XLEN-1:0] AMOResult;
logic MemReadM; logic MemReadM;
amoalu amoalu(.srca(ReadDataM), .srcb(IHWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]), amoalu amoalu(.ReadDataM, .IHWriteDataM, .LSUFunct7M, .LSUFunct3M, .AMOResult);
.result(AMOResult));
mux2 #(`XLEN) wdmux(IHWriteDataM, AMOResult, LSUAtomicM[1], IMAWriteDataM); mux2 #(`XLEN) wdmux(IHWriteDataM, AMOResult, LSUAtomicM[1], IMAWriteDataM);
assign MemReadM = PreLSURWM[1] & ~IgnoreRequest; assign MemReadM = PreLSURWM[1] & ~IgnoreRequest;

View File

@ -1,10 +1,14 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// dtim.sv // dtim.sv
// //
// Written: Ross Thompson ross1728@gmail.com January 30, 2022 // Written: Ross Thompson ross1728@gmail.com
// Modified: // Created: 30 January 2022
// Modified: 18 January 2023
//
// Purpose: tightly integrated memory into the LSU.
//
// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.12)
// //
// Purpose: simple memory with bus or cache.
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -26,14 +30,15 @@
`include "wally-config.vh" `include "wally-config.vh"
module dtim( module dtim(
input logic clk, ce, input logic clk,
input logic [1:0] MemRWM, input logic FlushW,
input logic [`PA_BITS-1:0] Adr, input logic ce, // Chip Enable. 0: Holds ReadDataWordM
input logic FlushW, input logic [1:0] MemRWM, // Read/Write control
input logic [`LLEN-1:0] WriteDataM, input logic [`PA_BITS-1:0] DTIMAdr, // No stall: Execution stage memory address. Stall: Memory stage memory address
input logic [`LLEN/8-1:0] ByteMaskM, input logic [`LLEN-1:0] WriteDataM, // Write data from IEU
output logic [`LLEN-1:0] ReadDataWordM input logic [`LLEN/8-1:0] ByteMaskM, // Selects which bytes within a word to write
); output logic [`LLEN-1:0] ReadDataWordM // Read data before subword selection
);
logic we; logic we;
@ -43,6 +48,6 @@ module dtim(
assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap. assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap.
ram1p1rwbe #(.DEPTH(`DTIM_RANGE/8), .WIDTH(`LLEN)) ram1p1rwbe #(.DEPTH(`DTIM_RANGE/8), .WIDTH(`LLEN))
ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM)); ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(DTIMAdr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM));
endmodule endmodule

View File

@ -1,8 +1,9 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// endianswap.sv // endianswap.sv
// //
// Written: David_Harris@hmc.edu 7 May 2022 // Written: David_Harris@hmc.edu
// Modified: // Created: 7 May 2022
// Modified: 18 January 2023
// //
// Purpose: Swap byte order for Big-Endian accesses // Purpose: Swap byte order for Big-Endian accesses
// //

View File

@ -1,11 +1,14 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// lrsc.sv // lrsc.sv
// //
// Written: David_Harris@hmc.edu 17 July 2021 // Written: David_Harris@hmc.edu
// Modified: // Created: 17 July 2021
// Modified: 18 January 2023
// //
// Purpose: Load Reserved / Store Conditional unit // Purpose: Load Reserved / Store Conditional unit
// Track the reservation and squash the store if it fails // Track the reservation and squash the store if it fails
//
// Documentation: RISC-V System on Chip Design Chapter 14 (Figure ***)
// //
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //

View File

@ -131,7 +131,7 @@ module lsu (
logic LSULoadAccessFaultM; // Load acces fault logic LSULoadAccessFaultM; // Load acces fault
logic LSUStoreAmoAccessFaultM; // Store access fault logic LSUStoreAmoAccessFaultM; // Store access fault
logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle
logic IgnoreRequest; // On FlushM, ignore TLB miss logic IgnoreRequest; // On FlushM or TLB miss ignore memory operation
logic SelDTIM; // Select DTIM rather than bus or D$ logic SelDTIM; // Select DTIM rather than bus or D$
@ -232,17 +232,19 @@ module lsu (
// **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length.
// **** create config to support DTIM with floating point. // **** create config to support DTIM with floating point.
dtim dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), dtim dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM),
.Adr(DTIMAdr), .FlushW, .WriteDataM(LSUWriteDataM), .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM),
.ReadDataWordM(DTIMReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0])); .ReadDataWordM(DTIMReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0]));
end else begin end else begin
end end
if (`BUS) begin : bus if (`BUS) begin : bus
localparam integer LLENWORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`LLEN : 1; // Number of LLEN words in cacheline
localparam integer LLENLOGBWPL = `DCACHE ? $clog2(LLENWORDSPERLINE) : 1; // Log2 of ^
localparam integer BEATSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`AHBW : 1; // Number of AHBW words (beats) in cacheline
localparam integer AHBWLOGBWPL = `DCACHE ? $clog2(BEATSPERLINE) : 1; // Log2 of ^
if(`DCACHE) begin : dcache if(`DCACHE) begin : dcache
localparam integer LINELEN = `DCACHE ? `DCACHE_LINELENINBITS : `XLEN; // Number of bytes in cacheline localparam integer LLENWORDSPERLINE = `DCACHE_LINELENINBITS/`LLEN; // Number of LLEN words in cacheline
localparam integer LLENLOGBWPL = $clog2(LLENWORDSPERLINE); // Log2 of ^
localparam integer BEATSPERLINE = `DCACHE_LINELENINBITS/`AHBW; // Number of AHBW words (beats) in cacheline
localparam integer AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^
localparam integer LINELEN = `DCACHE_LINELENINBITS; // Number of bits in cacheline
localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline
logic [`PA_BITS-1:0] DCacheBusAdr; // Cacheline address to fetch or writeback. logic [`PA_BITS-1:0] DCacheBusAdr; // Cacheline address to fetch or writeback.
logic [AHBWLOGBWPL-1:0] BeatCount; // Position within a cacheline. ahbcacheinterface to cache logic [AHBWLOGBWPL-1:0] BeatCount; // Position within a cacheline. ahbcacheinterface to cache
@ -250,7 +252,6 @@ module lsu (
logic SelBusBeat; // ahbcacheinterface selects postion in cacheline with BeatCount logic SelBusBeat; // ahbcacheinterface selects postion in cacheline with BeatCount
logic [1:0] CacheBusRW; // Cache sends request to ahbcacheinterface logic [1:0] CacheBusRW; // Cache sends request to ahbcacheinterface
logic [1:0] BusRW; // Uncached bus memory access logic [1:0] BusRW; // Uncached bus memory access
localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
logic CacheableOrFlushCacheM; // Memory address is cacheable or operation is a cache flush logic CacheableOrFlushCacheM; // Memory address is cacheable or operation is a cache flush
logic [1:0] CacheRWM; // Cache read (10), write (01), AMO (11) logic [1:0] CacheRWM; // Cache read (10), write (01), AMO (11)
logic [1:0] CacheAtomicM; // Cache AMO logic [1:0] CacheAtomicM; // Cache AMO
@ -272,7 +273,7 @@ module lsu (
.FetchBuffer, .CacheBusRW, .FetchBuffer, .CacheBusRW,
.CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0));
ahbcacheinterface #(.BEATSPERLINE(BEATSPERLINE), .LINELEN(LINELEN), .LOGWPL(AHBWLOGBWPL), .CACHE_ENABLED(`DCACHE)) ahbcacheinterface( ahbcacheinterface #(.BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW)) ahbcacheinterface(
.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HCLK(clk), .HRESETn(~reset), .Flush(FlushW),
.HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB),
.HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY),

View File

@ -1,11 +1,14 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// subwordread.sv // subwordread.sv
// //
// Written: David_Harris@hmc.edu 9 January 2021 // Written: David_Harris@hmc.edu
// Modified: // Created: 9 January 2021
// Modified: 18 January 2023
// //
// Purpose: Extract subwords and sign extend for reads // Purpose: Extract subwords and sign extend for reads
// //
// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9)
//
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University

View File

@ -1,11 +1,14 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// subwordwrite.sv // subwordwrite.sv
// //
// Written: David_Harris@hmc.edu 9 January 2021 // Written: David_Harris@hmc.edu
// Modified: // Created: 9 January 2021
// Modified: 18 January 2023
// //
// Purpose: Masking and muxing for subword writes // Purpose: Masking and muxing for subword writes
// //
// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9)
//
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University

View File

@ -1,11 +1,14 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// swbytemask.sv // swbytemask.sv
// //
// Written: David_Harris@hmc.edu 9 January 2021 // Written: David_Harris@hmc.edu
// Modified: // Created: 9 January 2021
// Modified: 18 January 2023
// //
// Purpose: On-chip RAM, external to core // Purpose: On-chip RAM, external to core
// //
// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9)
//
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University

View File

@ -98,7 +98,6 @@ module hptw (
logic [2:0] HPTWSize; // 32 or 64 bit access logic [2:0] HPTWSize; // 32 or 64 bit access
(* mark_debug = "true" *) statetype WalkerState, NextWalkerState, InitialWalkerState; (* mark_debug = "true" *) statetype WalkerState, NextWalkerState, InitialWalkerState;
// map hptw access faults onto either the original LSU load/store fault or instruction access fault // map hptw access faults onto either the original LSU load/store fault or instruction access fault
assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[1] & ~MemRWM[0]; assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[1] & ~MemRWM[0];
assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[0]; assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[0];
@ -189,13 +188,13 @@ module hptw (
// FSM to track PageType based on the levels of the page table traversed // FSM to track PageType based on the levels of the page table traversed
flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType); flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType);
always_comb always_comb
case (WalkerState) case (WalkerState)
L3_RD: NextPageType = 2'b11; // terapage L3_RD: NextPageType = 2'b11; // terapage
L2_RD: NextPageType = 2'b10; // gigapage L2_RD: NextPageType = 2'b10; // gigapage
L1_RD: NextPageType = 2'b01; // megapage L1_RD: NextPageType = 2'b01; // megapage
L0_RD: NextPageType = 2'b00; // kilopage L0_RD: NextPageType = 2'b00; // kilopage
default: NextPageType = PageType; default: NextPageType = PageType;
endcase endcase
// HPTWAdr muxing // HPTWAdr muxing
if (`XLEN==32) begin // RV32 if (`XLEN==32) begin // RV32

View File

@ -107,16 +107,10 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) (
.Cacheable, .Idempotent, .SelTIM, .Cacheable, .Idempotent, .SelTIM,
.PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM);
if (`PMP_ENTRIES > 0) // instantiate PMP pmpchecker pmpchecker(.PhysicalAddress, .PrivilegeModeW,
pmpchecker pmpchecker(.PhysicalAddress, .PrivilegeModeW, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW,
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .ExecuteAccessF, .WriteAccessM, .ReadAccessM,
.ExecuteAccessF, .WriteAccessM, .ReadAccessM, .PMPInstrAccessFaultF, .PMPLoadAccessFaultM, .PMPStoreAmoAccessFaultM);
.PMPInstrAccessFaultF, .PMPLoadAccessFaultM, .PMPStoreAmoAccessFaultM);
else begin
assign PMPInstrAccessFaultF = 0;
assign PMPLoadAccessFaultM = 0;
assign PMPStoreAmoAccessFaultM = 0;
end
// Access faults // Access faults
// If TLB miss and translating we want to not have faults from the PMA and PMP checkers. // If TLB miss and translating we want to not have faults from the PMA and PMP checkers.

View File

@ -49,28 +49,34 @@ module pmpchecker (
output logic PMPStoreAmoAccessFaultM output logic PMPStoreAmoAccessFaultM
); );
// Bit i is high when the address falls in PMP region i if (`PMP_ENTRIES > 0) begin
logic EnforcePMP; // Bit i is high when the address falls in PMP region i
logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges logic EnforcePMP;
logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address. logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges
logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address.
logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null
logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set
logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i]
pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0]( pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0](
.PhysicalAddress, .PhysicalAddress,
.PMPCfg(PMPCFG_ARRAY_REGW), .PMPCfg(PMPCFG_ARRAY_REGW),
.PMPAdr(PMPADDR_ARRAY_REGW), .PMPAdr(PMPADDR_ARRAY_REGW),
.PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}), .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}),
.PAgePMPAdrOut(PAgePMPAdr), .PAgePMPAdrOut(PAgePMPAdr),
.Match, .Active, .L, .X, .W, .R); .Match, .Active, .L, .X, .W, .R);
priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches. priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches.
// Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |(L & FirstMatch) : |Active; assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |(L & FirstMatch) : |Active;
assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ; assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ;
assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ; assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ;
assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|(R & FirstMatch) ; assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|(R & FirstMatch) ;
end else begin
assign PMPInstrAccessFaultF = 0;
assign PMPStoreAmoAccessFaultM = 0;
assign PMPLoadAccessFaultM = 0;
end
endmodule endmodule

View File

@ -63,12 +63,8 @@ module tlbcontrol #(parameter ITLB = 0) (
// Determine whether TLB is being used // Determine whether TLB is being used
assign TLBAccess = ReadAccess | WriteAccess; assign TLBAccess = ReadAccess | WriteAccess;
if (`XLEN==64) // Check whether upper bits of 64-bit virtual addressses are all equal // Check that upper bits are legal (all 0s or all 1s)
vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequalPageFault); vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequalPageFault);
else begin
assign SV39Mode = 0;
assign UpperBitsUnequalPageFault = 0;
end
// unswizzle useful PTE bits // unswizzle useful PTE bits
assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; assign {PTE_D, PTE_A} = PTEAccessBits[7:6];

View File

@ -35,12 +35,16 @@ module vm64check (
output logic UpperBitsUnequalPageFault output logic UpperBitsUnequalPageFault
); );
logic eq_63_47, eq_46_38; if (`XLEN == 64) begin
assign SV39Mode = (SATP_MODE == `SV39);
assign SV39Mode = (SATP_MODE == `SV39); // page fault if upper bits aren't all the same
logic eq_63_47, eq_46_38;
// page fault if upper bits aren't all the same assign eq_46_38 = &(VAdr[46:38]) | ~|(VAdr[46:38]);
assign eq_46_38 = &(VAdr[46:38]) | ~|(VAdr[46:38]); assign eq_63_47 = &(VAdr[63:47]) | ~|(VAdr[63:47]);
assign eq_63_47 = &(VAdr[63:47]) | ~|(VAdr[63:47]); assign UpperBitsUnequalPageFault = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47;
assign UpperBitsUnequalPageFault = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47; end else begin
assign SV39Mode = 0;
assign UpperBitsUnequalPageFault = 0;
end
endmodule endmodule

View File

@ -8,12 +8,10 @@
// Based on RISC-V spec (https://github.com/riscv/riscv-plic-spec/blob/master/riscv-plic.adoc) // Based on RISC-V spec (https://github.com/riscv/riscv-plic-spec/blob/master/riscv-plic.adoc)
// With clarifications from ROA's existing implementation (https://roalogic.github.io/plic/docs/AHB-Lite_PLIC_Datasheet.pdf) // With clarifications from ROA's existing implementation (https://roalogic.github.io/plic/docs/AHB-Lite_PLIC_Datasheet.pdf)
// Supports only 1 target core and only a global threshold. // Supports only 1 target core and only a global threshold.
// // This PLIC implementation serves as both the PLIC Gateways and PLIC Core.
// Documentation: RISC-V System on Chip Design Chapter 15 // It assumes interrupt sources are level-triggered wires.
// //
// *** Big questions: // Documentation: RISC-V System on Chip Design Chapter 15
// Do we detect requests as level-triggered or edge-trigged?
// If edge-triggered, do we want to allow 1 source to be able to make a number of repeated requests?
// //
// A component of the CORE-V-WALLY configurable RISC-V project. // A component of the CORE-V-WALLY configurable RISC-V project.
// //
@ -111,7 +109,7 @@ module plic_apb (
if (memwrite) if (memwrite)
casez(entry) casez(entry)
24'h0000??: intPriority[entry[7:2]] <= #1 Din[2:0]; 24'h0000??: intPriority[entry[7:2]] <= #1 Din[2:0];
`ifdef PLIC_NUM_SRC_LT_32 // *** switch to a generate for loop so as to deprecate PLIC_NUM_SRC_LT_32 and allow up to 1023 sources `ifdef PLIC_NUM_SRC_LT_32 // eventually switch to a generate for loop so as to deprecate PLIC_NUM_SRC_LT_32 and allow up to 1023 sources
24'h002000: intEn[0][`N:1] <= #1 Din[`N:1]; 24'h002000: intEn[0][`N:1] <= #1 Din[`N:1];
24'h002080: intEn[1][`N:1] <= #1 Din[`N:1]; 24'h002080: intEn[1][`N:1] <= #1 Din[`N:1];
`endif `endif
@ -172,8 +170,7 @@ module plic_apb (
end end
// pending interrupt requests // pending interrupt requests
assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots. Confirmed to boot successfully. assign nextIntPending = (intPending | requests) & ~intInProgress;
//assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending); flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending);
// context-dependent signals // context-dependent signals
@ -248,7 +245,7 @@ module plic_apb (
end end
end end
// is the max priority > threshold? // is the max priority > threshold?
// *** would it be any better to first priority encode maxPriority into binary and then ">" with threshold? // would it be any better to first priority encode maxPriority into binary and then ">" with threshold?
assign MExtInt = |(threshMask[0] & priorities_with_irqs[0]); assign MExtInt = |(threshMask[0] & priorities_with_irqs[0]);
assign SExtInt = |(threshMask[1] & priorities_with_irqs[1]); assign SExtInt = |(threshMask[1] & priorities_with_irqs[1]);
endmodule endmodule

View File

@ -1,251 +0,0 @@
`include "wally-config.vh"
`define NUM_REGS 32
`define NUM_CSRS 4096
`define PRINT_PC_INSTR 1
`define PRINT_MOST 1
`define PRINT_ALL 0
module rvviTrace #(
parameter int ILEN = `XLEN, // Instruction length in bits
parameter int XLEN = `XLEN, // GPR length in bits
parameter int FLEN = `FLEN, // FPR length in bits
parameter int VLEN = 0, // Vector register size in bits
parameter int NHART = 1, // Number of harts reported
parameter int RETIRE = 1) // Number of instructions that can retire during valid event
();
localparam NUMREGS = `E_SUPPORTED ? 16 : 32;
// wally specific signals
logic reset;
logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW;
logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW;
logic InstrValidM, InstrValidW;
logic StallE, StallM, StallW;
logic FlushD, FlushE, FlushM, FlushW;
logic TrapM, TrapW;
logic IntrF, IntrD, IntrE, IntrM, IntrW;
logic HaltM, HaltW;
logic [1:0] PrivilegeModeW;
logic [`XLEN-1:0] rf[NUMREGS];
logic [NUMREGS-1:0] rf_wb;
logic [4:0] rf_a3;
logic rf_we3;
logic [`XLEN-1:0] frf[32];
logic [`NUM_REGS-1:0] frf_wb;
logic [4:0] frf_a4;
logic frf_we4;
logic [`XLEN-1:0] CSRArray [logic[11:0]];
logic CSRWriteM, CSRWriteW;
logic [11:0] CSRAdrM, CSRAdrW;
// tracer signals
logic clk;
logic valid;
logic [63:0] order [(NHART-1):0][(RETIRE-1):0];
logic [ILEN-1:0] insn [(NHART-1):0][(RETIRE-1):0];
logic intr [(NHART-1):0][(RETIRE-1):0];
logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0];
logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0];
logic trap [(NHART-1):0][(RETIRE-1):0];
logic halt [(NHART-1):0][(RETIRE-1):0];
logic [1:0] mode [(NHART-1):0][(RETIRE-1):0];
logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0] x_wb [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0][(XLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0] f_wb [(NHART-1):0][(RETIRE-1):0];
logic [4095:0][(XLEN-1):0] csr [(NHART-1):0][(RETIRE-1):0];
logic [4095:0] csr_wb [(NHART-1):0][(RETIRE-1):0];
logic lrsc_cancel[(NHART-1):0][(RETIRE-1):0];
assign clk = testbench.dut.clk;
// assign InstrValidF = testbench.dut.core.ieu.InstrValidF; // not needed yet
assign InstrValidD = testbench.dut.core.ieu.c.InstrValidD;
assign InstrValidE = testbench.dut.core.ieu.c.InstrValidE;
assign InstrValidM = testbench.dut.core.ieu.InstrValidM;
assign InstrRawD = testbench.dut.core.ifu.InstrRawD;
assign PCNextF = testbench.dut.core.ifu.PCNextF;
assign PCF = testbench.dut.core.ifu.PCF;
assign PCD = testbench.dut.core.ifu.PCD;
assign PCE = testbench.dut.core.ifu.PCE;
assign PCM = testbench.dut.core.ifu.PCM;
assign reset = testbench.reset;
assign StallE = testbench.dut.core.StallE;
assign StallM = testbench.dut.core.StallM;
assign StallW = testbench.dut.core.StallW;
assign FlushD = testbench.dut.core.FlushD;
assign FlushE = testbench.dut.core.FlushE;
assign FlushM = testbench.dut.core.FlushM;
assign FlushW = testbench.dut.core.FlushW;
assign TrapM = testbench.dut.core.TrapM;
assign HaltM = testbench.DCacheFlushStart;
assign PrivilegeModeW = testbench.dut.core.priv.priv.privmode.PrivilegeModeW;
assign STATUS_SXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_SXL;
assign STATUS_UXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_UXL;
always_comb begin
// machine CSRs
// *** missing PMP and performance counters.
CSRArray[12'h300] = testbench.dut.core.priv.priv.csr.csrm.MSTATUS_REGW;
CSRArray[12'h310] = testbench.dut.core.priv.priv.csr.csrm.MSTATUSH_REGW;
CSRArray[12'h305] = testbench.dut.core.priv.priv.csr.csrm.MTVEC_REGW;
CSRArray[12'h341] = testbench.dut.core.priv.priv.csr.csrm.MEPC_REGW;
CSRArray[12'h306] = testbench.dut.core.priv.priv.csr.csrm.MCOUNTEREN_REGW;
CSRArray[12'h320] = testbench.dut.core.priv.priv.csr.csrm.MCOUNTINHIBIT_REGW;
CSRArray[12'h302] = testbench.dut.core.priv.priv.csr.csrm.MEDELEG_REGW;
CSRArray[12'h303] = testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
CSRArray[12'h344] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW;
CSRArray[12'h304] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW;
CSRArray[12'h301] = testbench.dut.core.priv.priv.csr.csrm.MISA_REGW;
CSRArray[12'hF14] = testbench.dut.core.priv.priv.csr.csrm.MHARTID_REGW;
CSRArray[12'h340] = testbench.dut.core.priv.priv.csr.csrm.MSCRATCH_REGW;
CSRArray[12'h342] = testbench.dut.core.priv.priv.csr.csrm.MCAUSE_REGW;
CSRArray[12'h343] = testbench.dut.core.priv.priv.csr.csrm.MTVAL_REGW;
CSRArray[12'hF11] = 0;
CSRArray[12'hF12] = 0;
CSRArray[12'hF13] = `XLEN'h100;
CSRArray[12'hF15] = 0;
CSRArray[12'h34A] = 0;
// MCYCLE and MINSTRET
CSRArray[12'hB00] = testbench.dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[0];
CSRArray[12'hB02] = testbench.dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[2];
// supervisor CSRs
CSRArray[12'h100] = testbench.dut.core.priv.priv.csr.csrs.SSTATUS_REGW;
CSRArray[12'h104] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW & 12'h222;
CSRArray[12'h105] = testbench.dut.core.priv.priv.csr.csrs.STVEC_REGW;
CSRArray[12'h141] = testbench.dut.core.priv.priv.csr.csrs.SEPC_REGW;
CSRArray[12'h106] = testbench.dut.core.priv.priv.csr.csrs.SCOUNTEREN_REGW;
CSRArray[12'h180] = testbench.dut.core.priv.priv.csr.csrs.SATP_REGW;
CSRArray[12'h140] = testbench.dut.core.priv.priv.csr.csrs.csrs.SSCRATCH_REGW;
CSRArray[12'h143] = testbench.dut.core.priv.priv.csr.csrs.csrs.STVAL_REGW;
CSRArray[12'h142] = testbench.dut.core.priv.priv.csr.csrs.csrs.SCAUSE_REGW;
CSRArray[12'h144] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW & & 12'h222 & testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
// user CSRs
CSRArray[12'h001] = testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW;
CSRArray[12'h002] = testbench.dut.core.priv.priv.csr.csru.FRM_REGW;
CSRArray[12'h003] = {testbench.dut.core.priv.priv.csr.csru.FRM_REGW, testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW};
end
genvar index;
assign rf[0] = '0;
for(index = 1; index < NUMREGS; index += 1)
assign rf[index] = testbench.dut.core.ieu.dp.regf.rf[index];
assign rf_a3 = testbench.dut.core.ieu.dp.regf.a3;
assign rf_we3 = testbench.dut.core.ieu.dp.regf.we3;
always_comb begin
rf_wb <= '0;
if(rf_we3)
rf_wb[rf_a3] <= 1'b1;
end
for(index = 0; index < NUMREGS; index += 1)
assign frf[index] = testbench.dut.core.fpu.fpu.fregfile.rf[index];
assign frf_a4 = testbench.dut.core.fpu.fpu.fregfile.a4;
assign frf_we4 = testbench.dut.core.fpu.fpu.fregfile.we4;
always_comb begin
frf_wb <= '0;
if(frf_we4)
frf_wb[frf_a4] <= 1'b1;
end
assign CSRAdrM = testbench.dut.core.priv.priv.csr.CSRAdrM;
assign CSRWriteM = testbench.dut.core.priv.priv.csr.CSRWriteM;
// pipeline to writeback stage
flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE);
flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM);
flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW);
flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW);
flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW);
flopenrc #(1) TrapWReg (clk, reset, 1'b0, ~StallW, TrapM, TrapW);
flopenrc #(1) HaltWReg (clk, reset, 1'b0, ~StallW, HaltM, HaltW);
flopenrc #(1) IntrFReg (clk, reset, 1'b0, ~StallF, TrapM, IntrF);
flopenrc #(1) IntrDReg (clk, reset, FlushD, ~StallD, IntrF, IntrD);
flopenrc #(1) IntrEReg (clk, reset, FlushE, ~StallE, IntrD, IntrE);
flopenrc #(1) IntrMReg (clk, reset, FlushM, ~StallM, IntrE, IntrM);
flopenrc #(1) IntrWReg (clk, reset, FlushW, ~StallW, IntrM, IntrW);
flopenrc #(12) CSRAdrWReg (clk, reset, FlushW, ~StallW, CSRAdrM, CSRAdrW);
flopenrc #(1) CSRWriteWReg (clk, reset, FlushW, ~StallW, CSRWriteM, CSRWriteW);
// Initially connecting the writeback stage signals, but may need to use M stage
// and gate on ~FlushW.
assign valid = InstrValidW & ~StallW & ~FlushW;
assign order[0][0] = CSRArray[12'hB02];
assign insn[0][0] = InstrRawW;
assign pc_rdata[0][0] = PCW;
assign trap[0][0] = TrapW;
assign halt[0][0] = HaltW;
assign intr[0][0] = IntrW;
assign mode[0][0] = PrivilegeModeW;
assign ixl[0][0] = PrivilegeModeW == 2'b11 ? 2'b10 :
PrivilegeModeW == 2'b01 ? STATUS_SXL : STATUS_UXL;
assign pc_wdata[0][0] = ~FlushW ? PCM :
~FlushM ? PCE :
~FlushE ? PCD :
~FlushD ? PCF : PCNextF;
for(index = 0; index < `NUM_REGS; index += 1) begin
assign x_wdata[0][0][index] = rf[index];
assign x_wb[0][0][index] = rf_wb[index];
assign f_wdata[0][0][index] = frf[index];
assign f_wb[0][0][index] = frf_wb[index];
end
always_comb begin
csr_wb[0][0] <= '0;
if(CSRWriteW)
csr_wb[0][0][CSRAdrW] <= 1'b1;
end
integer index3;
always_comb begin
for(index3 = 0; index3 < `NUM_CSRS; index3 += 1) begin
if(CSRArray.exists(index3))
csr[0][0][index3] = CSRArray[index3];
else
csr[0][0][index3] = '0;
end
end
// *** implementation only cancel? so sc does not clear?
assign lrsc_cancel[0][0] = '0;
integer index2;
always_ff @(posedge clk) begin
if(valid) begin
if(`PRINT_PC_INSTR & !(`PRINT_ALL | `PRINT_MOST))
$display("order = %08d, PC = %08x, insn = %08x", order[0][0], pc_rdata[0][0], insn[0][0]);
else if(`PRINT_MOST & !`PRINT_ALL)
$display("order = %08d, PC = %010x, insn = %08x, trap = %1d, halt = %1d, intr = %1d, mode = %1x, ixl = %1x, pc_wdata = %010x, x%02d = %016x, f%02d = %016x, csr%03x = %016x",
order[0][0], pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], intr[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0], rf_a3, x_wdata[0][0][rf_a3], frf_a4, f_wdata[0][0][frf_a4], CSRAdrW, csr[0][0][CSRAdrW]);
else if(`PRINT_ALL) begin
$display("order = %08d, PC = %08x, insn = %08x, trap = %1d, halt = %1d, intr = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x",
order[0][0], pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], intr[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0]);
for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin
$display("x%02d = %08x", index2, x_wdata[0][0][index2]);
end
for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin
$display("f%02d = %08x", index2, f_wdata[0][0][index2]);
end
end
end
if(HaltW) $stop();
end
endmodule

View File

@ -155,7 +155,7 @@ module testbench;
`define MCOUNTEREN `CSR_BASE.csrm.mcounteren.MCOUNTERENreg.q `define MCOUNTEREN `CSR_BASE.csrm.mcounteren.MCOUNTERENreg.q
`define SCOUNTEREN `CSR_BASE.csrs.csrs.SCOUNTERENreg.q `define SCOUNTEREN `CSR_BASE.csrs.csrs.SCOUNTERENreg.q
`define MSCRATCH `CSR_BASE.csrm.MSCRATCHreg.q `define MSCRATCH `CSR_BASE.csrm.MSCRATCHreg.q
`define SSCRATCH `CSR_BASE.csrs.csrs.csrs.SSCRATCHreg.q `define SSCRATCH `CSR_BASE.csrs.csrs.SSCRATCHreg.q
`define MTVEC `CSR_BASE.csrm.MTVECreg.q `define MTVEC `CSR_BASE.csrm.MTVECreg.q
`define STVEC `CSR_BASE.csrs.csrs.STVECreg.q `define STVEC `CSR_BASE.csrs.csrs.STVECreg.q
`define SATP `CSR_BASE.csrs.csrs.genblk1.SATPreg.q `define SATP `CSR_BASE.csrs.csrs.genblk1.SATPreg.q

View File

@ -692,55 +692,3 @@ task automatic updateProgramAddrLabelArray;
$fclose(ProgramAddrMapFP); $fclose(ProgramAddrMapFP);
endtask endtask
`define NUM_REGS 32
`define NUM_CSRS 4096
module rvviTrace();
// wally specific signals
logic reset;
logic [`XLEN-1:0] PCM, PCW;
logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW;
logic InstrValidM, InstrValidW;
logic StallE, StallM, StallW;
logic FlushE, FlushM, FlushW;
// tracer signals
logic clk;
logic valid;
logic [`XLEN-1:0] insn;
logic [`XLEN-1:0 ] pc_rdata;
assign clk = testbench.dut.clk;
assign InstrValidM = testbench.dut.core.ieu.InstrValidM;
assign InstrRawD = testbench.dut.core.ifu.InstrRawD;
assign PCM = testbench.dut.core.ifu.PCM;
assign reset = testbench.reset;
assign StallE = testbench.dut.core.StallE;
assign StallM = testbench.dut.core.StallM;
assign StallW = testbench.dut.core.StallW;
assign FlushE = testbench.dut.core.FlushE;
assign FlushM = testbench.dut.core.FlushM;
assign FlushW = testbench.dut.core.FlushW;
// pipeline to writeback stage
flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE);
flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM);
flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW);
flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW);
flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW);
assign valid = InstrValidW;
assign insn = InstrRawW;
assign pc_rdata = PCW;
always_ff @(posedge clk) begin
if(valid) begin
$display("PC = %x, insn = %x", pc_rdata, insn);
end
end
endmodule