Merge branch 'main' of github.com:davidharrishmc/riscv-wally into boot

This commit is contained in:
Jacob Pease 2023-01-19 16:59:24 -06:00
commit fbe5c63219
53 changed files with 620 additions and 93249 deletions

View File

@ -1,23 +0,0 @@
# Makefile
CC = gcc
CFLAGS = -O3
LIBS = -lm
LFLAGS = -L.
# Link against the riscv-isa-sim version of SoftFloat rather than
# the regular version to get RISC-V NaN behavior
IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat
LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
SRCS = $(wildcard *.c)
PROGS = $(patsubst %.c,%,$(SRCS))
all: $(PROGS)
%: %.c
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
clean:
rm -f $(PROGS)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,23 +0,0 @@
# fma.do
#
# run with vsim -do "do fma.do"
# add -c before -do for batch simulation
onbreak {resume}
# create library
vlib worklib
vlog -lint -sv -work worklib fma16.v testbench.v
vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
vsim -lib worklib testbenchopt
add wave sim:/testbench_fma16/clk
add wave sim:/testbench_fma16/reset
add wave sim:/testbench_fma16/x
add wave sim:/testbench_fma16/y
add wave sim:/testbench_fma16/z
add wave sim:/testbench_fma16/result
add wave sim:/testbench_fma16/rexpected
run -all

View File

@ -1,268 +0,0 @@
// fma16.sv
// David_Harris@hmc.edu 26 February 2022
// 16-bit floating-point multiply-accumulate
// Operation: general purpose multiply, add, fma, with optional negation
// If mul=1, p = x * y. Else p = x.
// If add=1, result = p + z. Else result = p.
// If negr or negz = 1, negate result or z to handle negations and subtractions
// fadd: mul = 0, add = 1, negr = negz = 0
// fsub: mul = 0, add = 1, negr = 0, negz = 1
// fmul: mul = 1, add = 0, negr = 0, negz = 0
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
`define FFLEN 16
`define Nf 10
`define Ne 5
`define BIAS 15
`define EMIN (-(2**(`Ne-1)-1))
`define EMAX (2**(`Ne-1)-1)
`define NaN 16'h7E00
`define INF 15'h7C00
// rounding modes *** update
`define RZ 3'b00
`define RNE 3'b01
`define RM 3'b10
`define RP 3'b11
module fma16(
input logic [`FFLEN-1:0] x, y, z,
input logic mul, add, negr, negz,
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
output logic [`FFLEN-1:0] result);
logic [`Nf:0] xm, ym, zm; // U1.Nf
logic [`Ne-1:0] xe, ye, ze; // B_Ne
logic xs, ys, zs;
logic zs1; // sign before optional negation
logic [2*`Nf+1:0] pm; // U2.2Nf
logic [`Ne:0] pe; // B_Ne+1
logic ps; // sign of product
logic [22:0] rm;
logic [`Ne+1:0] re;
logic rs;
logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
logic [`Ne+1:0] re2;
unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan); // unpack inputs
//signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs); // handle negations
mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps); // p = x * y
add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs); // r = z + p
postproc16 post(roundmode, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result); // normalize, round, pack
endmodule
module mult16(
input logic mul,
input logic [`Nf:0] xm, ym,
input logic [`Ne-1:0] xe, ye,
input logic xs, ys,
output logic [2*`Nf+1:0] pm,
output logic [`Ne:0] pe,
output logic ps);
// only multiply if mul = 1
assign pm = mul ? xm * ym : {1'b0, xm, 10'b0}; // multiply mantiassas
assign pe = mul ? xe + ye - `BIAS : {1'b0, xe}; // add exponents, account for bias
assign ps = xs ^ ys; // negative if X xor Y are negative
endmodule
module add16(
input logic add,
input logic [2*`Nf+1:0] pm, // U2.2Nf
input logic [`Nf:0] zm, // U1.Nf
input logic [`Ne:0] pe, // B_Ne+1
input logic [`Ne-1:0] ze, // B_Ne
input logic ps, zs,
input logic negz,
output logic [22:0] rm,
output logic [`Ne+1:0] re, // B_Ne+2
output logic [`Ne+1:0] re2,
output logic rs);
logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.
logic [`Nf-1:0] prezsticky;
logic zsticky;
logic effectivesub;
logic rs0;
logic [`Ne:0] leadingzeros, NormCnt; // *** should paramterize size
logic [`Ne:0] re1;
// Alignment shift
assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
always_comb // AlignCount mux; see Muller page 254
if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7; re = {1'b0, pe}; end
else if (ExpDiff <= 2) begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
else if (ExpDiff <= `Nf+3) begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
else begin AlignCnt = 0; re = {2'b0, ze}; end
// Shift Zm right by AlignCnt. Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
// Effective subtraction
assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
assign zalignedaddsub = effectivesub ? ~zaligned : zaligned; // invert zaligned for subtraction
// Adder
assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
assign rs0 = r[`Nf*3+7]; // sign of the initial result
assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
// Sign Logic
assign rs = ps ^ rs0; // flip the sign if necessary
// Leading zero counter
lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
// Normalization shift
always_comb // NormCount mux
if (ExpDiff < 3) begin
if (re1 >= `EMIN) begin NormCnt = `Nf + 3 + leadingzeros; re2 = {1'b0, re1}; end
else begin NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN; end
end else begin NormCnt = AlignCnt; re = {2'b00, ze}; end
assign rnormed = r2 << NormCnt; // *** update sticky
/* temporarily comment out to start synth
// One-bit secondary normalization
if (ExpDiff <= 2) begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
else begin // *** handle sticky
if (rnormed[***]) begin rnormed2 = rnormed >> 1; re2 = re+1; end
else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re; end
else begin rnormed2 = rnormed << 1; re2 = re-1; end
end
// round
assign l = rnormed2[***]; // least significant bit
assign r = rnormed2[***-1]; // rounding bit
assign s = ***; // sticky bit
always_comb
case (roundmode)
RZ: roundup = 0;
RP: roundup = ~rs & (r | s);
RM: roundup = rs & (r | s);
RNE: roundup = r & (s | l);
default: roundup = 0;
endcase
assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
*/
// *** need to handle rounding to MAXNUM vs. INFINITY
// add or pass product through
/* assign rm = add ? arm : {1'b0, pm};
assign re = add ? are : {1'b0, pe};
assign rs = add ? ars : ps; */
endmodule
module lzc(
input logic [`Nf*3+7:0] r2,
output logic [`Ne:0] leadingzeros
);
endmodule
module postproc16(
input logic [1:0] roundmode,
input logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
input logic [22:0] rm,
input logic [`Nf:0] zm, // U1.Nf
input logic [6:0] re,
input logic [`Ne-1:0] ze, // B_Ne
input logic rs, zs, ps,
input logic [`Ne+1:0] re2,
output logic [15:0] result);
logic [9:0] uf, uff;
logic [6:0] ue;
logic [6:0] ueb, uebiased;
logic invalid;
// Special cases
// *** not handling signaling NaN
// *** also add overflow/underflow/inexact
always_comb begin
if (xnan | ynan | znan) begin result = `NaN; invalid = 0; end // propagate NANs
else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
else if (xzero & yinf | xinf & yzero) begin result = `NaN; invalid = 1; end // zero times infinity
else if (xinf | yinf) begin result = {ps, `INF}; invalid = 0; end // X or Y
else if (zinf) begin result = {zs, `INF}; invalid = 0; end // infinite Z
else if (xzero | yzero) begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
else if (re2 >= `EMAX) begin result = {rs, `INF}; invalid = 0; end
else begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
end
always_comb
if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
ue = re + 7'b1;
uf = rm[20:11];
end else begin // no normalization shift needed
ue = re;
uf = rm[19:10];
end
// overflow
always_comb begin
ueb = ue-7'd15;
if (ue >= 7'd46) begin // overflow
/* uebiased = 7'd30;
uff = 10'h3ff; */
end else begin
uebiased = ue-7'd15;
uff = uf;
end
end
assign result = {rs, uebiased[4:0], uff};
// add special case handling for zeros, NaN, Infinity
endmodule
module signadj16(
input logic negr, negz,
input logic xs, ys, zs1,
output logic ps, zs);
assign ps = xs ^ ys; // sign of product
assign zs = zs1 ^ negz; // sign of addend
endmodule
module unpack16(
input logic [15:0] x, y, z,
output logic [10:0] xm, ym, zm,
output logic [4:0] xe, ye, ze,
output logic xs, ys, zs,
output logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
endmodule
module unpacknum16(
input logic [15:0] num,
output logic [10:0] m,
output logic [4:0] e,
output logic s,
output logic zero, inf, nan);
logic [9:0] f; // fraction without leading 1
logic [4:0] eb; // biased exponent
assign {s, eb, f} = num; // pull bit fields out of floating-point number
assign m = {1'b1, f}; // prepend leading 1 to fraction
assign e = eb; // leave bias in exponent ***
assign zero = (e == 0 && f == 0);
assign inf = (e == 31 && f == 0);
assign nan = (e == 31 && f != 0);
endmodule

View File

@ -1,24 +0,0 @@
// fma16.sv
// David_Harris@hmc.edu 26 February 2022
// 16-bit floating-point multiply-accumulate
// Operation: general purpose multiply, add, fma, with optional negation
// If mul=1, p = x * y. Else p = x.
// If add=1, result = p + z. Else result = p.
// If negr or negz = 1, negate result or z to handle negations and subtractions
// fadd: mul = 0, add = 1, negr = negz = 0
// fsub: mul = 0, add = 1, negr = 0, negz = 1
// fmul: mul = 1, add = 0, negr = 0, negz = 0
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
module fma16(
input logic [15:0] x, y, z,
input logic mul, add, negr, negz,
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
output logic [15:0] result);
endmodule

View File

@ -1,240 +0,0 @@
#include <stdio.h>
#include <stdint.h>
#include "softfloat.h"
#include "softfloat_types.h"
typedef union sp {
float32_t v;
float f;
} sp;
// lists of tests, terminated with 0x8000
uint16_t easyExponents[] = {15, 0x8000};
uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000};
uint16_t zeros[] = {0x0000, 0x8000};
uint16_t infs[] = {0x7C00, 0xFC00};
uint16_t nans[] = {0x7D00, 0x7D01};
void softfloatInit(void) {
softfloat_roundingMode = softfloat_round_minMag;
softfloat_exceptionFlags = 0;
softfloat_detectTininess = softfloat_tininess_beforeRounding;
}
float convFloat(float16_t f16) {
float32_t f32;
float res;
sp r;
f32 = f16_to_f32(f16);
r.v = f32;
res = r.f;
return res;
}
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
float16_t result;
int op, flagVals;
char calc[80], flags[80];
float32_t x32, y32, z32, r32;
float xf, yf, zf, rf;
float16_t smallest;
if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
if (!add) z.v = 0x0000; // force z to 0 to avoid add
if (negp) x.v ^= 0x8000; // flip sign of x to negate p
if (negz) z.v ^= 0x8000; // flip sign of z to negate z
op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
softfloat_exceptionFlags = 0; // clear exceptions
result = f16_mulAdd(x, y, z);
sprintf(flags, "NV: %d OF: %d UF: %d NX: %d",
(softfloat_exceptionFlags >> 4) % 2,
(softfloat_exceptionFlags >> 2) % 2,
(softfloat_exceptionFlags >> 1) % 2,
(softfloat_exceptionFlags) % 2);
// pack these four flags into one nibble, discarding DZ flag
flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
// convert to floats for printing
xf = convFloat(x);
yf = convFloat(y);
zf = convFloat(z);
rf = convFloat(result);
if (mul)
if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
else sprintf(calc, "%f * %f = %f", xf, yf, rf);
else sprintf(calc, "%f + %f = %f", xf, zf, rf);
// omit denorms, which aren't required for this project
smallest.v = 0x0400;
float16_t resultmag = result;
resultmag.v &= 0x7FFF; // take absolute value
if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: ");
if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: ");
fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
}
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases,
FILE *fptr, int *numCases) {
int i, j;
fprintf(fptr, desc); fprintf(fptr, "\n");
*numCases=0;
for (i=0; e[i] != 0x8000; i++)
for (j=0; f[j] != 0x8000; j++) {
cases[*numCases].v = f[j] | e[i]<<10;
*numCases = *numCases + 1;
}
}
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
z.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<=sgn; k++) {
y.v ^= (k<<15);
genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
y.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
z.v = cases[j].v;
for (k=0; k<=sgn; k++) {
z.v ^= (k<<15);
genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, l, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<numCases; k++) {
z.v = cases[k].v;
for (l=0; l<=sgn; l++) {
z.v ^= (l<<15);
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
}
fclose(fptr);
}
void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, sx, sy, sz, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
cases[numCases].v = 0x0000; // add +0 case
cases[numCases+1].v = 0x8000; // add -0 case
numCases += 2;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<numCases; k++) {
z.v = cases[k].v;
for (sx=0; sx<=sgn; sx++) {
x.v ^= (sx<<15);
for (sy=0; sy<=sgn; sy++) {
y.v ^= (sy<<15);
for (sz=0; sz<=sgn; sz++) {
z.v ^= (sz<<15);
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
}
}
}
fclose(fptr);
}
int main()
{
softfloatInit(); // configure softfloat modes
// Test cases: multiplication
genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: addition
genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: FMA
genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: Zero, Infinity, NaN
genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
// Full test cases with other rounding modes
softfloat_roundingMode = softfloat_round_near_even;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
softfloat_roundingMode = softfloat_round_min;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
softfloat_roundingMode = softfloat_round_max;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
return 0;
}

View File

@ -1,8 +0,0 @@
#!/bin/bash
# check for warnings in Verilog code
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
export PATH=$PATH:/usr/local/bin/
verilator=`which verilator`
basepath=$(dirname $0)/..
$verilator --lint-only --top-module fma16 fma16.v

View File

@ -1,2 +0,0 @@
vsim -do "do fma.do"

View File

@ -1 +0,0 @@
vsim -c -do "do fma.do"

View File

@ -1 +0,0 @@
make -C ../../../synthDC synth DESIGN=fma16

View File

@ -1,52 +0,0 @@
/* verilator lint_off STMTDLY */
module testbench_fma16;
reg clk, reset;
reg [15:0] x, y, z, rexpected;
wire [15:0] result;
reg [7:0] ctrl;
reg [3:0] flagsexpected;
reg mul, add, negp, negz;
reg [1:0] roundmode;
reg [31:0] vectornum, errors;
reg [75:0] testvectors[10000:0];
// instantiate device under test
fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
// generate clock
always
begin
clk = 1; #5; clk = 0; #5;
end
// at start of test, load vectors and pulse reset
initial
begin
$readmemh("work/fmul_0.tv", testvectors);
vectornum = 0; errors = 0;
reset = 1; #22; reset = 0;
end
// apply test vectors on rising edge of clk
always @(posedge clk)
begin
#1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
{roundmode, mul, add, negp, negz} = ctrl[5:0];
end
// check results on falling edge of clk
always @(negedge clk)
if (~reset) begin // skip during reset
if (result !== rexpected) begin // check result // *** should also add tests on flags eventually
$display("Error: inputs %h * %h + %h", x, y, z);
$display(" result = %h (%h expected)", result, rexpected);
errors = errors + 1;
end
vectornum = vectornum + 1;
if (testvectors[vectornum] === 'x) begin
$display("%d tests completed with %d errors",
vectornum, errors);
$stop;
end
end
endmodule

File diff suppressed because it is too large Load Diff

View File

@ -1,130 +0,0 @@
#!/usr/bin/perl -w
# torturegen.pl
# David_Harris@hmc.edu 19 April 2022
# Convert TestFloat cases into format for fma16 project torture test
# Strip out cases involving denorms
use strict;
my @basenames = ("add", "mul", "mulAdd");
my @roundingmodes = ("rz", "rd", "ru", "rne");
my @names = ();
foreach my $name (@basenames) {
foreach my $mode (@roundingmodes) {
push(@names, "f16_${name}_$mode.tv");
}
}
open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
my $datestring = localtime();
print(TORTURE "// Torture tests generated $datestring by $0\n");
foreach my $tv (@names) {
open(TV, "work/$tv") || die("Can't read $tv");
my $type = &getType($tv); # is it mul, add, mulAdd
my $rm = &getRm($tv); # rounding mode
# if ($rm != 0) { next; } # only do rz
print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
my $linecount = 0;
my $babyTorture = 0;
while (<TV>) {
my $line = $_;
$linecount++;
my $density = 10;
if ($type eq "mulAdd") {$density = 500;}
if ($babyTorture) {
$density = 100;
if ($type eq "mulAdd") {$density = 50000;}
}
if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
chomp($line); # strip off newline
my @parts = split(/_/, $line);
my ($x, $y, $z, $op, $w, $flags);
$x = $parts[0];
if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
$op = $rm << 4;
if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
my $opname = sprintf("%02x", $op);
if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
$flags = substr($flags, -1); # take last character
if (&fpval($w) eq "NaN") { $w = "7e00"; }
my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
my $skip = "";
if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
$skip = "Skipped denorm";
}
my $summary = &summary($x, $y, $z, $w, $type);
if ($skip ne "") {
print TORTURE "// $skip $tv line $linecount $line $summary\n"
}
else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
}
close(TV);
}
close(TORTURE);
sub fpval {
my $val = shift;
$val = hex($val); # convert hex string to number
my $frac = $val & 0x3FF;
my $exp = ($val >> 10) & 0x1F;
my $sign = $val >> 15;
my $res;
if ($exp == 31 && $frac != 0) { return "NaN"; }
elsif ($exp == 31) { $res = "INF"; }
elsif ($val == 0) { $res = 0; }
elsif ($exp == 0) { $res = "Denorm"; }
else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
if ($sign == 1) { $res = "-$res"; }
return $res;
}
sub summary {
my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
my $xv = &fpval($x);
my $yv = &fpval($y);
my $zv = &fpval($z);
my $wv = &fpval($w);
if ($type eq "add") { return "$xv + $zv = $wv"; }
elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
else {return "$xv * $yv + $zv = $wv"; }
}
sub getType {
my $tv = shift;
if ($tv =~ /mulAdd/) { return("mulAdd"); }
elsif ($tv =~ /mul/) { return "mul"; }
else { return "add"; }
}
sub getRm {
my $tv = shift;
if ($tv =~ /rz/) { return 0; }
elsif ($tv =~ /rne/) { return 1; }
elsif ($tv =~ /rd/) {return 2; }
elsif ($tv =~ /ru/) { return 3; }
else { return "bad"; }
}
sub isdenorm {
my $fp = shift;
my $val = hex($fp);
my $expv = $val >> 10;
$expv = $expv & 0x1F;
my $denorm = 0;
if ($expv == 0 && $val != 0) { $denorm = 1;}
# my $e0 = ($expv == 0);
# my $vn0 = ($val != 0);
# my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
# print("Num $fp Exp $expv Denorm $denorm Done\n");
return $denorm;
}

View File

@ -1,62 +0,0 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /testbench_fma16/clk
add wave -noupdate /testbench_fma16/reset
add wave -noupdate /testbench_fma16/x
add wave -noupdate /testbench_fma16/y
add wave -noupdate /testbench_fma16/z
add wave -noupdate /testbench_fma16/result
add wave -noupdate /testbench_fma16/rexpected
add wave -noupdate /testbench_fma16/dut/x
add wave -noupdate /testbench_fma16/dut/y
add wave -noupdate /testbench_fma16/dut/z
add wave -noupdate /testbench_fma16/dut/mul
add wave -noupdate /testbench_fma16/dut/add
add wave -noupdate /testbench_fma16/dut/negr
add wave -noupdate /testbench_fma16/dut/negz
add wave -noupdate /testbench_fma16/dut/roundmode
add wave -noupdate /testbench_fma16/dut/result
add wave -noupdate /testbench_fma16/dut/XManE
add wave -noupdate /testbench_fma16/dut/YManE
add wave -noupdate /testbench_fma16/dut/ZManE
add wave -noupdate /testbench_fma16/dut/XExpE
add wave -noupdate /testbench_fma16/dut/YExpE
add wave -noupdate /testbench_fma16/dut/ZExpE
add wave -noupdate /testbench_fma16/dut/PExpE
add wave -noupdate /testbench_fma16/dut/Ne
add wave -noupdate /testbench_fma16/dut/upOneExt
add wave -noupdate /testbench_fma16/dut/XSgnE
add wave -noupdate /testbench_fma16/dut/YSgnE
add wave -noupdate /testbench_fma16/dut/ZSgnE
add wave -noupdate /testbench_fma16/dut/PSgnE
add wave -noupdate /testbench_fma16/dut/ProdManE
add wave -noupdate /testbench_fma16/dut/NfracS
add wave -noupdate /testbench_fma16/dut/ProdManAl
add wave -noupdate /testbench_fma16/dut/ZManExt
add wave -noupdate /testbench_fma16/dut/ZManAl
add wave -noupdate /testbench_fma16/dut/Nfrac
add wave -noupdate /testbench_fma16/dut/res
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
add wave -noupdate /testbench_fma16/dut/NSamt
add wave -noupdate /testbench_fma16/dut/ZExpGreater
add wave -noupdate /testbench_fma16/dut/ACLess
add wave -noupdate /testbench_fma16/dut/upOne
add wave -noupdate /testbench_fma16/dut/KillProd
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
quietly wave cursor active 2
configure wave -namecolwidth 237
configure wave -valuecolwidth 64
configure wave -justifyvalue left
configure wave -signalnamewidth 0
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {4083 ns} {4235 ns}

View File

@ -5,7 +5,8 @@ WALLYDIR:= $(ROOT)/tests/wally-riscv-arch-test
# IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests
# ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX) $(IMPERASDIR)/$(SUFFIX)
IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests
ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX)
#ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX)
ALLDIRS := $(ARCHDIR)/$(SUFFIX)
ELFFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf")
OBJDUMPFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf.objdump")

View File

@ -1,45 +0,0 @@
# wally-pipelined.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m"
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
# *** modelsim won't take `PA_BITS, but will take other defines for the lengths of DTIM_RANGE and IROM_LEN. For now just live with the warnings.
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench_imperas.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063
vopt +acc work.testbench -G DEBUG=1 -o workopt
vsim workopt +nowarn3829 -fatal 7
view wave
#-- display input and output signals as hexidecimal values
add log -recursive /*
do wave.do
run -all
noview ../testbench/testbench_imperas.sv
view wave

View File

@ -1,14 +1,13 @@
///////////////////////////////////////////
// ahbcacheinterface.sv
//
// Written: Ross Thompson ross1728@gmail.com August 29, 2022
// Modified:
// Written: Ross Thompson ross1728@gmail.com
// Created: August 29, 2022
// Modified: 18 January 2023
//
// Purpose: Cache/Bus data path.
// Bus Side logic
// register the fetch data from the next level of memory.
// This register should be necessary for timing. There is no register in the uncore or
// ahblite controller between the memories and this cache.
// Purpose: Translates cache bus requests and uncached ieu memory requests into AHB transactions.
//
// Documentation: RISC-V System on Chip Design Chapter 9 (Figure 9.8)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
@ -30,48 +29,55 @@
`include "wally-config.vh"
module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) (
module ahbcacheinterface #(
parameter integer BEATSPERLINE, // Number of AHBW words (beats) in cacheline
parameter integer AHBWLOGBWPL, // Log2 of ^
parameter integer LINELEN, // Number of bits in cacheline
parameter integer LLENPOVERAHBW // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
)(
input logic HCLK, HRESETn,
// bus interface
input logic HREADY,
input logic [`AHBW-1:0] HRDATA,
output logic [2:0] HSIZE,
output logic [2:0] HBURST,
output logic [1:0] HTRANS,
output logic HWRITE,
output logic [`PA_BITS-1:0] HADDR,
output logic [`AHBW-1:0] HWDATA,
output logic [`AHBW/8-1:0] HWSTRB,
output logic [LOGWPL-1:0] BeatCount,
// bus interface controls
input logic HREADY, // AHB peripheral ready
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
output logic HWRITE, // AHB 0: Read operation 1: Write operation
output logic [2:0] HSIZE, // AHB transaction width
output logic [2:0] HBURST, // AHB burst length
// bus interface buses
input logic [`AHBW-1:0] HRDATA, // AHB read data
output logic [`PA_BITS-1:0] HADDR, // AHB address
output logic [`AHBW-1:0] HWDATA, // AHB write data
output logic [`AHBW/8-1:0] HWSTRB, // AHB byte mask
// cache interface
input logic [`PA_BITS-1:0] CacheBusAdr,
input logic [`LLEN-1:0] CacheReadDataWordM,
input logic [`LLEN-1:0] WriteDataM,
input logic CacheableOrFlushCacheM,
input logic [1:0] CacheBusRW,
output logic CacheBusAck,
output logic [LINELEN-1:0] FetchBuffer,
input logic Cacheable,
// lsu/ifu interface
input logic Flush,
input logic [`PA_BITS-1:0] PAdr,
input logic [1:0] BusRW,
input logic Stall,
input logic [2:0] Funct3,
output logic SelBusBeat,
output logic BusStall,
output logic BusCommitted
);
localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // *** fix me duplciated in lsu.
input logic [`PA_BITS-1:0] CacheBusAdr, // Address of cache line
input logic [`LLEN-1:0] CacheReadDataWordM, // one word of cache line during a writeback
input logic CacheableOrFlushCacheM, // Memory operation is cacheable or flushing D$
input logic Cacheable, // Memory operation is cachable
input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch
output logic CacheBusAck, // Handshack to $ indicating bus transaction completed
output logic [LINELEN-1:0] FetchBuffer, // Register to hold beats of cache line as the arrive from bus
output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase
output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr
localparam integer BeatCountThreshold = CACHE_ENABLED ? BEATSPERLINE - 1 : 0;
logic [`PA_BITS-1:0] LocalHADDR;
logic [LOGWPL-1:0] BeatCountDelayed;
logic CaptureEn;
logic [`AHBW-1:0] PreHWDATA;
// uncached interface
input logic [`PA_BITS-1:0] PAdr, // Physical address of uncached memory operation
input logic [`LLEN-1:0] WriteDataM, // IEU write data for uncached store
input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write
input logic [2:0] Funct3, // Size of uncached memory operation
// lsu/ifu interface
input logic Stall, // Core pipeline is stalled
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
output logic BusStall, // Bus is busy with an in flight memory operation
output logic BusCommitted); // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
localparam integer BeatCountThreshold = BEATSPERLINE - 1; // Largest beat index
logic [`PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation
logic [AHBWLOGBWPL-1:0] BeatCountDelayed; // Beat within the cache line in the second (Data) cache stage
logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA
logic [`AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s
logic [`AHBW-1:0] PreHWDATA; // AHB Address phase write data
genvar index;
@ -84,7 +90,7 @@ module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, CACHE_ENABLE
end
mux2 #(`PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR);
assign HADDR = ({{`PA_BITS-LOGWPL{1'b0}}, BeatCount} << $clog2(`AHBW/8)) + LocalHADDR;
assign HADDR = ({{`PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(`AHBW/8)) + LocalHADDR;
mux2 #(3) sizemux(.d0(Funct3), .d1(`AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE));
@ -105,12 +111,11 @@ module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, CACHE_ENABLE
// *** bummer need a second byte mask for bus as it is AHBW rather than LLEN.
// probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0.
logic [`AHBW/8-1:0] BusByteMaskM;
swbytemask #(`AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(`AHBW/8)-1:0]), .ByteMask(BusByteMaskM));
flopen #(`AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[`AHBW/8-1:0], HWSTRB);
buscachefsm #(BeatCountThreshold, LOGWPL) AHBBuscachefsm(
buscachefsm #(BeatCountThreshold, AHBWLOGBWPL) AHBBuscachefsm(
.HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat,
.CacheBusRW, .CacheBusAck, .BeatCount, .BeatCountDelayed,
.HREADY, .HTRANS, .HWRITE, .HBURST);

View File

@ -1,15 +1,14 @@
///////////////////////////////////////////
// ahbinterface.sv
//
// Written: Ross Thompson ross1728@gmail.com August 29, 2022
// Modified:
// Written: Ross Thompson ross1728@gmail.com
// Created: August 29, 2022
// Modified: 18 January 2023
//
// Purpose: Cache/Bus data path.
// Bus Side logic
// register the fetch data from the next level of memory.
// This register should be necessary for timing. There is no register in the uncore or
// ahblite controller between the memories and this cache.
// Purpose: Translates LSU simple memory requests into AHB transactions (NON_SEQ).
//
// Documentation: RISC-V System on Chip Design Chapter 6 (Figure 6.21)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -30,25 +29,27 @@
`include "wally-config.vh"
module ahbinterface #(parameter LSU = 0) ( // **** modify to use LSU/ifu parameter to control widths of buses
input logic HCLK, HRESETn,
module ahbinterface #(
parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits
)(
input logic HCLK, HRESETn,
// bus interface
input logic HREADY,
input logic [`XLEN-1:0] HRDATA,
output logic [1:0] HTRANS,
output logic HWRITE,
output logic [`XLEN-1:0] HWDATA,
output logic [`XLEN/8-1:0] HWSTRB,
input logic HREADY, // AHB peripheral ready
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
output logic HWRITE, // AHB 0: Read operation 1: Write operation
input logic [`XLEN-1:0] HRDATA, // AHB read data
output logic [`XLEN-1:0] HWDATA, // AHB write data
output logic [`XLEN/8-1:0] HWSTRB, // AHB byte mask
// lsu/ifu interface
input logic Flush,
input logic [1:0] BusRW,
input logic [`XLEN/8-1:0] ByteMask,
input logic [`XLEN-1:0] WriteData,
input logic Stall,
output logic BusStall,
output logic BusCommitted,
output logic [(LSU ? `XLEN : 32)-1:0] FetchBuffer
input logic Stall, // Core pipeline is stalled
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write
input logic [`XLEN/8-1:0] ByteMask, // Bytes enables within a word
input logic [`XLEN-1:0] WriteData, // IEU write data for a store
output logic BusStall, // Bus is busy with an in flight memory operation
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
output logic [(LSU ? `XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus
);
logic CaptureEn;

View File

@ -1,11 +1,14 @@
///////////////////////////////////////////
// busfsm.sv
//
// Written: Ross Thompson ross1728@gmail.com December 29, 2021
// Modified:
// Written: Ross Thompson ross1728@gmail.com
// Created: December 29, 2021
// Modified: 18 January 2023
//
// Purpose: Load/Store Unit's interface to BUS for cacheless system
// Purpose: Controller for cache to AHB bus interface
//
// Documentation: RISC-V System on Chip Design Chapter 9 (Figure 9.9)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -25,34 +28,40 @@
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define BURST_EN 1
`define BURST_EN 1 // Enables burst mode. Disable to show the lost performance.
// HCLK and clk must be the same clock!
module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) (
module buscachefsm #(
parameter integer BeatCountThreshold, // Largest beat index
parameter integer AHBWLOGBWPL // Log2 of BEATSPERLINE
)(
input logic HCLK,
input logic HRESETn,
// IEU interface
input logic Flush,
input logic [1:0] BusRW,
input logic Stall,
output logic BusCommitted,
output logic BusStall,
output logic CaptureEn,
// cache interface
input logic [1:0] CacheBusRW,
output logic CacheBusAck,
input logic Stall, // Core pipeline is stalled
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write
output logic BusStall, // Bus is busy with an in flight memory operation
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
// ahb cache interface locals.
output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA
// cache interface
input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch
output logic CacheBusAck, // Handshack to $ indicating bus transaction completed
// lsu interface
output logic [LOGWPL-1:0] BeatCount, BeatCountDelayed,
output logic SelBusBeat,
output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase
output logic [AHBWLOGBWPL-1:0] BeatCountDelayed, // Beat within the cache line in the second (Data) cache stage
output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr
// BUS interface
input logic HREADY,
output logic [1:0] HTRANS,
output logic HWRITE,
output logic [2:0] HBURST
input logic HREADY, // AHB peripheral ready
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
output logic HWRITE, // AHB 0: Read operation 1: Write operation
output logic [2:0] HBURST // AHB burst length
);
typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype;
@ -60,7 +69,7 @@ module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) (
(* mark_debug = "true" *) busstatetype CurrState, NextState;
logic [LOGWPL-1:0] NextBeatCount;
logic [AHBWLOGBWPL-1:0] NextBeatCount;
logic FinalBeatCount;
logic [2:0] LocalBurstType;
logic BeatCntEn;
@ -73,14 +82,14 @@ module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) (
always_comb begin
case(CurrState)
ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE;
ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE;
else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK;
else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH;
else NextState = ADR_PHASE;
DATA_PHASE: if(HREADY) NextState = MEM3;
else NextState = DATA_PHASE;
else NextState = DATA_PHASE;
MEM3: if(Stall) NextState = MEM3;
else NextState = ADR_PHASE;
else NextState = ADR_PHASE;
CACHE_FETCH: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK;
else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH;
else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE;
@ -89,17 +98,17 @@ module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) (
else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH;
else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE;
else NextState = CACHE_WRITEBACK;
default: NextState = ADR_PHASE;
default: NextState = ADR_PHASE;
endcase
end
// IEU, LSU, and IFU controls
// Used to store data from data phase of AHB.
flopenr #(LOGWPL) BeatCountReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, NextBeatCount, BeatCount);
flopenr #(LOGWPL) BeatCountDelayedReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, BeatCount, BeatCountDelayed);
flopenr #(AHBWLOGBWPL) BeatCountReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, NextBeatCount, BeatCount);
flopenr #(AHBWLOGBWPL) BeatCountDelayedReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, BeatCount, BeatCountDelayed);
assign NextBeatCount = BeatCount + 1'b1;
assign FinalBeatCount = BeatCountDelayed == BeatCountThreshold[LOGWPL-1:0];
assign FinalBeatCount = BeatCountDelayed == BeatCountThreshold[AHBWLOGBWPL-1:0];
assign BeatCntEn = ((NextState == CACHE_WRITEBACK | NextState == CACHE_FETCH) & HREADY & ~Flush) |
(NextState == ADR_PHASE & |CacheBusRW & HREADY);
assign BeatCntReset = NextState == ADR_PHASE;

View File

@ -1,10 +1,13 @@
///////////////////////////////////////////
// busfsm.sv
//
// Written: Ross Thompson ross1728@gmail.com December 29, 2021
// Modified:
// Written: Ross Thompson ross1728@gmail.com
// Created: December 29, 2021
// Modified: 18 January 2023
//
// Purpose: Load/Store Unit's interface to BUS for cacheless system
// Purpose: Simple NON_SEQ (no burst) AHB controller.
//
// Documentation: RISC-V System on Chip Design Chapter 6 (Figure 6.23)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
@ -32,15 +35,16 @@ module busfsm (
input logic HRESETn,
// IEU interface
input logic Flush,
input logic [1:0] BusRW,
input logic Stall,
output logic BusCommitted,
output logic BusStall,
output logic CaptureEn,
input logic HREADY,
output logic [1:0] HTRANS,
output logic HWRITE
input logic Stall, // Core pipeline is stalled
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write
output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA
output logic BusStall, // Bus is busy with an in flight memory operation
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
// AHB control signals
input logic HREADY, // AHB peripheral ready
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ
output logic HWRITE // AHB 0: Read operation 1: Write operation
);
typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3} busstatetype;
@ -54,13 +58,13 @@ module busfsm (
always_comb begin
case(CurrState)
ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE;
else NextState = ADR_PHASE;
DATA_PHASE: if(HREADY) NextState = MEM3;
else NextState = DATA_PHASE;
MEM3: if(Stall) NextState = MEM3;
else NextState = ADR_PHASE;
default: NextState = ADR_PHASE;
ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE;
else NextState = ADR_PHASE;
DATA_PHASE: if(HREADY) NextState = MEM3;
else NextState = DATA_PHASE;
MEM3: if(Stall) NextState = MEM3;
else NextState = ADR_PHASE;
default: NextState = ADR_PHASE;
endcase
end

View File

@ -1,17 +1,18 @@
///////////////////////////////////////////
// controller input stage
//
// Written: Ross Thompson August 31, 2022
// ross1728@gmail.com
// Modified:
// Written: Ross Thompson ross1728@gmail.com
// Created: August 31, 2022
// Modified: 18 January 2023
//
// Purpose: AHB multi controller interface to merge LSU and IFU controls.
// See ARM_HIH0033A_AMBA_AHB-Lite_SPEC 1.0
// Arbitrates requests from instruction and data streams
// Connects core to peripherals and I/O pins on SOC
// Bus width presently matches XLEN
// Anticipate replacing this with an AXI bus interface to communicate with FPGA DRAM/Flash controllers
//
// Documentation: RISC-V System on Chip Design Chapter 6 (Figure 6.25)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -32,25 +33,29 @@
`include "wally-config.vh"
module controllerinputstage #(parameter SAVE_ENABLED = 1) (
input logic HCLK,
input logic HRESETn,
input logic Save, Restore, Disable,
output logic Request,
module controllerinputstage #(
parameter SAVE_ENABLED = 1 // 1: Save manager inputs if Save = 1, 0: Don't save inputs
)(
input logic HCLK,
input logic HRESETn,
input logic Save, // Two or more managers requesting (HTRANS != 00) at the same time. Save the non-granted manager inputs
input logic Restore, // Restore a saved manager inputs when it is finally granted
input logic Disable, // Supress HREADY to the non-granted manager
output logic Request, // This manager is making a request
// controller input
input logic HWRITEIn,
input logic [2:0] HSIZEIn,
input logic [2:0] HBURSTIn,
input logic [1:0] HTRANSIn,
input logic [`PA_BITS-1:0] HADDRIn,
output logic HREADYOut,
input logic [1:0] HTRANSIn, // Manager input. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
input logic HWRITEIn, // Manager input. AHB 0: Read operation 1: Write operation
input logic [2:0] HSIZEIn, // Manager input. AHB transaction width
input logic [2:0] HBURSTIn, // Manager input. AHB burst length
input logic [`PA_BITS-1:0] HADDRIn, // Manager input. AHB address
output logic HREADYOut, // Indicate to manager the peripherial is not busy and another manager does not have priority
// controller output
output logic HWRITEOut,
output logic [2:0] HSIZEOut,
output logic [2:0] HBURSTOut,
output logic [1:0] HTRANSOut,
output logic [`PA_BITS-1:0] HADDROut,
input logic HREADYIn
output logic [1:0] HTRANSOut, // Aribrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
output logic HWRITEOut, // Aribrated manager transaction. AHB 0: Read operation 1: Write operation
output logic [2:0] HSIZEOut, // Aribrated manager transaction. AHB transaction width
output logic [2:0] HBURSTOut, // Aribrated manager transaction. AHB burst length
output logic [`PA_BITS-1:0] HADDROut, // Aribrated manager transaction. AHB address
input logic HREADYIn // Peripherial ready
);
logic HWRITESave;

View File

@ -1,17 +1,18 @@
///////////////////////////////////////////
// abhmulticontroller
//
// Written: Ross Thompson August 29, 2022
// ross1728@gmail.com
// Modified:
// Written: Ross Thompson ross1728@gmail.com
// Created: August 29, 2022
// Modified: 18 January 2023
//
// Purpose: AHB multi controller interface to merge LSU and IFU controls.
// See ARM_HIH0033A_AMBA_AHB-Lite_SPEC 1.0
// Arbitrates requests from instruction and data streams
// Connects core to peripherals and I/O pins on SOC
// Bus width presently matches XLEN
// Anticipate replacing this with an AXI bus interface to communicate with FPGA DRAM/Flash controllers
//
// Documentation: RISC-V System on Chip Design Chapter 6 (Figures 6.25 and 6.26)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -35,42 +36,46 @@
module ebu (
input logic clk, reset,
// Signals from IFU
input logic [`PA_BITS-1:0] IFUHADDR,
input logic [2:0] IFUHSIZE,
input logic [2:0] IFUHBURST,
input logic [1:0] IFUHTRANS,
output logic IFUHREADY,
input logic [1:0] IFUHTRANS, // IFU AHB transaction request
input logic [2:0] IFUHSIZE, // IFU AHB transaction size
input logic [2:0] IFUHBURST, // IFU AHB burst length
input logic [`PA_BITS-1:0] IFUHADDR, // IFU AHB address
output logic IFUHREADY, // AHB peripheral ready gated by possible non-grant
// Signals from LSU
input logic [`PA_BITS-1:0] LSUHADDR,
input logic [1:0] LSUHTRANS, // LSU AHB transaction request
input logic LSUHWRITE, // LSU AHB transaction direction. 1: write, 0: read
input logic [2:0] LSUHSIZE, // LSU AHB size
input logic [2:0] LSUHBURST, // LSU AHB burst length
input logic [`PA_BITS-1:0] LSUHADDR, // LSU AHB address
input logic [`XLEN-1:0] LSUHWDATA, // initially support AHBW = XLEN
input logic [`XLEN/8-1:0] LSUHWSTRB,
input logic [2:0] LSUHSIZE,
input logic [2:0] LSUHBURST,
input logic [1:0] LSUHTRANS,
input logic LSUHWRITE,
output logic LSUHREADY,
// add LSUHWSTRB ***
input logic [`XLEN/8-1:0] LSUHWSTRB, // AHB byte mask
output logic LSUHREADY, // AHB peripheral. Never gated as LSU always has priority
// AHB-Lite external signals
(* mark_debug = "true" *) input logic HREADY, HRESP,
(* mark_debug = "true" *) output logic HCLK, HRESETn,
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] HADDR,
(* mark_debug = "true" *) output logic [`AHBW-1:0] HWDATA,
(* mark_debug = "true" *) output logic [`XLEN/8-1:0] HWSTRB,
(* mark_debug = "true" *) output logic HWRITE,
(* mark_debug = "true" *) output logic [2:0] HSIZE,
(* mark_debug = "true" *) output logic [2:0] HBURST,
(* mark_debug = "true" *) output logic [3:0] HPROT,
(* mark_debug = "true" *) output logic [1:0] HTRANS,
(* mark_debug = "true" *) output logic HMASTLOCK
(* mark_debug = "true" *) output logic HCLK, HRESETn,
(* mark_debug = "true" *) input logic HREADY, // AHB peripheral ready
(* mark_debug = "true" *) input logic HRESP, // AHB peripheral response. 0: OK 1: Error
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration
(* mark_debug = "true" *) output logic [`AHBW-1:0] HWDATA, // AHB Write data after arbitration
(* mark_debug = "true" *) output logic [`XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration
(* mark_debug = "true" *) output logic HWRITE, // AHB transaction direction after arbitration
(* mark_debug = "true" *) output logic [2:0] HSIZE, // AHB transaction size after arbitration
(* mark_debug = "true" *) output logic [2:0] HBURST, // AHB burst length after arbitration
(* mark_debug = "true" *) output logic [3:0] HPROT, // AHB protection. Wally does not use
(* mark_debug = "true" *) output logic [1:0] HTRANS, // AHB transaction request after arbitration
(* mark_debug = "true" *) output logic HMASTLOCK // AHB master lock. Wally does not use
);
typedef enum logic [1:0] {IDLE, ARBITRATE} statetype;
statetype CurrState, NextState;
logic LSUDisable, LSUSelect;
logic IFUSave, IFURestore, IFUDisable, IFUSelect;
logic both;
logic LSUDisable;
logic LSUSelect;
logic IFUSave;
logic IFURestore;
logic IFUDisable;
logic IFUSelect;
logic both; // Both the LSU and IFU request at the same time
logic [`PA_BITS-1:0] IFUHADDROut;
logic [1:0] IFUHTRANSOut;
@ -84,14 +89,15 @@ module ebu (
logic [2:0] LSUHSIZEOut;
logic LSUHWRITEOut;
logic IFUReq, LSUReq;
logic IFUReq;
logic LSUReq;
logic BeatCntEn;
logic [4-1:0] NextBeatCount, BeatCount;
logic FinalBeat, FinalBeatD;
logic [4-1:0] NextBeatCount, BeatCount; // Position within a burst transfer
logic FinalBeat, FinalBeatD; // Indicates the last beat of a burst
logic CntReset;
logic [3:0] Threshold;
logic IFUReqD;
logic [3:0] Threshold; // Number of beats derived from HBURST
logic IFUReqD; // 1 cycle delayed IFU request. Part of arbitration
assign HCLK = clk;
@ -100,14 +106,16 @@ module ebu (
// if two requests come in at once pick one to select and save the others Address phase
// inputs. Abritration scheme is LSU always goes first.
// input stage IFU
////////////////////////////////////////////////////////////////////////////////////////////////////
// input stages and muxing for IFU and LSU
////////////////////////////////////////////////////////////////////////////////////////////////////
controllerinputstage IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable),
.Request(IFUReq),
.HWRITEIn(1'b0), .HSIZEIn(IFUHSIZE), .HBURSTIn(IFUHBURST), .HTRANSIn(IFUHTRANS), .HADDRIn(IFUHADDR),
.HWRITEOut(IFUHWRITEOut), .HSIZEOut(IFUHSIZEOut), .HBURSTOut(IFUHBURSTOut), .HREADYOut(IFUHREADY),
.HTRANSOut(IFUHTRANSOut), .HADDROut(IFUHADDROut), .HREADYIn(HREADY));
// input stage LSU
// LSU always has priority so there should never be a need to save and restore the address phase inputs.
controllerinputstage #(0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable),
.Request(LSUReq),
@ -115,7 +123,7 @@ module ebu (
.HWRITEOut(LSUHWRITEOut), .HSIZEOut(LSUHSIZEOut), .HBURSTOut(LSUHBURSTOut),
.HTRANSOut(LSUHTRANSOut), .HADDROut(LSUHADDROut), .HREADYIn(HREADY));
// output mux //*** rewrite for general number of controllers.
// output mux //*** switch to structural implementation
assign HADDR = LSUSelect ? LSUHADDROut : IFUSelect ? IFUHADDROut : '0;
assign HSIZE = LSUSelect ? LSUHSIZEOut : IFUSelect ? IFUHSIZEOut: '0;
assign HBURST = LSUSelect ? LSUHBURSTOut : IFUSelect ? IFUHBURSTOut : '0; // If doing memory accesses, use LSUburst, else use Instruction burst.
@ -129,8 +137,13 @@ module ebu (
assign HWSTRB = LSUHWSTRB;
// HRDATA is sent to all controllers at the core level.
////////////////////////////////////////////////////////////////////////////////////////////////////
// Aribtration scheme
// FSM decides if arbitration needed. Arbitration is held until the last beat of
// a burst is completed.
////////////////////////////////////////////////////////////////////////////////////////////////////
assign both = LSUReq & IFUReq;
flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextState, IDLE, CurrState);
always_comb
@ -142,8 +155,27 @@ module ebu (
default: NextState = IDLE;
endcase
// This part is only used when burst mode is supported.
// Controller needs to count beats.
// basic arb always selects LSU when both
// replace this block for more sophisticated arbitration as needed.
// Controller 0 (IFU)
assign IFUSave = CurrState == IDLE & both;
assign IFURestore = CurrState == ARBITRATE;
assign IFUDisable = CurrState == ARBITRATE;
assign IFUSelect = (NextState == ARBITRATE) ? 1'b0 : IFUReq;
// Controller 1 (LSU)
// When both the IFU and LSU request at the same time, the FSM will go into the arbitrate state.
// Once the LSU request is done the fsm returns to IDLE. To prevent the LSU from regaining
// priority and re issuing the same memroy operation, the delayed IFUReqD squashes the LSU request.
// This is necessary because the pipeline is stalled for the entire duration of both transactions,
// and the LSU memory request will stil be active.
flopr #(1) ifureqreg(clk, ~HRESETn, IFUReq, IFUReqD);
assign LSUDisable = CurrState == ARBITRATE ? 1'b0 : (IFUReqD & ~(HREADY & FinalBeatD));
assign LSUSelect = NextState == ARBITRATE ? 1'b1: LSUReq;
////////////////////////////////////////////////////////////////////////////////////////////////////
// Burst mode logic
////////////////////////////////////////////////////////////////////////////////////////////////////
flopenr #(4) BeatCountReg(HCLK, ~HRESETn | CntReset | FinalBeat, BeatCntEn, NextBeatCount, BeatCount);
assign NextBeatCount = BeatCount + 1'b1;
@ -165,17 +197,6 @@ module ebu (
endcase
end
// basic arb always selects LSU when both
// replace this block for more sophisticated arbitration as needed.
// Controller 0 (IFU)
assign IFUSave = CurrState == IDLE & both;
assign IFURestore = CurrState == ARBITRATE;
assign IFUDisable = CurrState == ARBITRATE;
assign IFUSelect = (NextState == ARBITRATE) ? 1'b0 : IFUReq;
// Controller 1 (LSU)
assign LSUDisable = CurrState == ARBITRATE ? 1'b0 : (IFUReqD & ~(HREADY & FinalBeatD));
assign LSUSelect = NextState == ARBITRATE ? 1'b1: LSUReq;
flopr #(1) ifureqreg(clk, ~HRESETn, IFUReq, IFUReqD);
endmodule

View File

@ -38,9 +38,10 @@ module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) (
input logic [WIDTH-1:0] din,
input logic we,
input logic [(WIDTH-1)/8:0] bwe,
output logic [WIDTH-1:0] dout);
output logic [WIDTH-1:0] dout
);
logic [WIDTH-1:0] RAM[DEPTH-1:0];
logic [WIDTH-1:0] RAM[DEPTH-1:0];
// ***************************************************************************
// TRUE SRAM macro
@ -64,18 +65,18 @@ module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) (
integer i;
// Read
always @(posedge clk)
always_ff @(posedge clk)
if(ce) dout <= #1 RAM[addr];
// Write divided into part for bytes and part for extra msbs
if(WIDTH >= 8)
always @(posedge clk)
always_ff @(posedge clk)
if (ce & we)
for(i = 0; i < WIDTH/8; i++)
if(bwe[i]) RAM[addr][i*8 +: 8] <= #1 din[i*8 +: 8];
if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8
always @(posedge clk)
always_ff @(posedge clk)
if (ce & we & bwe[WIDTH/8])
RAM[addr][WIDTH-1:WIDTH-WIDTH%8] <= #1 din[WIDTH-1:WIDTH-WIDTH%8];
end

View File

@ -36,24 +36,20 @@
`include "wally-config.vh"
module ram2p1r1wb
#(parameter int DEPTH = 10,
parameter int WIDTH = 2
)
module ram2p1r1wb #(parameter DEPTH = 10, WIDTH = 2) (
input logic clk,
input logic reset,
(input logic clk,
input logic reset,
// port 1 is read only
input logic [DEPTH-1:0] ra1,
output logic [WIDTH-1:0] rd1,
input logic ren1,
// port 2 is write only
input logic [DEPTH-1:0] wa2,
input logic [WIDTH-1:0] wd2,
input logic wen2,
input logic [WIDTH-1:0] bwe2
// port 1 is read only
input logic [DEPTH-1:0] ra1,
output logic [WIDTH-1:0] rd1,
input logic ren1,
// port 2 is write only
input logic [DEPTH-1:0] wa2,
input logic [WIDTH-1:0] wd2,
input logic wen2,
input logic [WIDTH-1:0] bwe2
);

View File

@ -32,16 +32,17 @@
`include "wally-config.vh"
module ram2p1r1wbefix #(parameter DEPTH=128, WIDTH=256) (
input logic clk,
input logic ce1, ce2,
input logic [$clog2(DEPTH)-1:0] ra1,
input logic [WIDTH-1:0] wd2,
input logic [$clog2(DEPTH)-1:0] wa2,
input logic we2,
input logic [(WIDTH-1)/8:0] bwe2,
output logic [WIDTH-1:0] rd1);
input logic clk,
input logic ce1, ce2,
input logic [$clog2(DEPTH)-1:0] ra1,
input logic [WIDTH-1:0] wd2,
input logic [$clog2(DEPTH)-1:0] wa2,
input logic we2,
input logic [(WIDTH-1)/8:0] bwe2,
output logic [WIDTH-1:0] rd1
);
logic [WIDTH-1:0] mem[DEPTH-1:0];
logic [WIDTH-1:0] mem[DEPTH-1:0];
// ***************************************************************************
// TRUE Smem macro
@ -53,18 +54,18 @@ module ram2p1r1wbefix #(parameter DEPTH=128, WIDTH=256) (
integer i;
// Read
always @(posedge clk)
always_ff @(posedge clk)
if(ce1) rd1 <= #1 mem[ra1];
// Write divided into part for bytes and part for extra msbs
if(WIDTH >= 8)
always @(posedge clk)
always_ff @(posedge clk)
if (ce2 & we2)
for(i = 0; i < WIDTH/8; i++)
if(bwe2[i]) mem[wa2][i*8 +: 8] <= #1 wd2[i*8 +: 8];
if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8
always @(posedge clk)
always_ff @(posedge clk)
if (ce2 & we2 & bwe2[WIDTH/8])
mem[wa2][WIDTH-1:WIDTH-WIDTH%8] <= #1 wd2[WIDTH-1:WIDTH-WIDTH%8];

View File

@ -34,8 +34,10 @@ module datapath (
// Decode stage signals
input logic [2:0] ImmSrcD, // Selects type of immediate extension
input logic [31:0] InstrD, // Instruction in Decode stage
input logic [2:0] Funct3E, // Funct3 field of instruction in Execute stage
// Execute stage signals
input logic [`XLEN-1:0] PCE, // PC in Execute stage
input logic [`XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage)
input logic [2:0] Funct3E, // Funct3 field of instruction in Execute stage
input logic StallE, FlushE, // Stall, flush Execute stage
input logic [1:0] ForwardAE, ForwardBE, // Forward ALU operands from later stages
input logic [2:0] ALUControlE, // Indicate operation ALU performs
@ -43,8 +45,6 @@ module datapath (
input logic ALUResultSrcE, // Selects result to pass on to Memory stage
input logic JumpE, // Is a jump (j) instruction
input logic BranchSignedE, // Branch comparison operands are signed (if it's a branch)
input logic [`XLEN-1:0] PCE, // PC in Execute stage
input logic [`XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage)
output logic [1:0] FlagsE, // Comparison flags ({eq, lt})
output logic [`XLEN-1:0] IEUAdrE, // Address computed by ALU
output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B
@ -77,9 +77,9 @@ module datapath (
logic [`XLEN-1:0] R1E, R2E; // Source operands read from register file
logic [`XLEN-1:0] ImmExtE; // Extended immediate in Execute stage
logic [`XLEN-1:0] SrcAE, SrcBE; // ALU operands
logic [`XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), computed address *** According to Figure 4.12, IEUResultE should be called IEUAdrE
logic [`XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), result of execution stage
// Memory stage signals
logic [`XLEN-1:0] IEUResultM; // Address computed by ALU *** According to Figure 4.12, IEUResultM should be called IEUAdrM
logic [`XLEN-1:0] IEUResultM; // Result from execution stage
logic [`XLEN-1:0] IFResultM; // Result from either IEU or single-cycle FPU op writing an integer register
// Writeback stage signals
logic [`XLEN-1:0] SCResultW; // Store Conditional result

View File

@ -37,6 +37,7 @@ module ieu (
// Execute stage signals
input logic [`XLEN-1:0] PCE, // PC
input logic [`XLEN-1:0] PCLinkE, // PC + 4
output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE)
input logic FWriteIntE, FCvtIntE, // FPU writes to integer register file, FPU converts float to int
output logic [`XLEN-1:0] IEUAdrE, // Memory address
output logic IntDivE, W64E, // Integer divide, RV64 W-type instruction
@ -66,29 +67,28 @@ module ieu (
input logic FlushD, FlushE, FlushM, FlushW, // Flush signals
output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit
output logic MDUStallD, CSRRdStallD, StoreStallD,
output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE)
output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction
output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions
);
logic [2:0] ImmSrcD; // Select type of immediate extension
logic [1:0] FlagsE; // Comparison flags ({eq, lt})
logic [2:0] ALUControlE; // ALU Control
logic ALUSrcAE, ALUSrcBE; // ALU source operands
logic [2:0] ResultSrcW; // Source of result in Writeback stage
logic ALUResultSrcE; // ALU result
logic SCE; // Store Conditional instruction
logic FWriteIntM; // FPU writing to integer register file
logic IntDivW; // Integer divide instruction
logic [2:0] ImmSrcD; // Select type of immediate extension
logic [1:0] FlagsE; // Comparison flags ({eq, lt})
logic [2:0] ALUControlE; // ALU control indicates function to perform
logic ALUSrcAE, ALUSrcBE; // ALU source operands
logic [2:0] ResultSrcW; // Selects result in Writeback stage
logic ALUResultSrcE; // Selects ALU result to pass on to Memory stage
logic SCE; // Store Conditional instruction
logic FWriteIntM; // FPU writing to integer register file
logic IntDivW; // Integer divide instruction
// forwarding signals
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; // Source and destination registers
logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers
logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages
logic MemReadE, CSRReadE; // Load, CSRRead instruction
logic JumpE; // Jump instruction
logic BranchSignedE; // Branch does signed comparison on operands
logic MDUE; // Multiply/divide instruction
// Forwarding signals
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; // Source and destination registers
logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers
logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages
logic MemReadE, CSRReadE; // Load, CSRRead instruction
logic JumpE; // Jump instruction
logic BranchSignedE; // Branch does signed comparison on operands
logic MDUE; // Multiply/divide instruction
controller c(
.clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD,

View File

@ -1,13 +1,12 @@
///////////////////////////////////////////
// bpred.sv
//
// Written: Ross Thomposn
// Email: ross1728@gmail.com
// Created: February 12, 2021
// Modified:
// Written: Ross Thomposn ross1728@gmail.com
// Created: 12 February 2021
// Modified: 19 January 2023
//
// Purpose: Branch prediction unit
// Produces a branch prediction based on branch history.
// Purpose: Branch direction prediction and jump/branch target prediction.
// Prediction made during the fetch stage and corrected in the execution stage.
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
@ -35,30 +34,29 @@ module bpred (
input logic FlushD, FlushE, FlushM, FlushW,
// Fetch stage
// the prediction
input logic [31:0] InstrD, // Decompressed decode stage instruction
input logic [`XLEN-1:0] PCNextF, // Next Fetch Address
input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4
output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction
output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage.
input logic [31:0] InstrD, // Decompressed decode stage instruction. Used to decode instruction class
input logic [`XLEN-1:0] PCNextF, // Next Fetch Address
input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4
output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction
output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage.
// Update Predictor
input logic [`XLEN-1:0] PCF, // Fetch stage instruction address.
input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took.
input logic [`XLEN-1:0] PCE, // Execution stage instruction address.
input logic [`XLEN-1:0] PCM, // Memory stage instruction address.
input logic [`XLEN-1:0] PCF, // Fetch stage instruction address.
input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took.
input logic [`XLEN-1:0] PCE, // Execution stage instruction address.
input logic [`XLEN-1:0] PCM, // Memory stage instruction address.
// *** after reviewing the compressed instruction set I am leaning towards having the btb predict the instruction class.
// *** the specifics of how this is encode is subject to change.
input logic PCSrcE, // Executation stage branch is taken
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br
// Branch and jump outcome
input logic PCSrcE, // Executation stage branch is taken
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br
// Report branch prediction status
output logic BPPredWrongE, // Prediction is wrong.
output logic DirPredictionWrongM, // Prediction direction is wrong.
output logic BTBPredPCWrongM, // Prediction target wrong.
output logic RASPredPCWrongM, // RAS prediction is wrong.
output logic BPPredWrongE, // Prediction is wrong.
output logic DirPredictionWrongM, // Prediction direction is wrong.
output logic BTBPredPCWrongM, // Prediction target wrong.
output logic RASPredPCWrongM, // RAS prediction is wrong.
output logic PredictionInstrClassWrongM // Class prediction is wrong.
);

View File

@ -1,10 +1,15 @@
///////////////////////////////////////////
// decompress.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
// Written: David_Harris@hmc.edu
// Created: 9 January 2021
// Modified: 18 January 2023
//
// Purpose: Expand 16-bit compressed instructions to 32 bits
//
// Documentation: RISC-V System on Chip Design Chapter 11 (Section 11.3.1)
// RISC-V Specification 13 Dec 2019 Chapter 16 pg. 97
// *** probably need more documentation in this file since the book is very light on decompression.
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
@ -27,9 +32,10 @@
`include "wally-config.vh"
module decompress (
input logic [31:0] InstrRawD,
output logic [31:0] InstrD,
output logic IllegalCompInstrD);
input logic [31:0] InstrRawD, // 32-bit instruction or raw un decompress instruction
output logic [31:0] InstrD, // Decompressed instruction
output logic IllegalCompInstrD // Invalid decompressed instruction
);
logic [15:0] instr16;
logic [4:0] rds1, rs2, rs1p, rs2p, rds1p, rdp;

View File

@ -121,11 +121,10 @@ module ifu (
// Spill Support
/////////////////////////////////////////////////////////////////////////////////////////////
if(`C_SUPPORTED) begin : SpillSupport
spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .Flush(FlushD), .PCF, .PCPlus4F, .PCNextF, .InstrRawF(InstrRawF),
.InstrDAPageFaultF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill,
.SelNextSpillF, .PostSpillInstrRawF, .CompressedF);
end else begin : NoSpillSupport
if(`C_SUPPORTED) begin : Spill
spill #(`ICACHE) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF,
.InstrDAPageFaultF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF);
end else begin : NoSpill
assign PCNextFSpill = PCNextF;
assign PCFSpill = PCF;
assign PostSpillInstrRawF = InstrRawF;
@ -189,9 +188,11 @@ module ifu (
assign IgnoreRequest = ITLBMissF | FlushD;
// The IROM uses untranslated addresses, so it is not compatible with virtual memory.
if (`IROM_SUPPORTED) begin : irom
if (`IROM_SUPPORTED) begin : irom
logic IROMce;
assign IROMce = ~GatedStallD | reset;
assign IFURWF = 2'b10;
irom irom(.clk, .ce(~GatedStallD | reset), .Adr(PCNextFSpill[`XLEN-1:0]), .ReadData(IROMInstrF));
irom irom(.clk, .ce(IROMce), .Adr(PCNextFSpill[`XLEN-1:0]), .IROMInstrF);
end else begin
assign IFURWF = 2'b10;
end
@ -201,6 +202,7 @@ module ifu (
localparam integer LOGBWPL = `ICACHE ? $clog2(WORDSPERLINE) : 1;
if(`ICACHE) begin : icache
localparam integer LINELEN = `ICACHE ? `ICACHE_LINELENINBITS : `XLEN;
localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
logic [LINELEN-1:0] FetchBuffer;
logic [`PA_BITS-1:0] ICacheBusAdr;
logic ICacheBusAck;
@ -226,7 +228,7 @@ module ifu (
.NextAdr(PCNextFSpill[11:0]),
.PAdr(PCPF),
.CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM));
ahbcacheinterface #(WORDSPERLINE, LINELEN, LOGBWPL, `ICACHE)
ahbcacheinterface #(WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW)
ahbcacheinterface(.HCLK(clk), .HRESETn(~reset),
.HRDATA,
.Flush(FlushD), .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(),

View File

@ -1,8 +1,9 @@
///////////////////////////////////////////
// irom.sv
//
// Written: Ross Thompson ross1728@gmail.com January 30, 2022
// Modified:
// Written: Ross Thompson ross1728@gmail.com
// Created: 30 January 2022
// Modified: 18 January 2023
//
// Purpose: simple instruction ROM
// A component of the CORE-V-WALLY configurable RISC-V project.
@ -26,23 +27,30 @@
`include "wally-config.vh"
module irom(
input logic clk, ce,
input logic [`XLEN-1:0] Adr,
output logic [31:0] ReadData
input logic clk,
input logic ce, // Chip Enable. 0: Holds IROMInstrF constant
input logic [`XLEN-1:0] Adr, // PCNextFSpill
output logic [31:0] IROMInstrF // Instruction read data
);
localparam ADDR_WDITH = $clog2(`IROM_RANGE/8);
localparam OFFSET = $clog2(`XLEN/8);
logic [`XLEN-1:0] ReadDataFull;
logic [`XLEN-1:0] IROMInstrFFull;
logic [31:0] RawIROMInstrF;
rom1p1r #(ADDR_WDITH, `XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataFull));
if (`XLEN == 32) assign ReadData = ReadDataFull;
// have to delay Ardr[OFFSET-1] by 1 cycle
logic [1:0] AdrD;
flopen #(2) AdrReg(clk, ce, Adr[2:1], AdrD);
rom1p1r #(ADDR_WDITH, `XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull));
if (`XLEN == 32) assign RawIROMInstrF = IROMInstrFFull;
else begin
logic AdrD;
flopen #(1) AdrReg(clk, ce, Adr[OFFSET-1], AdrD);
assign ReadData = AdrD ? ReadDataFull[63:32] : ReadDataFull[31:0];
// IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two
// haves. Adr is the Next PCF not PCF so we delay 1 cycle.
assign RawIROMInstrF = AdrD[1] ? IROMInstrFFull[63:32] : IROMInstrFFull[31:0];
end
// If the memory addres is aligned to 2 bytes return the upper 2 bytes in the lower 2 bytes.
// The spill logic will handle merging the two together.
assign IROMInstrF = AdrD[0] ? {16'b0, RawIROMInstrF[31:16]} : RawIROMInstrF;
endmodule

View File

@ -123,8 +123,8 @@ module speculativegshare
assign GHRNextD = FlushD ? {GHRNextE, GHRNextE[0]} : {DirPredictionF[1], GHRF, GHRF[0]};
flopenr #(k+2) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, OldGHRD);
assign GHRD = WrongPredInstrClassD[0] & BranchInstrD ? {DirPredictionD[1], OldGHRD[k:1]} : // shift right
WrongPredInstrClassD[0] & ~BranchInstrD ? OldGHRD[k-2:-1] : // shift left
OldGHRD;
WrongPredInstrClassD[0] & ~BranchInstrD ? OldGHRD[k-1:-1] : // shift left
OldGHRD[k:0];
assign GHRNextE = FlushE ? GHRNextM : GHRD;
flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE);

112
pipelined/src/ifu/spill.sv Normal file
View File

@ -0,0 +1,112 @@
///////////////////////////////////////////
// spill.sv
//
// Written: Ross Thompson ross1728@gmail.com
// Created: 28 January 2022
// Modified: 19 January 2023
//
// Purpose: allows the IFU to make extra memory request if instruction address crosses
// cache line boundaries or if instruction address without a cache crosses
// XLEN/8 boundary.
//
// Documentation: RISC-V System on Chip Design Chapter 11 (Figure 11.5)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module spill #(
parameter CACHE_ENABLED // Changes spill threshold to 1 if there is no cache
)(input logic clk,
input logic reset,
input logic StallD, FlushD,
input logic [`XLEN-1:0] PCF, // 2 byte aligned PC in Fetch stage
input logic [`XLEN-1:2] PCPlus4F, // PCF + 4
input logic [`XLEN-1:0] PCNextF, // The next PCF
input logic [31:0] InstrRawF, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed
input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched
input logic ITLBMissF, // ITLB miss, ignore memory request
input logic InstrDAPageFaultF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active)
output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill
output logic [`XLEN-1:0] PCFSpill, // PCF for one of the two memory addresses of the spill
output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline
output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction
output logic CompressedF); // The fetched instruction is compressed
// Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1]
localparam integer SPILLTHRESHOLD = CACHE_ENABLED ? `ICACHE_LINELENINBITS/32 : 1;
logic [`XLEN-1:0] PCPlus2F;
logic TakeSpillF;
logic SpillF;
logic SelSpillF;
logic SpillSaveF;
logic [15:0] InstrFirstHalf;
typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype;
(* mark_debug = "true" *) statetype CurrState, NextState;
////////////////////////////////////////////////////////////////////////////////////////////////////
// PC logic
////////////////////////////////////////////////////////////////////////////////////////////////////
// compute PCF+2 from the raw PC+4
mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F));
// select between PCNextF and PCF+2
mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~FlushD), .y(PCNextFSpill));
// select between PCF and PCF+2
mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill));
////////////////////////////////////////////////////////////////////////////////////////////////////
// Detect spill
////////////////////////////////////////////////////////////////////////////////////////////////////
assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1];
assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF));
always_ff @(posedge clk)
if (reset | FlushD) CurrState <= #1 STATE_READY;
else CurrState <= #1 NextState;
always_comb begin
case (CurrState)
STATE_READY: if (TakeSpillF) NextState = STATE_SPILL;
else NextState = STATE_READY;
STATE_SPILL: if(IFUCacheBusStallD | StallD) NextState = STATE_SPILL;
else NextState = STATE_READY;
default: NextState = STATE_READY;
endcase
end
assign SelSpillF = (CurrState == STATE_SPILL);
assign SelNextSpillF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallD);
assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushD;
////////////////////////////////////////////////////////////////////////////////////////////////////
// Merge spilled instruction
////////////////////////////////////////////////////////////////////////////////////////////////////
// save the first 2 bytes
flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalf);
// merge together
mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalf}, SpillF, PostSpillInstrRawF);
assign CompressedF = PostSpillInstrRawF[1:0] != 2'b11;
endmodule

View File

@ -1,98 +0,0 @@
///////////////////////////////////////////
// spillsupport.sv
//
// Written: Ross Thompson ross1728@gmail.com January 28, 2022
// Modified:
//
// Purpose: allows the IFU to make extra memory request if instruction address crosses
// cache line boundaries or if instruction address without a cache crosses
// XLEN/8 boundary.
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module spillsupport #(parameter CACHE_ENABLED)
(input logic clk,
input logic reset,
input logic StallF, Flush,
input logic [`XLEN-1:0] PCF,
input logic [`XLEN-1:2] PCPlus4F,
input logic [`XLEN-1:0] PCNextF,
input logic [31:0] InstrRawF,
input logic IFUCacheBusStallD,
input logic ITLBMissF,
input logic InstrDAPageFaultF,
output logic [`XLEN-1:0] PCNextFSpill,
output logic [`XLEN-1:0] PCFSpill,
output logic SelNextSpillF,
output logic [31:0] PostSpillInstrRawF,
output logic CompressedF);
localparam integer SPILLTHRESHOLD = CACHE_ENABLED ? `ICACHE_LINELENINBITS/32 : 1;
logic [`XLEN-1:0] PCPlus2F;
logic TakeSpillF;
logic SpillF;
logic SelSpillF, SpillSaveF;
logic [15:0] SpillDataLine0, SavedInstr;
typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype;
(* mark_debug = "true" *) statetype CurrState, NextState;
// compute PCF+2
mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F));
// select between PCNextF and PCF+2
mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~Flush), .y(PCNextFSpill));
// select between PCF and PCF+2
mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill));
assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1];
assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF));
always_ff @(posedge clk)
if (reset | Flush) CurrState <= #1 STATE_READY;
else CurrState <= #1 NextState;
always_comb begin
case (CurrState)
STATE_READY: if (TakeSpillF) NextState = STATE_SPILL;
else NextState = STATE_READY;
STATE_SPILL: if(IFUCacheBusStallD | StallF) NextState = STATE_SPILL;
else NextState = STATE_READY;
default: NextState = STATE_READY;
endcase
end
assign SelSpillF = (CurrState == STATE_SPILL);
assign SelNextSpillF = (CurrState == STATE_READY & TakeSpillF) |
(CurrState == STATE_SPILL & IFUCacheBusStallD);
assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF;
assign SavedInstr = CACHE_ENABLED ? InstrRawF[15:0] : InstrRawF[31:16];
flopenr #(16) SpillInstrReg(.clk(clk),
.en(SpillSaveF & ~Flush),
.reset(reset),
.d(SavedInstr),
.q(SpillDataLine0));
mux2 #(32) postspillmux(.d0(InstrRawF), .d1({InstrRawF[15:0], SpillDataLine0}), .s(SpillF),
.y(PostSpillInstrRawF));
assign CompressedF = PostSpillInstrRawF[1:0] != 2'b11;
endmodule

View File

@ -1,11 +1,14 @@
///////////////////////////////////////////
// amoalu.sv
//
// Written: David_Harris@hmc.edu 10 March 2021
// Modified:
// Written: David_Harris@hmc.edu
// Created: 10 March 2021
// Modified: 18 January 2023
//
// Purpose: Performs AMO operations
//
// Documentation: RISC-V System on Chip Design Chapter 14 (Figure ***)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -26,13 +29,12 @@
`include "wally-config.vh"
// *** this should probably be moved into the LSU because it is instantiated in the D$
module amoalu (
input logic [`XLEN-1:0] srca, srcb,
input logic [6:0] funct,
input logic [1:0] width,
output logic [`XLEN-1:0] result
input logic [`XLEN-1:0] ReadDataM, // LSU's ReadData
input logic [`XLEN-1:0] IHWriteDataM, // LSU's WriteData
input logic [6:0] LSUFunct7M, // ALU Operation
input logic [2:0] LSUFunct3M, // Memoy access width
output logic [`XLEN-1:0] AMOResult // ALU output
);
logic [`XLEN-1:0] a, b, y;
@ -41,7 +43,7 @@ module amoalu (
// a single carry chain should be shared for + and the four min/max
// and the same mux can be used to select b for swap.
always_comb
case (funct[6:2])
case (LSUFunct7M[6:2])
5'b00001: y = b; // amoswap
5'b00000: y = a + b; // amoadd
5'b00100: y = a ^ b; // amoxor
@ -56,19 +58,19 @@ module amoalu (
// sign extend if necessary
if (`XLEN == 32) begin:sext
assign a = srca;
assign b = srcb;
assign result = y;
assign a = ReadDataM;
assign b = IHWriteDataM;
assign AMOResult = y;
end else begin:sext // `XLEN = 64
always_comb
if (width == 2'b10) begin // sign-extend word-length operations
a = {{32{srca[31]}}, srca[31:0]};
b = {{32{srcb[31]}}, srcb[31:0]};
result = {{32{y[31]}}, y[31:0]};
if (LSUFunct3M[1:0] == 2'b10) begin // sign-extend word-length operations
a = {{32{ReadDataM[31]}}, ReadDataM[31:0]};
b = {{32{IHWriteDataM[31]}}, IHWriteDataM[31:0]};
AMOResult = {{32{y[31]}}, y[31:0]};
end else begin
a = srca;
b = srcb;
result = y;
a = ReadDataM;
b = IHWriteDataM;
AMOResult = y;
end
end
endmodule

View File

@ -1,10 +1,13 @@
///////////////////////////////////////////
// atomic.sv
//
// Written: Ross Thompson ross1728@gmail.com January 31, 2022
// Modified:
// Written: Ross Thompson ross1728@gmail.com
// Created: 31 January 2022
// Modified: 18 January 2023
//
// Purpose: atomic data path.
// Purpose: Wrapper for amoalu and lrsc
//
// Documentation: RISC-V System on Chip Design Chapter 14 (Figure ***)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
@ -28,25 +31,25 @@
module atomic (
input logic clk,
input logic reset, StallW,
input logic [`XLEN-1:0] ReadDataM,
input logic [`XLEN-1:0] IHWriteDataM,
input logic [`PA_BITS-1:0] PAdrM,
input logic [6:0] LSUFunct7M,
input logic [2:0] LSUFunct3M,
input logic [1:0] LSUAtomicM,
input logic [1:0] PreLSURWM,
input logic IgnoreRequest,
output logic [`XLEN-1:0] IMAWriteDataM,
output logic SquashSCW,
output logic [1:0] LSURWM
input logic reset,
input logic StallW,
input logic [`XLEN-1:0] ReadDataM, // LSU ReadData XLEN because FPU does not issue atomic memory operation from FPU registers
input logic [`XLEN-1:0] IHWriteDataM, // LSU WriteData XLEN because FPU does not issue atomic memory operation from FPU registers
input logic [`PA_BITS-1:0] PAdrM, // Physical memory address
input logic [6:0] LSUFunct7M, // AMO alu operation gated by HPTW
input logic [2:0] LSUFunct3M, // IEU or HPTW memory operation size
input logic [1:0] LSUAtomicM, // 10: AMO operation, select AMOResult as the writedata output, 01: LR/SC operation
input logic [1:0] PreLSURWM, // IEU or HPTW Read/Write signal
input logic IgnoreRequest, // On FlushM or TLB miss ignore memory operation
output logic [`XLEN-1:0] IMAWriteDataM, // IEU, HPTW, or AMO write data
output logic SquashSCW, // Store conditional failed disable write to GPR
output logic [1:0] LSURWM // IEU or HPTW Read/Write signal gated by LR/SC
);
logic [`XLEN-1:0] AMOResult;
logic MemReadM;
amoalu amoalu(.srca(ReadDataM), .srcb(IHWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]),
.result(AMOResult));
amoalu amoalu(.ReadDataM, .IHWriteDataM, .LSUFunct7M, .LSUFunct3M, .AMOResult);
mux2 #(`XLEN) wdmux(IHWriteDataM, AMOResult, LSUAtomicM[1], IMAWriteDataM);
assign MemReadM = PreLSURWM[1] & ~IgnoreRequest;

View File

@ -1,10 +1,14 @@
///////////////////////////////////////////
// dtim.sv
//
// Written: Ross Thompson ross1728@gmail.com January 30, 2022
// Modified:
// Written: Ross Thompson ross1728@gmail.com
// Created: 30 January 2022
// Modified: 18 January 2023
//
// Purpose: tightly integrated memory into the LSU.
//
// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.12)
//
// Purpose: simple memory with bus or cache.
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -26,14 +30,15 @@
`include "wally-config.vh"
module dtim(
input logic clk, ce,
input logic [1:0] MemRWM,
input logic [`PA_BITS-1:0] Adr,
input logic FlushW,
input logic [`LLEN-1:0] WriteDataM,
input logic [`LLEN/8-1:0] ByteMaskM,
output logic [`LLEN-1:0] ReadDataWordM
);
input logic clk,
input logic FlushW,
input logic ce, // Chip Enable. 0: Holds ReadDataWordM
input logic [1:0] MemRWM, // Read/Write control
input logic [`PA_BITS-1:0] DTIMAdr, // No stall: Execution stage memory address. Stall: Memory stage memory address
input logic [`LLEN-1:0] WriteDataM, // Write data from IEU
input logic [`LLEN/8-1:0] ByteMaskM, // Selects which bytes within a word to write
output logic [`LLEN-1:0] ReadDataWordM // Read data before subword selection
);
logic we;
@ -43,6 +48,6 @@ module dtim(
assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap.
ram1p1rwbe #(.DEPTH(`DTIM_RANGE/8), .WIDTH(`LLEN))
ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM));
ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(DTIMAdr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM));
endmodule

View File

@ -1,8 +1,9 @@
///////////////////////////////////////////
// endianswap.sv
//
// Written: David_Harris@hmc.edu 7 May 2022
// Modified:
// Written: David_Harris@hmc.edu
// Created: 7 May 2022
// Modified: 18 January 2023
//
// Purpose: Swap byte order for Big-Endian accesses
//

View File

@ -1,11 +1,14 @@
///////////////////////////////////////////
// lrsc.sv
//
// Written: David_Harris@hmc.edu 17 July 2021
// Modified:
// Written: David_Harris@hmc.edu
// Created: 17 July 2021
// Modified: 18 January 2023
//
// Purpose: Load Reserved / Store Conditional unit
// Track the reservation and squash the store if it fails
//
// Documentation: RISC-V System on Chip Design Chapter 14 (Figure ***)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//

View File

@ -131,7 +131,7 @@ module lsu (
logic LSULoadAccessFaultM; // Load acces fault
logic LSUStoreAmoAccessFaultM; // Store access fault
logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle
logic IgnoreRequest; // On FlushM, ignore TLB miss
logic IgnoreRequest; // On FlushM or TLB miss ignore memory operation
logic SelDTIM; // Select DTIM rather than bus or D$
@ -232,17 +232,19 @@ module lsu (
// **** fix ReadDataWordM to be LLEN. ByteMask is wrong length.
// **** create config to support DTIM with floating point.
dtim dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM),
.Adr(DTIMAdr), .FlushW, .WriteDataM(LSUWriteDataM),
.DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM),
.ReadDataWordM(DTIMReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0]));
end else begin
end
if (`BUS) begin : bus
localparam integer LLENWORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`LLEN : 1; // Number of LLEN words in cacheline
localparam integer LLENLOGBWPL = `DCACHE ? $clog2(LLENWORDSPERLINE) : 1; // Log2 of ^
localparam integer BEATSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`AHBW : 1; // Number of AHBW words (beats) in cacheline
localparam integer AHBWLOGBWPL = `DCACHE ? $clog2(BEATSPERLINE) : 1; // Log2 of ^
if(`DCACHE) begin : dcache
localparam integer LINELEN = `DCACHE ? `DCACHE_LINELENINBITS : `XLEN; // Number of bytes in cacheline
localparam integer LLENWORDSPERLINE = `DCACHE_LINELENINBITS/`LLEN; // Number of LLEN words in cacheline
localparam integer LLENLOGBWPL = $clog2(LLENWORDSPERLINE); // Log2 of ^
localparam integer BEATSPERLINE = `DCACHE_LINELENINBITS/`AHBW; // Number of AHBW words (beats) in cacheline
localparam integer AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^
localparam integer LINELEN = `DCACHE_LINELENINBITS; // Number of bits in cacheline
localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline
logic [`PA_BITS-1:0] DCacheBusAdr; // Cacheline address to fetch or writeback.
logic [AHBWLOGBWPL-1:0] BeatCount; // Position within a cacheline. ahbcacheinterface to cache
@ -250,7 +252,6 @@ module lsu (
logic SelBusBeat; // ahbcacheinterface selects postion in cacheline with BeatCount
logic [1:0] CacheBusRW; // Cache sends request to ahbcacheinterface
logic [1:0] BusRW; // Uncached bus memory access
localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
logic CacheableOrFlushCacheM; // Memory address is cacheable or operation is a cache flush
logic [1:0] CacheRWM; // Cache read (10), write (01), AMO (11)
logic [1:0] CacheAtomicM; // Cache AMO
@ -272,7 +273,7 @@ module lsu (
.FetchBuffer, .CacheBusRW,
.CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0));
ahbcacheinterface #(.BEATSPERLINE(BEATSPERLINE), .LINELEN(LINELEN), .LOGWPL(AHBWLOGBWPL), .CACHE_ENABLED(`DCACHE)) ahbcacheinterface(
ahbcacheinterface #(.BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW)) ahbcacheinterface(
.HCLK(clk), .HRESETn(~reset), .Flush(FlushW),
.HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB),
.HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY),

View File

@ -1,11 +1,14 @@
///////////////////////////////////////////
// subwordread.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
// Written: David_Harris@hmc.edu
// Created: 9 January 2021
// Modified: 18 January 2023
//
// Purpose: Extract subwords and sign extend for reads
//
// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University

View File

@ -1,11 +1,14 @@
///////////////////////////////////////////
// subwordwrite.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
// Written: David_Harris@hmc.edu
// Created: 9 January 2021
// Modified: 18 January 2023
//
// Purpose: Masking and muxing for subword writes
//
// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University

View File

@ -1,11 +1,14 @@
///////////////////////////////////////////
// swbytemask.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
// Written: David_Harris@hmc.edu
// Created: 9 January 2021
// Modified: 18 January 2023
//
// Purpose: On-chip RAM, external to core
//
// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University

View File

@ -98,7 +98,6 @@ module hptw (
logic [2:0] HPTWSize; // 32 or 64 bit access
(* mark_debug = "true" *) statetype WalkerState, NextWalkerState, InitialWalkerState;
// map hptw access faults onto either the original LSU load/store fault or instruction access fault
assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[1] & ~MemRWM[0];
assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[0];
@ -189,13 +188,13 @@ module hptw (
// FSM to track PageType based on the levels of the page table traversed
flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType);
always_comb
case (WalkerState)
L3_RD: NextPageType = 2'b11; // terapage
L2_RD: NextPageType = 2'b10; // gigapage
L1_RD: NextPageType = 2'b01; // megapage
L0_RD: NextPageType = 2'b00; // kilopage
default: NextPageType = PageType;
endcase
case (WalkerState)
L3_RD: NextPageType = 2'b11; // terapage
L2_RD: NextPageType = 2'b10; // gigapage
L1_RD: NextPageType = 2'b01; // megapage
L0_RD: NextPageType = 2'b00; // kilopage
default: NextPageType = PageType;
endcase
// HPTWAdr muxing
if (`XLEN==32) begin // RV32

View File

@ -107,16 +107,10 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) (
.Cacheable, .Idempotent, .SelTIM,
.PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM);
if (`PMP_ENTRIES > 0) // instantiate PMP
pmpchecker pmpchecker(.PhysicalAddress, .PrivilegeModeW,
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW,
.ExecuteAccessF, .WriteAccessM, .ReadAccessM,
.PMPInstrAccessFaultF, .PMPLoadAccessFaultM, .PMPStoreAmoAccessFaultM);
else begin
assign PMPInstrAccessFaultF = 0;
assign PMPLoadAccessFaultM = 0;
assign PMPStoreAmoAccessFaultM = 0;
end
pmpchecker pmpchecker(.PhysicalAddress, .PrivilegeModeW,
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW,
.ExecuteAccessF, .WriteAccessM, .ReadAccessM,
.PMPInstrAccessFaultF, .PMPLoadAccessFaultM, .PMPStoreAmoAccessFaultM);
// Access faults
// If TLB miss and translating we want to not have faults from the PMA and PMP checkers.

View File

@ -49,28 +49,34 @@ module pmpchecker (
output logic PMPStoreAmoAccessFaultM
);
// Bit i is high when the address falls in PMP region i
logic EnforcePMP;
logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges
logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address.
logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null
logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set
logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i]
if (`PMP_ENTRIES > 0) begin
// Bit i is high when the address falls in PMP region i
logic EnforcePMP;
logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges
logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address.
logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null
logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set
logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i]
pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0](
.PhysicalAddress,
.PMPCfg(PMPCFG_ARRAY_REGW),
.PMPAdr(PMPADDR_ARRAY_REGW),
.PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}),
.PAgePMPAdrOut(PAgePMPAdr),
.Match, .Active, .L, .X, .W, .R);
pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0](
.PhysicalAddress,
.PMPCfg(PMPCFG_ARRAY_REGW),
.PMPAdr(PMPADDR_ARRAY_REGW),
.PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}),
.PAgePMPAdrOut(PAgePMPAdr),
.Match, .Active, .L, .X, .W, .R);
priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches.
priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches.
// Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |(L & FirstMatch) : |Active;
// Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |(L & FirstMatch) : |Active;
assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ;
assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ;
assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|(R & FirstMatch) ;
assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ;
assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ;
assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|(R & FirstMatch) ;
end else begin
assign PMPInstrAccessFaultF = 0;
assign PMPStoreAmoAccessFaultM = 0;
assign PMPLoadAccessFaultM = 0;
end
endmodule

View File

@ -63,12 +63,8 @@ module tlbcontrol #(parameter ITLB = 0) (
// Determine whether TLB is being used
assign TLBAccess = ReadAccess | WriteAccess;
if (`XLEN==64) // Check whether upper bits of 64-bit virtual addressses are all equal
vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequalPageFault);
else begin
assign SV39Mode = 0;
assign UpperBitsUnequalPageFault = 0;
end
// Check that upper bits are legal (all 0s or all 1s)
vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequalPageFault);
// unswizzle useful PTE bits
assign {PTE_D, PTE_A} = PTEAccessBits[7:6];

View File

@ -35,12 +35,16 @@ module vm64check (
output logic UpperBitsUnequalPageFault
);
logic eq_63_47, eq_46_38;
if (`XLEN == 64) begin
assign SV39Mode = (SATP_MODE == `SV39);
assign SV39Mode = (SATP_MODE == `SV39);
// page fault if upper bits aren't all the same
assign eq_46_38 = &(VAdr[46:38]) | ~|(VAdr[46:38]);
assign eq_63_47 = &(VAdr[63:47]) | ~|(VAdr[63:47]);
assign UpperBitsUnequalPageFault = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47;
// page fault if upper bits aren't all the same
logic eq_63_47, eq_46_38;
assign eq_46_38 = &(VAdr[46:38]) | ~|(VAdr[46:38]);
assign eq_63_47 = &(VAdr[63:47]) | ~|(VAdr[63:47]);
assign UpperBitsUnequalPageFault = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47;
end else begin
assign SV39Mode = 0;
assign UpperBitsUnequalPageFault = 0;
end
endmodule

View File

@ -8,12 +8,10 @@
// Based on RISC-V spec (https://github.com/riscv/riscv-plic-spec/blob/master/riscv-plic.adoc)
// With clarifications from ROA's existing implementation (https://roalogic.github.io/plic/docs/AHB-Lite_PLIC_Datasheet.pdf)
// Supports only 1 target core and only a global threshold.
//
// Documentation: RISC-V System on Chip Design Chapter 15
// This PLIC implementation serves as both the PLIC Gateways and PLIC Core.
// It assumes interrupt sources are level-triggered wires.
//
// *** Big questions:
// Do we detect requests as level-triggered or edge-trigged?
// If edge-triggered, do we want to allow 1 source to be able to make a number of repeated requests?
// Documentation: RISC-V System on Chip Design Chapter 15
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
@ -111,7 +109,7 @@ module plic_apb (
if (memwrite)
casez(entry)
24'h0000??: intPriority[entry[7:2]] <= #1 Din[2:0];
`ifdef PLIC_NUM_SRC_LT_32 // *** switch to a generate for loop so as to deprecate PLIC_NUM_SRC_LT_32 and allow up to 1023 sources
`ifdef PLIC_NUM_SRC_LT_32 // eventually switch to a generate for loop so as to deprecate PLIC_NUM_SRC_LT_32 and allow up to 1023 sources
24'h002000: intEn[0][`N:1] <= #1 Din[`N:1];
24'h002080: intEn[1][`N:1] <= #1 Din[`N:1];
`endif
@ -172,8 +170,7 @@ module plic_apb (
end
// pending interrupt requests
assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots. Confirmed to boot successfully.
//assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
assign nextIntPending = (intPending | requests) & ~intInProgress;
flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending);
// context-dependent signals
@ -248,7 +245,7 @@ module plic_apb (
end
end
// is the max priority > threshold?
// *** would it be any better to first priority encode maxPriority into binary and then ">" with threshold?
// would it be any better to first priority encode maxPriority into binary and then ">" with threshold?
assign MExtInt = |(threshMask[0] & priorities_with_irqs[0]);
assign SExtInt = |(threshMask[1] & priorities_with_irqs[1]);
endmodule

View File

@ -1,251 +0,0 @@
`include "wally-config.vh"
`define NUM_REGS 32
`define NUM_CSRS 4096
`define PRINT_PC_INSTR 1
`define PRINT_MOST 1
`define PRINT_ALL 0
module rvviTrace #(
parameter int ILEN = `XLEN, // Instruction length in bits
parameter int XLEN = `XLEN, // GPR length in bits
parameter int FLEN = `FLEN, // FPR length in bits
parameter int VLEN = 0, // Vector register size in bits
parameter int NHART = 1, // Number of harts reported
parameter int RETIRE = 1) // Number of instructions that can retire during valid event
();
localparam NUMREGS = `E_SUPPORTED ? 16 : 32;
// wally specific signals
logic reset;
logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW;
logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW;
logic InstrValidM, InstrValidW;
logic StallE, StallM, StallW;
logic FlushD, FlushE, FlushM, FlushW;
logic TrapM, TrapW;
logic IntrF, IntrD, IntrE, IntrM, IntrW;
logic HaltM, HaltW;
logic [1:0] PrivilegeModeW;
logic [`XLEN-1:0] rf[NUMREGS];
logic [NUMREGS-1:0] rf_wb;
logic [4:0] rf_a3;
logic rf_we3;
logic [`XLEN-1:0] frf[32];
logic [`NUM_REGS-1:0] frf_wb;
logic [4:0] frf_a4;
logic frf_we4;
logic [`XLEN-1:0] CSRArray [logic[11:0]];
logic CSRWriteM, CSRWriteW;
logic [11:0] CSRAdrM, CSRAdrW;
// tracer signals
logic clk;
logic valid;
logic [63:0] order [(NHART-1):0][(RETIRE-1):0];
logic [ILEN-1:0] insn [(NHART-1):0][(RETIRE-1):0];
logic intr [(NHART-1):0][(RETIRE-1):0];
logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0];
logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0];
logic trap [(NHART-1):0][(RETIRE-1):0];
logic halt [(NHART-1):0][(RETIRE-1):0];
logic [1:0] mode [(NHART-1):0][(RETIRE-1):0];
logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0] x_wb [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0][(XLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0] f_wb [(NHART-1):0][(RETIRE-1):0];
logic [4095:0][(XLEN-1):0] csr [(NHART-1):0][(RETIRE-1):0];
logic [4095:0] csr_wb [(NHART-1):0][(RETIRE-1):0];
logic lrsc_cancel[(NHART-1):0][(RETIRE-1):0];
assign clk = testbench.dut.clk;
// assign InstrValidF = testbench.dut.core.ieu.InstrValidF; // not needed yet
assign InstrValidD = testbench.dut.core.ieu.c.InstrValidD;
assign InstrValidE = testbench.dut.core.ieu.c.InstrValidE;
assign InstrValidM = testbench.dut.core.ieu.InstrValidM;
assign InstrRawD = testbench.dut.core.ifu.InstrRawD;
assign PCNextF = testbench.dut.core.ifu.PCNextF;
assign PCF = testbench.dut.core.ifu.PCF;
assign PCD = testbench.dut.core.ifu.PCD;
assign PCE = testbench.dut.core.ifu.PCE;
assign PCM = testbench.dut.core.ifu.PCM;
assign reset = testbench.reset;
assign StallE = testbench.dut.core.StallE;
assign StallM = testbench.dut.core.StallM;
assign StallW = testbench.dut.core.StallW;
assign FlushD = testbench.dut.core.FlushD;
assign FlushE = testbench.dut.core.FlushE;
assign FlushM = testbench.dut.core.FlushM;
assign FlushW = testbench.dut.core.FlushW;
assign TrapM = testbench.dut.core.TrapM;
assign HaltM = testbench.DCacheFlushStart;
assign PrivilegeModeW = testbench.dut.core.priv.priv.privmode.PrivilegeModeW;
assign STATUS_SXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_SXL;
assign STATUS_UXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_UXL;
always_comb begin
// machine CSRs
// *** missing PMP and performance counters.
CSRArray[12'h300] = testbench.dut.core.priv.priv.csr.csrm.MSTATUS_REGW;
CSRArray[12'h310] = testbench.dut.core.priv.priv.csr.csrm.MSTATUSH_REGW;
CSRArray[12'h305] = testbench.dut.core.priv.priv.csr.csrm.MTVEC_REGW;
CSRArray[12'h341] = testbench.dut.core.priv.priv.csr.csrm.MEPC_REGW;
CSRArray[12'h306] = testbench.dut.core.priv.priv.csr.csrm.MCOUNTEREN_REGW;
CSRArray[12'h320] = testbench.dut.core.priv.priv.csr.csrm.MCOUNTINHIBIT_REGW;
CSRArray[12'h302] = testbench.dut.core.priv.priv.csr.csrm.MEDELEG_REGW;
CSRArray[12'h303] = testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
CSRArray[12'h344] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW;
CSRArray[12'h304] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW;
CSRArray[12'h301] = testbench.dut.core.priv.priv.csr.csrm.MISA_REGW;
CSRArray[12'hF14] = testbench.dut.core.priv.priv.csr.csrm.MHARTID_REGW;
CSRArray[12'h340] = testbench.dut.core.priv.priv.csr.csrm.MSCRATCH_REGW;
CSRArray[12'h342] = testbench.dut.core.priv.priv.csr.csrm.MCAUSE_REGW;
CSRArray[12'h343] = testbench.dut.core.priv.priv.csr.csrm.MTVAL_REGW;
CSRArray[12'hF11] = 0;
CSRArray[12'hF12] = 0;
CSRArray[12'hF13] = `XLEN'h100;
CSRArray[12'hF15] = 0;
CSRArray[12'h34A] = 0;
// MCYCLE and MINSTRET
CSRArray[12'hB00] = testbench.dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[0];
CSRArray[12'hB02] = testbench.dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[2];
// supervisor CSRs
CSRArray[12'h100] = testbench.dut.core.priv.priv.csr.csrs.SSTATUS_REGW;
CSRArray[12'h104] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW & 12'h222;
CSRArray[12'h105] = testbench.dut.core.priv.priv.csr.csrs.STVEC_REGW;
CSRArray[12'h141] = testbench.dut.core.priv.priv.csr.csrs.SEPC_REGW;
CSRArray[12'h106] = testbench.dut.core.priv.priv.csr.csrs.SCOUNTEREN_REGW;
CSRArray[12'h180] = testbench.dut.core.priv.priv.csr.csrs.SATP_REGW;
CSRArray[12'h140] = testbench.dut.core.priv.priv.csr.csrs.csrs.SSCRATCH_REGW;
CSRArray[12'h143] = testbench.dut.core.priv.priv.csr.csrs.csrs.STVAL_REGW;
CSRArray[12'h142] = testbench.dut.core.priv.priv.csr.csrs.csrs.SCAUSE_REGW;
CSRArray[12'h144] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW & & 12'h222 & testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
// user CSRs
CSRArray[12'h001] = testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW;
CSRArray[12'h002] = testbench.dut.core.priv.priv.csr.csru.FRM_REGW;
CSRArray[12'h003] = {testbench.dut.core.priv.priv.csr.csru.FRM_REGW, testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW};
end
genvar index;
assign rf[0] = '0;
for(index = 1; index < NUMREGS; index += 1)
assign rf[index] = testbench.dut.core.ieu.dp.regf.rf[index];
assign rf_a3 = testbench.dut.core.ieu.dp.regf.a3;
assign rf_we3 = testbench.dut.core.ieu.dp.regf.we3;
always_comb begin
rf_wb <= '0;
if(rf_we3)
rf_wb[rf_a3] <= 1'b1;
end
for(index = 0; index < NUMREGS; index += 1)
assign frf[index] = testbench.dut.core.fpu.fpu.fregfile.rf[index];
assign frf_a4 = testbench.dut.core.fpu.fpu.fregfile.a4;
assign frf_we4 = testbench.dut.core.fpu.fpu.fregfile.we4;
always_comb begin
frf_wb <= '0;
if(frf_we4)
frf_wb[frf_a4] <= 1'b1;
end
assign CSRAdrM = testbench.dut.core.priv.priv.csr.CSRAdrM;
assign CSRWriteM = testbench.dut.core.priv.priv.csr.CSRWriteM;
// pipeline to writeback stage
flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE);
flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM);
flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW);
flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW);
flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW);
flopenrc #(1) TrapWReg (clk, reset, 1'b0, ~StallW, TrapM, TrapW);
flopenrc #(1) HaltWReg (clk, reset, 1'b0, ~StallW, HaltM, HaltW);
flopenrc #(1) IntrFReg (clk, reset, 1'b0, ~StallF, TrapM, IntrF);
flopenrc #(1) IntrDReg (clk, reset, FlushD, ~StallD, IntrF, IntrD);
flopenrc #(1) IntrEReg (clk, reset, FlushE, ~StallE, IntrD, IntrE);
flopenrc #(1) IntrMReg (clk, reset, FlushM, ~StallM, IntrE, IntrM);
flopenrc #(1) IntrWReg (clk, reset, FlushW, ~StallW, IntrM, IntrW);
flopenrc #(12) CSRAdrWReg (clk, reset, FlushW, ~StallW, CSRAdrM, CSRAdrW);
flopenrc #(1) CSRWriteWReg (clk, reset, FlushW, ~StallW, CSRWriteM, CSRWriteW);
// Initially connecting the writeback stage signals, but may need to use M stage
// and gate on ~FlushW.
assign valid = InstrValidW & ~StallW & ~FlushW;
assign order[0][0] = CSRArray[12'hB02];
assign insn[0][0] = InstrRawW;
assign pc_rdata[0][0] = PCW;
assign trap[0][0] = TrapW;
assign halt[0][0] = HaltW;
assign intr[0][0] = IntrW;
assign mode[0][0] = PrivilegeModeW;
assign ixl[0][0] = PrivilegeModeW == 2'b11 ? 2'b10 :
PrivilegeModeW == 2'b01 ? STATUS_SXL : STATUS_UXL;
assign pc_wdata[0][0] = ~FlushW ? PCM :
~FlushM ? PCE :
~FlushE ? PCD :
~FlushD ? PCF : PCNextF;
for(index = 0; index < `NUM_REGS; index += 1) begin
assign x_wdata[0][0][index] = rf[index];
assign x_wb[0][0][index] = rf_wb[index];
assign f_wdata[0][0][index] = frf[index];
assign f_wb[0][0][index] = frf_wb[index];
end
always_comb begin
csr_wb[0][0] <= '0;
if(CSRWriteW)
csr_wb[0][0][CSRAdrW] <= 1'b1;
end
integer index3;
always_comb begin
for(index3 = 0; index3 < `NUM_CSRS; index3 += 1) begin
if(CSRArray.exists(index3))
csr[0][0][index3] = CSRArray[index3];
else
csr[0][0][index3] = '0;
end
end
// *** implementation only cancel? so sc does not clear?
assign lrsc_cancel[0][0] = '0;
integer index2;
always_ff @(posedge clk) begin
if(valid) begin
if(`PRINT_PC_INSTR & !(`PRINT_ALL | `PRINT_MOST))
$display("order = %08d, PC = %08x, insn = %08x", order[0][0], pc_rdata[0][0], insn[0][0]);
else if(`PRINT_MOST & !`PRINT_ALL)
$display("order = %08d, PC = %010x, insn = %08x, trap = %1d, halt = %1d, intr = %1d, mode = %1x, ixl = %1x, pc_wdata = %010x, x%02d = %016x, f%02d = %016x, csr%03x = %016x",
order[0][0], pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], intr[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0], rf_a3, x_wdata[0][0][rf_a3], frf_a4, f_wdata[0][0][frf_a4], CSRAdrW, csr[0][0][CSRAdrW]);
else if(`PRINT_ALL) begin
$display("order = %08d, PC = %08x, insn = %08x, trap = %1d, halt = %1d, intr = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x",
order[0][0], pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], intr[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0]);
for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin
$display("x%02d = %08x", index2, x_wdata[0][0][index2]);
end
for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin
$display("f%02d = %08x", index2, f_wdata[0][0][index2]);
end
end
end
if(HaltW) $stop();
end
endmodule

View File

@ -155,7 +155,7 @@ module testbench;
`define MCOUNTEREN `CSR_BASE.csrm.mcounteren.MCOUNTERENreg.q
`define SCOUNTEREN `CSR_BASE.csrs.csrs.SCOUNTERENreg.q
`define MSCRATCH `CSR_BASE.csrm.MSCRATCHreg.q
`define SSCRATCH `CSR_BASE.csrs.csrs.csrs.SSCRATCHreg.q
`define SSCRATCH `CSR_BASE.csrs.csrs.SSCRATCHreg.q
`define MTVEC `CSR_BASE.csrm.MTVECreg.q
`define STVEC `CSR_BASE.csrs.csrs.STVECreg.q
`define SATP `CSR_BASE.csrs.csrs.genblk1.SATPreg.q

View File

@ -692,55 +692,3 @@ task automatic updateProgramAddrLabelArray;
$fclose(ProgramAddrMapFP);
endtask
`define NUM_REGS 32
`define NUM_CSRS 4096
module rvviTrace();
// wally specific signals
logic reset;
logic [`XLEN-1:0] PCM, PCW;
logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW;
logic InstrValidM, InstrValidW;
logic StallE, StallM, StallW;
logic FlushE, FlushM, FlushW;
// tracer signals
logic clk;
logic valid;
logic [`XLEN-1:0] insn;
logic [`XLEN-1:0 ] pc_rdata;
assign clk = testbench.dut.clk;
assign InstrValidM = testbench.dut.core.ieu.InstrValidM;
assign InstrRawD = testbench.dut.core.ifu.InstrRawD;
assign PCM = testbench.dut.core.ifu.PCM;
assign reset = testbench.reset;
assign StallE = testbench.dut.core.StallE;
assign StallM = testbench.dut.core.StallM;
assign StallW = testbench.dut.core.StallW;
assign FlushE = testbench.dut.core.FlushE;
assign FlushM = testbench.dut.core.FlushM;
assign FlushW = testbench.dut.core.FlushW;
// pipeline to writeback stage
flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE);
flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM);
flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW);
flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW);
flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW);
assign valid = InstrValidW;
assign insn = InstrRawW;
assign pc_rdata = PCW;
always_ff @(posedge clk) begin
if(valid) begin
$display("PC = %x, insn = %x", pc_rdata, insn);
end
end
endmodule