Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Thomas Fleming 2021-05-03 17:38:13 -04:00
commit 3f7061d557
15 changed files with 365 additions and 478 deletions

View File

@ -1,9 +1,11 @@
#!/bin/bash
# check for warnings in Verilog code
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
basepath=$(dirname $0)
for config in rv64ic rv32ic; do
echo "$config linting..."
if !(verilator --lint-only "$@" --top-module wallypipelinedsoc "-Iconfig/$config" src/*/*.sv); then
if !(verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/$config" $basepath/src/*/*.sv); then
echo "Exiting after $config lint due to errors or warnings"
exit 1
fi

View File

@ -36,6 +36,11 @@ configs = [
cmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do ../config/rv64ic rv64ic\n!",
grepstr="All tests ran without failures"
),
Config(
name="lints",
cmd="../lint-wally > {}",
grepstr="All lints run with no errors or warnings"
),
]
import multiprocessing, os

View File

@ -3,6 +3,8 @@ quietly virtual function -install /testbench/dut/hart/ifu/icache/cachemem -env /
quietly WaveActivateNextPane {} 0
add wave -noupdate /testbench/clk
add wave -noupdate /testbench/reset
add wave -noupdate /testbench/memfilename
add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE
add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE
@ -19,13 +21,13 @@ add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/DataStall
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE
@ -36,25 +38,25 @@ add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbe
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW
add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPPredF
add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBValidF
add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPInstrClassF
add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBPredPCF
add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/RASPCF
add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePC
add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdateEN
add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePrediction
add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateEN
add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePC
add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget
add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/TargetWrongE
add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/FallThroughWrongE
add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionPCWrongE
add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/InstrClassE
add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionInstrClassWrongE
add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE
add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
add wave -noupdate -expand -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPPredF
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBValidF
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPInstrClassF
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBPredPCF
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/RASPCF
add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePC
add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdateEN
add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePrediction
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateEN
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePC
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/TargetWrongE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/FallThroughWrongE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionPCWrongE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/InstrClassE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionInstrClassWrongE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE
@ -112,8 +114,6 @@ add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM
add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAccessM
add wave -noupdate -group dcache /testbench/dut/hart/dmem/AtomicMaskedM
add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAckW
add wave -noupdate -group dcache /testbench/dut/hart/dmem/genblk1/lrM
add wave -noupdate -group dcache /testbench/dut/hart/dmem/genblk1/scM
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E
@ -203,7 +203,6 @@ add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbenc
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FlushDLastCyclen
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD
add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCNextPF
add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF
add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF
add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF
@ -223,10 +222,11 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED
add wave -noupdate /testbench/dut/hart/dmem/genblk1/scM
add wave -noupdate /testbench/dut/hart/ifu/icache/PCTagF
add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 2} {12215488 ns} 0} {{Cursor 4} {22127 ns} 0}
quietly wave cursor active 2
WaveRestoreCursors {{Cursor 2} {9951515 ns} 0} {{Cursor 4} {1318991 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 250
configure wave -valuecolwidth 513
configure wave -justifyvalue left
@ -241,4 +241,4 @@ configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {21993 ns} {22181 ns}
WaveRestoreZoom {9951431 ns} {9951599 ns}

View File

@ -125,128 +125,6 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par
assign DataValid = DataValidBit && (DataTag == ReadTag);
endmodule
module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
// Pipeline stuff
input logic clk,
input logic reset,
input logic re,
// If flush is high, invalidate the entire cache
input logic flush,
// Select which address to read (broken for efficiency's sake)
input logic [`XLEN-1:12] ReadUpperPAdr,
input logic [11:0] ReadLowerAdr,
// Write new data to the cache
input logic WriteEnable,
input logic [LINESIZE-1:0] WriteLine,
input logic [`XLEN-1:0] WritePAdr,
// Output the word, as well as if it is valid
output logic [31:0] DataWord, // *** was WORDSIZE-1
output logic DataValid
);
// Various compile-time constants
localparam integer WORDWIDTH = $clog2(WORDSIZE/8);
localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE);
localparam integer SETWIDTH = $clog2(NUMLINES);
localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH;
localparam integer OFFSETBEGIN = WORDWIDTH;
localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1;
localparam integer SETBEGIN = OFFSETEND+1;
localparam integer SETEND = SETBEGIN + SETWIDTH - 1;
localparam integer TAGBEGIN = SETEND + 1;
localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
// Machinery to read from and write to the correct addresses in memory
logic [`XLEN-1:0] ReadPAdr;
logic [`XLEN-1:0] OldReadPAdr;
logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset;
logic [SETWIDTH-1:0] ReadSet, WriteSet;
logic [TAGWIDTH-1:0] ReadTag, WriteTag;
logic [LINESIZE-1:0] ReadLine;
logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
// Machinery to check if a given read is valid and is the desired value
logic [TAGWIDTH-1:0] DataTag;
logic [NUMLINES-1:0] ValidOut;
logic DataValidBit;
flopenr #(`XLEN) ReadPAdrFlop(clk, reset, re, ReadPAdr, OldReadPAdr);
// Assign the read and write addresses in cache memory
always_comb begin
ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
ReadSet = ReadPAdr[SETEND:SETBEGIN];
ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
WriteSet = WritePAdr[SETEND:SETBEGIN];
WriteTag = WritePAdr[TAGEND:TAGBEGIN];
end
// Depth is number of bits in one "word" of the memory, width is number of such words
Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
.*,
.ReadAddr(ReadSet),
.ReadData(ReadLine),
.WriteAddr(WriteSet),
.WriteData(WriteLine)
);
Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
.*,
.ReadAddr(ReadSet),
.ReadData(DataTag),
.WriteAddr(WriteSet),
.WriteData(WriteTag)
);
// Pick the right bits coming out the read line
//assign DataWord = ReadLineTransformed[ReadOffset];
//logic [31:0] tempRD;
always_comb begin
case (OldReadPAdr[4:1])
0: DataWord = ReadLine[31:0];
1: DataWord = ReadLine[47:16];
2: DataWord = ReadLine[63:32];
3: DataWord = ReadLine[79:48];
4: DataWord = ReadLine[95:64];
5: DataWord = ReadLine[111:80];
6: DataWord = ReadLine[127:96];
7: DataWord = ReadLine[143:112];
8: DataWord = ReadLine[159:128];
9: DataWord = ReadLine[175:144];
10: DataWord = ReadLine[191:160];
11: DataWord = ReadLine[207:176];
12: DataWord = ReadLine[223:192];
13: DataWord = ReadLine[239:208];
14: DataWord = ReadLine[255:224];
15: DataWord = {16'b0, ReadLine[255:240]};
endcase
end
genvar i;
generate
for (i=0; i < LINESIZE/WORDSIZE; i++) begin
assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE];
end
endgenerate
// Correctly handle the valid bits
always_ff @(posedge clk, posedge reset) begin
if (reset || flush) begin
ValidOut <= {NUMLINES{1'b0}};
end else begin
if (WriteEnable) begin
ValidOut[WriteSet] <= 1;
end
end
DataValidBit <= ValidOut[ReadSet];
end
assign DataValid = DataValidBit && (DataTag == ReadTag);
endmodule
// Write-through direct-mapped memory
module wtdirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (

21
wally-pipelined/src/cache/sram1rw.sv vendored Normal file
View File

@ -0,0 +1,21 @@
// Depth is number of bits in one "word" of the memory, width is number of such words
module sram1rw #(parameter DEPTH=128, WIDTH=256) (
input logic clk,
// port 1 is read only
input logic [$clog2(WIDTH)-1:0] Addr,
output logic [DEPTH-1:0] ReadData,
// port 2 is write only
input logic [DEPTH-1:0] WriteData,
input logic WriteEnable
);
logic [WIDTH-1:0][DEPTH-1:0] StoredData;
always_ff @(posedge clk) begin
ReadData <= StoredData[Addr];
if (WriteEnable) begin
StoredData[Addr] <= WriteData;
end
end
endmodule

View File

@ -1,90 +1,93 @@
module add3comp2(a, b, c, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors?
// //***breaks lint with warnings like: %Warning-UNOPTFLAT: Example path: src/fpu/compressors.sv:37: ASSIGNW
// //%Warning-UNOPTFLAT: Example path: src/fpu/compressors.sv:32: wallypipelinedsoc.hart.fpu.fma1.multiply.genblk5[0].add4.cout
// module add3comp2(a, b, c, carry, sum);
// /////////////////////////////////////////////////////////////////////////////
// //look into diffrent implementations of the compressors?
parameter BITS = 4;
input logic [BITS-1:0] a;
input logic [BITS-1:0] b;
input logic [BITS-1:0] c;
output logic [BITS-1:0] carry;
output logic [BITS-1:0] sum;
genvar i;
// parameter BITS = 4;
// input logic [BITS-1:0] a;
// input logic [BITS-1:0] b;
// input logic [BITS-1:0] c;
// output logic [BITS-1:0] carry;
// output logic [BITS-1:0] sum;
// genvar i;
generate
for(i= 0; i<BITS; i=i+1) begin
sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
end
endgenerate
// generate
// for(i= 0; i<BITS; i=i+1) begin
// sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
// end
// endgenerate
endmodule
// endmodule
module add4comp2(a, b, c, d, carry, sum);
/////////////////////////////////////////////////////////////////////////////
// module add4comp2(a, b, c, d, carry, sum);
// /////////////////////////////////////////////////////////////////////////////
parameter BITS = 4;
input logic [BITS-1:0] a;
input logic [BITS-1:0] b;
input logic [BITS-1:0] c;
input logic [BITS-1:0] d;
output logic [BITS:0] carry;
output logic [BITS-1:0] sum;
// parameter BITS = 4;
// input logic [BITS-1:0] a;
// input logic [BITS-1:0] b;
// input logic [BITS-1:0] c;
// input logic [BITS-1:0] d;
// output logic [BITS:0] carry;
// output logic [BITS-1:0] sum;
logic [BITS-1:0] cout;
logic carryTmp;
genvar i;
// logic [BITS-1:0] cout;
// logic carryTmp;
// genvar i;
sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
// sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
generate
for(i= 1; i<BITS-1; i=i+1) begin
sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
end
endgenerate
// generate
// for(i= 1; i<BITS-1; i=i+1) begin
// sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
// end
// endgenerate
sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
// sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
assign carry[BITS-1] = carryTmp & cout[BITS-1];
assign carry[BITS] = carryTmp ^ cout[BITS-1];
// assign carry[BITS-1] = carryTmp & cout[BITS-1];
// assign carry[BITS] = carryTmp ^ cout[BITS-1];
endmodule
// endmodule
module sng3comp2(a, b, c, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors?
// module sng3comp2(a, b, c, carry, sum);
// /////////////////////////////////////////////////////////////////////////////
// //look into diffrent implementations of the compressors?
input logic a;
input logic b;
input logic c;
output logic carry;
output logic sum;
// input logic a;
// input logic b;
// input logic c;
// output logic carry;
// output logic sum;
logic axorb;
// logic axorb;
assign axorb = a ^ b;
assign sum = axorb ^ c;
// assign axorb = a ^ b;
// assign sum = axorb ^ c;
assign carry = axorb ? c : a;
// assign carry = axorb ? c : a;
endmodule
// endmodule
module sng4comp2(a, b, c, d, cin, cout, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into pass gate 4:2 counters?
// module sng4comp2(a, b, c, d, cin, cout, carry, sum);
// /////////////////////////////////////////////////////////////////////////////
// //look into pass gate 4:2 counters?
input logic a;
input logic b;
input logic c;
input logic d;
input logic cin;
output logic cout;
output logic carry;
output logic sum;
// input logic a;
// input logic b;
// input logic c;
// input logic d;
// input logic cin;
// output logic cout;
// output logic carry;
// output logic sum;
logic TmpSum;
// logic TmpSum;
sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
// sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
// sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
endmodule
// endmodule

View File

@ -97,6 +97,9 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
logic sticky;
logic [12:0] de0;
logic isAdd;
logic wsign;
logic [51:0] wman;
logic [10:0] wexp;
assign isAdd = 1;
@ -118,17 +121,19 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
add add(.*);
lza lza(.*);
normalize normalize(.zexp(ReadData3M[62:52]),.*);
round round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
round round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]),.*);
// Instantiate exponent datapath
expgen2 expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.wexp(FmaResultM[62:52]),.*);
expgen2 expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.*);
// Instantiate control logic
sign sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.wsign(FmaResultM[63]),.*);
sign sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.*);
flag2 flag2(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.vbits(v[1:0]),.*);
assign FmaResultM = {wsign,wexp,wman};
endmodule

View File

@ -159,7 +159,8 @@ module fpu (
logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
logic AddConvertE;
logic [63:0] AddFloat1E, AddFloat2E;
logic [10:0] AddExp1DenormE, AddExp2DenormE, AddExponentE;
logic [11:0] AddExp1DenormE, AddExp2DenormE;
logic [10:0] AddExponentE;
logic [63:0] AddOp1E, AddOp2E;
logic [2:0] AddRmE;
logic [3:0] AddOpTypeE;
@ -317,7 +318,8 @@ module fpu (
logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
logic AddConvertM, AddSignM;
logic [63:0] AddFloat1M, AddFloat2M;
logic [10:0] AddExp1DenormM, AddExp2DenormM, AddExponentM;
logic [11:0] AddExp1DenormM, AddExp2DenormM;
logic [10:0] AddExponentM;
logic [63:0] AddOp1M, AddOp2M;
logic [2:0] AddRmM;
logic [3:0] AddOpTypeM;
@ -380,8 +382,8 @@ module fpu (
flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignM);
flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M);
flopenrc #(11) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(11) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM);
flopenrc #(64) EMRegAdd21(clk, reset, PipeClearEM, PipeEnableEM, AddOp1E, AddOp1M);
flopenrc #(64) EMRegAdd22(clk, reset, PipeClearEM, PipeEnableEM, AddOp2E, AddOp2M);

View File

@ -39,7 +39,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
input [63:0] AddSumM, AddSumTcM;
input [63:0] AddFloat1M;
input [63:0] AddFloat2M;
input [10:0] AddExp1DenormM, AddExp2DenormM;
input [11:0] AddExp1DenormM, AddExp2DenormM;
input [10:0] AddExponentM, AddExpPostSumM; //exp_pre;
//input exp_valid;
input [3:0] AddSelInvM;
@ -85,7 +85,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
//AddExponentM value pre-rounding with considerations for denormalized
//cases/conversion cases
assign exp_pre = AddDenormInM ?
((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM : AddExp1DenormM))
((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
: (AddConvertM ? 11'b10000111100 : AddExponentM);

View File

@ -26,81 +26,83 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
// wire [105:0] acc
genvar i;
assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
// assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
// assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
generate
for(i=0; i<27; i=i+1) begin
booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
end
endgenerate
// generate
// for(i=0; i<27; i=i+1) begin
// booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
// end
// endgenerate
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
assign acc[26] = {pp[26],add1[25], 50'b0};
// assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
// assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
// assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
// assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
// assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
// assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
// assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
// assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
// assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
// assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
// assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
// assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
// assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
// assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
// assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
// assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
// assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
// assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
// assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
// assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
// assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
// assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
// assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
// assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
// assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
// assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
// assign acc[26] = {pp[26],add1[25], 50'b0};
//***breaks lint with warnings like: %Warning-UNOPTFLAT: Example path: src/fpu/multiply.sv:86: ASSIGNW
// %Warning-UNOPTFLAT: Example path: src/fpu/multiply.sv:22: wallypipelinedsoc.hart.fpu.fma1.multiply.lv3add
//*** resize adders
generate
for(i=0; i<9; i=i+1) begin
add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
.carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
end
endgenerate
// generate
// for(i=0; i<9; i=i+1) begin
// add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
// .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
// assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
// end
// endgenerate
generate
for(i=0; i<6; i=i+1) begin
add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
.carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
end
endgenerate
// generate
// for(i=0; i<6; i=i+1) begin
// add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
// .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
// assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
// end
// endgenerate
generate
for(i=0; i<4; i=i+1) begin
add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
.carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
end
endgenerate
// generate
// for(i=0; i<4; i=i+1) begin
// add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
// .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
// assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
// end
// endgenerate
generate
for(i=0; i<2; i=i+1) begin
add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
end
endgenerate
// generate
// for(i=0; i<2; i=i+1) begin
// add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
// .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
// assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
// end
// endgenerate
add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
.carry(carryTmp[21]), .sum(tmpsE));
assign sE = tmpsE[105:0];
assign rE = {carryTmp[21][104:0], 1'b0};
// add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
// .carry(carryTmp[21]), .sum(tmpsE));
// assign sE = tmpsE[105:0];
// assign rE = {carryTmp[21][104:0], 1'b0};
// assign rE = 0;
// assign sE = acc[0] +
// acc[1] +
@ -130,7 +132,7 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
// acc[25] +
// acc[26];
// assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
// assign rE = 0;
assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
assign rE = 0;
endmodule

View File

@ -56,6 +56,10 @@ module round(v, sticky, FrmM, wsign,
// 0xx - do nothing
// 100 - tie - plus1 if v[2] = 1
// 101/110/111 - plus1
//***causes lint warning: %Warning-UNOPTFLAT: Example path: src/fpu/round.sv:59: ALWAYS
// %Warning-UNOPTFLAT: Example path: src/fpu/round.sv:42: wallypipelinedsoc.hart.fpu.fma2.round.plus1
always_comb begin
case (FrmM)
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
@ -66,12 +70,6 @@ module round(v, sticky, FrmM, wsign,
default: plus1 = 1'bx;
endcase
end
// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
// (rp & ~wsign) |
// (rm & wsign);
//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
// rp && ~wsign && (v[1] || v[0]) ||
// rm && wsign && (v[1] || v[0]);
// Compute rounded result
assign v1 = v[53:2] + 1;

View File

@ -27,26 +27,24 @@
module icache(
// Basic pipeline stuff
input logic clk, reset,
input logic StallF, StallD,
input logic FlushD,
// Upper bits of physical address for PC
input logic [`XLEN-1:12] UpperPCNextPF,
// Lower 12 bits of virtual PC address, since it's faster this way
input logic [11:0] LowerPCNextF,
input logic clk, reset,
input logic StallF, StallD,
input logic FlushD,
input logic [`XLEN-1:0] PCNextF,
input logic [`XLEN-1:0] PCPF,
// Data read in from the ebu unit
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
// Read requested from the ebu unit
output logic [`XLEN-1:0] InstrPAdrF,
output logic InstrReadF,
output logic [`XLEN-1:0] InstrPAdrF,
output logic InstrReadF,
// High if the instruction currently in the fetch stage is compressed
output logic CompressedF,
output logic CompressedF,
// High if the icache is requesting a stall
output logic ICacheStallF,
output logic ICacheStallF,
// The raw (not decompressed) instruction that was requested
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
output logic [31:0] InstrRawD
output logic [31:0] InstrRawD
);
// Configuration parameters
@ -56,12 +54,10 @@ module icache(
// Input signals to cache memory
logic FlushMem;
logic [`XLEN-1:12] ICacheMemReadUpperPAdr;
logic [11:0] ICacheMemReadLowerAdr;
logic ICacheMemWriteEnable;
logic [ICACHELINESIZE-1:0] ICacheMemWriteData;
logic [`XLEN-1:0] ICacheMemWritePAdr;
logic EndFetchState;
logic [`XLEN-1:0] PCTagF, PCNextIndexF;
// Output signals from cache memory
logic [31:0] ICacheMemReadData;
logic ICacheMemReadValid;
@ -71,13 +67,9 @@ module icache(
cachemem(
.*,
// Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall
.re(ICacheReadEn),
.flush(FlushMem),
.ReadUpperPAdr(ICacheMemReadUpperPAdr),
.ReadLowerAdr(ICacheMemReadLowerAdr),
.WriteEnable(ICacheMemWriteEnable),
.WriteLine(ICacheMemWriteData),
.WritePAdr(ICacheMemWritePAdr),
.DataWord(ICacheMemReadData),
.DataValid(ICacheMemReadValid)
);
@ -96,22 +88,19 @@ module icachecontroller #(parameter LINESIZE = 256) (
// Input the address to read
// The upper bits of the physical pc
input logic [`XLEN-1:12] UpperPCNextPF,
// The lower bits of the virtual pc
input logic [11:0] LowerPCNextF,
input logic [`XLEN-1:0] PCNextF,
input logic [`XLEN-1:0] PCPF,
// Signals to/from cache memory
// The read coming out of it
input logic [31:0] ICacheMemReadData,
input logic ICacheMemReadValid,
// The address at which we want to search the cache memory
output logic [`XLEN-1:12] ICacheMemReadUpperPAdr,
output logic [11:0] ICacheMemReadLowerAdr,
output logic [`XLEN-1:0] PCTagF,
output logic [`XLEN-1:0] PCNextIndexF,
output logic ICacheReadEn,
// Load data into the cache
output logic ICacheMemWriteEnable,
output logic [LINESIZE-1:0] ICacheMemWriteData,
output logic [`XLEN-1:0] ICacheMemWritePAdr,
// Outputs to rest of ifu
// High if the instruction in the fetch stage is compressed
@ -198,7 +187,7 @@ module icachecontroller #(parameter LINESIZE = 256) (
logic [LOGWPL:0] FetchCount, NextFetchCount;
logic [`XLEN-1:0] PCPreFinalF, PCPFinalF, PCSpillF, PCNextPF;
logic [`XLEN-1:0] PCPreFinalF, PCPFinalF, PCSpillF;
logic [`XLEN-1:OFFSETWIDTH] PCPTrunkF;
@ -215,159 +204,46 @@ module icachecontroller #(parameter LINESIZE = 256) (
//logic FlushDLastCycleN;
//logic PCPMisalignedF;
localparam [31:0] NOP = 32'h13;
logic [`XLEN-1:0] PCPF;
//logic [`XLEN-1:0] PCPF;
logic reset_q;
logic [1:0] PCMux_q;
// Misaligned signals
//logic [`XLEN:0] MisalignedInstrRawF;
//logic MisalignedStall;
// Cache fault signals
//logic FaultStall;
assign PCNextPF = {UpperPCNextPF, LowerPCNextF};
flopenl #(`XLEN) PCPFFlop(clk, reset, SavePC & ~StallF, PCPFinalF, `RESET_VECTOR, PCPF);
//flopenl #(`XLEN) PCPFFlop(clk, reset, SavePC & ~StallF, PCPFinalF, `RESET_VECTOR, PCPF);
// on spill we want to get the first 2 bytes of the next cache block.
// the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can
// simply add 2 to land on the next cache block.
assign PCSpillF = PCPF + 2'b10;
// now we have to select between these three PCs
assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextPF; // *** don't like the stallf
//assign PCPreFinalF = PCMux[0] ? PCPF : PCNextPF; // *** don't like the stallf
assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextF; // *** don't like the stallf
assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF;
// this mux needs to be delayed 1 cycle as it occurs 1 pipeline stage later.
// *** read enable may not be necessary.
flopenr #(2) PCMuxReg(.clk(clk),
.reset(reset),
.en(ICacheReadEn),
.d(PCMux),
.q(PCMux_q));
assign PCTagF = PCMux_q[1] ? PCSpillF : PCPF;
assign PCNextIndexF = PCPFinalF;
// truncate the offset from PCPF for memory address generation
assign PCPTrunkF = PCPFinalF[`XLEN-1:OFFSETWIDTH];
assign PCPTrunkF = PCTagF[`XLEN-1:OFFSETWIDTH];
// Detect if the instruction is compressed
assign CompressedF = FinalInstrRawF[1:0] != 2'b11;
// Handle happy path (data in cache, reads aligned)
/* -----\/----- EXCLUDED -----\/-----
generate
if (`XLEN == 32) begin
assign AlignedInstrRawF = PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData;
//assign PCPMisalignedF = PCPF[1] && ~CompressedF;
end else begin
assign AlignedInstrRawF = PCPF[2]
? (PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32])
: (PCPF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]);
//assign PCPMisalignedF = PCPF[2] && PCPF[1] && ~CompressedF;
end
endgenerate
-----/\----- EXCLUDED -----/\----- */
//flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD);
//flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN);
//mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD);
// Stall for faults or misaligned reads
/* -----\/----- EXCLUDED -----\/-----
always_comb begin
assign ICacheStallF = FaultStall | MisalignedStall;
end
-----/\----- EXCLUDED -----/\----- */
// Handle misaligned, noncompressed reads
/* -----\/----- EXCLUDED -----\/-----
logic MisalignedState, NextMisalignedState;
logic [15:0] MisalignedHalfInstrF;
logic [15:0] UpperHalfWord;
-----/\----- EXCLUDED -----/\----- */
/* -----\/----- EXCLUDED -----\/-----
flopenr #(16) MisalignedHalfInstrFlop(clk, reset, ~FaultStall & (PCPMisalignedF & MisalignedState), AlignedInstrRawF[15:0], MisalignedHalfInstrF);
flopenr #(1) MisalignedStateFlop(clk, reset, ~FaultStall, NextMisalignedState, MisalignedState);
-----/\----- EXCLUDED -----/\----- */
// When doing a misaligned read, swizzle the bits correctly
/* -----\/----- EXCLUDED -----\/-----
generate
if (`XLEN == 32) begin
assign UpperHalfWord = ICacheMemReadData[31:16];
end else begin
assign UpperHalfWord = ICacheMemReadData[63:48];
end
endgenerate
always_comb begin
if (MisalignedState) begin
assign MisalignedInstrRawF = {16'b0, UpperHalfWord};
end else begin
assign MisalignedInstrRawF = {ICacheMemReadData[15:0], MisalignedHalfInstrF};
end
end
-----/\----- EXCLUDED -----/\----- */
// Manage internal state and stall when necessary
/* -----\/----- EXCLUDED -----\/-----
always_comb begin
assign MisalignedStall = PCPMisalignedF & MisalignedState;
assign NextMisalignedState = ~PCPMisalignedF | ~MisalignedState;
end
-----/\----- EXCLUDED -----/\----- */
// Pick the correct address to read
/* -----\/----- EXCLUDED -----\/-----
generate
if (`XLEN == 32) begin
assign ICacheMemReadLowerAdr = {LowerPCNextF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00};
end else begin
assign ICacheMemReadLowerAdr = {LowerPCNextF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00};
end
endgenerate
-----/\----- EXCLUDED -----/\----- */
// TODO Handle reading instructions that cross page boundaries
//assign ICacheMemReadUpperPAdr = UpperPCNextPF;
// Handle cache faults
/* -----\/----- EXCLUDED -----\/-----
logic FetchState, BeginFetchState;
logic [LOGWPL:0] FetchWordNum, NextFetchWordNum;
logic [`XLEN-1:0] LineAlignedPCPF;
flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState);
flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum);
// Enter the fetch state when we hit a cache fault
always_comb begin
BeginFetchState = ~ICacheMemReadValid & ~FetchState & (FetchWordNum == 0);
end
// Exit the fetch state once the cache line has been loaded
flopr #(1) EndFetchStateFlop(clk, reset, ICacheMemWriteEnable, EndFetchState);
// Machinery to request the correct addresses from main memory
always_comb begin
InstrReadF = FetchState & ~EndFetchState & ~ICacheMemWriteEnable; // next stage logic
LineAlignedPCPF = {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; // the fetch address for abh?
InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8); // ?
NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; // convert to enable
end
// Write to cache memory when we have the line here
always_comb begin
ICacheMemWritePAdr = LineAlignedPCPF;
ICacheMemWriteEnable = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState & ~EndFetchState;
end
// Stall the pipeline while loading a new line from memory
always_comb begin
FaultStall = FetchState | ~ICacheMemReadValid;
end
-----/\----- EXCLUDED -----/\----- */
// the FSM is always runing, do not stall.
flopr #(5) stateReg(.clk(clk),
.reset(reset),
@ -638,12 +514,6 @@ module icachecontroller #(parameter LINESIZE = 256) (
flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen);
mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCyclen, InstrRawD);
//assign InstrRawD = AlignedInstrRawD;
assign {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr} = PCPFinalF;
assign ICacheMemWritePAdr = PCPFinalF;
endmodule

View File

@ -0,0 +1,102 @@
`include "wally-config.vh"
module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
// Pipeline stuff
input logic clk,
input logic reset,
// If flush is high, invalidate the entire cache
input logic flush,
// Select which address to read (broken for efficiency's sake)
input logic [`XLEN-1:0] PCTagF, // physical tag address
input logic [`XLEN-1:0] PCNextIndexF,
// Write new data to the cache
input logic WriteEnable,
input logic [LINESIZE-1:0] WriteLine,
// Output the word, as well as if it is valid
output logic [31:0] DataWord, // *** was WORDSIZE-1
output logic DataValid
);
// Various compile-time constants
localparam integer WORDWIDTH = $clog2(WORDSIZE/8);
localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE);
localparam integer SETWIDTH = $clog2(NUMLINES);
localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH;
localparam integer OFFSETBEGIN = WORDWIDTH;
localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1;
localparam integer SETBEGIN = OFFSETEND+1;
localparam integer SETEND = SETBEGIN + SETWIDTH - 1;
localparam integer TAGBEGIN = SETEND + 1;
localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
// Machinery to read from and write to the correct addresses in memory
logic [LINESIZE-1:0] ReadLine;
logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
// Machinery to check if a given read is valid and is the desired value
logic [TAGWIDTH-1:0] DataTag;
logic [NUMLINES-1:0] ValidOut;
logic DataValidBit;
// Depth is number of bits in one "word" of the memory, width is number of such words
sram1rw #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
.*,
.Addr(PCNextIndexF[SETEND:SETBEGIN]),
.ReadData(ReadLine),
.WriteData(WriteLine)
);
sram1rw #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
.*,
.Addr(PCNextIndexF[SETEND:SETBEGIN]),
.ReadData(DataTag),
.WriteData(PCTagF[TAGEND:TAGBEGIN])
);
// Pick the right bits coming out the read line
//assign DataWord = ReadLineTransformed[ReadOffset];
//logic [31:0] tempRD;
always_comb begin
case (PCTagF[4:1])
0: DataWord = ReadLine[31:0];
1: DataWord = ReadLine[47:16];
2: DataWord = ReadLine[63:32];
3: DataWord = ReadLine[79:48];
4: DataWord = ReadLine[95:64];
5: DataWord = ReadLine[111:80];
6: DataWord = ReadLine[127:96];
7: DataWord = ReadLine[143:112];
8: DataWord = ReadLine[159:128];
9: DataWord = ReadLine[175:144];
10: DataWord = ReadLine[191:160];
11: DataWord = ReadLine[207:176];
12: DataWord = ReadLine[223:192];
13: DataWord = ReadLine[239:208];
14: DataWord = ReadLine[255:224];
15: DataWord = {16'b0, ReadLine[255:240]};
endcase
end
genvar i;
generate
for (i=0; i < LINESIZE/WORDSIZE; i++) begin
assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE];
end
endgenerate
// Correctly handle the valid bits
always_ff @(posedge clk, posedge reset) begin
if (reset || flush) begin
ValidOut <= {NUMLINES{1'b0}};
end else begin
if (WriteEnable) begin
ValidOut[PCNextIndexF[SETEND:SETBEGIN]] <= 1;
end
end
DataValidBit <= ValidOut[PCNextIndexF[SETEND:SETBEGIN]];
end
assign DataValid = DataValidBit && (DataTag == PCTagF[TAGEND:TAGBEGIN]);
endmodule

View File

@ -105,11 +105,9 @@ module ifu (
// jarred 2021-03-14 Add instrution cache block to remove rd2
assign PCNextPF = PCNextF; // Temporary workaround until iTLB is live
icache icache(
.*,
.UpperPCNextPF(PCNextPF[`XLEN-1:12]),
.LowerPCNextF(PCNextPF[11:0])
);
icache icache(.*);
assign PrivilegedChangePCM = RetM | TrapM;

View File

@ -30,13 +30,14 @@ module testbench();
parameter DEBUG = 0;
parameter TESTSBP = 0;
parameter TESTSPERIPH = 0 ; // set to 0 for regression
localparam MAXSIGLEN = 1000000;
logic clk;
logic reset;
int test, i, errors, totalerrors;
logic [31:0] sig32[0:10000];
logic [`XLEN-1:0] signature[0:10000];
logic [31:0] sig32[0:MAXSIGLEN];
logic [`XLEN-1:0] signature[0:MAXSIGLEN];
logic [`XLEN-1:0] testadr;
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
logic [31:0] InstrW;
@ -602,7 +603,7 @@ string tests32f[] = '{
$display("Code ended with ecall with gp = 1");
#60; // give time for instructions in pipeline to finish
// clear signature to prevent contamination from previous tests
for(i=0; i<10000; i=i+1) begin
for(i=0; i<MAXSIGLEN; i=i+1) begin
sig32[i] = 'bx;
end
@ -610,7 +611,7 @@ string tests32f[] = '{
signame = {"../../imperas-riscv-tests/work/", tests[test], ".signature.output"};
$readmemh(signame, sig32);
i = 0;
while (i < 10000) begin
while (i < MAXSIGLEN) begin
if (`XLEN == 32) begin
signature[i] = sig32[i];
i = i+1;