Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

2025-02-11 06:05:49 +00:00 · 2021-05-03 17:38:13 -04:00 · 2021-05-03 17:38:13 -04:00 · 3f7061d557
commit 3f7061d557
parent 86a93d77b4 a21b84e2ad
15 changed files with 365 additions and 478 deletions
--- a/wally-pipelined/lint-wally
+++ b/wally-pipelined/lint-wally
@ -1,9 +1,11 @@
+#!/bin/bash
 # check for warnings in Verilog code
 # The verilator lint tool is faster and better than Modelsim so it is best to run this first.

+basepath=$(dirname $0)
 for config in rv64ic rv32ic; do
    echo "$config linting..."
-    if !(verilator --lint-only "$@" --top-module wallypipelinedsoc "-Iconfig/$config" src/*/*.sv); then
+    if !(verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/$config" $basepath/src/*/*.sv); then
        echo "Exiting after $config lint due to errors or warnings"
        exit 1
    fi
--- a/wally-pipelined/regression/regression-wally.py
+++ b/wally-pipelined/regression/regression-wally.py
@ -36,6 +36,11 @@ configs = [
        cmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do ../config/rv64ic rv64ic\n!",
        grepstr="All tests ran without failures"
    ),
+    Config(
+        name="lints",
+        cmd="../lint-wally > {}",
+        grepstr="All lints run with no errors or warnings"
+    ),
 ]

 import multiprocessing, os
--- a/wally-pipelined/regression/wave.do
+++ b/wally-pipelined/regression/wave.do
@ -3,6 +3,8 @@ quietly virtual function -install /testbench/dut/hart/ifu/icache/cachemem -env /
 quietly WaveActivateNextPane {} 0
 add wave -noupdate /testbench/clk
 add wave -noupdate /testbench/reset
+add wave -noupdate /testbench/memfilename
+add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName
 add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE
 add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
 add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE
@ -19,13 +21,13 @@ add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap
 add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM
 add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM
 add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/DataStall
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD
 add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF
 add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD
 add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE
@ -36,25 +38,25 @@ add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbe
 add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE
 add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM
 add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW
-add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPPredF
-add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBValidF
-add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPInstrClassF
-add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBPredPCF
-add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/RASPCF
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePC
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdateEN
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePrediction
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateEN
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePC
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/TargetWrongE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/FallThroughWrongE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionPCWrongE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/InstrClassE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionInstrClassWrongE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
-add wave -noupdate -expand -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
+add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPPredF
+add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBValidF
+add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPInstrClassF
+add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBPredPCF
+add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/RASPCF
+add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePC
+add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdateEN
+add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePrediction
+add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateEN
+add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePC
+add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/TargetWrongE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/FallThroughWrongE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionPCWrongE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/InstrClassE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionInstrClassWrongE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
+add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
 add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName
 add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD
 add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE
@ -112,8 +114,6 @@ add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM
 add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAccessM
 add wave -noupdate -group dcache /testbench/dut/hart/dmem/AtomicMaskedM
 add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAckW
-add wave -noupdate -group dcache /testbench/dut/hart/dmem/genblk1/lrM
-add wave -noupdate -group dcache /testbench/dut/hart/dmem/genblk1/scM
 add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D
 add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D
 add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E
@ -203,7 +203,6 @@ add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbenc
 add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD
 add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FlushDLastCyclen
 add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD
-add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCNextPF
 add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF
 add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF
 add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF
@ -223,10 +222,11 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED
-add wave -noupdate /testbench/dut/hart/dmem/genblk1/scM
+add wave -noupdate /testbench/dut/hart/ifu/icache/PCTagF
+add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr
 TreeUpdate [SetDefaultTree]
-WaveRestoreCursors {{Cursor 2} {12215488 ns} 0} {{Cursor 4} {22127 ns} 0}
-quietly wave cursor active 2
+WaveRestoreCursors {{Cursor 2} {9951515 ns} 0} {{Cursor 4} {1318991 ns} 0}
+quietly wave cursor active 1
 configure wave -namecolwidth 250
 configure wave -valuecolwidth 513
 configure wave -justifyvalue left
@ -241,4 +241,4 @@ configure wave -griddelta 40
 configure wave -timeline 0
 configure wave -timelineunits ns
 update
-WaveRestoreZoom {21993 ns} {22181 ns}
+WaveRestoreZoom {9951431 ns} {9951599 ns}
--- a/wally-pipelined/src/cache/dmapped.sv
+++ b/wally-pipelined/src/cache/dmapped.sv
@ -125,128 +125,6 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par
    assign DataValid = DataValidBit && (DataTag == ReadTag);
 endmodule

-module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
-    // Pipeline stuff
-    input  logic clk,
-    input  logic reset,
-    input  logic re,
-    // If flush is high, invalidate the entire cache
-    input  logic flush,
-    // Select which address to read (broken for efficiency's sake)
-    input  logic [`XLEN-1:12]   ReadUpperPAdr,
-    input  logic [11:0]         ReadLowerAdr,
-    // Write new data to the cache
-    input  logic                WriteEnable,
-    input  logic [LINESIZE-1:0] WriteLine,
-    input  logic [`XLEN-1:0]    WritePAdr,
-    // Output the word, as well as if it is valid
-    output logic [31:0] DataWord, // *** was WORDSIZE-1
-    output logic                DataValid
-);
-
-    // Various compile-time constants
-    localparam integer WORDWIDTH = $clog2(WORDSIZE/8);
-    localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE);
-    localparam integer SETWIDTH = $clog2(NUMLINES);
-    localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH;
-
-    localparam integer OFFSETBEGIN = WORDWIDTH;
-    localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1;
-    localparam integer SETBEGIN = OFFSETEND+1;
-    localparam integer SETEND = SETBEGIN + SETWIDTH - 1;
-    localparam integer TAGBEGIN = SETEND + 1;
-    localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
-
-    // Machinery to read from and write to the correct addresses in memory
-    logic [`XLEN-1:0]       ReadPAdr;
-    logic [`XLEN-1:0]       OldReadPAdr;
-    logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset;
-    logic [SETWIDTH-1:0]    ReadSet, WriteSet;
-    logic [TAGWIDTH-1:0]    ReadTag, WriteTag;
-    logic [LINESIZE-1:0]    ReadLine;
-    logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
-
-    // Machinery to check if a given read is valid and is the desired value
-    logic [TAGWIDTH-1:0]    DataTag;
-    logic [NUMLINES-1:0]    ValidOut;
-    logic                   DataValidBit;
-
-    flopenr #(`XLEN) ReadPAdrFlop(clk, reset, re, ReadPAdr, OldReadPAdr);
-
-    // Assign the read and write addresses in cache memory
-    always_comb begin
-        ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
-        ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
-        ReadSet = ReadPAdr[SETEND:SETBEGIN];
-        ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
-
-        WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
-        WriteSet = WritePAdr[SETEND:SETBEGIN];
-        WriteTag = WritePAdr[TAGEND:TAGBEGIN];
-    end
-
-    // Depth is number of bits in one "word" of the memory, width is number of such words
-    Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
-        .*,
-        .ReadAddr(ReadSet),
-        .ReadData(ReadLine),
-        .WriteAddr(WriteSet),
-        .WriteData(WriteLine)
-    );
-    Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
-        .*,
-        .ReadAddr(ReadSet),
-        .ReadData(DataTag),
-        .WriteAddr(WriteSet),
-        .WriteData(WriteTag)
-    );
-
-    // Pick the right bits coming out the read line
-    //assign DataWord = ReadLineTransformed[ReadOffset];
-  //logic [31:0] tempRD;
-  always_comb begin
-    case (OldReadPAdr[4:1])
-      0: DataWord = ReadLine[31:0];
-      1: DataWord = ReadLine[47:16];
-      2: DataWord = ReadLine[63:32];
-      3: DataWord = ReadLine[79:48];
-
-      4: DataWord = ReadLine[95:64];
-      5: DataWord = ReadLine[111:80];
-      6: DataWord = ReadLine[127:96];
-      7: DataWord = ReadLine[143:112];      
-
-      8: DataWord = ReadLine[159:128];      
-      9: DataWord = ReadLine[175:144];      
-      10: DataWord = ReadLine[191:160];      
-      11: DataWord = ReadLine[207:176];
-
-      12: DataWord = ReadLine[223:192];
-      13: DataWord = ReadLine[239:208];
-      14: DataWord = ReadLine[255:224];
-      15: DataWord = {16'b0, ReadLine[255:240]};
-    endcase
-  end
-    genvar i;
-    generate
-        for (i=0; i < LINESIZE/WORDSIZE; i++) begin
-            assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE];
-        end
-    endgenerate
-
-    // Correctly handle the valid bits
-    always_ff @(posedge clk, posedge reset) begin
-        if (reset || flush) begin
-            ValidOut <= {NUMLINES{1'b0}};
-        end else begin
-            if (WriteEnable) begin
-                ValidOut[WriteSet] <= 1;
-            end
-        end
-        DataValidBit <= ValidOut[ReadSet];
-    end
-    assign DataValid = DataValidBit && (DataTag == ReadTag);
-endmodule

 // Write-through direct-mapped memory
 module wtdirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
--- a/wally-pipelined/src/cache/sram1rw.sv
+++ b/wally-pipelined/src/cache/sram1rw.sv
@ -0,0 +1,21 @@
+// Depth is number of bits in one "word" of the memory, width is number of such words
+module sram1rw #(parameter DEPTH=128, WIDTH=256) (
+    input logic 		    clk,
+    // port 1 is read only
+    input logic [$clog2(WIDTH)-1:0] Addr,
+    output logic [DEPTH-1:0] 	    ReadData,
+  
+    // port 2 is write only
+    input logic [DEPTH-1:0] 	    WriteData,
+    input logic 		    WriteEnable
+);
+
+    logic [WIDTH-1:0][DEPTH-1:0] StoredData;
+
+    always_ff @(posedge clk) begin
+        ReadData <= StoredData[Addr];
+        if (WriteEnable) begin
+            StoredData[Addr] <= WriteData;
+        end
+    end
+endmodule
--- a/wally-pipelined/src/fpu/compressors.sv
+++ b/wally-pipelined/src/fpu/compressors.sv
@ -1,90 +1,93 @@
-module add3comp2(a, b, c, carry, sum); 
-/////////////////////////////////////////////////////////////////////////////
-//look into diffrent implementations of the compressors?
+// //***breaks lint with warnings like: %Warning-UNOPTFLAT:      Example path: src/fpu/compressors.sv:37:  ASSIGNW
+// //%Warning-UNOPTFLAT:      Example path: src/fpu/compressors.sv:32:  wallypipelinedsoc.hart.fpu.fma1.multiply.genblk5[0].add4.cout
+
+// module add3comp2(a, b, c, carry, sum); 
+// /////////////////////////////////////////////////////////////////////////////
+// //look into diffrent implementations of the compressors?
    
-    parameter BITS = 4;
-	input logic 		[BITS-1:0]		a;
-	input logic		[BITS-1:0]		b;
-	input logic		[BITS-1:0]    	c;
-    output logic      [BITS-1:0]      carry;
-	output logic		[BITS-1:0]		sum;
-    genvar i;
+//     parameter BITS = 4;
+// 	input logic 		[BITS-1:0]		a;
+// 	input logic		[BITS-1:0]		b;
+// 	input logic		[BITS-1:0]    	c;
+//     output logic      [BITS-1:0]      carry;
+// 	output logic		[BITS-1:0]		sum;
+//     genvar i;

-    generate
-        for(i= 0; i<BITS; i=i+1) begin
-            sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
-        end
-    endgenerate
+//     generate
+//         for(i= 0; i<BITS; i=i+1) begin
+//             sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
+//         end
+//     endgenerate

-endmodule
+// endmodule

-module add4comp2(a, b, c, d, carry, sum); 
-/////////////////////////////////////////////////////////////////////////////
+// module add4comp2(a, b, c, d, carry, sum); 
+// /////////////////////////////////////////////////////////////////////////////
    
-    parameter BITS = 4;
-	input logic 		[BITS-1:0]		a;
-	input logic		[BITS-1:0]		b;
-	input logic		[BITS-1:0]    	c;
-	input logic		[BITS-1:0]    	d;
-    output logic      [BITS:0]      carry;
-	output logic		[BITS-1:0]		sum;
+//     parameter BITS = 4;
+// 	input logic 		[BITS-1:0]		a;
+// 	input logic		[BITS-1:0]		b;
+// 	input logic		[BITS-1:0]    	c;
+// 	input logic		[BITS-1:0]    	d;
+//     output logic      [BITS:0]      carry;
+// 	output logic		[BITS-1:0]		sum;

-    logic       [BITS-1:0]      cout;
-    logic                       carryTmp;
-    genvar i;
+//     logic       [BITS-1:0]      cout;
+//     logic                       carryTmp;
+//     genvar i;


-    sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
+//     sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);

-    generate
-        for(i= 1; i<BITS-1; i=i+1) begin
-            sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
-        end
-    endgenerate
+//     generate
+//         for(i= 1; i<BITS-1; i=i+1) begin
+//             sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
+//         end
+//     endgenerate


-    sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
+//     sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);

-    assign carry[BITS-1] = carryTmp & cout[BITS-1];
-    assign carry[BITS] = carryTmp ^ cout[BITS-1];
+//     assign carry[BITS-1] = carryTmp & cout[BITS-1];
+//     assign carry[BITS] = carryTmp ^ cout[BITS-1];

-endmodule
+// endmodule

-module sng3comp2(a, b, c, carry, sum); 
-/////////////////////////////////////////////////////////////////////////////
-//look into diffrent implementations of the compressors?
+// module sng3comp2(a, b, c, carry, sum); 
+// /////////////////////////////////////////////////////////////////////////////
+// //look into diffrent implementations of the compressors?
    
-	input logic 				a;
-	input logic				b;
-	input logic		       	c;
-    output logic              carry;
-	output logic				sum;
+// 	input logic 				a;
+// 	input logic				b;
+// 	input logic		       	c;
+//     output logic              carry;
+// 	output logic				sum;
    
-    logic               axorb;
+//     logic               axorb;

-    assign axorb = a ^ b;
-    assign sum = axorb ^ c;
+//     assign axorb = a ^ b;
+//     assign sum = axorb ^ c;

-    assign carry = axorb ? c : a;
+//     assign carry = axorb ? c : a;

-endmodule
+// endmodule

-module sng4comp2(a, b, c, d, cin, cout, carry, sum); 
-/////////////////////////////////////////////////////////////////////////////
-//look into pass gate 4:2 counters?
+// module sng4comp2(a, b, c, d, cin, cout, carry, sum); 
+// /////////////////////////////////////////////////////////////////////////////
+// //look into pass gate 4:2 counters?
    
-	input logic 				a;
-	input logic				b;
-	input logic		       	c;
-    input logic               d;
-    input logic               cin;
-    output logic              cout;
-    output logic              carry;
-	output logic				sum;
+// 	input logic 				a;
+// 	input logic				b;
+// 	input logic		       	c;
+//     input logic               d;
+//     input logic               cin;
+//     output logic              cout;
+//     output logic              carry;
+// 	output logic				sum;
    
-    logic               TmpSum;
+//     logic               TmpSum;

-    sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
-    sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
+//     sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
+//     sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);

-endmodule
+// endmodule
--- a/wally-pipelined/src/fpu/fma2.sv
+++ b/wally-pipelined/src/fpu/fma2.sv
@ -97,6 +97,9 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
 	logic					sticky;
 	logic			[12:0]		de0;
 	logic					isAdd;
+	logic					wsign;
+	logic 			[51:0]		wman;
+	logic 			[10:0]		wexp;

 	assign isAdd = 1;

@ -118,17 +121,19 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
 	add				add(.*);
 	lza				lza(.*);
 	normalize		normalize(.zexp(ReadData3M[62:52]),.*); 
-	round			round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
+	round			round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]),.*);

 // Instantiate exponent datapath

-	expgen2			expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.wexp(FmaResultM[62:52]),.*);
+	expgen2			expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.*);


 // Instantiate control logic
 
-sign				sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.wsign(FmaResultM[63]),.*); 
+sign				sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.*); 
 flag2				flag2(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.vbits(v[1:0]),.*); 

+assign FmaResultM = {wsign,wexp,wman};
+
 endmodule

--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -159,7 +159,8 @@ module fpu (
  logic                    AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
  logic                    AddConvertE;
  logic [63:0]             AddFloat1E, AddFloat2E;
-  logic [10:0]             AddExp1DenormE, AddExp2DenormE, AddExponentE;
+  logic [11:0]             AddExp1DenormE, AddExp2DenormE;
+  logic [10:0]             AddExponentE;
  logic [63:0]             AddOp1E, AddOp2E;
  logic [2:0]              AddRmE;
  logic [3:0]              AddOpTypeE;
@ -317,7 +318,8 @@ module fpu (
  logic                    AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
  logic                    AddConvertM, AddSignM;
  logic [63:0]             AddFloat1M, AddFloat2M;
-  logic [10:0]             AddExp1DenormM, AddExp2DenormM, AddExponentM;
+  logic [11:0]             AddExp1DenormM, AddExp2DenormM;
+  logic [10:0]             AddExponentM;
  logic [63:0]             AddOp1M, AddOp2M;
  logic [2:0]              AddRmM;
  logic [3:0]              AddOpTypeM;
@ -380,8 +382,8 @@ module fpu (
  flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignM); 
  flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); 
  flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); 
-  flopenrc #(11) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); 
-  flopenrc #(11) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); 
+  flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); 
+  flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); 
  flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); 
  flopenrc #(64) EMRegAdd21(clk, reset, PipeClearEM, PipeEnableEM, AddOp1E, AddOp1M); 
  flopenrc #(64) EMRegAdd22(clk, reset, PipeClearEM, PipeEnableEM, AddOp2E, AddOp2M); 
--- a/wally-pipelined/src/fpu/fpuaddcvt2.sv
+++ b/wally-pipelined/src/fpu/fpuaddcvt2.sv
@ -39,7 +39,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
   input [63:0] AddSumM, AddSumTcM;
   input [63:0] 	 AddFloat1M; 
   input [63:0] 	 AddFloat2M;
-   input [10:0]	 AddExp1DenormM, AddExp2DenormM;
+   input [11:0]	 AddExp1DenormM, AddExp2DenormM;
   input [10:0] 	 AddExponentM, AddExpPostSumM; //exp_pre;
   //input		 exp_valid;
   input [3:0] 	 AddSelInvM;
@ -85,7 +85,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
   //AddExponentM value pre-rounding with considerations for denormalized
   //cases/conversion cases
   assign exp_pre       = AddDenormInM ?
-                          ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM : AddExp1DenormM))
+                          ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
                          : (AddConvertM ? 11'b10000111100 : AddExponentM);


--- a/wally-pipelined/src/fpu/multiply.sv
+++ b/wally-pipelined/src/fpu/multiply.sv
@ -26,81 +26,83 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
     // wire [105:0] acc
    genvar i;	

-	assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
-	assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
+	// assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
+	// assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
    
-     generate
-        for(i=0; i<27; i=i+1) begin
-            booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
-        end
-     endgenerate
+    //  generate
+    //     for(i=0; i<27; i=i+1) begin
+    //         booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
+    //     end
+    //  endgenerate

-    assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; 
-    assign acc[1] = {49'b01,~e[1],pp[1],add1[0]}; 
-    assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
-    assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
-    assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
-    assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
-    assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
-    assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
-    assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
-    assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
-    assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
-    assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
-    assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
-    assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
-    assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
-    assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
-    assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
-    assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
-    assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
-    assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
-    assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
-    assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
-    assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
-    assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
-    assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
-    assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
-    assign acc[26] = {pp[26],add1[25], 50'b0};
+    // assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; 
+    // assign acc[1] = {49'b01,~e[1],pp[1],add1[0]}; 
+    // assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
+    // assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
+    // assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
+    // assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
+    // assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
+    // assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
+    // assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
+    // assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
+    // assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
+    // assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
+    // assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
+    // assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
+    // assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
+    // assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
+    // assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
+    // assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
+    // assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
+    // assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
+    // assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
+    // assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
+    // assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
+    // assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
+    // assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
+    // assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
+    // assign acc[26] = {pp[26],add1[25], 50'b0};

+//***breaks lint with warnings like: %Warning-UNOPTFLAT:      Example path: src/fpu/multiply.sv:86:  ASSIGNW
+// %Warning-UNOPTFLAT:      Example path: src/fpu/multiply.sv:22:  wallypipelinedsoc.hart.fpu.fma1.multiply.lv3add
    //*** resize adders
-     generate
-        for(i=0; i<9; i=i+1) begin
-            add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), 
-                                           .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
-            assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
-        end
-     endgenerate
+    //  generate
+    //     for(i=0; i<9; i=i+1) begin
+    //         add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), 
+    //                                        .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
+    //         assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
+    //     end
+    //  endgenerate

-     generate
-        for(i=0; i<6; i=i+1) begin
-            add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), 
-                                           .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
-            assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
-        end
-     endgenerate
+    //  generate
+    //     for(i=0; i<6; i=i+1) begin
+    //         add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), 
+    //                                        .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
+    //         assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
+    //     end
+    //  endgenerate

-    generate
-        for(i=0; i<4; i=i+1) begin
-            add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), 
-                                            .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
-            assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
-        end
-    endgenerate
+    // generate
+    //     for(i=0; i<4; i=i+1) begin
+    //         add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), 
+    //                                         .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
+    //         assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
+    //     end
+    // endgenerate


-    generate
-        for(i=0; i<2; i=i+1) begin
-            add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
-                                            .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
-            assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
-        end
-    endgenerate
+    // generate
+    //     for(i=0; i<2; i=i+1) begin
+    //         add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
+    //                                         .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
+    //         assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
+    //     end
+    // endgenerate

-    add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
-                                    .carry(carryTmp[21]), .sum(tmpsE));
-    assign sE = tmpsE[105:0];
-    assign rE = {carryTmp[21][104:0], 1'b0};
+    // add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
+    //                                 .carry(carryTmp[21]), .sum(tmpsE));
+    // assign sE = tmpsE[105:0];
+    // assign rE = {carryTmp[21][104:0], 1'b0};
 		// assign rE = 0;
 		// assign sE = acc[0] +
 		// 		   acc[1] +
@ -130,7 +132,7 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
 		// 		   acc[25] +
 		// 		   acc[26];

-			// assign sE = {53'b0,~(xdenormE|xzeroE),xman}  *  {53'b0,~(ydenormE|yzeroE),yman};
-			// assign rE = 0;
+			assign sE = {53'b0,~(xdenormE|xzeroE),xman}  *  {53'b0,~(ydenormE|yzeroE),yman};
+			assign rE = 0;
 endmodule

--- a/wally-pipelined/src/fpu/round.sv
+++ b/wally-pipelined/src/fpu/round.sv
@ -56,6 +56,10 @@ module round(v, sticky, FrmM, wsign,
 	//	0xx - do nothing
 	//	100 - tie - plus1 if v[2] = 1
 	//	101/110/111 - plus1
+
+	//***causes lint warning: %Warning-UNOPTFLAT:      Example path: src/fpu/round.sv:59:  ALWAYS
+// %Warning-UNOPTFLAT:      Example path: src/fpu/round.sv:42:  wallypipelinedsoc.hart.fpu.fma2.round.plus1
+
 	always_comb begin
 		case (FrmM)
 			3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
@ -66,12 +70,6 @@ module round(v, sticky, FrmM, wsign,
 			default: plus1 = 1'bx;
 		endcase
 	end
-	// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
-	// 	       (rp & ~wsign) |
-	// 	       (rm & wsign);
-	//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
-	//				 rp && ~wsign && (v[1] || v[0]) ||
-	//				 rm && wsign && (v[1] || v[0]);

 	// Compute rounded result 
    assign v1 = v[53:2] + 1;
--- a/wally-pipelined/src/ifu/icache.sv
+++ b/wally-pipelined/src/ifu/icache.sv
@ -27,26 +27,24 @@

 module icache(
  // Basic pipeline stuff
-  input  logic              clk, reset,
-  input  logic              StallF, StallD,
-  input  logic              FlushD,
-  // Upper bits of physical address for PC
-  input  logic [`XLEN-1:12] UpperPCNextPF,
-  // Lower 12 bits of virtual PC address, since it's faster this way
-  input  logic [11:0]       LowerPCNextF,
+  input logic 		   clk, reset,
+  input logic 		   StallF, StallD,
+  input logic 		   FlushD,
+  input logic [`XLEN-1:0]  PCNextF,
+  input logic [`XLEN-1:0]  PCPF,	      
  // Data read in from the ebu unit
-  input  logic [`XLEN-1:0]  InstrInF,
-  input  logic              InstrAckF,
+  input logic [`XLEN-1:0]  InstrInF,
+  input logic 		   InstrAckF,
  // Read requested from the ebu unit
-  output logic [`XLEN-1:0]  InstrPAdrF,
-  output logic              InstrReadF,
+  output logic [`XLEN-1:0] InstrPAdrF,
+  output logic 		   InstrReadF,
  // High if the instruction currently in the fetch stage is compressed
-  output logic              CompressedF,
+  output logic 		   CompressedF,
  // High if the icache is requesting a stall
-  output logic              ICacheStallF,
+  output logic 		   ICacheStallF,
  // The raw (not decompressed) instruction that was requested
  // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
-  output logic [31:0]       InstrRawD
+  output logic [31:0] 	   InstrRawD
 );

    // Configuration parameters
@ -56,12 +54,10 @@ module icache(

    // Input signals to cache memory
    logic                       FlushMem;
-    logic [`XLEN-1:12]          ICacheMemReadUpperPAdr;
-    logic [11:0]                ICacheMemReadLowerAdr;
    logic                       ICacheMemWriteEnable;
    logic [ICACHELINESIZE-1:0]  ICacheMemWriteData;
-    logic [`XLEN-1:0]           ICacheMemWritePAdr;
    logic                       EndFetchState;
+    logic [`XLEN-1:0]           PCTagF, PCNextIndexF;  
    // Output signals from cache memory
    logic [31:0]   ICacheMemReadData;
    logic               ICacheMemReadValid;
@ -71,13 +67,9 @@ module icache(
  cachemem(
        .*,
        // Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall
-        .re(ICacheReadEn),
        .flush(FlushMem),
-        .ReadUpperPAdr(ICacheMemReadUpperPAdr),
-        .ReadLowerAdr(ICacheMemReadLowerAdr),
        .WriteEnable(ICacheMemWriteEnable),
        .WriteLine(ICacheMemWriteData),
-        .WritePAdr(ICacheMemWritePAdr),
        .DataWord(ICacheMemReadData),
        .DataValid(ICacheMemReadValid)
    );
@ -96,22 +88,19 @@ module icachecontroller #(parameter LINESIZE = 256) (

    // Input the address to read
    // The upper bits of the physical pc
-    input logic [`XLEN-1:12] 	UpperPCNextPF,
-    // The lower bits of the virtual pc
-    input logic [11:0] 		LowerPCNextF,
-
+    input logic [`XLEN-1:0] 	PCNextF,
+    input logic [`XLEN-1:0] 	PCPF,
    // Signals to/from cache memory
    // The read coming out of it
    input logic [31:0] 		ICacheMemReadData,
    input logic 		ICacheMemReadValid,
    // The address at which we want to search the cache memory
-    output logic [`XLEN-1:12] 	ICacheMemReadUpperPAdr,
-    output logic [11:0] 	ICacheMemReadLowerAdr,
+    output logic [`XLEN-1:0] 	PCTagF,
+    output logic [`XLEN-1:0]    PCNextIndexF,						     
    output logic 		ICacheReadEn,
    // Load data into the cache
    output logic 		ICacheMemWriteEnable,
    output logic [LINESIZE-1:0] ICacheMemWriteData,
-    output logic [`XLEN-1:0] 	ICacheMemWritePAdr,

    // Outputs to rest of ifu
    // High if the instruction in the fetch stage is compressed
@ -198,7 +187,7 @@ module icachecontroller #(parameter LINESIZE = 256) (
  
  logic [LOGWPL:0] 	     FetchCount, NextFetchCount;

-  logic [`XLEN-1:0] 	     PCPreFinalF, PCPFinalF, PCSpillF, PCNextPF;
+  logic [`XLEN-1:0] 	     PCPreFinalF, PCPFinalF, PCSpillF;
  logic [`XLEN-1:OFFSETWIDTH] PCPTrunkF;

  
@ -215,159 +204,46 @@ module icachecontroller #(parameter LINESIZE = 256) (
    //logic           FlushDLastCycleN;
    //logic           PCPMisalignedF;
  localparam [31:0]  	     NOP = 32'h13;
-  logic [`XLEN-1:0] 	     PCPF;
+  //logic [`XLEN-1:0] 	     PCPF;

  logic 		     reset_q;
+  logic [1:0] 		     PCMux_q;
+  
  
    // Misaligned signals
    //logic [`XLEN:0] MisalignedInstrRawF;
    //logic           MisalignedStall;
    // Cache fault signals
    //logic           FaultStall;
-
-  assign PCNextPF = {UpperPCNextPF, LowerPCNextF};
  
-  flopenl #(`XLEN) PCPFFlop(clk, reset, SavePC & ~StallF, PCPFinalF, `RESET_VECTOR, PCPF);
+  //flopenl #(`XLEN) PCPFFlop(clk, reset, SavePC & ~StallF, PCPFinalF, `RESET_VECTOR, PCPF);
  // on spill we want to get the first 2 bytes of the next cache block.
  // the spill only occurs if the PCPF mod BlockByteLength == -2.  Therefore we can
  // simply add 2 to land on the next cache block.
  assign PCSpillF = PCPF + 2'b10;

  // now we have to select between these three PCs
-  assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextPF; // *** don't like the stallf
-  //assign PCPreFinalF = PCMux[0] ? PCPF : PCNextPF; // *** don't like the stallf 
+  assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextF; // *** don't like the stallf
  assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF;
+
+  // this mux needs to be delayed 1 cycle as it occurs 1 pipeline stage later.
+  // *** read enable may not be necessary.
+  flopenr #(2) PCMuxReg(.clk(clk),
+			.reset(reset),
+			.en(ICacheReadEn),
+			.d(PCMux),
+			.q(PCMux_q));
  
-  
+  assign PCTagF = PCMux_q[1] ? PCSpillF : PCPF;
+  assign PCNextIndexF = PCPFinalF;
  
  // truncate the offset from PCPF for memory address generation
-  assign PCPTrunkF = PCPFinalF[`XLEN-1:OFFSETWIDTH];
+  assign PCPTrunkF = PCTagF[`XLEN-1:OFFSETWIDTH];
  
    // Detect if the instruction is compressed
  assign CompressedF = FinalInstrRawF[1:0] != 2'b11;


-    // Handle happy path (data in cache, reads aligned)
-/* -----\/----- EXCLUDED -----\/-----
-
-    generate
-        if (`XLEN == 32) begin
-            assign AlignedInstrRawF = PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData;
-            //assign PCPMisalignedF = PCPF[1] && ~CompressedF;
-        end else begin
-            assign AlignedInstrRawF = PCPF[2]
-                ? (PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32])
-                : (PCPF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]);
-            //assign PCPMisalignedF = PCPF[2] && PCPF[1] && ~CompressedF;
-        end
-    endgenerate
- -----/\----- EXCLUDED -----/\----- */
-
-    //flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD);
-    //flopr   #(1)  FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN);
-
-    //mux2    #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD);
-
-    // Stall for faults or misaligned reads
-/* -----\/----- EXCLUDED -----\/-----
-    always_comb begin
-        assign ICacheStallF = FaultStall | MisalignedStall;
-    end
- -----/\----- EXCLUDED -----/\----- */
-
-
-    // Handle misaligned, noncompressed reads
-
-/* -----\/----- EXCLUDED -----\/-----
-    logic           MisalignedState, NextMisalignedState;
-    logic [15:0]    MisalignedHalfInstrF;
-    logic [15:0]    UpperHalfWord;
- -----/\----- EXCLUDED -----/\----- */
-
-/* -----\/----- EXCLUDED -----\/-----
-    flopenr #(16) MisalignedHalfInstrFlop(clk, reset, ~FaultStall & (PCPMisalignedF & MisalignedState), AlignedInstrRawF[15:0], MisalignedHalfInstrF);
-    flopenr #(1)  MisalignedStateFlop(clk, reset, ~FaultStall, NextMisalignedState, MisalignedState);
- -----/\----- EXCLUDED -----/\----- */
-
-    // When doing a misaligned read, swizzle the bits correctly
-/* -----\/----- EXCLUDED -----\/-----
-    generate
-        if (`XLEN == 32) begin
-            assign UpperHalfWord = ICacheMemReadData[31:16];
-        end else begin
-            assign UpperHalfWord = ICacheMemReadData[63:48];
-        end
-    endgenerate
-    always_comb begin
-        if (MisalignedState) begin
-            assign MisalignedInstrRawF = {16'b0, UpperHalfWord};
-        end else begin
-            assign MisalignedInstrRawF = {ICacheMemReadData[15:0], MisalignedHalfInstrF};
-        end
-    end
- -----/\----- EXCLUDED -----/\----- */
-
-    // Manage internal state and stall when necessary
-/* -----\/----- EXCLUDED -----\/-----
-    always_comb begin
-        assign MisalignedStall = PCPMisalignedF & MisalignedState;
-        assign NextMisalignedState = ~PCPMisalignedF | ~MisalignedState;
-    end
- -----/\----- EXCLUDED -----/\----- */
-
-    // Pick the correct address to read
-/* -----\/----- EXCLUDED -----\/-----
-    generate
-        if (`XLEN == 32) begin
-            assign ICacheMemReadLowerAdr = {LowerPCNextF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00};
-        end else begin
-            assign ICacheMemReadLowerAdr = {LowerPCNextF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00};
-        end
-    endgenerate
- -----/\----- EXCLUDED -----/\----- */
-    // TODO Handle reading instructions that cross page boundaries
-    //assign ICacheMemReadUpperPAdr = UpperPCNextPF;
-
-
-    // Handle cache faults
-
-
-/* -----\/----- EXCLUDED -----\/-----
-    logic               FetchState, BeginFetchState;
-    logic [LOGWPL:0]    FetchWordNum, NextFetchWordNum;
-    logic [`XLEN-1:0]   LineAlignedPCPF;
-
-    flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState);
-    flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum);
-
-
-    // Enter the fetch state when we hit a cache fault
-    always_comb begin
-        BeginFetchState = ~ICacheMemReadValid & ~FetchState & (FetchWordNum == 0);
-    end
-    // Exit the fetch state once the cache line has been loaded
-    flopr #(1) EndFetchStateFlop(clk, reset, ICacheMemWriteEnable, EndFetchState);
-
-    // Machinery to request the correct addresses from main memory
-    always_comb begin
-        InstrReadF = FetchState & ~EndFetchState & ~ICacheMemWriteEnable; // next stage logic
-        LineAlignedPCPF = {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; // the fetch address for abh?
-        InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8); // ?
-        NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; // convert to enable
-    end
-
-    // Write to cache memory when we have the line here
-    always_comb begin
-        ICacheMemWritePAdr = LineAlignedPCPF;
-        ICacheMemWriteEnable = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState & ~EndFetchState;
-    end
-
-    // Stall the pipeline while loading a new line from memory
-    always_comb begin
-        FaultStall = FetchState | ~ICacheMemReadValid;
-    end
- -----/\----- EXCLUDED -----/\----- */
-
  // the FSM is always runing, do not stall.
  flopr #(5) stateReg(.clk(clk),
 		      .reset(reset),
@ -638,12 +514,6 @@ module icachecontroller #(parameter LINESIZE = 256) (
    flopr   #(1)  flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen);
  mux2    #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCyclen, InstrRawD);
  //assign InstrRawD = AlignedInstrRawD;
-  
-  
-  assign {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr} = PCPFinalF;

-  assign ICacheMemWritePAdr = PCPFinalF;
-
-  
  
 endmodule
--- a/wally-pipelined/src/ifu/icacheMem.sv
+++ b/wally-pipelined/src/ifu/icacheMem.sv
@ -0,0 +1,102 @@
+`include "wally-config.vh"
+
+module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
+    // Pipeline stuff
+    input logic 	       clk,
+    input logic 	       reset,
+    // If flush is high, invalidate the entire cache
+    input logic 	       flush,
+													    
+    // Select which address to read (broken for efficiency's sake)
+    input logic [`XLEN-1:0]    PCTagF, // physical tag address
+    input logic [`XLEN-1:0]    PCNextIndexF,
+    // Write new data to the cache
+    input logic 	       WriteEnable,
+    input logic [LINESIZE-1:0] WriteLine,
+    // Output the word, as well as if it is valid
+    output logic [31:0]        DataWord, // *** was WORDSIZE-1
+    output logic 	       DataValid
+);
+
+    // Various compile-time constants
+    localparam integer WORDWIDTH = $clog2(WORDSIZE/8);
+    localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE);
+    localparam integer SETWIDTH = $clog2(NUMLINES);
+    localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH;
+
+    localparam integer OFFSETBEGIN = WORDWIDTH;
+    localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1;
+    localparam integer SETBEGIN = OFFSETEND+1;
+    localparam integer SETEND = SETBEGIN + SETWIDTH - 1;
+    localparam integer TAGBEGIN = SETEND + 1;
+    localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
+
+    // Machinery to read from and write to the correct addresses in memory
+    logic [LINESIZE-1:0]    ReadLine;
+    logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
+
+    // Machinery to check if a given read is valid and is the desired value
+    logic [TAGWIDTH-1:0]    DataTag;
+    logic [NUMLINES-1:0]    ValidOut;
+    logic                   DataValidBit;
+
+    // Depth is number of bits in one "word" of the memory, width is number of such words
+    sram1rw #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
+        .*,
+        .Addr(PCNextIndexF[SETEND:SETBEGIN]),
+        .ReadData(ReadLine),
+        .WriteData(WriteLine)
+    );
+    sram1rw #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
+        .*,
+        .Addr(PCNextIndexF[SETEND:SETBEGIN]),
+        .ReadData(DataTag),
+        .WriteData(PCTagF[TAGEND:TAGBEGIN])
+    );
+
+    // Pick the right bits coming out the read line
+    //assign DataWord = ReadLineTransformed[ReadOffset];
+  //logic [31:0] tempRD;
+  always_comb begin
+    case (PCTagF[4:1])
+      0: DataWord = ReadLine[31:0];
+      1: DataWord = ReadLine[47:16];
+      2: DataWord = ReadLine[63:32];
+      3: DataWord = ReadLine[79:48];
+
+      4: DataWord = ReadLine[95:64];
+      5: DataWord = ReadLine[111:80];
+      6: DataWord = ReadLine[127:96];
+      7: DataWord = ReadLine[143:112];      
+
+      8: DataWord = ReadLine[159:128];      
+      9: DataWord = ReadLine[175:144];      
+      10: DataWord = ReadLine[191:160];      
+      11: DataWord = ReadLine[207:176];
+
+      12: DataWord = ReadLine[223:192];
+      13: DataWord = ReadLine[239:208];
+      14: DataWord = ReadLine[255:224];
+      15: DataWord = {16'b0, ReadLine[255:240]};
+    endcase
+  end
+    genvar i;
+    generate
+        for (i=0; i < LINESIZE/WORDSIZE; i++) begin
+            assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE];
+        end
+    endgenerate
+
+    // Correctly handle the valid bits
+    always_ff @(posedge clk, posedge reset) begin
+        if (reset || flush) begin
+            ValidOut <= {NUMLINES{1'b0}};
+        end else begin
+            if (WriteEnable) begin
+                ValidOut[PCNextIndexF[SETEND:SETBEGIN]] <= 1;
+            end
+        end
+        DataValidBit <= ValidOut[PCNextIndexF[SETEND:SETBEGIN]];
+    end
+    assign DataValid = DataValidBit && (DataTag == PCTagF[TAGEND:TAGBEGIN]);
+endmodule
--- a/wally-pipelined/src/ifu/ifu.sv
+++ b/wally-pipelined/src/ifu/ifu.sv
@ -105,11 +105,9 @@ module ifu (

  // jarred 2021-03-14 Add instrution cache block to remove rd2
  assign PCNextPF = PCNextF; // Temporary workaround until iTLB is live
-  icache icache(
-    .*,
-    .UpperPCNextPF(PCNextPF[`XLEN-1:12]),
-    .LowerPCNextF(PCNextPF[11:0])
-  );
+  icache icache(.*);
+  
+

  assign PrivilegedChangePCM = RetM | TrapM;

--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@ -30,13 +30,14 @@ module testbench();
  parameter DEBUG = 0;
  parameter TESTSBP = 0;
  parameter TESTSPERIPH = 0 ; // set to 0 for regression
+  localparam MAXSIGLEN = 1000000;
  
  logic        clk;
  logic        reset;

  int test, i, errors, totalerrors;
-  logic [31:0] sig32[0:10000];
-  logic [`XLEN-1:0] signature[0:10000];
+  logic [31:0] sig32[0:MAXSIGLEN];
+  logic [`XLEN-1:0] signature[0:MAXSIGLEN];
  logic [`XLEN-1:0] testadr;
  string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
  logic [31:0] InstrW;
@ -602,7 +603,7 @@ string tests32f[] = '{
        $display("Code ended with ecall with gp = 1");
        #60; // give time for instructions in pipeline to finish
        // clear signature to prevent contamination from previous tests
-        for(i=0; i<10000; i=i+1) begin
+        for(i=0; i<MAXSIGLEN; i=i+1) begin
          sig32[i] = 'bx;
        end

@ -610,7 +611,7 @@ string tests32f[] = '{
        signame = {"../../imperas-riscv-tests/work/", tests[test], ".signature.output"};
        $readmemh(signame, sig32);
        i = 0;
-        while (i < 10000) begin
+        while (i < MAXSIGLEN) begin
          if (`XLEN == 32) begin
            signature[i] = sig32[i];
            i = i+1;