From fdf4954a207ae8207a726ac1a41425bab24d406c Mon Sep 17 00:00:00 2001
From: Ross Thompson <stephen.thompson.37@us.af.mil>
Date: Mon, 3 May 2021 09:04:48 -0500
Subject: [PATCH 1/7] Added back in function name to wave.do

---
 wally-pipelined/regression/wave.do | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do
index 428bf5b97..94e955fe0 100644
--- a/wally-pipelined/regression/wave.do
+++ b/wally-pipelined/regression/wave.do
@@ -3,6 +3,7 @@ quietly virtual function -install /testbench/dut/hart/ifu/icache/cachemem -env /
 quietly WaveActivateNextPane {} 0
 add wave -noupdate /testbench/clk
 add wave -noupdate /testbench/reset
+add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName
 add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE
 add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
 add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE
@@ -112,8 +113,6 @@ add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM
 add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAccessM
 add wave -noupdate -group dcache /testbench/dut/hart/dmem/AtomicMaskedM
 add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAckW
-add wave -noupdate -group dcache /testbench/dut/hart/dmem/genblk1/lrM
-add wave -noupdate -group dcache /testbench/dut/hart/dmem/genblk1/scM
 add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D
 add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D
 add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E
@@ -223,9 +222,8 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED
-add wave -noupdate /testbench/dut/hart/dmem/genblk1/scM
 TreeUpdate [SetDefaultTree]
-WaveRestoreCursors {{Cursor 2} {12215488 ns} 0} {{Cursor 4} {22127 ns} 0}
+WaveRestoreCursors {{Cursor 2} {5792261 ns} 0} {{Cursor 4} {1318991 ns} 0}
 quietly wave cursor active 2
 configure wave -namecolwidth 250
 configure wave -valuecolwidth 513
@@ -241,4 +239,4 @@ configure wave -griddelta 40
 configure wave -timeline 0
 configure wave -timelineunits ns
 update
-WaveRestoreZoom {21993 ns} {22181 ns}
+WaveRestoreZoom {1311008 ns} {1321254 ns}

From 12b978fec2ac20021c3b62c5228dd32b16f64764 Mon Sep 17 00:00:00 2001
From: Ross Thompson <stephen.thompson.37@us.af.mil>
Date: Mon, 3 May 2021 12:03:17 -0500
Subject: [PATCH 2/7] Eliminated extra register and fixed ports to icache.
 Still need to support physical tag check and write in icache memory. Still
 need to reduce to 1 port SRAM in icache. I would like to refactor the icache
 code.

---
 wally-pipelined/regression/wave.do   |  59 +++++-----
 wally-pipelined/src/cache/dmapped.sv | 122 --------------------
 wally-pipelined/src/ifu/icache.sv    | 164 +++------------------------
 wally-pipelined/src/ifu/icacheMem.sv | 124 ++++++++++++++++++++
 wally-pipelined/src/ifu/ifu.sv       |   8 +-
 5 files changed, 175 insertions(+), 302 deletions(-)
 create mode 100644 wally-pipelined/src/ifu/icacheMem.sv

diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do
index 94e955fe0..962f25811 100644
--- a/wally-pipelined/regression/wave.do
+++ b/wally-pipelined/regression/wave.do
@@ -3,6 +3,7 @@ quietly virtual function -install /testbench/dut/hart/ifu/icache/cachemem -env /
 quietly WaveActivateNextPane {} 0
 add wave -noupdate /testbench/clk
 add wave -noupdate /testbench/reset
+add wave -noupdate /testbench/memfilename
 add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName
 add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE
 add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
@@ -20,13 +21,13 @@ add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap
 add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM
 add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM
 add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/DataStall
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall
+add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD
 add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF
 add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD
 add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE
@@ -37,25 +38,25 @@ add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbe
 add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE
 add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM
 add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW
-add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPPredF
-add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBValidF
-add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPInstrClassF
-add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBPredPCF
-add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/RASPCF
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePC
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdateEN
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePrediction
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateEN
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePC
-add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/TargetWrongE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/FallThroughWrongE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionPCWrongE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/InstrClassE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionInstrClassWrongE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE
-add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
-add wave -noupdate -expand -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
+add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPPredF
+add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBValidF
+add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPInstrClassF
+add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBPredPCF
+add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/RASPCF
+add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePC
+add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdateEN
+add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePrediction
+add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateEN
+add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePC
+add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/TargetWrongE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/FallThroughWrongE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionPCWrongE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/InstrClassE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionInstrClassWrongE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE
+add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
+add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE
 add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName
 add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD
 add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE
@@ -223,8 +224,8 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED
 TreeUpdate [SetDefaultTree]
-WaveRestoreCursors {{Cursor 2} {5792261 ns} 0} {{Cursor 4} {1318991 ns} 0}
-quietly wave cursor active 2
+WaveRestoreCursors {{Cursor 2} {5796691 ns} 0} {{Cursor 4} {1318991 ns} 0}
+quietly wave cursor active 1
 configure wave -namecolwidth 250
 configure wave -valuecolwidth 513
 configure wave -justifyvalue left
@@ -239,4 +240,4 @@ configure wave -griddelta 40
 configure wave -timeline 0
 configure wave -timelineunits ns
 update
-WaveRestoreZoom {1311008 ns} {1321254 ns}
+WaveRestoreZoom {5795108 ns} {5798036 ns}
diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv
index 34864d393..f40da412a 100644
--- a/wally-pipelined/src/cache/dmapped.sv
+++ b/wally-pipelined/src/cache/dmapped.sv
@@ -125,128 +125,6 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par
     assign DataValid = DataValidBit && (DataTag == ReadTag);
 endmodule
 
-module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
-    // Pipeline stuff
-    input  logic clk,
-    input  logic reset,
-    input  logic re,
-    // If flush is high, invalidate the entire cache
-    input  logic flush,
-    // Select which address to read (broken for efficiency's sake)
-    input  logic [`XLEN-1:12]   ReadUpperPAdr,
-    input  logic [11:0]         ReadLowerAdr,
-    // Write new data to the cache
-    input  logic                WriteEnable,
-    input  logic [LINESIZE-1:0] WriteLine,
-    input  logic [`XLEN-1:0]    WritePAdr,
-    // Output the word, as well as if it is valid
-    output logic [31:0] DataWord, // *** was WORDSIZE-1
-    output logic                DataValid
-);
-
-    // Various compile-time constants
-    localparam integer WORDWIDTH = $clog2(WORDSIZE/8);
-    localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE);
-    localparam integer SETWIDTH = $clog2(NUMLINES);
-    localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH;
-
-    localparam integer OFFSETBEGIN = WORDWIDTH;
-    localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1;
-    localparam integer SETBEGIN = OFFSETEND+1;
-    localparam integer SETEND = SETBEGIN + SETWIDTH - 1;
-    localparam integer TAGBEGIN = SETEND + 1;
-    localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
-
-    // Machinery to read from and write to the correct addresses in memory
-    logic [`XLEN-1:0]       ReadPAdr;
-    logic [`XLEN-1:0]       OldReadPAdr;
-    logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset;
-    logic [SETWIDTH-1:0]    ReadSet, WriteSet;
-    logic [TAGWIDTH-1:0]    ReadTag, WriteTag;
-    logic [LINESIZE-1:0]    ReadLine;
-    logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
-
-    // Machinery to check if a given read is valid and is the desired value
-    logic [TAGWIDTH-1:0]    DataTag;
-    logic [NUMLINES-1:0]    ValidOut;
-    logic                   DataValidBit;
-
-    flopenr #(`XLEN) ReadPAdrFlop(clk, reset, re, ReadPAdr, OldReadPAdr);
-
-    // Assign the read and write addresses in cache memory
-    always_comb begin
-        ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
-        ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
-        ReadSet = ReadPAdr[SETEND:SETBEGIN];
-        ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
-
-        WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
-        WriteSet = WritePAdr[SETEND:SETBEGIN];
-        WriteTag = WritePAdr[TAGEND:TAGBEGIN];
-    end
-
-    // Depth is number of bits in one "word" of the memory, width is number of such words
-    Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
-        .*,
-        .ReadAddr(ReadSet),
-        .ReadData(ReadLine),
-        .WriteAddr(WriteSet),
-        .WriteData(WriteLine)
-    );
-    Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
-        .*,
-        .ReadAddr(ReadSet),
-        .ReadData(DataTag),
-        .WriteAddr(WriteSet),
-        .WriteData(WriteTag)
-    );
-
-    // Pick the right bits coming out the read line
-    //assign DataWord = ReadLineTransformed[ReadOffset];
-  //logic [31:0] tempRD;
-  always_comb begin
-    case (OldReadPAdr[4:1])
-      0: DataWord = ReadLine[31:0];
-      1: DataWord = ReadLine[47:16];
-      2: DataWord = ReadLine[63:32];
-      3: DataWord = ReadLine[79:48];
-
-      4: DataWord = ReadLine[95:64];
-      5: DataWord = ReadLine[111:80];
-      6: DataWord = ReadLine[127:96];
-      7: DataWord = ReadLine[143:112];      
-
-      8: DataWord = ReadLine[159:128];      
-      9: DataWord = ReadLine[175:144];      
-      10: DataWord = ReadLine[191:160];      
-      11: DataWord = ReadLine[207:176];
-
-      12: DataWord = ReadLine[223:192];
-      13: DataWord = ReadLine[239:208];
-      14: DataWord = ReadLine[255:224];
-      15: DataWord = {16'b0, ReadLine[255:240]};
-    endcase
-  end
-    genvar i;
-    generate
-        for (i=0; i < LINESIZE/WORDSIZE; i++) begin
-            assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE];
-        end
-    endgenerate
-
-    // Correctly handle the valid bits
-    always_ff @(posedge clk, posedge reset) begin
-        if (reset || flush) begin
-            ValidOut <= {NUMLINES{1'b0}};
-        end else begin
-            if (WriteEnable) begin
-                ValidOut[WriteSet] <= 1;
-            end
-        end
-        DataValidBit <= ValidOut[ReadSet];
-    end
-    assign DataValid = DataValidBit && (DataTag == ReadTag);
-endmodule
 
 // Write-through direct-mapped memory
 module wtdirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv
index f524be384..5821b6559 100644
--- a/wally-pipelined/src/ifu/icache.sv
+++ b/wally-pipelined/src/ifu/icache.sv
@@ -27,26 +27,24 @@
 
 module icache(
   // Basic pipeline stuff
-  input  logic              clk, reset,
-  input  logic              StallF, StallD,
-  input  logic              FlushD,
-  // Upper bits of physical address for PC
-  input  logic [`XLEN-1:12] UpperPCNextPF,
-  // Lower 12 bits of virtual PC address, since it's faster this way
-  input  logic [11:0]       LowerPCNextF,
+  input logic 		   clk, reset,
+  input logic 		   StallF, StallD,
+  input logic 		   FlushD,
+  input logic [`XLEN-1:0]  PCNextF,
+  input logic [`XLEN-1:0]  PCPF,	      
   // Data read in from the ebu unit
-  input  logic [`XLEN-1:0]  InstrInF,
-  input  logic              InstrAckF,
+  input logic [`XLEN-1:0]  InstrInF,
+  input logic 		   InstrAckF,
   // Read requested from the ebu unit
-  output logic [`XLEN-1:0]  InstrPAdrF,
-  output logic              InstrReadF,
+  output logic [`XLEN-1:0] InstrPAdrF,
+  output logic 		   InstrReadF,
   // High if the instruction currently in the fetch stage is compressed
-  output logic              CompressedF,
+  output logic 		   CompressedF,
   // High if the icache is requesting a stall
-  output logic              ICacheStallF,
+  output logic 		   ICacheStallF,
   // The raw (not decompressed) instruction that was requested
   // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
-  output logic [31:0]       InstrRawD
+  output logic [31:0] 	   InstrRawD
 );
 
     // Configuration parameters
@@ -96,10 +94,8 @@ module icachecontroller #(parameter LINESIZE = 256) (
 
     // Input the address to read
     // The upper bits of the physical pc
-    input logic [`XLEN-1:12] 	UpperPCNextPF,
-    // The lower bits of the virtual pc
-    input logic [11:0] 		LowerPCNextF,
-
+    input logic [`XLEN-1:0] 	PCNextF,
+    input logic [`XLEN-1:0]     PCPF,
     // Signals to/from cache memory
     // The read coming out of it
     input logic [31:0] 		ICacheMemReadData,
@@ -198,7 +194,7 @@ module icachecontroller #(parameter LINESIZE = 256) (
   
   logic [LOGWPL:0] 	     FetchCount, NextFetchCount;
 
-  logic [`XLEN-1:0] 	     PCPreFinalF, PCPFinalF, PCSpillF, PCNextPF;
+  logic [`XLEN-1:0] 	     PCPreFinalF, PCPFinalF, PCSpillF;
   logic [`XLEN-1:OFFSETWIDTH] PCPTrunkF;
 
   
@@ -215,7 +211,7 @@ module icachecontroller #(parameter LINESIZE = 256) (
     //logic           FlushDLastCycleN;
     //logic           PCPMisalignedF;
   localparam [31:0]  	     NOP = 32'h13;
-  logic [`XLEN-1:0] 	     PCPF;
+  //logic [`XLEN-1:0] 	     PCPF;
 
   logic 		     reset_q;
   
@@ -224,18 +220,15 @@ module icachecontroller #(parameter LINESIZE = 256) (
     //logic           MisalignedStall;
     // Cache fault signals
     //logic           FaultStall;
-
-  assign PCNextPF = {UpperPCNextPF, LowerPCNextF};
   
-  flopenl #(`XLEN) PCPFFlop(clk, reset, SavePC & ~StallF, PCPFinalF, `RESET_VECTOR, PCPF);
+  //flopenl #(`XLEN) PCPFFlop(clk, reset, SavePC & ~StallF, PCPFinalF, `RESET_VECTOR, PCPF);
   // on spill we want to get the first 2 bytes of the next cache block.
   // the spill only occurs if the PCPF mod BlockByteLength == -2.  Therefore we can
   // simply add 2 to land on the next cache block.
   assign PCSpillF = PCPF + 2'b10;
 
   // now we have to select between these three PCs
-  assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextPF; // *** don't like the stallf
-  //assign PCPreFinalF = PCMux[0] ? PCPF : PCNextPF; // *** don't like the stallf 
+  assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextF; // *** don't like the stallf
   assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF;
   
   
@@ -247,127 +240,6 @@ module icachecontroller #(parameter LINESIZE = 256) (
   assign CompressedF = FinalInstrRawF[1:0] != 2'b11;
 
 
-    // Handle happy path (data in cache, reads aligned)
-/* -----\/----- EXCLUDED -----\/-----
-
-    generate
-        if (`XLEN == 32) begin
-            assign AlignedInstrRawF = PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData;
-            //assign PCPMisalignedF = PCPF[1] && ~CompressedF;
-        end else begin
-            assign AlignedInstrRawF = PCPF[2]
-                ? (PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32])
-                : (PCPF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]);
-            //assign PCPMisalignedF = PCPF[2] && PCPF[1] && ~CompressedF;
-        end
-    endgenerate
- -----/\----- EXCLUDED -----/\----- */
-
-    //flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD);
-    //flopr   #(1)  FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN);
-
-    //mux2    #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD);
-
-    // Stall for faults or misaligned reads
-/* -----\/----- EXCLUDED -----\/-----
-    always_comb begin
-        assign ICacheStallF = FaultStall | MisalignedStall;
-    end
- -----/\----- EXCLUDED -----/\----- */
-
-
-    // Handle misaligned, noncompressed reads
-
-/* -----\/----- EXCLUDED -----\/-----
-    logic           MisalignedState, NextMisalignedState;
-    logic [15:0]    MisalignedHalfInstrF;
-    logic [15:0]    UpperHalfWord;
- -----/\----- EXCLUDED -----/\----- */
-
-/* -----\/----- EXCLUDED -----\/-----
-    flopenr #(16) MisalignedHalfInstrFlop(clk, reset, ~FaultStall & (PCPMisalignedF & MisalignedState), AlignedInstrRawF[15:0], MisalignedHalfInstrF);
-    flopenr #(1)  MisalignedStateFlop(clk, reset, ~FaultStall, NextMisalignedState, MisalignedState);
- -----/\----- EXCLUDED -----/\----- */
-
-    // When doing a misaligned read, swizzle the bits correctly
-/* -----\/----- EXCLUDED -----\/-----
-    generate
-        if (`XLEN == 32) begin
-            assign UpperHalfWord = ICacheMemReadData[31:16];
-        end else begin
-            assign UpperHalfWord = ICacheMemReadData[63:48];
-        end
-    endgenerate
-    always_comb begin
-        if (MisalignedState) begin
-            assign MisalignedInstrRawF = {16'b0, UpperHalfWord};
-        end else begin
-            assign MisalignedInstrRawF = {ICacheMemReadData[15:0], MisalignedHalfInstrF};
-        end
-    end
- -----/\----- EXCLUDED -----/\----- */
-
-    // Manage internal state and stall when necessary
-/* -----\/----- EXCLUDED -----\/-----
-    always_comb begin
-        assign MisalignedStall = PCPMisalignedF & MisalignedState;
-        assign NextMisalignedState = ~PCPMisalignedF | ~MisalignedState;
-    end
- -----/\----- EXCLUDED -----/\----- */
-
-    // Pick the correct address to read
-/* -----\/----- EXCLUDED -----\/-----
-    generate
-        if (`XLEN == 32) begin
-            assign ICacheMemReadLowerAdr = {LowerPCNextF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00};
-        end else begin
-            assign ICacheMemReadLowerAdr = {LowerPCNextF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00};
-        end
-    endgenerate
- -----/\----- EXCLUDED -----/\----- */
-    // TODO Handle reading instructions that cross page boundaries
-    //assign ICacheMemReadUpperPAdr = UpperPCNextPF;
-
-
-    // Handle cache faults
-
-
-/* -----\/----- EXCLUDED -----\/-----
-    logic               FetchState, BeginFetchState;
-    logic [LOGWPL:0]    FetchWordNum, NextFetchWordNum;
-    logic [`XLEN-1:0]   LineAlignedPCPF;
-
-    flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState);
-    flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum);
-
-
-    // Enter the fetch state when we hit a cache fault
-    always_comb begin
-        BeginFetchState = ~ICacheMemReadValid & ~FetchState & (FetchWordNum == 0);
-    end
-    // Exit the fetch state once the cache line has been loaded
-    flopr #(1) EndFetchStateFlop(clk, reset, ICacheMemWriteEnable, EndFetchState);
-
-    // Machinery to request the correct addresses from main memory
-    always_comb begin
-        InstrReadF = FetchState & ~EndFetchState & ~ICacheMemWriteEnable; // next stage logic
-        LineAlignedPCPF = {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; // the fetch address for abh?
-        InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8); // ?
-        NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; // convert to enable
-    end
-
-    // Write to cache memory when we have the line here
-    always_comb begin
-        ICacheMemWritePAdr = LineAlignedPCPF;
-        ICacheMemWriteEnable = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState & ~EndFetchState;
-    end
-
-    // Stall the pipeline while loading a new line from memory
-    always_comb begin
-        FaultStall = FetchState | ~ICacheMemReadValid;
-    end
- -----/\----- EXCLUDED -----/\----- */
-
   // the FSM is always runing, do not stall.
   flopr #(5) stateReg(.clk(clk),
 		      .reset(reset),
diff --git a/wally-pipelined/src/ifu/icacheMem.sv b/wally-pipelined/src/ifu/icacheMem.sv
new file mode 100644
index 000000000..345e5e453
--- /dev/null
+++ b/wally-pipelined/src/ifu/icacheMem.sv
@@ -0,0 +1,124 @@
+`include "wally-config.vh"
+
+module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
+    // Pipeline stuff
+    input  logic clk,
+    input  logic reset,
+    input  logic re,
+    // If flush is high, invalidate the entire cache
+    input  logic flush,
+    // Select which address to read (broken for efficiency's sake)
+    input  logic [`XLEN-1:12]   ReadUpperPAdr, // physical address Must come one cycle later
+    input  logic [11:0]         ReadLowerAdr, // virtual address
+    // Write new data to the cache
+    input  logic                WriteEnable,
+    input  logic [LINESIZE-1:0] WriteLine,
+    input  logic [`XLEN-1:0]    WritePAdr,
+    // Output the word, as well as if it is valid
+    output logic [31:0] DataWord, // *** was WORDSIZE-1
+    output logic                DataValid
+);
+
+    // Various compile-time constants
+    localparam integer WORDWIDTH = $clog2(WORDSIZE/8);
+    localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE);
+    localparam integer SETWIDTH = $clog2(NUMLINES);
+    localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH;
+
+    localparam integer OFFSETBEGIN = WORDWIDTH;
+    localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1;
+    localparam integer SETBEGIN = OFFSETEND+1;
+    localparam integer SETEND = SETBEGIN + SETWIDTH - 1;
+    localparam integer TAGBEGIN = SETEND + 1;
+    localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
+
+    // Machinery to read from and write to the correct addresses in memory
+    logic [`XLEN-1:0]       ReadPAdr;
+    logic [`XLEN-1:0]       OldReadPAdr;
+    logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset;
+    logic [SETWIDTH-1:0]    ReadSet, WriteSet;
+    logic [TAGWIDTH-1:0]    ReadTag, WriteTag;
+    logic [LINESIZE-1:0]    ReadLine;
+    logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
+
+    // Machinery to check if a given read is valid and is the desired value
+    logic [TAGWIDTH-1:0]    DataTag;
+    logic [NUMLINES-1:0]    ValidOut;
+    logic                   DataValidBit;
+
+    flopenr #(`XLEN) ReadPAdrFlop(clk, reset, re, ReadPAdr, OldReadPAdr);
+
+    // Assign the read and write addresses in cache memory
+    always_comb begin
+        ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
+        ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
+        ReadSet = ReadPAdr[SETEND:SETBEGIN];
+        ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
+
+        WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
+        WriteSet = WritePAdr[SETEND:SETBEGIN];
+        WriteTag = WritePAdr[TAGEND:TAGBEGIN];
+    end
+
+    // Depth is number of bits in one "word" of the memory, width is number of such words
+    Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
+        .*,
+        .ReadAddr(ReadSet),
+        .ReadData(ReadLine),
+        .WriteAddr(WriteSet),
+        .WriteData(WriteLine)
+    );
+    Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
+        .*,
+        .ReadAddr(ReadSet),
+        .ReadData(DataTag),
+        .WriteAddr(WriteSet),
+        .WriteData(WriteTag)
+    );
+
+    // Pick the right bits coming out the read line
+    //assign DataWord = ReadLineTransformed[ReadOffset];
+  //logic [31:0] tempRD;
+  always_comb begin
+    case (OldReadPAdr[4:1])
+      0: DataWord = ReadLine[31:0];
+      1: DataWord = ReadLine[47:16];
+      2: DataWord = ReadLine[63:32];
+      3: DataWord = ReadLine[79:48];
+
+      4: DataWord = ReadLine[95:64];
+      5: DataWord = ReadLine[111:80];
+      6: DataWord = ReadLine[127:96];
+      7: DataWord = ReadLine[143:112];      
+
+      8: DataWord = ReadLine[159:128];      
+      9: DataWord = ReadLine[175:144];      
+      10: DataWord = ReadLine[191:160];      
+      11: DataWord = ReadLine[207:176];
+
+      12: DataWord = ReadLine[223:192];
+      13: DataWord = ReadLine[239:208];
+      14: DataWord = ReadLine[255:224];
+      15: DataWord = {16'b0, ReadLine[255:240]};
+    endcase
+  end
+    genvar i;
+    generate
+        for (i=0; i < LINESIZE/WORDSIZE; i++) begin
+            assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE];
+        end
+    endgenerate
+
+    // Correctly handle the valid bits
+    always_ff @(posedge clk, posedge reset) begin
+        if (reset || flush) begin
+            ValidOut <= {NUMLINES{1'b0}};
+        end else begin
+            if (WriteEnable) begin
+                ValidOut[WriteSet] <= 1;
+            end
+        end
+        DataValidBit <= ValidOut[ReadSet];
+    end
+    assign DataValid = DataValidBit && (DataTag == ReadTag);
+endmodule
diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv
index 371205051..25fc478de 100644
--- a/wally-pipelined/src/ifu/ifu.sv
+++ b/wally-pipelined/src/ifu/ifu.sv
@@ -105,11 +105,9 @@ module ifu (
 
   // jarred 2021-03-14 Add instrution cache block to remove rd2
   assign PCNextPF = PCNextF; // Temporary workaround until iTLB is live
-  icache icache(
-    .*,
-    .UpperPCNextPF(PCNextPF[`XLEN-1:12]),
-    .LowerPCNextF(PCNextPF[11:0])
-  );
+  icache icache(.*);
+  
+
 
   assign PrivilegedChangePCM = RetM | TrapM;
 

From 3f05e319541e02942051a047a7f917788d1503c8 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Mon, 3 May 2021 19:17:09 +0000
Subject: [PATCH 3/7] fpu warnings fixed/commented

---
 wally-pipelined/src/fpu/compressors.sv | 135 ++++++++++++------------
 wally-pipelined/src/fpu/fma2.sv        |  11 +-
 wally-pipelined/src/fpu/fpu.sv         |  10 +-
 wally-pipelined/src/fpu/fpuaddcvt2.sv  |   4 +-
 wally-pipelined/src/fpu/multiply.sv    | 138 +++++++++++++------------
 wally-pipelined/src/fpu/round.sv       |  10 +-
 6 files changed, 159 insertions(+), 149 deletions(-)

diff --git a/wally-pipelined/src/fpu/compressors.sv b/wally-pipelined/src/fpu/compressors.sv
index 0c2bece86..1e975e43c 100644
--- a/wally-pipelined/src/fpu/compressors.sv
+++ b/wally-pipelined/src/fpu/compressors.sv
@@ -1,90 +1,93 @@
-module add3comp2(a, b, c, carry, sum); 
-/////////////////////////////////////////////////////////////////////////////
-//look into diffrent implementations of the compressors?
+// //***breaks lint with warnings like: %Warning-UNOPTFLAT:      Example path: src/fpu/compressors.sv:37:  ASSIGNW
+// //%Warning-UNOPTFLAT:      Example path: src/fpu/compressors.sv:32:  wallypipelinedsoc.hart.fpu.fma1.multiply.genblk5[0].add4.cout
+
+// module add3comp2(a, b, c, carry, sum); 
+// /////////////////////////////////////////////////////////////////////////////
+// //look into diffrent implementations of the compressors?
     
-    parameter BITS = 4;
-	input logic 		[BITS-1:0]		a;
-	input logic		[BITS-1:0]		b;
-	input logic		[BITS-1:0]    	c;
-    output logic      [BITS-1:0]      carry;
-	output logic		[BITS-1:0]		sum;
-    genvar i;
+//     parameter BITS = 4;
+// 	input logic 		[BITS-1:0]		a;
+// 	input logic		[BITS-1:0]		b;
+// 	input logic		[BITS-1:0]    	c;
+//     output logic      [BITS-1:0]      carry;
+// 	output logic		[BITS-1:0]		sum;
+//     genvar i;
 
-    generate
-        for(i= 0; i<BITS; i=i+1) begin
-            sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
-        end
-    endgenerate
+//     generate
+//         for(i= 0; i<BITS; i=i+1) begin
+//             sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
+//         end
+//     endgenerate
 
-endmodule
+// endmodule
 
-module add4comp2(a, b, c, d, carry, sum); 
-/////////////////////////////////////////////////////////////////////////////
+// module add4comp2(a, b, c, d, carry, sum); 
+// /////////////////////////////////////////////////////////////////////////////
     
-    parameter BITS = 4;
-	input logic 		[BITS-1:0]		a;
-	input logic		[BITS-1:0]		b;
-	input logic		[BITS-1:0]    	c;
-	input logic		[BITS-1:0]    	d;
-    output logic      [BITS:0]      carry;
-	output logic		[BITS-1:0]		sum;
+//     parameter BITS = 4;
+// 	input logic 		[BITS-1:0]		a;
+// 	input logic		[BITS-1:0]		b;
+// 	input logic		[BITS-1:0]    	c;
+// 	input logic		[BITS-1:0]    	d;
+//     output logic      [BITS:0]      carry;
+// 	output logic		[BITS-1:0]		sum;
 
-    logic       [BITS-1:0]      cout;
-    logic                       carryTmp;
-    genvar i;
+//     logic       [BITS-1:0]      cout;
+//     logic                       carryTmp;
+//     genvar i;
 
 
-    sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
+//     sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
 
-    generate
-        for(i= 1; i<BITS-1; i=i+1) begin
-            sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
-        end
-    endgenerate
+//     generate
+//         for(i= 1; i<BITS-1; i=i+1) begin
+//             sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
+//         end
+//     endgenerate
 
 
-    sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
+//     sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
 
-    assign carry[BITS-1] = carryTmp & cout[BITS-1];
-    assign carry[BITS] = carryTmp ^ cout[BITS-1];
+//     assign carry[BITS-1] = carryTmp & cout[BITS-1];
+//     assign carry[BITS] = carryTmp ^ cout[BITS-1];
 
-endmodule
+// endmodule
 
-module sng3comp2(a, b, c, carry, sum); 
-/////////////////////////////////////////////////////////////////////////////
-//look into diffrent implementations of the compressors?
+// module sng3comp2(a, b, c, carry, sum); 
+// /////////////////////////////////////////////////////////////////////////////
+// //look into diffrent implementations of the compressors?
     
-	input logic 				a;
-	input logic				b;
-	input logic		       	c;
-    output logic              carry;
-	output logic				sum;
+// 	input logic 				a;
+// 	input logic				b;
+// 	input logic		       	c;
+//     output logic              carry;
+// 	output logic				sum;
     
-    logic               axorb;
+//     logic               axorb;
 
-    assign axorb = a ^ b;
-    assign sum = axorb ^ c;
+//     assign axorb = a ^ b;
+//     assign sum = axorb ^ c;
 
-    assign carry = axorb ? c : a;
+//     assign carry = axorb ? c : a;
 
-endmodule
+// endmodule
 
-module sng4comp2(a, b, c, d, cin, cout, carry, sum); 
-/////////////////////////////////////////////////////////////////////////////
-//look into pass gate 4:2 counters?
+// module sng4comp2(a, b, c, d, cin, cout, carry, sum); 
+// /////////////////////////////////////////////////////////////////////////////
+// //look into pass gate 4:2 counters?
     
-	input logic 				a;
-	input logic				b;
-	input logic		       	c;
-    input logic               d;
-    input logic               cin;
-    output logic              cout;
-    output logic              carry;
-	output logic				sum;
+// 	input logic 				a;
+// 	input logic				b;
+// 	input logic		       	c;
+//     input logic               d;
+//     input logic               cin;
+//     output logic              cout;
+//     output logic              carry;
+// 	output logic				sum;
     
-    logic               TmpSum;
+//     logic               TmpSum;
 
-    sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
-    sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
+//     sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
+//     sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
 
-endmodule
\ No newline at end of file
+// endmodule
\ No newline at end of file
diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv
index 2ff976623..8ff107fff 100644
--- a/wally-pipelined/src/fpu/fma2.sv
+++ b/wally-pipelined/src/fpu/fma2.sv
@@ -97,6 +97,9 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
 	logic					sticky;
 	logic			[12:0]		de0;
 	logic					isAdd;
+	logic					wsign;
+	logic 			[51:0]		wman;
+	logic 			[10:0]		wexp;
 
 	assign isAdd = 1;
 
@@ -118,17 +121,19 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
 	add				add(.*);
 	lza				lza(.*);
 	normalize		normalize(.zexp(ReadData3M[62:52]),.*); 
-	round			round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
+	round			round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]),.*);
 
 // Instantiate exponent datapath
 
-	expgen2			expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.wexp(FmaResultM[62:52]),.*);
+	expgen2			expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.*);
 
 
 // Instantiate control logic
  
-sign				sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.wsign(FmaResultM[63]),.*); 
+sign				sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.*); 
 flag2				flag2(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.vbits(v[1:0]),.*); 
 
+assign FmaResultM = {wsign,wexp,wman};
+
 endmodule
 
diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv
index 71cb8c54c..7fb77e261 100755
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@@ -159,7 +159,8 @@ module fpu (
   logic                    AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
   logic                    AddConvertE;
   logic [63:0]             AddFloat1E, AddFloat2E;
-  logic [10:0]             AddExp1DenormE, AddExp2DenormE, AddExponentE;
+  logic [11:0]             AddExp1DenormE, AddExp2DenormE;
+  logic [10:0]             AddExponentE;
   logic [63:0]             AddOp1E, AddOp2E;
   logic [2:0]              AddRmE;
   logic [3:0]              AddOpTypeE;
@@ -317,7 +318,8 @@ module fpu (
   logic                    AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
   logic                    AddConvertM, AddSignM;
   logic [63:0]             AddFloat1M, AddFloat2M;
-  logic [10:0]             AddExp1DenormM, AddExp2DenormM, AddExponentM;
+  logic [11:0]             AddExp1DenormM, AddExp2DenormM;
+  logic [10:0]             AddExponentM;
   logic [63:0]             AddOp1M, AddOp2M;
   logic [2:0]              AddRmM;
   logic [3:0]              AddOpTypeM;
@@ -380,8 +382,8 @@ module fpu (
   flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignM); 
   flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); 
   flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); 
-  flopenrc #(11) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); 
-  flopenrc #(11) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); 
+  flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); 
+  flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); 
   flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); 
   flopenrc #(64) EMRegAdd21(clk, reset, PipeClearEM, PipeEnableEM, AddOp1E, AddOp1M); 
   flopenrc #(64) EMRegAdd22(clk, reset, PipeClearEM, PipeEnableEM, AddOp2E, AddOp2M); 
diff --git a/wally-pipelined/src/fpu/fpuaddcvt2.sv b/wally-pipelined/src/fpu/fpuaddcvt2.sv
index 5c25cccc7..e040d2d2f 100755
--- a/wally-pipelined/src/fpu/fpuaddcvt2.sv
+++ b/wally-pipelined/src/fpu/fpuaddcvt2.sv
@@ -39,7 +39,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
    input [63:0] AddSumM, AddSumTcM;
    input [63:0] 	 AddFloat1M; 
    input [63:0] 	 AddFloat2M;
-   input [10:0]	 AddExp1DenormM, AddExp2DenormM;
+   input [11:0]	 AddExp1DenormM, AddExp2DenormM;
    input [10:0] 	 AddExponentM, AddExpPostSumM; //exp_pre;
    //input		 exp_valid;
    input [3:0] 	 AddSelInvM;
@@ -85,7 +85,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
    //AddExponentM value pre-rounding with considerations for denormalized
    //cases/conversion cases
    assign exp_pre       = AddDenormInM ?
-                          ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM : AddExp1DenormM))
+                          ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
                           : (AddConvertM ? 11'b10000111100 : AddExponentM);
 
 
diff --git a/wally-pipelined/src/fpu/multiply.sv b/wally-pipelined/src/fpu/multiply.sv
index ecdfeca30..1771188c6 100644
--- a/wally-pipelined/src/fpu/multiply.sv
+++ b/wally-pipelined/src/fpu/multiply.sv
@@ -26,81 +26,83 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
      // wire [105:0] acc
     genvar i;	
 
-	assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
-	assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
+	// assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
+	// assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
     
-     generate
-        for(i=0; i<27; i=i+1) begin
-            booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
-        end
-     endgenerate
+    //  generate
+    //     for(i=0; i<27; i=i+1) begin
+    //         booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
+    //     end
+    //  endgenerate
 
-    assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; 
-    assign acc[1] = {49'b01,~e[1],pp[1],add1[0]}; 
-    assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
-    assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
-    assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
-    assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
-    assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
-    assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
-    assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
-    assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
-    assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
-    assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
-    assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
-    assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
-    assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
-    assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
-    assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
-    assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
-    assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
-    assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
-    assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
-    assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
-    assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
-    assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
-    assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
-    assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
-    assign acc[26] = {pp[26],add1[25], 50'b0};
+    // assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; 
+    // assign acc[1] = {49'b01,~e[1],pp[1],add1[0]}; 
+    // assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
+    // assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
+    // assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
+    // assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
+    // assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
+    // assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
+    // assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
+    // assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
+    // assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
+    // assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
+    // assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
+    // assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
+    // assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
+    // assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
+    // assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
+    // assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
+    // assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
+    // assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
+    // assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
+    // assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
+    // assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
+    // assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
+    // assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
+    // assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
+    // assign acc[26] = {pp[26],add1[25], 50'b0};
 
+//***breaks lint with warnings like: %Warning-UNOPTFLAT:      Example path: src/fpu/multiply.sv:86:  ASSIGNW
+// %Warning-UNOPTFLAT:      Example path: src/fpu/multiply.sv:22:  wallypipelinedsoc.hart.fpu.fma1.multiply.lv3add
     //*** resize adders
-     generate
-        for(i=0; i<9; i=i+1) begin
-            add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), 
-                                           .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
-            assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
-        end
-     endgenerate
+    //  generate
+    //     for(i=0; i<9; i=i+1) begin
+    //         add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), 
+    //                                        .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
+    //         assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
+    //     end
+    //  endgenerate
 
-     generate
-        for(i=0; i<6; i=i+1) begin
-            add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), 
-                                           .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
-            assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
-        end
-     endgenerate
+    //  generate
+    //     for(i=0; i<6; i=i+1) begin
+    //         add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), 
+    //                                        .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
+    //         assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
+    //     end
+    //  endgenerate
 
-    generate
-        for(i=0; i<4; i=i+1) begin
-            add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), 
-                                            .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
-            assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
-        end
-    endgenerate
+    // generate
+    //     for(i=0; i<4; i=i+1) begin
+    //         add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), 
+    //                                         .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
+    //         assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
+    //     end
+    // endgenerate
 
 
-    generate
-        for(i=0; i<2; i=i+1) begin
-            add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
-                                            .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
-            assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
-        end
-    endgenerate
+    // generate
+    //     for(i=0; i<2; i=i+1) begin
+    //         add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
+    //                                         .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
+    //         assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
+    //     end
+    // endgenerate
 
-    add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
-                                    .carry(carryTmp[21]), .sum(tmpsE));
-    assign sE = tmpsE[105:0];
-    assign rE = {carryTmp[21][104:0], 1'b0};
+    // add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
+    //                                 .carry(carryTmp[21]), .sum(tmpsE));
+    // assign sE = tmpsE[105:0];
+    // assign rE = {carryTmp[21][104:0], 1'b0};
 		// assign rE = 0;
 		// assign sE = acc[0] +
 		// 		   acc[1] +
@@ -130,7 +132,7 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
 		// 		   acc[25] +
 		// 		   acc[26];
 
-			// assign sE = {53'b0,~(xdenormE|xzeroE),xman}  *  {53'b0,~(ydenormE|yzeroE),yman};
-			// assign rE = 0;
+			assign sE = {53'b0,~(xdenormE|xzeroE),xman}  *  {53'b0,~(ydenormE|yzeroE),yman};
+			assign rE = 0;
 endmodule
 
diff --git a/wally-pipelined/src/fpu/round.sv b/wally-pipelined/src/fpu/round.sv
index 34ecbb025..e56af7c70 100644
--- a/wally-pipelined/src/fpu/round.sv
+++ b/wally-pipelined/src/fpu/round.sv
@@ -56,6 +56,10 @@ module round(v, sticky, FrmM, wsign,
 	//	0xx - do nothing
 	//	100 - tie - plus1 if v[2] = 1
 	//	101/110/111 - plus1
+
+	//***causes lint warning: %Warning-UNOPTFLAT:      Example path: src/fpu/round.sv:59:  ALWAYS
+// %Warning-UNOPTFLAT:      Example path: src/fpu/round.sv:42:  wallypipelinedsoc.hart.fpu.fma2.round.plus1
+
 	always_comb begin
 		case (FrmM)
 			3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
@@ -66,12 +70,6 @@ module round(v, sticky, FrmM, wsign,
 			default: plus1 = 1'bx;
 		endcase
 	end
-	// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
-	// 	       (rp & ~wsign) |
-	// 	       (rm & wsign);
-	//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
-	//				 rp && ~wsign && (v[1] || v[0]) ||
-	//				 rm && wsign && (v[1] || v[0]);
 
 	// Compute rounded result 
     assign v1 = v[53:2] + 1;

From 699a8f3ac31c03a22c00b0306eaeda29508dd7af Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Mon, 3 May 2021 15:29:20 -0400
Subject: [PATCH 4/7] Extended maximum signature length to 1M

---
 wally-pipelined/testbench/testbench-imperas.sv | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv
index 76989c6d4..debbd96c0 100644
--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@@ -30,13 +30,14 @@ module testbench();
   parameter DEBUG = 0;
   parameter TESTSBP = 0;
   parameter TESTSPERIPH = 0 ; // set to 0 for regression
+  localparam MAXSIGLEN = 1000000;
   
   logic        clk;
   logic        reset;
 
   int test, i, errors, totalerrors;
-  logic [31:0] sig32[0:10000];
-  logic [`XLEN-1:0] signature[0:10000];
+  logic [31:0] sig32[0:MAXSIGLEN];
+  logic [`XLEN-1:0] signature[0:MAXSIGLEN];
   logic [`XLEN-1:0] testadr;
   string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
   logic [31:0] InstrW;
@@ -602,7 +603,7 @@ string tests32f[] = '{
         $display("Code ended with ecall with gp = 1");
         #60; // give time for instructions in pipeline to finish
         // clear signature to prevent contamination from previous tests
-        for(i=0; i<10000; i=i+1) begin
+        for(i=0; i<MAXSIGLEN; i=i+1) begin
           sig32[i] = 'bx;
         end
 
@@ -610,7 +611,7 @@ string tests32f[] = '{
         signame = {"../../imperas-riscv-tests/work/", tests[test], ".signature.output"};
         $readmemh(signame, sig32);
         i = 0;
-        while (i < 10000) begin
+        while (i < MAXSIGLEN) begin
           if (`XLEN == 32) begin
             signature[i] = sig32[i];
             i = i+1;

From 7185905f7bbe6aafc106d03ee3dbb54a864dd86a Mon Sep 17 00:00:00 2001
From: Ross Thompson <stephen.thompson.37@us.af.mil>
Date: Mon, 3 May 2021 14:36:09 -0500
Subject: [PATCH 5/7] Reduced icache to 1 port memory.

---
 wally-pipelined/regression/wave.do   |  7 ++--
 wally-pipelined/src/cache/sram1rw.sv | 21 ++++++++++
 wally-pipelined/src/ifu/icache.sv    | 34 ++++++++--------
 wally-pipelined/src/ifu/icacheMem.sv | 60 +++++++++-------------------
 4 files changed, 60 insertions(+), 62 deletions(-)
 create mode 100644 wally-pipelined/src/cache/sram1rw.sv

diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do
index 962f25811..ec9194a01 100644
--- a/wally-pipelined/regression/wave.do
+++ b/wally-pipelined/regression/wave.do
@@ -203,7 +203,6 @@ add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbenc
 add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD
 add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FlushDLastCyclen
 add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD
-add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCNextPF
 add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF
 add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF
 add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF
@@ -223,8 +222,10 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED
+add wave -noupdate /testbench/dut/hart/ifu/icache/PCTagF
+add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr
 TreeUpdate [SetDefaultTree]
-WaveRestoreCursors {{Cursor 2} {5796691 ns} 0} {{Cursor 4} {1318991 ns} 0}
+WaveRestoreCursors {{Cursor 2} {9951515 ns} 0} {{Cursor 4} {1318991 ns} 0}
 quietly wave cursor active 1
 configure wave -namecolwidth 250
 configure wave -valuecolwidth 513
@@ -240,4 +241,4 @@ configure wave -griddelta 40
 configure wave -timeline 0
 configure wave -timelineunits ns
 update
-WaveRestoreZoom {5795108 ns} {5798036 ns}
+WaveRestoreZoom {9951431 ns} {9951599 ns}
diff --git a/wally-pipelined/src/cache/sram1rw.sv b/wally-pipelined/src/cache/sram1rw.sv
new file mode 100644
index 000000000..a74593881
--- /dev/null
+++ b/wally-pipelined/src/cache/sram1rw.sv
@@ -0,0 +1,21 @@
+// Depth is number of bits in one "word" of the memory, width is number of such words
+module sram1rw #(parameter DEPTH=128, WIDTH=256) (
+    input logic 		    clk,
+    // port 1 is read only
+    input logic [$clog2(WIDTH)-1:0] Addr,
+    output logic [DEPTH-1:0] 	    ReadData,
+  
+    // port 2 is write only
+    input logic [DEPTH-1:0] 	    WriteData,
+    input logic 		    WriteEnable
+);
+
+    logic [WIDTH-1:0][DEPTH-1:0] StoredData;
+
+    always_ff @(posedge clk) begin
+        ReadData <= StoredData[Addr];
+        if (WriteEnable) begin
+            StoredData[Addr] <= WriteData;
+        end
+    end
+endmodule
diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv
index 5821b6559..f6890d7ff 100644
--- a/wally-pipelined/src/ifu/icache.sv
+++ b/wally-pipelined/src/ifu/icache.sv
@@ -54,12 +54,10 @@ module icache(
 
     // Input signals to cache memory
     logic                       FlushMem;
-    logic [`XLEN-1:12]          ICacheMemReadUpperPAdr;
-    logic [11:0]                ICacheMemReadLowerAdr;
     logic                       ICacheMemWriteEnable;
     logic [ICACHELINESIZE-1:0]  ICacheMemWriteData;
-    logic [`XLEN-1:0]           ICacheMemWritePAdr;
     logic                       EndFetchState;
+    logic [`XLEN-1:0]           PCTagF, PCNextIndexF;  
     // Output signals from cache memory
     logic [31:0]   ICacheMemReadData;
     logic               ICacheMemReadValid;
@@ -69,13 +67,9 @@ module icache(
   cachemem(
         .*,
         // Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall
-        .re(ICacheReadEn),
         .flush(FlushMem),
-        .ReadUpperPAdr(ICacheMemReadUpperPAdr),
-        .ReadLowerAdr(ICacheMemReadLowerAdr),
         .WriteEnable(ICacheMemWriteEnable),
         .WriteLine(ICacheMemWriteData),
-        .WritePAdr(ICacheMemWritePAdr),
         .DataWord(ICacheMemReadData),
         .DataValid(ICacheMemReadValid)
     );
@@ -95,19 +89,18 @@ module icachecontroller #(parameter LINESIZE = 256) (
     // Input the address to read
     // The upper bits of the physical pc
     input logic [`XLEN-1:0] 	PCNextF,
-    input logic [`XLEN-1:0]     PCPF,
+    input logic [`XLEN-1:0] 	PCPF,
     // Signals to/from cache memory
     // The read coming out of it
     input logic [31:0] 		ICacheMemReadData,
     input logic 		ICacheMemReadValid,
     // The address at which we want to search the cache memory
-    output logic [`XLEN-1:12] 	ICacheMemReadUpperPAdr,
-    output logic [11:0] 	ICacheMemReadLowerAdr,
+    output logic [`XLEN-1:0] 	PCTagF,
+    output logic [`XLEN-1:0]    PCNextIndexF,						     
     output logic 		ICacheReadEn,
     // Load data into the cache
     output logic 		ICacheMemWriteEnable,
     output logic [LINESIZE-1:0] ICacheMemWriteData,
-    output logic [`XLEN-1:0] 	ICacheMemWritePAdr,
 
     // Outputs to rest of ifu
     // High if the instruction in the fetch stage is compressed
@@ -214,6 +207,8 @@ module icachecontroller #(parameter LINESIZE = 256) (
   //logic [`XLEN-1:0] 	     PCPF;
 
   logic 		     reset_q;
+  logic [1:0] 		     PCMux_q;
+  
   
     // Misaligned signals
     //logic [`XLEN:0] MisalignedInstrRawF;
@@ -230,8 +225,17 @@ module icachecontroller #(parameter LINESIZE = 256) (
   // now we have to select between these three PCs
   assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextF; // *** don't like the stallf
   assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF;
+
+  // this mux needs to be delayed 1 cycle as it occurs 1 pipeline stage later.
+  // *** read enable may not be necessary.
+  flopenr #(2) PCMuxReg(.clk(clk),
+			.reset(reset),
+			.en(ICacheReadEn),
+			.d(PCMux),
+			.q(PCMux_q));
   
-  
+  assign PCTagF = PCMux_q[1] ? PCSpillF : PCPF;
+  assign PCNextIndexF = PCPFinalF;
   
   // truncate the offset from PCPF for memory address generation
   assign PCPTrunkF = PCPFinalF[`XLEN-1:OFFSETWIDTH];
@@ -510,12 +514,6 @@ module icachecontroller #(parameter LINESIZE = 256) (
     flopr   #(1)  flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen);
   mux2    #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCyclen, InstrRawD);
   //assign InstrRawD = AlignedInstrRawD;
-  
-  
-  assign {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr} = PCPFinalF;
 
-  assign ICacheMemWritePAdr = PCPFinalF;
-
-  
   
 endmodule
diff --git a/wally-pipelined/src/ifu/icacheMem.sv b/wally-pipelined/src/ifu/icacheMem.sv
index 345e5e453..de83eb568 100644
--- a/wally-pipelined/src/ifu/icacheMem.sv
+++ b/wally-pipelined/src/ifu/icacheMem.sv
@@ -2,21 +2,20 @@
 
 module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) (
     // Pipeline stuff
-    input  logic clk,
-    input  logic reset,
-    input  logic re,
+    input logic 	       clk,
+    input logic 	       reset,
     // If flush is high, invalidate the entire cache
-    input  logic flush,
+    input logic 	       flush,
+													    
     // Select which address to read (broken for efficiency's sake)
-    input  logic [`XLEN-1:12]   ReadUpperPAdr, // physical address Must come one cycle later
-    input  logic [11:0]         ReadLowerAdr, // virtual address
+    input logic [`XLEN-1:0]    PCTagF, // physical tag address
+    input logic [`XLEN-1:0]    PCNextIndexF,
     // Write new data to the cache
-    input  logic                WriteEnable,
-    input  logic [LINESIZE-1:0] WriteLine,
-    input  logic [`XLEN-1:0]    WritePAdr,
+    input logic 	       WriteEnable,
+    input logic [LINESIZE-1:0] WriteLine,
     // Output the word, as well as if it is valid
-    output logic [31:0] DataWord, // *** was WORDSIZE-1
-    output logic                DataValid
+    output logic [31:0]        DataWord, // *** was WORDSIZE-1
+    output logic 	       DataValid
 );
 
     // Various compile-time constants
@@ -33,11 +32,6 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, p
     localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1;
 
     // Machinery to read from and write to the correct addresses in memory
-    logic [`XLEN-1:0]       ReadPAdr;
-    logic [`XLEN-1:0]       OldReadPAdr;
-    logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset;
-    logic [SETWIDTH-1:0]    ReadSet, WriteSet;
-    logic [TAGWIDTH-1:0]    ReadTag, WriteTag;
     logic [LINESIZE-1:0]    ReadLine;
     logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed;
 
@@ -46,41 +40,25 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, p
     logic [NUMLINES-1:0]    ValidOut;
     logic                   DataValidBit;
 
-    flopenr #(`XLEN) ReadPAdrFlop(clk, reset, re, ReadPAdr, OldReadPAdr);
-
-    // Assign the read and write addresses in cache memory
-    always_comb begin
-        ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
-        ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
-        ReadSet = ReadPAdr[SETEND:SETBEGIN];
-        ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
-
-        WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
-        WriteSet = WritePAdr[SETEND:SETBEGIN];
-        WriteTag = WritePAdr[TAGEND:TAGBEGIN];
-    end
-
     // Depth is number of bits in one "word" of the memory, width is number of such words
-    Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
+    sram1rw #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem (
         .*,
-        .ReadAddr(ReadSet),
+        .Addr(PCNextIndexF[SETEND:SETBEGIN]),
         .ReadData(ReadLine),
-        .WriteAddr(WriteSet),
         .WriteData(WriteLine)
     );
-    Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
+    sram1rw #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags (
         .*,
-        .ReadAddr(ReadSet),
+        .Addr(PCNextIndexF[SETEND:SETBEGIN]),
         .ReadData(DataTag),
-        .WriteAddr(WriteSet),
-        .WriteData(WriteTag)
+        .WriteData(PCTagF[TAGEND:TAGBEGIN])
     );
 
     // Pick the right bits coming out the read line
     //assign DataWord = ReadLineTransformed[ReadOffset];
   //logic [31:0] tempRD;
   always_comb begin
-    case (OldReadPAdr[4:1])
+    case (PCTagF[4:1])
       0: DataWord = ReadLine[31:0];
       1: DataWord = ReadLine[47:16];
       2: DataWord = ReadLine[63:32];
@@ -115,10 +93,10 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, p
             ValidOut <= {NUMLINES{1'b0}};
         end else begin
             if (WriteEnable) begin
-                ValidOut[WriteSet] <= 1;
+                ValidOut[PCNextIndexF[SETEND:SETBEGIN]] <= 1;
             end
         end
-        DataValidBit <= ValidOut[ReadSet];
+        DataValidBit <= ValidOut[PCNextIndexF[SETEND:SETBEGIN]];
     end
-    assign DataValid = DataValidBit && (DataTag == ReadTag);
+    assign DataValid = DataValidBit && (DataTag == PCTagF[TAGEND:TAGBEGIN]);
 endmodule

From e09ac73eaf907a429a10f05210ae5e71dc5be71a Mon Sep 17 00:00:00 2001
From: Ross Thompson <stephen.thompson.37@us.af.mil>
Date: Mon, 3 May 2021 14:51:25 -0500
Subject: [PATCH 6/7] Removed combinational loops between icache and PMA
 checker.

---
 wally-pipelined/src/ifu/icache.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv
index f6890d7ff..2b8ee703e 100644
--- a/wally-pipelined/src/ifu/icache.sv
+++ b/wally-pipelined/src/ifu/icache.sv
@@ -238,7 +238,7 @@ module icachecontroller #(parameter LINESIZE = 256) (
   assign PCNextIndexF = PCPFinalF;
   
   // truncate the offset from PCPF for memory address generation
-  assign PCPTrunkF = PCPFinalF[`XLEN-1:OFFSETWIDTH];
+  assign PCPTrunkF = PCTagF[`XLEN-1:OFFSETWIDTH];
   
     // Detect if the instruction is compressed
   assign CompressedF = FinalInstrRawF[1:0] != 2'b11;

From a21b84e2ad395dc202a25dfb22dab4ff73273d3f Mon Sep 17 00:00:00 2001
From: Jarred Allen <jaallen@g.hmc.edu>
Date: Mon, 3 May 2021 17:32:05 -0400
Subject: [PATCH 7/7] Add lint to regression

---
 wally-pipelined/lint-wally                     | 4 +++-
 wally-pipelined/regression/regression-wally.py | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/wally-pipelined/lint-wally b/wally-pipelined/lint-wally
index 59d6bf153..ed67dbab6 100755
--- a/wally-pipelined/lint-wally
+++ b/wally-pipelined/lint-wally
@@ -1,9 +1,11 @@
+#!/bin/bash
 # check for warnings in Verilog code
 # The verilator lint tool is faster and better than Modelsim so it is best to run this first.
 
+basepath=$(dirname $0)
 for config in rv64ic rv32ic; do
     echo "$config linting..."
-    if !(verilator --lint-only "$@" --top-module wallypipelinedsoc "-Iconfig/$config" src/*/*.sv); then
+    if !(verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/$config" $basepath/src/*/*.sv); then
         echo "Exiting after $config lint due to errors or warnings"
         exit 1
     fi
diff --git a/wally-pipelined/regression/regression-wally.py b/wally-pipelined/regression/regression-wally.py
index 2b272e3b4..aa64424c7 100755
--- a/wally-pipelined/regression/regression-wally.py
+++ b/wally-pipelined/regression/regression-wally.py
@@ -36,6 +36,11 @@ configs = [
         cmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do ../config/rv64ic rv64ic\n!",
         grepstr="All tests ran without failures"
     ),
+    Config(
+        name="lints",
+        cmd="../lint-wally > {}",
+        grepstr="All lints run with no errors or warnings"
+    ),
 ]
 
 import multiprocessing, os