From fdf4954a207ae8207a726ac1a41425bab24d406c Mon Sep 17 00:00:00 2001 From: Ross Thompson <stephen.thompson.37@us.af.mil> Date: Mon, 3 May 2021 09:04:48 -0500 Subject: [PATCH 1/7] Added back in function name to wave.do --- wally-pipelined/regression/wave.do | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 428bf5b97..94e955fe0 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -3,6 +3,7 @@ quietly virtual function -install /testbench/dut/hart/ifu/icache/cachemem -env / quietly WaveActivateNextPane {} 0 add wave -noupdate /testbench/clk add wave -noupdate /testbench/reset +add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE @@ -112,8 +113,6 @@ add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAccessM add wave -noupdate -group dcache /testbench/dut/hart/dmem/AtomicMaskedM add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAckW -add wave -noupdate -group dcache /testbench/dut/hart/dmem/genblk1/lrM -add wave -noupdate -group dcache /testbench/dut/hart/dmem/genblk1/scM add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -223,9 +222,8 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED -add wave -noupdate /testbench/dut/hart/dmem/genblk1/scM TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {12215488 ns} 0} {{Cursor 4} {22127 ns} 0} +WaveRestoreCursors {{Cursor 2} {5792261 ns} 0} {{Cursor 4} {1318991 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 513 @@ -241,4 +239,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {21993 ns} {22181 ns} +WaveRestoreZoom {1311008 ns} {1321254 ns} From 12b978fec2ac20021c3b62c5228dd32b16f64764 Mon Sep 17 00:00:00 2001 From: Ross Thompson <stephen.thompson.37@us.af.mil> Date: Mon, 3 May 2021 12:03:17 -0500 Subject: [PATCH 2/7] Eliminated extra register and fixed ports to icache. Still need to support physical tag check and write in icache memory. Still need to reduce to 1 port SRAM in icache. I would like to refactor the icache code. --- wally-pipelined/regression/wave.do | 59 +++++----- wally-pipelined/src/cache/dmapped.sv | 122 -------------------- wally-pipelined/src/ifu/icache.sv | 164 +++------------------------ wally-pipelined/src/ifu/icacheMem.sv | 124 ++++++++++++++++++++ wally-pipelined/src/ifu/ifu.sv | 8 +- 5 files changed, 175 insertions(+), 302 deletions(-) create mode 100644 wally-pipelined/src/ifu/icacheMem.sv diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 94e955fe0..962f25811 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -3,6 +3,7 @@ quietly virtual function -install /testbench/dut/hart/ifu/icache/cachemem -env / quietly WaveActivateNextPane {} 0 add wave -noupdate /testbench/clk add wave -noupdate /testbench/reset +add wave -noupdate /testbench/memfilename add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName @@ -20,13 +21,13 @@ add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/DataStall -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall +add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE @@ -37,25 +38,25 @@ add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbe add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPPredF -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBValidF -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPInstrClassF -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBPredPCF -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/RASPCF -add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePC -add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdateEN -add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePrediction -add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateEN -add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePC -add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/TargetWrongE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/FallThroughWrongE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionPCWrongE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/InstrClassE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionInstrClassWrongE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE -add wave -noupdate -expand -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPPredF +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBValidF +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPInstrClassF +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBPredPCF +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/RASPCF +add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePC +add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdateEN +add wave -noupdate -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePrediction +add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateEN +add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePC +add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/TargetWrongE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/FallThroughWrongE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionPCWrongE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/InstrClassE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionInstrClassWrongE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE +add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE @@ -223,8 +224,8 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {5792261 ns} 0} {{Cursor 4} {1318991 ns} 0} -quietly wave cursor active 2 +WaveRestoreCursors {{Cursor 2} {5796691 ns} 0} {{Cursor 4} {1318991 ns} 0} +quietly wave cursor active 1 configure wave -namecolwidth 250 configure wave -valuecolwidth 513 configure wave -justifyvalue left @@ -239,4 +240,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {1311008 ns} {1321254 ns} +WaveRestoreZoom {5795108 ns} {5798036 ns} diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv index 34864d393..f40da412a 100644 --- a/wally-pipelined/src/cache/dmapped.sv +++ b/wally-pipelined/src/cache/dmapped.sv @@ -125,128 +125,6 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par assign DataValid = DataValidBit && (DataTag == ReadTag); endmodule -module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) ( - // Pipeline stuff - input logic clk, - input logic reset, - input logic re, - // If flush is high, invalidate the entire cache - input logic flush, - // Select which address to read (broken for efficiency's sake) - input logic [`XLEN-1:12] ReadUpperPAdr, - input logic [11:0] ReadLowerAdr, - // Write new data to the cache - input logic WriteEnable, - input logic [LINESIZE-1:0] WriteLine, - input logic [`XLEN-1:0] WritePAdr, - // Output the word, as well as if it is valid - output logic [31:0] DataWord, // *** was WORDSIZE-1 - output logic DataValid -); - - // Various compile-time constants - localparam integer WORDWIDTH = $clog2(WORDSIZE/8); - localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE); - localparam integer SETWIDTH = $clog2(NUMLINES); - localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH; - - localparam integer OFFSETBEGIN = WORDWIDTH; - localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1; - localparam integer SETBEGIN = OFFSETEND+1; - localparam integer SETEND = SETBEGIN + SETWIDTH - 1; - localparam integer TAGBEGIN = SETEND + 1; - localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1; - - // Machinery to read from and write to the correct addresses in memory - logic [`XLEN-1:0] ReadPAdr; - logic [`XLEN-1:0] OldReadPAdr; - logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset; - logic [SETWIDTH-1:0] ReadSet, WriteSet; - logic [TAGWIDTH-1:0] ReadTag, WriteTag; - logic [LINESIZE-1:0] ReadLine; - logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed; - - // Machinery to check if a given read is valid and is the desired value - logic [TAGWIDTH-1:0] DataTag; - logic [NUMLINES-1:0] ValidOut; - logic DataValidBit; - - flopenr #(`XLEN) ReadPAdrFlop(clk, reset, re, ReadPAdr, OldReadPAdr); - - // Assign the read and write addresses in cache memory - always_comb begin - ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN]; - ReadPAdr = {ReadUpperPAdr, ReadLowerAdr}; - ReadSet = ReadPAdr[SETEND:SETBEGIN]; - ReadTag = OldReadPAdr[TAGEND:TAGBEGIN]; - - WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN]; - WriteSet = WritePAdr[SETEND:SETBEGIN]; - WriteTag = WritePAdr[TAGEND:TAGBEGIN]; - end - - // Depth is number of bits in one "word" of the memory, width is number of such words - Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem ( - .*, - .ReadAddr(ReadSet), - .ReadData(ReadLine), - .WriteAddr(WriteSet), - .WriteData(WriteLine) - ); - Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags ( - .*, - .ReadAddr(ReadSet), - .ReadData(DataTag), - .WriteAddr(WriteSet), - .WriteData(WriteTag) - ); - - // Pick the right bits coming out the read line - //assign DataWord = ReadLineTransformed[ReadOffset]; - //logic [31:0] tempRD; - always_comb begin - case (OldReadPAdr[4:1]) - 0: DataWord = ReadLine[31:0]; - 1: DataWord = ReadLine[47:16]; - 2: DataWord = ReadLine[63:32]; - 3: DataWord = ReadLine[79:48]; - - 4: DataWord = ReadLine[95:64]; - 5: DataWord = ReadLine[111:80]; - 6: DataWord = ReadLine[127:96]; - 7: DataWord = ReadLine[143:112]; - - 8: DataWord = ReadLine[159:128]; - 9: DataWord = ReadLine[175:144]; - 10: DataWord = ReadLine[191:160]; - 11: DataWord = ReadLine[207:176]; - - 12: DataWord = ReadLine[223:192]; - 13: DataWord = ReadLine[239:208]; - 14: DataWord = ReadLine[255:224]; - 15: DataWord = {16'b0, ReadLine[255:240]}; - endcase - end - genvar i; - generate - for (i=0; i < LINESIZE/WORDSIZE; i++) begin - assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; - end - endgenerate - - // Correctly handle the valid bits - always_ff @(posedge clk, posedge reset) begin - if (reset || flush) begin - ValidOut <= {NUMLINES{1'b0}}; - end else begin - if (WriteEnable) begin - ValidOut[WriteSet] <= 1; - end - end - DataValidBit <= ValidOut[ReadSet]; - end - assign DataValid = DataValidBit && (DataTag == ReadTag); -endmodule // Write-through direct-mapped memory module wtdirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) ( diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index f524be384..5821b6559 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -27,26 +27,24 @@ module icache( // Basic pipeline stuff - input logic clk, reset, - input logic StallF, StallD, - input logic FlushD, - // Upper bits of physical address for PC - input logic [`XLEN-1:12] UpperPCNextPF, - // Lower 12 bits of virtual PC address, since it's faster this way - input logic [11:0] LowerPCNextF, + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, + input logic [`XLEN-1:0] PCNextF, + input logic [`XLEN-1:0] PCPF, // Data read in from the ebu unit - input logic [`XLEN-1:0] InstrInF, - input logic InstrAckF, + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, // Read requested from the ebu unit - output logic [`XLEN-1:0] InstrPAdrF, - output logic InstrReadF, + output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF, // High if the instruction currently in the fetch stage is compressed - output logic CompressedF, + output logic CompressedF, // High if the icache is requesting a stall - output logic ICacheStallF, + output logic ICacheStallF, // The raw (not decompressed) instruction that was requested // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros - output logic [31:0] InstrRawD + output logic [31:0] InstrRawD ); // Configuration parameters @@ -96,10 +94,8 @@ module icachecontroller #(parameter LINESIZE = 256) ( // Input the address to read // The upper bits of the physical pc - input logic [`XLEN-1:12] UpperPCNextPF, - // The lower bits of the virtual pc - input logic [11:0] LowerPCNextF, - + input logic [`XLEN-1:0] PCNextF, + input logic [`XLEN-1:0] PCPF, // Signals to/from cache memory // The read coming out of it input logic [31:0] ICacheMemReadData, @@ -198,7 +194,7 @@ module icachecontroller #(parameter LINESIZE = 256) ( logic [LOGWPL:0] FetchCount, NextFetchCount; - logic [`XLEN-1:0] PCPreFinalF, PCPFinalF, PCSpillF, PCNextPF; + logic [`XLEN-1:0] PCPreFinalF, PCPFinalF, PCSpillF; logic [`XLEN-1:OFFSETWIDTH] PCPTrunkF; @@ -215,7 +211,7 @@ module icachecontroller #(parameter LINESIZE = 256) ( //logic FlushDLastCycleN; //logic PCPMisalignedF; localparam [31:0] NOP = 32'h13; - logic [`XLEN-1:0] PCPF; + //logic [`XLEN-1:0] PCPF; logic reset_q; @@ -224,18 +220,15 @@ module icachecontroller #(parameter LINESIZE = 256) ( //logic MisalignedStall; // Cache fault signals //logic FaultStall; - - assign PCNextPF = {UpperPCNextPF, LowerPCNextF}; - flopenl #(`XLEN) PCPFFlop(clk, reset, SavePC & ~StallF, PCPFinalF, `RESET_VECTOR, PCPF); + //flopenl #(`XLEN) PCPFFlop(clk, reset, SavePC & ~StallF, PCPFinalF, `RESET_VECTOR, PCPF); // on spill we want to get the first 2 bytes of the next cache block. // the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can // simply add 2 to land on the next cache block. assign PCSpillF = PCPF + 2'b10; // now we have to select between these three PCs - assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextPF; // *** don't like the stallf - //assign PCPreFinalF = PCMux[0] ? PCPF : PCNextPF; // *** don't like the stallf + assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextF; // *** don't like the stallf assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF; @@ -247,127 +240,6 @@ module icachecontroller #(parameter LINESIZE = 256) ( assign CompressedF = FinalInstrRawF[1:0] != 2'b11; - // Handle happy path (data in cache, reads aligned) -/* -----\/----- EXCLUDED -----\/----- - - generate - if (`XLEN == 32) begin - assign AlignedInstrRawF = PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData; - //assign PCPMisalignedF = PCPF[1] && ~CompressedF; - end else begin - assign AlignedInstrRawF = PCPF[2] - ? (PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32]) - : (PCPF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]); - //assign PCPMisalignedF = PCPF[2] && PCPF[1] && ~CompressedF; - end - endgenerate - -----/\----- EXCLUDED -----/\----- */ - - //flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD); - //flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN); - - //mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD); - - // Stall for faults or misaligned reads -/* -----\/----- EXCLUDED -----\/----- - always_comb begin - assign ICacheStallF = FaultStall | MisalignedStall; - end - -----/\----- EXCLUDED -----/\----- */ - - - // Handle misaligned, noncompressed reads - -/* -----\/----- EXCLUDED -----\/----- - logic MisalignedState, NextMisalignedState; - logic [15:0] MisalignedHalfInstrF; - logic [15:0] UpperHalfWord; - -----/\----- EXCLUDED -----/\----- */ - -/* -----\/----- EXCLUDED -----\/----- - flopenr #(16) MisalignedHalfInstrFlop(clk, reset, ~FaultStall & (PCPMisalignedF & MisalignedState), AlignedInstrRawF[15:0], MisalignedHalfInstrF); - flopenr #(1) MisalignedStateFlop(clk, reset, ~FaultStall, NextMisalignedState, MisalignedState); - -----/\----- EXCLUDED -----/\----- */ - - // When doing a misaligned read, swizzle the bits correctly -/* -----\/----- EXCLUDED -----\/----- - generate - if (`XLEN == 32) begin - assign UpperHalfWord = ICacheMemReadData[31:16]; - end else begin - assign UpperHalfWord = ICacheMemReadData[63:48]; - end - endgenerate - always_comb begin - if (MisalignedState) begin - assign MisalignedInstrRawF = {16'b0, UpperHalfWord}; - end else begin - assign MisalignedInstrRawF = {ICacheMemReadData[15:0], MisalignedHalfInstrF}; - end - end - -----/\----- EXCLUDED -----/\----- */ - - // Manage internal state and stall when necessary -/* -----\/----- EXCLUDED -----\/----- - always_comb begin - assign MisalignedStall = PCPMisalignedF & MisalignedState; - assign NextMisalignedState = ~PCPMisalignedF | ~MisalignedState; - end - -----/\----- EXCLUDED -----/\----- */ - - // Pick the correct address to read -/* -----\/----- EXCLUDED -----\/----- - generate - if (`XLEN == 32) begin - assign ICacheMemReadLowerAdr = {LowerPCNextF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00}; - end else begin - assign ICacheMemReadLowerAdr = {LowerPCNextF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00}; - end - endgenerate - -----/\----- EXCLUDED -----/\----- */ - // TODO Handle reading instructions that cross page boundaries - //assign ICacheMemReadUpperPAdr = UpperPCNextPF; - - - // Handle cache faults - - -/* -----\/----- EXCLUDED -----\/----- - logic FetchState, BeginFetchState; - logic [LOGWPL:0] FetchWordNum, NextFetchWordNum; - logic [`XLEN-1:0] LineAlignedPCPF; - - flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState); - flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum); - - - // Enter the fetch state when we hit a cache fault - always_comb begin - BeginFetchState = ~ICacheMemReadValid & ~FetchState & (FetchWordNum == 0); - end - // Exit the fetch state once the cache line has been loaded - flopr #(1) EndFetchStateFlop(clk, reset, ICacheMemWriteEnable, EndFetchState); - - // Machinery to request the correct addresses from main memory - always_comb begin - InstrReadF = FetchState & ~EndFetchState & ~ICacheMemWriteEnable; // next stage logic - LineAlignedPCPF = {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; // the fetch address for abh? - InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8); // ? - NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; // convert to enable - end - - // Write to cache memory when we have the line here - always_comb begin - ICacheMemWritePAdr = LineAlignedPCPF; - ICacheMemWriteEnable = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState & ~EndFetchState; - end - - // Stall the pipeline while loading a new line from memory - always_comb begin - FaultStall = FetchState | ~ICacheMemReadValid; - end - -----/\----- EXCLUDED -----/\----- */ - // the FSM is always runing, do not stall. flopr #(5) stateReg(.clk(clk), .reset(reset), diff --git a/wally-pipelined/src/ifu/icacheMem.sv b/wally-pipelined/src/ifu/icacheMem.sv new file mode 100644 index 000000000..345e5e453 --- /dev/null +++ b/wally-pipelined/src/ifu/icacheMem.sv @@ -0,0 +1,124 @@ +`include "wally-config.vh" + +module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) ( + // Pipeline stuff + input logic clk, + input logic reset, + input logic re, + // If flush is high, invalidate the entire cache + input logic flush, + // Select which address to read (broken for efficiency's sake) + input logic [`XLEN-1:12] ReadUpperPAdr, // physical address Must come one cycle later + input logic [11:0] ReadLowerAdr, // virtual address + // Write new data to the cache + input logic WriteEnable, + input logic [LINESIZE-1:0] WriteLine, + input logic [`XLEN-1:0] WritePAdr, + // Output the word, as well as if it is valid + output logic [31:0] DataWord, // *** was WORDSIZE-1 + output logic DataValid +); + + // Various compile-time constants + localparam integer WORDWIDTH = $clog2(WORDSIZE/8); + localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE); + localparam integer SETWIDTH = $clog2(NUMLINES); + localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH; + + localparam integer OFFSETBEGIN = WORDWIDTH; + localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1; + localparam integer SETBEGIN = OFFSETEND+1; + localparam integer SETEND = SETBEGIN + SETWIDTH - 1; + localparam integer TAGBEGIN = SETEND + 1; + localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1; + + // Machinery to read from and write to the correct addresses in memory + logic [`XLEN-1:0] ReadPAdr; + logic [`XLEN-1:0] OldReadPAdr; + logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset; + logic [SETWIDTH-1:0] ReadSet, WriteSet; + logic [TAGWIDTH-1:0] ReadTag, WriteTag; + logic [LINESIZE-1:0] ReadLine; + logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed; + + // Machinery to check if a given read is valid and is the desired value + logic [TAGWIDTH-1:0] DataTag; + logic [NUMLINES-1:0] ValidOut; + logic DataValidBit; + + flopenr #(`XLEN) ReadPAdrFlop(clk, reset, re, ReadPAdr, OldReadPAdr); + + // Assign the read and write addresses in cache memory + always_comb begin + ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN]; + ReadPAdr = {ReadUpperPAdr, ReadLowerAdr}; + ReadSet = ReadPAdr[SETEND:SETBEGIN]; + ReadTag = OldReadPAdr[TAGEND:TAGBEGIN]; + + WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN]; + WriteSet = WritePAdr[SETEND:SETBEGIN]; + WriteTag = WritePAdr[TAGEND:TAGBEGIN]; + end + + // Depth is number of bits in one "word" of the memory, width is number of such words + Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem ( + .*, + .ReadAddr(ReadSet), + .ReadData(ReadLine), + .WriteAddr(WriteSet), + .WriteData(WriteLine) + ); + Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags ( + .*, + .ReadAddr(ReadSet), + .ReadData(DataTag), + .WriteAddr(WriteSet), + .WriteData(WriteTag) + ); + + // Pick the right bits coming out the read line + //assign DataWord = ReadLineTransformed[ReadOffset]; + //logic [31:0] tempRD; + always_comb begin + case (OldReadPAdr[4:1]) + 0: DataWord = ReadLine[31:0]; + 1: DataWord = ReadLine[47:16]; + 2: DataWord = ReadLine[63:32]; + 3: DataWord = ReadLine[79:48]; + + 4: DataWord = ReadLine[95:64]; + 5: DataWord = ReadLine[111:80]; + 6: DataWord = ReadLine[127:96]; + 7: DataWord = ReadLine[143:112]; + + 8: DataWord = ReadLine[159:128]; + 9: DataWord = ReadLine[175:144]; + 10: DataWord = ReadLine[191:160]; + 11: DataWord = ReadLine[207:176]; + + 12: DataWord = ReadLine[223:192]; + 13: DataWord = ReadLine[239:208]; + 14: DataWord = ReadLine[255:224]; + 15: DataWord = {16'b0, ReadLine[255:240]}; + endcase + end + genvar i; + generate + for (i=0; i < LINESIZE/WORDSIZE; i++) begin + assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; + end + endgenerate + + // Correctly handle the valid bits + always_ff @(posedge clk, posedge reset) begin + if (reset || flush) begin + ValidOut <= {NUMLINES{1'b0}}; + end else begin + if (WriteEnable) begin + ValidOut[WriteSet] <= 1; + end + end + DataValidBit <= ValidOut[ReadSet]; + end + assign DataValid = DataValidBit && (DataTag == ReadTag); +endmodule diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 371205051..25fc478de 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -105,11 +105,9 @@ module ifu ( // jarred 2021-03-14 Add instrution cache block to remove rd2 assign PCNextPF = PCNextF; // Temporary workaround until iTLB is live - icache icache( - .*, - .UpperPCNextPF(PCNextPF[`XLEN-1:12]), - .LowerPCNextF(PCNextPF[11:0]) - ); + icache icache(.*); + + assign PrivilegedChangePCM = RetM | TrapM; From 3f05e319541e02942051a047a7f917788d1503c8 Mon Sep 17 00:00:00 2001 From: Katherine Parry <kparry4@gmail.com> Date: Mon, 3 May 2021 19:17:09 +0000 Subject: [PATCH 3/7] fpu warnings fixed/commented --- wally-pipelined/src/fpu/compressors.sv | 135 ++++++++++++------------ wally-pipelined/src/fpu/fma2.sv | 11 +- wally-pipelined/src/fpu/fpu.sv | 10 +- wally-pipelined/src/fpu/fpuaddcvt2.sv | 4 +- wally-pipelined/src/fpu/multiply.sv | 138 +++++++++++++------------ wally-pipelined/src/fpu/round.sv | 10 +- 6 files changed, 159 insertions(+), 149 deletions(-) diff --git a/wally-pipelined/src/fpu/compressors.sv b/wally-pipelined/src/fpu/compressors.sv index 0c2bece86..1e975e43c 100644 --- a/wally-pipelined/src/fpu/compressors.sv +++ b/wally-pipelined/src/fpu/compressors.sv @@ -1,90 +1,93 @@ -module add3comp2(a, b, c, carry, sum); -///////////////////////////////////////////////////////////////////////////// -//look into diffrent implementations of the compressors? +// //***breaks lint with warnings like: %Warning-UNOPTFLAT: Example path: src/fpu/compressors.sv:37: ASSIGNW +// //%Warning-UNOPTFLAT: Example path: src/fpu/compressors.sv:32: wallypipelinedsoc.hart.fpu.fma1.multiply.genblk5[0].add4.cout + +// module add3comp2(a, b, c, carry, sum); +// ///////////////////////////////////////////////////////////////////////////// +// //look into diffrent implementations of the compressors? - parameter BITS = 4; - input logic [BITS-1:0] a; - input logic [BITS-1:0] b; - input logic [BITS-1:0] c; - output logic [BITS-1:0] carry; - output logic [BITS-1:0] sum; - genvar i; +// parameter BITS = 4; +// input logic [BITS-1:0] a; +// input logic [BITS-1:0] b; +// input logic [BITS-1:0] c; +// output logic [BITS-1:0] carry; +// output logic [BITS-1:0] sum; +// genvar i; - generate - for(i= 0; i<BITS; i=i+1) begin - sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]); - end - endgenerate +// generate +// for(i= 0; i<BITS; i=i+1) begin +// sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]); +// end +// endgenerate -endmodule +// endmodule -module add4comp2(a, b, c, d, carry, sum); -///////////////////////////////////////////////////////////////////////////// +// module add4comp2(a, b, c, d, carry, sum); +// ///////////////////////////////////////////////////////////////////////////// - parameter BITS = 4; - input logic [BITS-1:0] a; - input logic [BITS-1:0] b; - input logic [BITS-1:0] c; - input logic [BITS-1:0] d; - output logic [BITS:0] carry; - output logic [BITS-1:0] sum; +// parameter BITS = 4; +// input logic [BITS-1:0] a; +// input logic [BITS-1:0] b; +// input logic [BITS-1:0] c; +// input logic [BITS-1:0] d; +// output logic [BITS:0] carry; +// output logic [BITS-1:0] sum; - logic [BITS-1:0] cout; - logic carryTmp; - genvar i; +// logic [BITS-1:0] cout; +// logic carryTmp; +// genvar i; - sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]); +// sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]); - generate - for(i= 1; i<BITS-1; i=i+1) begin - sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]); - end - endgenerate +// generate +// for(i= 1; i<BITS-1; i=i+1) begin +// sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]); +// end +// endgenerate - sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]); +// sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]); - assign carry[BITS-1] = carryTmp & cout[BITS-1]; - assign carry[BITS] = carryTmp ^ cout[BITS-1]; +// assign carry[BITS-1] = carryTmp & cout[BITS-1]; +// assign carry[BITS] = carryTmp ^ cout[BITS-1]; -endmodule +// endmodule -module sng3comp2(a, b, c, carry, sum); -///////////////////////////////////////////////////////////////////////////// -//look into diffrent implementations of the compressors? +// module sng3comp2(a, b, c, carry, sum); +// ///////////////////////////////////////////////////////////////////////////// +// //look into diffrent implementations of the compressors? - input logic a; - input logic b; - input logic c; - output logic carry; - output logic sum; +// input logic a; +// input logic b; +// input logic c; +// output logic carry; +// output logic sum; - logic axorb; +// logic axorb; - assign axorb = a ^ b; - assign sum = axorb ^ c; +// assign axorb = a ^ b; +// assign sum = axorb ^ c; - assign carry = axorb ? c : a; +// assign carry = axorb ? c : a; -endmodule +// endmodule -module sng4comp2(a, b, c, d, cin, cout, carry, sum); -///////////////////////////////////////////////////////////////////////////// -//look into pass gate 4:2 counters? +// module sng4comp2(a, b, c, d, cin, cout, carry, sum); +// ///////////////////////////////////////////////////////////////////////////// +// //look into pass gate 4:2 counters? - input logic a; - input logic b; - input logic c; - input logic d; - input logic cin; - output logic cout; - output logic carry; - output logic sum; +// input logic a; +// input logic b; +// input logic c; +// input logic d; +// input logic cin; +// output logic cout; +// output logic carry; +// output logic sum; - logic TmpSum; +// logic TmpSum; - sng3comp2 add1(.carry(cout), .sum(TmpSum),.*); - sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*); +// sng3comp2 add1(.carry(cout), .sum(TmpSum),.*); +// sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*); -endmodule \ No newline at end of file +// endmodule \ No newline at end of file diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index 2ff976623..8ff107fff 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -97,6 +97,9 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM, logic sticky; logic [12:0] de0; logic isAdd; + logic wsign; + logic [51:0] wman; + logic [10:0] wexp; assign isAdd = 1; @@ -118,17 +121,19 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM, add add(.*); lza lza(.*); normalize normalize(.zexp(ReadData3M[62:52]),.*); - round round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*); + round round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]),.*); // Instantiate exponent datapath - expgen2 expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.wexp(FmaResultM[62:52]),.*); + expgen2 expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.*); // Instantiate control logic -sign sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.wsign(FmaResultM[63]),.*); +sign sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.*); flag2 flag2(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.vbits(v[1:0]),.*); +assign FmaResultM = {wsign,wexp,wman}; + endmodule diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 71cb8c54c..7fb77e261 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -159,7 +159,8 @@ module fpu ( logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE; logic AddConvertE; logic [63:0] AddFloat1E, AddFloat2E; - logic [10:0] AddExp1DenormE, AddExp2DenormE, AddExponentE; + logic [11:0] AddExp1DenormE, AddExp2DenormE; + logic [10:0] AddExponentE; logic [63:0] AddOp1E, AddOp2E; logic [2:0] AddRmE; logic [3:0] AddOpTypeE; @@ -317,7 +318,8 @@ module fpu ( logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM; logic AddConvertM, AddSignM; logic [63:0] AddFloat1M, AddFloat2M; - logic [10:0] AddExp1DenormM, AddExp2DenormM, AddExponentM; + logic [11:0] AddExp1DenormM, AddExp2DenormM; + logic [10:0] AddExponentM; logic [63:0] AddOp1M, AddOp2M; logic [2:0] AddRmM; logic [3:0] AddOpTypeM; @@ -380,8 +382,8 @@ module fpu ( flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignM); flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); - flopenrc #(11) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(11) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); flopenrc #(64) EMRegAdd21(clk, reset, PipeClearEM, PipeEnableEM, AddOp1E, AddOp1M); flopenrc #(64) EMRegAdd22(clk, reset, PipeClearEM, PipeEnableEM, AddOp2E, AddOp2M); diff --git a/wally-pipelined/src/fpu/fpuaddcvt2.sv b/wally-pipelined/src/fpu/fpuaddcvt2.sv index 5c25cccc7..e040d2d2f 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt2.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt2.sv @@ -39,7 +39,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel input [63:0] AddSumM, AddSumTcM; input [63:0] AddFloat1M; input [63:0] AddFloat2M; - input [10:0] AddExp1DenormM, AddExp2DenormM; + input [11:0] AddExp1DenormM, AddExp2DenormM; input [10:0] AddExponentM, AddExpPostSumM; //exp_pre; //input exp_valid; input [3:0] AddSelInvM; @@ -85,7 +85,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel //AddExponentM value pre-rounding with considerations for denormalized //cases/conversion cases assign exp_pre = AddDenormInM ? - ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM : AddExp1DenormM)) + ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0])) : (AddConvertM ? 11'b10000111100 : AddExponentM); diff --git a/wally-pipelined/src/fpu/multiply.sv b/wally-pipelined/src/fpu/multiply.sv index ecdfeca30..1771188c6 100644 --- a/wally-pipelined/src/fpu/multiply.sv +++ b/wally-pipelined/src/fpu/multiply.sv @@ -26,81 +26,83 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE); // wire [105:0] acc genvar i; - assign xExt = {1'b0,~(xdenormE|xzeroE),xman}; - assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0}; + // assign xExt = {1'b0,~(xdenormE|xzeroE),xman}; + // assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0}; - generate - for(i=0; i<27; i=i+1) begin - booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i])); - end - endgenerate + // generate + // for(i=0; i<27; i=i+1) begin + // booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i])); + // end + // endgenerate - assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; - assign acc[1] = {49'b01,~e[1],pp[1],add1[0]}; - assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0}; - assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0}; - assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0}; - assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0}; - assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0}; - assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0}; - assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0}; - assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0}; - assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0}; - assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0}; - assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0}; - assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0}; - assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0}; - assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0}; - assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0}; - assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0}; - assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0}; - assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0}; - assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0}; - assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0}; - assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0}; - assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0}; - assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0}; - assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0}; - assign acc[26] = {pp[26],add1[25], 50'b0}; + // assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; + // assign acc[1] = {49'b01,~e[1],pp[1],add1[0]}; + // assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0}; + // assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0}; + // assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0}; + // assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0}; + // assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0}; + // assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0}; + // assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0}; + // assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0}; + // assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0}; + // assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0}; + // assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0}; + // assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0}; + // assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0}; + // assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0}; + // assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0}; + // assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0}; + // assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0}; + // assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0}; + // assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0}; + // assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0}; + // assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0}; + // assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0}; + // assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0}; + // assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0}; + // assign acc[26] = {pp[26],add1[25], 50'b0}; +//***breaks lint with warnings like: %Warning-UNOPTFLAT: Example path: src/fpu/multiply.sv:86: ASSIGNW +// %Warning-UNOPTFLAT: Example path: src/fpu/multiply.sv:22: wallypipelinedsoc.hart.fpu.fma1.multiply.lv3add //*** resize adders - generate - for(i=0; i<9; i=i+1) begin - add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), - .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1])); - assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0}; - end - endgenerate + // generate + // for(i=0; i<9; i=i+1) begin + // add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), + // .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1])); + // assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0}; + // end + // endgenerate - generate - for(i=0; i<6; i=i+1) begin - add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), - .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1])); - assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0}; - end - endgenerate + // generate + // for(i=0; i<6; i=i+1) begin + // add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), + // .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1])); + // assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0}; + // end + // endgenerate - generate - for(i=0; i<4; i=i+1) begin - add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), - .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1])); - assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0}; - end - endgenerate + // generate + // for(i=0; i<4; i=i+1) begin + // add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), + // .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1])); + // assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0}; + // end + // endgenerate - generate - for(i=0; i<2; i=i+1) begin - add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]), - .carry(carryTmp[i+19]), .sum(lv4add[i*2+1])); - assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0}; - end - endgenerate + // generate + // for(i=0; i<2; i=i+1) begin + // add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]), + // .carry(carryTmp[i+19]), .sum(lv4add[i*2+1])); + // assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0}; + // end + // endgenerate - add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) , - .carry(carryTmp[21]), .sum(tmpsE)); - assign sE = tmpsE[105:0]; - assign rE = {carryTmp[21][104:0], 1'b0}; + // add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) , + // .carry(carryTmp[21]), .sum(tmpsE)); + // assign sE = tmpsE[105:0]; + // assign rE = {carryTmp[21][104:0], 1'b0}; // assign rE = 0; // assign sE = acc[0] + // acc[1] + @@ -130,7 +132,7 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE); // acc[25] + // acc[26]; - // assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman}; - // assign rE = 0; + assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman}; + assign rE = 0; endmodule diff --git a/wally-pipelined/src/fpu/round.sv b/wally-pipelined/src/fpu/round.sv index 34ecbb025..e56af7c70 100644 --- a/wally-pipelined/src/fpu/round.sv +++ b/wally-pipelined/src/fpu/round.sv @@ -56,6 +56,10 @@ module round(v, sticky, FrmM, wsign, // 0xx - do nothing // 100 - tie - plus1 if v[2] = 1 // 101/110/111 - plus1 + + //***causes lint warning: %Warning-UNOPTFLAT: Example path: src/fpu/round.sv:59: ALWAYS +// %Warning-UNOPTFLAT: Example path: src/fpu/round.sv:42: wallypipelinedsoc.hart.fpu.fma2.round.plus1 + always_comb begin case (FrmM) 3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even @@ -66,12 +70,6 @@ module round(v, sticky, FrmM, wsign, default: plus1 = 1'bx; endcase end - // assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) | - // (rp & ~wsign) | - // (rm & wsign); - //assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) || - // rp && ~wsign && (v[1] || v[0]) || - // rm && wsign && (v[1] || v[0]); // Compute rounded result assign v1 = v[53:2] + 1; From 699a8f3ac31c03a22c00b0306eaeda29508dd7af Mon Sep 17 00:00:00 2001 From: David Harris <david_harris@hmc.edu> Date: Mon, 3 May 2021 15:29:20 -0400 Subject: [PATCH 4/7] Extended maximum signature length to 1M --- wally-pipelined/testbench/testbench-imperas.sv | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 76989c6d4..debbd96c0 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -30,13 +30,14 @@ module testbench(); parameter DEBUG = 0; parameter TESTSBP = 0; parameter TESTSPERIPH = 0 ; // set to 0 for regression + localparam MAXSIGLEN = 1000000; logic clk; logic reset; int test, i, errors, totalerrors; - logic [31:0] sig32[0:10000]; - logic [`XLEN-1:0] signature[0:10000]; + logic [31:0] sig32[0:MAXSIGLEN]; + logic [`XLEN-1:0] signature[0:MAXSIGLEN]; logic [`XLEN-1:0] testadr; string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; logic [31:0] InstrW; @@ -602,7 +603,7 @@ string tests32f[] = '{ $display("Code ended with ecall with gp = 1"); #60; // give time for instructions in pipeline to finish // clear signature to prevent contamination from previous tests - for(i=0; i<10000; i=i+1) begin + for(i=0; i<MAXSIGLEN; i=i+1) begin sig32[i] = 'bx; end @@ -610,7 +611,7 @@ string tests32f[] = '{ signame = {"../../imperas-riscv-tests/work/", tests[test], ".signature.output"}; $readmemh(signame, sig32); i = 0; - while (i < 10000) begin + while (i < MAXSIGLEN) begin if (`XLEN == 32) begin signature[i] = sig32[i]; i = i+1; From 7185905f7bbe6aafc106d03ee3dbb54a864dd86a Mon Sep 17 00:00:00 2001 From: Ross Thompson <stephen.thompson.37@us.af.mil> Date: Mon, 3 May 2021 14:36:09 -0500 Subject: [PATCH 5/7] Reduced icache to 1 port memory. --- wally-pipelined/regression/wave.do | 7 ++-- wally-pipelined/src/cache/sram1rw.sv | 21 ++++++++++ wally-pipelined/src/ifu/icache.sv | 34 ++++++++-------- wally-pipelined/src/ifu/icacheMem.sv | 60 +++++++++------------------- 4 files changed, 60 insertions(+), 62 deletions(-) create mode 100644 wally-pipelined/src/cache/sram1rw.sv diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 962f25811..ec9194a01 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -203,7 +203,6 @@ add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbenc add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FlushDLastCyclen add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCNextPF add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF @@ -223,8 +222,10 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED +add wave -noupdate /testbench/dut/hart/ifu/icache/PCTagF +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {5796691 ns} 0} {{Cursor 4} {1318991 ns} 0} +WaveRestoreCursors {{Cursor 2} {9951515 ns} 0} {{Cursor 4} {1318991 ns} 0} quietly wave cursor active 1 configure wave -namecolwidth 250 configure wave -valuecolwidth 513 @@ -240,4 +241,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {5795108 ns} {5798036 ns} +WaveRestoreZoom {9951431 ns} {9951599 ns} diff --git a/wally-pipelined/src/cache/sram1rw.sv b/wally-pipelined/src/cache/sram1rw.sv new file mode 100644 index 000000000..a74593881 --- /dev/null +++ b/wally-pipelined/src/cache/sram1rw.sv @@ -0,0 +1,21 @@ +// Depth is number of bits in one "word" of the memory, width is number of such words +module sram1rw #(parameter DEPTH=128, WIDTH=256) ( + input logic clk, + // port 1 is read only + input logic [$clog2(WIDTH)-1:0] Addr, + output logic [DEPTH-1:0] ReadData, + + // port 2 is write only + input logic [DEPTH-1:0] WriteData, + input logic WriteEnable +); + + logic [WIDTH-1:0][DEPTH-1:0] StoredData; + + always_ff @(posedge clk) begin + ReadData <= StoredData[Addr]; + if (WriteEnable) begin + StoredData[Addr] <= WriteData; + end + end +endmodule diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index 5821b6559..f6890d7ff 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -54,12 +54,10 @@ module icache( // Input signals to cache memory logic FlushMem; - logic [`XLEN-1:12] ICacheMemReadUpperPAdr; - logic [11:0] ICacheMemReadLowerAdr; logic ICacheMemWriteEnable; logic [ICACHELINESIZE-1:0] ICacheMemWriteData; - logic [`XLEN-1:0] ICacheMemWritePAdr; logic EndFetchState; + logic [`XLEN-1:0] PCTagF, PCNextIndexF; // Output signals from cache memory logic [31:0] ICacheMemReadData; logic ICacheMemReadValid; @@ -69,13 +67,9 @@ module icache( cachemem( .*, // Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall - .re(ICacheReadEn), .flush(FlushMem), - .ReadUpperPAdr(ICacheMemReadUpperPAdr), - .ReadLowerAdr(ICacheMemReadLowerAdr), .WriteEnable(ICacheMemWriteEnable), .WriteLine(ICacheMemWriteData), - .WritePAdr(ICacheMemWritePAdr), .DataWord(ICacheMemReadData), .DataValid(ICacheMemReadValid) ); @@ -95,19 +89,18 @@ module icachecontroller #(parameter LINESIZE = 256) ( // Input the address to read // The upper bits of the physical pc input logic [`XLEN-1:0] PCNextF, - input logic [`XLEN-1:0] PCPF, + input logic [`XLEN-1:0] PCPF, // Signals to/from cache memory // The read coming out of it input logic [31:0] ICacheMemReadData, input logic ICacheMemReadValid, // The address at which we want to search the cache memory - output logic [`XLEN-1:12] ICacheMemReadUpperPAdr, - output logic [11:0] ICacheMemReadLowerAdr, + output logic [`XLEN-1:0] PCTagF, + output logic [`XLEN-1:0] PCNextIndexF, output logic ICacheReadEn, // Load data into the cache output logic ICacheMemWriteEnable, output logic [LINESIZE-1:0] ICacheMemWriteData, - output logic [`XLEN-1:0] ICacheMemWritePAdr, // Outputs to rest of ifu // High if the instruction in the fetch stage is compressed @@ -214,6 +207,8 @@ module icachecontroller #(parameter LINESIZE = 256) ( //logic [`XLEN-1:0] PCPF; logic reset_q; + logic [1:0] PCMux_q; + // Misaligned signals //logic [`XLEN:0] MisalignedInstrRawF; @@ -230,8 +225,17 @@ module icachecontroller #(parameter LINESIZE = 256) ( // now we have to select between these three PCs assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextF; // *** don't like the stallf assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF; + + // this mux needs to be delayed 1 cycle as it occurs 1 pipeline stage later. + // *** read enable may not be necessary. + flopenr #(2) PCMuxReg(.clk(clk), + .reset(reset), + .en(ICacheReadEn), + .d(PCMux), + .q(PCMux_q)); - + assign PCTagF = PCMux_q[1] ? PCSpillF : PCPF; + assign PCNextIndexF = PCPFinalF; // truncate the offset from PCPF for memory address generation assign PCPTrunkF = PCPFinalF[`XLEN-1:OFFSETWIDTH]; @@ -510,12 +514,6 @@ module icachecontroller #(parameter LINESIZE = 256) ( flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen); mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCyclen, InstrRawD); //assign InstrRawD = AlignedInstrRawD; - - - assign {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr} = PCPFinalF; - assign ICacheMemWritePAdr = PCPFinalF; - - endmodule diff --git a/wally-pipelined/src/ifu/icacheMem.sv b/wally-pipelined/src/ifu/icacheMem.sv index 345e5e453..de83eb568 100644 --- a/wally-pipelined/src/ifu/icacheMem.sv +++ b/wally-pipelined/src/ifu/icacheMem.sv @@ -2,21 +2,20 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) ( // Pipeline stuff - input logic clk, - input logic reset, - input logic re, + input logic clk, + input logic reset, // If flush is high, invalidate the entire cache - input logic flush, + input logic flush, + // Select which address to read (broken for efficiency's sake) - input logic [`XLEN-1:12] ReadUpperPAdr, // physical address Must come one cycle later - input logic [11:0] ReadLowerAdr, // virtual address + input logic [`XLEN-1:0] PCTagF, // physical tag address + input logic [`XLEN-1:0] PCNextIndexF, // Write new data to the cache - input logic WriteEnable, - input logic [LINESIZE-1:0] WriteLine, - input logic [`XLEN-1:0] WritePAdr, + input logic WriteEnable, + input logic [LINESIZE-1:0] WriteLine, // Output the word, as well as if it is valid - output logic [31:0] DataWord, // *** was WORDSIZE-1 - output logic DataValid + output logic [31:0] DataWord, // *** was WORDSIZE-1 + output logic DataValid ); // Various compile-time constants @@ -33,11 +32,6 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, p localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1; // Machinery to read from and write to the correct addresses in memory - logic [`XLEN-1:0] ReadPAdr; - logic [`XLEN-1:0] OldReadPAdr; - logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset; - logic [SETWIDTH-1:0] ReadSet, WriteSet; - logic [TAGWIDTH-1:0] ReadTag, WriteTag; logic [LINESIZE-1:0] ReadLine; logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed; @@ -46,41 +40,25 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, p logic [NUMLINES-1:0] ValidOut; logic DataValidBit; - flopenr #(`XLEN) ReadPAdrFlop(clk, reset, re, ReadPAdr, OldReadPAdr); - - // Assign the read and write addresses in cache memory - always_comb begin - ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN]; - ReadPAdr = {ReadUpperPAdr, ReadLowerAdr}; - ReadSet = ReadPAdr[SETEND:SETBEGIN]; - ReadTag = OldReadPAdr[TAGEND:TAGBEGIN]; - - WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN]; - WriteSet = WritePAdr[SETEND:SETBEGIN]; - WriteTag = WritePAdr[TAGEND:TAGBEGIN]; - end - // Depth is number of bits in one "word" of the memory, width is number of such words - Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem ( + sram1rw #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem ( .*, - .ReadAddr(ReadSet), + .Addr(PCNextIndexF[SETEND:SETBEGIN]), .ReadData(ReadLine), - .WriteAddr(WriteSet), .WriteData(WriteLine) ); - Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags ( + sram1rw #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags ( .*, - .ReadAddr(ReadSet), + .Addr(PCNextIndexF[SETEND:SETBEGIN]), .ReadData(DataTag), - .WriteAddr(WriteSet), - .WriteData(WriteTag) + .WriteData(PCTagF[TAGEND:TAGBEGIN]) ); // Pick the right bits coming out the read line //assign DataWord = ReadLineTransformed[ReadOffset]; //logic [31:0] tempRD; always_comb begin - case (OldReadPAdr[4:1]) + case (PCTagF[4:1]) 0: DataWord = ReadLine[31:0]; 1: DataWord = ReadLine[47:16]; 2: DataWord = ReadLine[63:32]; @@ -115,10 +93,10 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, p ValidOut <= {NUMLINES{1'b0}}; end else begin if (WriteEnable) begin - ValidOut[WriteSet] <= 1; + ValidOut[PCNextIndexF[SETEND:SETBEGIN]] <= 1; end end - DataValidBit <= ValidOut[ReadSet]; + DataValidBit <= ValidOut[PCNextIndexF[SETEND:SETBEGIN]]; end - assign DataValid = DataValidBit && (DataTag == ReadTag); + assign DataValid = DataValidBit && (DataTag == PCTagF[TAGEND:TAGBEGIN]); endmodule From e09ac73eaf907a429a10f05210ae5e71dc5be71a Mon Sep 17 00:00:00 2001 From: Ross Thompson <stephen.thompson.37@us.af.mil> Date: Mon, 3 May 2021 14:51:25 -0500 Subject: [PATCH 6/7] Removed combinational loops between icache and PMA checker. --- wally-pipelined/src/ifu/icache.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index f6890d7ff..2b8ee703e 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -238,7 +238,7 @@ module icachecontroller #(parameter LINESIZE = 256) ( assign PCNextIndexF = PCPFinalF; // truncate the offset from PCPF for memory address generation - assign PCPTrunkF = PCPFinalF[`XLEN-1:OFFSETWIDTH]; + assign PCPTrunkF = PCTagF[`XLEN-1:OFFSETWIDTH]; // Detect if the instruction is compressed assign CompressedF = FinalInstrRawF[1:0] != 2'b11; From a21b84e2ad395dc202a25dfb22dab4ff73273d3f Mon Sep 17 00:00:00 2001 From: Jarred Allen <jaallen@g.hmc.edu> Date: Mon, 3 May 2021 17:32:05 -0400 Subject: [PATCH 7/7] Add lint to regression --- wally-pipelined/lint-wally | 4 +++- wally-pipelined/regression/regression-wally.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/wally-pipelined/lint-wally b/wally-pipelined/lint-wally index 59d6bf153..ed67dbab6 100755 --- a/wally-pipelined/lint-wally +++ b/wally-pipelined/lint-wally @@ -1,9 +1,11 @@ +#!/bin/bash # check for warnings in Verilog code # The verilator lint tool is faster and better than Modelsim so it is best to run this first. +basepath=$(dirname $0) for config in rv64ic rv32ic; do echo "$config linting..." - if !(verilator --lint-only "$@" --top-module wallypipelinedsoc "-Iconfig/$config" src/*/*.sv); then + if !(verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/$config" $basepath/src/*/*.sv); then echo "Exiting after $config lint due to errors or warnings" exit 1 fi diff --git a/wally-pipelined/regression/regression-wally.py b/wally-pipelined/regression/regression-wally.py index 2b272e3b4..aa64424c7 100755 --- a/wally-pipelined/regression/regression-wally.py +++ b/wally-pipelined/regression/regression-wally.py @@ -36,6 +36,11 @@ configs = [ cmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do ../config/rv64ic rv64ic\n!", grepstr="All tests ran without failures" ), + Config( + name="lints", + cmd="../lint-wally > {}", + grepstr="All lints run with no errors or warnings" + ), ] import multiprocessing, os