diff --git a/pipelined/regression/fpga-wave.do b/pipelined/regression/fpga-wave.do index 32ca64ee4..0d3ac2ea8 100644 --- a/pipelined/regression/fpga-wave.do +++ b/pipelined/regression/fpga-wave.do @@ -224,7 +224,7 @@ add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbe add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM -add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/LSUBusBuffer add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} @@ -326,7 +326,7 @@ add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine -add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/LSUBusBuffer add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr @@ -482,7 +482,7 @@ add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/FinalInstrRawF add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck -add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusWriteData +add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/LSUBusBuffer add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress diff --git a/pipelined/regression/linux-wave.do b/pipelined/regression/linux-wave.do index 10f98251e..00c623935 100644 --- a/pipelined/regression/linux-wave.do +++ b/pipelined/regression/linux-wave.do @@ -189,7 +189,7 @@ add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/FinalInstrRawF add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck -add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusWriteData +add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/LSUBusBuffer add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/ITLBMissF add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress @@ -227,7 +227,7 @@ add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/LSUBusBuffer add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} @@ -331,7 +331,7 @@ add wave -noupdate -expand -group lsu -expand -group dcache -group status /testb add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/LSUBusBuffer add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr diff --git a/pipelined/regression/wave.do b/pipelined/regression/wave.do index a9a9feee9..1653cef83 100644 --- a/pipelined/regression/wave.do +++ b/pipelined/regression/wave.do @@ -222,7 +222,7 @@ add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/LSUBusBuffer add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} @@ -317,7 +317,7 @@ add wave -noupdate -expand -group lsu -expand -group dcache -group status -color add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/LSUBusBuffer add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay @@ -443,7 +443,7 @@ add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/FinalInstrRawF add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck -add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusWriteData +add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/LSUBusBuffer add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/VictimWay add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetDirtyWay add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetValidWay diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 6c19436e0..fd71e6526 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -31,37 +31,36 @@ `include "wally-config.vh" module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) ( - input logic clk, - input logic reset, + input logic clk, + input logic reset, // cpu side - input logic CPUBusy, - input logic [1:0] RW, - input logic [1:0] Atomic, - input logic FlushCache, - input logic InvalidateCache, - input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. - input logic [`PA_BITS-1:0] PAdr, // physical address + input logic CPUBusy, + input logic [1:0] RW, + input logic [1:0] Atomic, + input logic FlushCache, + input logic InvalidateCache, + input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. + input logic [`PA_BITS-1:0] PAdr, // physical address input logic [(WORDLEN-1)/8:0] ByteMask, - input logic [WORDLEN-1:0] FinalWriteData, - output logic CacheCommitted, - output logic CacheStall, + input logic [WORDLEN-1:0] FinalWriteData, + output logic CacheCommitted, + output logic CacheStall, // to performance counters to cpu - output logic CacheMiss, - output logic CacheAccess, + output logic CacheMiss, + output logic CacheAccess, // lsu control - input logic IgnoreRequestTLB, - input logic IgnoreRequestTrapM, - input logic TrapM, - input logic Cacheable, + input logic IgnoreRequestTLB, + input logic TrapM, + input logic Cacheable, // Bus fsm interface - output logic CacheFetchLine, - output logic CacheWriteLine, - input logic CacheBusAck, - input logic [LOGBWPL-1:0] WordCount, - input logic LSUBusWriteCrit, - output logic [`PA_BITS-1:0] CacheBusAdr, - input logic [LINELEN-1:0] CacheBusWriteData, - output logic [WORDLEN-1:0] ReadDataWord); + output logic CacheFetchLine, + output logic CacheWriteLine, + input logic CacheBusAck, + input logic SelLSUBusWord, + input logic [LOGBWPL-1:0] WordCount, + input logic [LINELEN-1:0] LSUBusBuffer, + output logic [`PA_BITS-1:0] CacheBusAdr, + output logic [WORDLEN-1:0] ReadDataWord); // Cache parameters localparam LINEBYTELEN = LINELEN/8; @@ -147,11 +146,11 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE // like to fix this. if(DCACHE) mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]), - .d1(WordCount), .s(LSUBusWriteCrit), + .d1(WordCount), .s(SelLSUBusWord), .y(WordOffsetAddr)); else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]; - mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, CacheBusWriteData, SelBusBuffer, ReadDataLine); + mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, LSUBusBuffer, SelBusBuffer, ReadDataLine); subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread( .PAdr(WordOffsetAddr), @@ -174,10 +173,10 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE for(index = 0; index < LINELEN/8; index++) begin mux2 #(8) WriteDataMux(.d0(FinalWriteDataDup[8*index+7:8*index]), - .d1(CacheBusWriteData[8*index+7:8*index]), .s(LineByteMux[index]), .y(CacheWriteData[8*index+7:8*index])); + .d1(LSUBusBuffer[8*index+7:8*index]), .s(LineByteMux[index]), .y(CacheWriteData[8*index+7:8*index])); end //mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), -// .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); +// .d1(LSUBusBuffer), .s(SetValid), .y(CacheWriteData)); mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}), @@ -214,7 +213,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE assign CacheRW = Cacheable ? RW : 2'b00; assign CacheAtomic = Cacheable ? Atomic : 2'b00; cachefsm cachefsm(.clk, .reset, .CacheFetchLine, .CacheWriteLine, .CacheBusAck, - .CacheRW, .CacheAtomic, .CPUBusy, .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM, + .CacheRW, .CacheAtomic, .CPUBusy, .IgnoreRequestTLB, .TrapM, .CacheHit, .VictimDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, .ClearValid, .ClearDirty, .SetDirty, diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index a1c785dc7..4cf17db99 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -42,7 +42,6 @@ module cachefsm input logic CPUBusy, // interlock fsm input logic IgnoreRequestTLB, - input logic IgnoreRequestTrapM, input logic TrapM, // Bus inputs input logic CacheBusAck, @@ -90,6 +89,7 @@ module cachefsm STATE_MISS_FETCH_WDV, STATE_MISS_EVICT_DIRTY, STATE_MISS_WRITE_CACHE_LINE, + STATE_MISS_READ_DELAY, // required for back to back reads. structural hazard on writting SRAM // flush cache STATE_FLUSH, STATE_FLUSH_CHECK, @@ -98,12 +98,12 @@ module cachefsm (* mark_debug = "true" *) statetype CurrState, NextState; logic IgnoreRequest; - assign IgnoreRequest = IgnoreRequestTLB | IgnoreRequestTrapM; + assign IgnoreRequest = IgnoreRequestTLB | TrapM; // if the command is used in the READY state then the cache needs to be able to supress - // using both IgnoreRequestTLB and IgnoreRequestTrapM. Otherwise we can just use IgnoreRequestTLB. + // using both IgnoreRequestTLB and DCacheTrapM. Otherwise we can just use IgnoreRequestTLB. - assign DoFlush = FlushCache & ~IgnoreRequestTrapM; // do NOT suppress flush on DTLBMissM. Does not depend on address translation. + assign DoFlush = FlushCache & ~TrapM; // do NOT suppress flush on DTLBMissM. Does not depend on address translation. assign AMO = CacheAtomic[1] & (&CacheRW); assign DoAMO = AMO & ~IgnoreRequest; assign DoRead = CacheRW[1] & ~IgnoreRequest; @@ -139,7 +139,11 @@ module cachefsm STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE; else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; else NextState = STATE_MISS_FETCH_WDV; - STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; + //STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; + STATE_MISS_WRITE_CACHE_LINE: if(~(AMO | CacheRW[0])) NextState = STATE_MISS_READ_DELAY; + else NextState = STATE_READY; + STATE_MISS_READ_DELAY: if(CPUBusy) NextState = STATE_MISS_READ_DELAY; + else NextState = STATE_READY; STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; else NextState = STATE_MISS_EVICT_DIRTY; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. @@ -195,7 +199,7 @@ module cachefsm (CurrState == STATE_FLUSH_CHECK & VictimDirty); // **** can this be simplified? assign SelAdr = (CurrState == STATE_READY & (IgnoreRequestTLB & ~TrapM)) | // Ignore Request is needed on TLB miss. - // use the raw requests as we don't want IgnoreRequestTrapM in the critical path + // use the raw requests as we don't want DCacheTrapM in the critical path (CurrState == STATE_READY & ((AMO | CacheRW[0]) & CacheHit)) | // changes if store delay hazard removed (CurrState == STATE_READY & (DoAnyMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | @@ -203,7 +207,7 @@ module cachefsm (CurrState == STATE_MISS_WRITE_CACHE_LINE) | resetDelay; - assign SelBusBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE; + assign SelBusBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE | CurrState == STATE_MISS_READ_DELAY; assign SRAMEnable = (CurrState == STATE_READY & ~CPUBusy | CacheStall) | (CurrState != STATE_READY) | reset; endmodule // cachefsm diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 636552f06..a9b584130 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -253,28 +253,5 @@ module divinteration ( endmodule -///////// -// csa // -///////// -module csa #(parameter N=69) ( - input logic [N-1:0] in1, in2, in3, - input logic cin, - output logic [N-1:0] out1, out2 -); - - // This block adds in1, in2, in3, and cin to produce - // a result out1 / out2 in carry-save redundant form. - // cin is just added to the least significant bit and - // is Startuired to handle adding a negative divisor. - // Fortunately, the carry (out2) is shifted left by one - // bit, leaving room in the least significant bit to - // insert cin. - - assign out1 = in1 ^ in2 ^ in3; - assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | - (in2[N-2:0] & in3[N-2:0]), cin}; -endmodule - - diff --git a/pipelined/src/generic/csa.sv b/pipelined/src/generic/csa.sv new file mode 100644 index 000000000..9251b3467 --- /dev/null +++ b/pipelined/src/generic/csa.sv @@ -0,0 +1,45 @@ +/////////////////////////////////////////// +// csa.sv +// +// Written: Katherine Parry and David_Harris@hmc.edu 21 August 2022 +// Modified: +// +// Purpose: 3:2 carry-save adder +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module csa #(parameter N=16) ( + input logic [N-1:0] x, y, z, + input logic cin, + output logic [N-1:0] s, c +); + + // This block adds x, y, z, and cin to produce + // a result s / c in carry-save redundant form. + // cin is just added to the least significant bit + // s + c = x + y + z + cin + + assign s = x ^ y ^ z; + assign c = {x[N-2:0] & (y[N-2:0] | z[N-2:0]) | + (y[N-2:0] & z[N-2:0]), cin}; +endmodule diff --git a/pipelined/src/ieu/comparator.sv b/pipelined/src/ieu/comparator.sv index c7c004dff..e30dc5fd2 100644 --- a/pipelined/src/ieu/comparator.sv +++ b/pipelined/src/ieu/comparator.sv @@ -30,6 +30,29 @@ `include "wally-config.vh" +// This comparator is best +module comparator_dc_flip #(parameter WIDTH=64) ( + input logic [WIDTH-1:0] a, b, + input logic sgnd, + output logic [1:0] flags); + + logic eq, lt, ltu; + logic [WIDTH-1:0] af, bf; + + // For signed numbers, flip most significant bit + assign af = {a[WIDTH-1] ^ sgnd, a[WIDTH-2:0]}; + assign bf = {b[WIDTH-1] ^ sgnd, b[WIDTH-2:0]}; + + // behavioral description gives best results + assign eq = (a == b); + assign lt = (af < bf); + assign flags = {eq, lt}; +endmodule + +/* + +Other comparators evaluated + module donedet #(parameter WIDTH=64) ( input logic [WIDTH-1:0] a, b, output logic eq); @@ -80,24 +103,6 @@ module comparator #(parameter WIDTH=64) ( assign flags = {eq, lt, ltu}; endmodule -// This comparator is best -module comparator_dc_flip #(parameter WIDTH=64) ( - input logic [WIDTH-1:0] a, b, - input logic sgnd, - output logic [1:0] flags); - - logic eq, lt, ltu; - logic [WIDTH-1:0] af, bf; - - // For signed numbers, flip most significant bit - assign af = {a[WIDTH-1] ^ sgnd, a[WIDTH-2:0]}; - assign bf = {b[WIDTH-1] ^ sgnd, b[WIDTH-2:0]}; - - // behavioral description gives best results - assign eq = (a == b); - assign lt = (af < bf); - assign flags = {eq, lt}; -endmodule module comparator2 #(parameter WIDTH=64) ( input logic clk, reset, @@ -106,7 +111,7 @@ module comparator2 #(parameter WIDTH=64) ( logic eq, lt, ltu; - /* verilator lint_off UNOPTFLAT */ + /* verilator lint_off UNOPTFLAT / // prefix implementation localparam levels=$clog2(WIDTH); genvar i; @@ -133,7 +138,7 @@ module comparator2 #(parameter WIDTH=64) ( // A < B signed if less than unsigned and msb is not < unsigned, or if A negative and B positive assign lt2 = ltu2 & ~l[0][WIDTH-1] | a[WIDTH-1] & ~b[WIDTH-1]; assign flags = {eq2, lt2, ltu2}; - /* verilator lint_on UNOPTFLAT */ + /* verilator lint_on UNOPTFLAT / endmodule @@ -143,7 +148,7 @@ module comparator_prefix #(parameter WIDTH=64) ( logic eq, lt, ltu; - /* verilator lint_off UNOPTFLAT */ + /* verilator lint_off UNOPTFLAT // prefix implementation localparam levels=$clog2(WIDTH); genvar i; @@ -170,7 +175,7 @@ module comparator_prefix #(parameter WIDTH=64) ( // A < B signed if less than unsigned and msb is not < unsigned, or if A negative and B positive assign lt2 = ltu2 & ~l[0][WIDTH-1] | a[WIDTH-1] & ~b[WIDTH-1]; assign flags = {eq2, lt2, ltu2}; - /* verilator lint_on UNOPTFLAT */ + /* verilator lint_on UNOPTFLAT / endmodule @@ -317,3 +322,4 @@ module stinecomp64 (FCC, A, B, Sel); assign FCC = {LT, EQ}; endmodule // comp64 +*/ \ No newline at end of file diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv index df711695e..d6cabb442 100644 --- a/pipelined/src/ieu/datapath.sv +++ b/pipelined/src/ieu/datapath.sv @@ -123,6 +123,7 @@ module datapath ( flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW); flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW); + // *** simplify WriteDataE in this merge // floating point interactions: fcvt, fp stores if (`F_SUPPORTED&(`LLEN>`XLEN)) begin:fpmux logic [`XLEN-1:0] IFCvtResultW; diff --git a/pipelined/src/ieu/forward.sv b/pipelined/src/ieu/forward.sv index ab938b37a..07bd89d3d 100644 --- a/pipelined/src/ieu/forward.sv +++ b/pipelined/src/ieu/forward.sv @@ -58,7 +58,7 @@ module forward( // Stall on dependent operations that finish in Mem Stage and can't bypass in time assign MatchDE = (Rs1D == RdE) | (Rs2D == RdE); // Decode-stage instruction source depends on result from execute stage instruction - assign FPUStallD = FWriteIntE & MatchDE; + assign FPUStallD = 0; // FWriteIntE & MatchDE; // FPU to Integer transfers have single-cycle latency assign LoadStallD = (MemReadE|SCE) & MatchDE; assign MDUStallD = MDUE & MatchDE; assign CSRRdStallD = CSRReadE & MatchDE; diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index b4b160289..b8e636c5f 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -197,25 +197,25 @@ module ifu ( localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS/`XLEN : 1; localparam integer LINELEN = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS : `XLEN; localparam integer LOGBWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1; - logic [LINELEN-1:0] ICacheBusWriteData; + logic [LINELEN-1:0] ILSUBusBuffer; logic [`PA_BITS-1:0] ICacheBusAdr; logic ICacheBusAck; logic SelUncachedAdr; busdp #(WORDSPERLINE, LINELEN, LOGBWPL, CACHE_ENABLED) busdp(.clk, .reset, - .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(), + .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .SelLSUBusWord(), .LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete), .LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr), .WordCount(), .DCacheFetchLine(ICacheFetchLine), .DCacheWriteLine(1'b0), .DCacheBusAck(ICacheBusAck), - .DCacheBusWriteData(ICacheBusWriteData), .LSUPAdrM(PCPF), + .DLSUBusBuffer(ILSUBusBuffer), .LSUPAdrM(PCPF), .SelUncachedAdr, .IgnoreRequest(ITLBMissF), .LSURWM(2'b10), .CPUBusy, .CacheableM(CacheableF), .BusStall, .BusCommittedM()); - mux2 #(32) UnCachedDataMux(.d0(FinalInstrRawF), .d1(ICacheBusWriteData[32-1:0]), + mux2 #(32) UnCachedDataMux(.d0(FinalInstrRawF), .d1(ILSUBusBuffer[32-1:0]), .s(SelUncachedAdr), .y(AllInstrRawF[31:0])); @@ -223,14 +223,14 @@ module ifu ( cache #(.LINELEN(`ICACHE_LINELENINBITS), .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0)) - icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0), - .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck), + icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM, + .LSUBusBuffer(ILSUBusBuffer), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), .CacheFetchLine(ICacheFetchLine), .CacheWriteLine(), .ReadDataWord(FinalInstrRawF), .Cacheable(CacheableF), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), - .ByteMask('0), .WordCount('0), .LSUBusWriteCrit('0), + .ByteMask('0), .WordCount('0), .SelLSUBusWord('0), .FinalWriteData('0), .RW(2'b10), .Atomic('0), .FlushCache('0), diff --git a/pipelined/src/lsu/busdp.sv b/pipelined/src/lsu/busdp.sv index 5139efdc1..b241d75f1 100644 --- a/pipelined/src/lsu/busdp.sv +++ b/pipelined/src/lsu/busdp.sv @@ -55,7 +55,7 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) input logic DCacheFetchLine, input logic DCacheWriteLine, output logic DCacheBusAck, - output logic [LINELEN-1:0] DCacheBusWriteData, //*** change name. + output logic [LINELEN-1:0] DLSUBusBuffer, //*** change name. output logic SelUncachedAdr, // lsu interface @@ -64,7 +64,7 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) input logic [1:0] LSURWM, input logic CPUBusy, input logic CacheableM, - output logic LSUBusWriteCrit, + output logic SelLSUBusWord, output logic BusStall, output logic BusCommittedM); @@ -73,14 +73,14 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) logic [LOGWPL-1:0] WordCountDelayed; - // *** implement flops as an array if feasbile; DCacheBusWriteData might be a problem - // *** better name than DCacheBusWriteData + // *** implement flops as an array if feasbile; DLSUBusBuffer might be a problem + // *** better name than DLSUBusBuffer genvar index; for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer logic [WORDSPERLINE-1:0] CaptureWord; assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCountDelayed); flopen #(`XLEN) fb(.clk, .en(CaptureWord[index]), .d(LSUBusHRDATA), - .q(DCacheBusWriteData[(index+1)*`XLEN-1:index*`XLEN])); + .q(DLSUBusBuffer[(index+1)*`XLEN-1:index*`XLEN])); end mux2 #(`PA_BITS) localadrmux(DCacheBusAdr, LSUPAdrM, SelUncachedAdr, LocalLSUBusAdr); assign LSUBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) + LocalLSUBusAdr; @@ -89,6 +89,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busfsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) busfsm( .clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine, - .LSUBusAck, .LSUBusInit, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead, + .LSUBusAck, .LSUBusInit, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .SelLSUBusWord, .LSUBusRead, .LSUBurstType, .LSUTransType, .LSUTransComplete, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount, .WordCountDelayed); endmodule diff --git a/pipelined/src/lsu/busfsm.sv b/pipelined/src/lsu/busfsm.sv index 75c4d006b..00c561423 100644 --- a/pipelined/src/lsu/busfsm.sv +++ b/pipelined/src/lsu/busfsm.sv @@ -47,7 +47,7 @@ module busfsm #(parameter integer WordCountThreshold, output logic BusStall, output logic LSUBusWrite, - output logic LSUBusWriteCrit, + output logic SelLSUBusWord, output logic LSUBusRead, output logic [2:0] LSUBurstType, output logic LSUTransComplete, @@ -166,7 +166,7 @@ module busfsm #(parameter integer WordCountThreshold, assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0] & ~IgnoreRequest) | (BusCurrState == STATE_BUS_UNCACHED_WRITE); assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE); - assign LSUBusWriteCrit = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0]) | + assign SelLSUBusWord = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0]) | (BusCurrState == STATE_BUS_UNCACHED_WRITE) | (BusCurrState == STATE_BUS_WRITE); diff --git a/pipelined/src/lsu/interlockfsm.sv b/pipelined/src/lsu/interlockfsm.sv index a6cf2846e..7f0dd395d 100644 --- a/pipelined/src/lsu/interlockfsm.sv +++ b/pipelined/src/lsu/interlockfsm.sv @@ -46,8 +46,7 @@ module interlockfsm( output logic InterlockStall, output logic SelReplayMemE, output logic SelHPTW, - output logic IgnoreRequestTLB, - output logic IgnoreRequestTrapM); + output logic IgnoreRequestTLB); logic ToITLBMiss; logic ToITLBMissNoReplay; @@ -96,38 +95,13 @@ module interlockfsm( endcase end // always_comb - // *** change test to not propagate xs so that we can return to excluded code - // might have changed name to WALLY-MMU-SV39? - - // signal to CPU it needs to wait on HPTW. - /* -----\/----- EXCLUDED -----\/----- - // this code has a problem with imperas64mmu as it reads in an invalid uninitalized instruction. InterlockStall becomes x and it propagates - // everywhere. The case statement below implements the same logic but any x on the inputs will resolve to 0. - // Note this will cause a problem for post synthesis gate simulation. - assign InterlockStall = (InterlockCurrState == STATE_T0_READY & (DTLBMissOrDAFaultM | ITLBMissOrDAFaultF)) | - (InterlockCurrState == STATE_T3_DTLB_MISS) | (InterlockCurrState == STATE_T4_ITLB_MISS) | - (InterlockCurrState == STATE_T5_ITLB_MISS) | (InterlockCurrState == STATE_T7_DITLB_MISS); - - -----/\----- EXCLUDED -----/\----- */ - - always_comb begin - InterlockStall = 1'b0; - case(InterlockCurrState) - STATE_T0_READY: if((DTLBMissOrDAFaultM | ITLBMissOrDAFaultF) & ~TrapM) InterlockStall = 1'b1; - STATE_T3_DTLB_MISS: InterlockStall = 1'b1; - STATE_T4_ITLB_MISS: InterlockStall = 1'b1; - STATE_T5_ITLB_MISS: InterlockStall = 1'b1; - STATE_T7_DITLB_MISS: InterlockStall = 1'b1; - default: InterlockStall = 1'b0; - endcase - end - + assign InterlockStall = (InterlockCurrState == STATE_T0_READY & (DTLBMissOrDAFaultM | ITLBMissOrDAFaultF) & ~TrapM) | + (InterlockCurrState == STATE_T3_DTLB_MISS) | (InterlockCurrState == STATE_T4_ITLB_MISS) | + (InterlockCurrState == STATE_T5_ITLB_MISS) | (InterlockCurrState == STATE_T7_DITLB_MISS); assign SelReplayMemE = (InterlockCurrState == STATE_T1_REPLAY & DCacheStallM) | (InterlockCurrState == STATE_T3_DTLB_MISS & DTLBWriteM) | (InterlockCurrState == STATE_T5_ITLB_MISS & ITLBWriteF); assign SelHPTW = (InterlockCurrState == STATE_T3_DTLB_MISS) | (InterlockCurrState == STATE_T4_ITLB_MISS) | (InterlockCurrState == STATE_T5_ITLB_MISS) | (InterlockCurrState == STATE_T7_DITLB_MISS); assign IgnoreRequestTLB = (InterlockCurrState == STATE_T0_READY & (ITLBMissOrDAFaultF | DTLBMissOrDAFaultM)); - assign IgnoreRequestTrapM = (InterlockCurrState == STATE_T0_READY & (TrapM)) | - ((InterlockCurrState == STATE_T1_REPLAY) & (TrapM)); endmodule diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 08d217e17..b278306a1 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -107,9 +107,9 @@ module lsu ( logic CacheableM; logic BusStall; logic InterlockStall; - logic IgnoreRequestTLB, IgnoreRequestTrapM; + logic IgnoreRequestTLB; logic BusCommittedM, DCacheCommittedM; - logic LSUBusWriteCrit; + logic SelLSUBusWord; logic DataDAPageFaultM; logic [`XLEN-1:0] LSUWriteDataM; logic [`XLEN-1:0] WriteDataM; @@ -136,10 +136,10 @@ module lsu ( .ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, .IEUAdrExtM, .PTE, .LSUWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE, .LSUAdrE, .PreLSUPAdrM, .CPUBusy, .InterlockStall, .SelHPTW, - .IgnoreRequestTLB, .IgnoreRequestTrapM); + .IgnoreRequestTLB); end else begin assign {InterlockStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF, IgnoreRequestTLB} = '0; - assign IgnoreRequestTrapM = TrapM; assign CPUBusy = StallW; assign PreLSURWM = MemRWM; + assign CPUBusy = StallW; assign PreLSURWM = MemRWM; assign LSUAdrE = IEUAdrE[11:0]; assign PreLSUPAdrM = IEUAdrExtM; assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM; @@ -197,7 +197,7 @@ module lsu ( logic [`LLEN-1:0] ReadDataWordMuxM; logic IgnoreRequest; logic SelUncachedAdr; - assign IgnoreRequest = IgnoreRequestTLB | IgnoreRequestTrapM; + assign IgnoreRequest = IgnoreRequestTLB | TrapM; if (`DMEM == `MEM_TIM) begin : dtim // *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW. @@ -212,7 +212,7 @@ module lsu ( localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS/`XLEN : 1; localparam integer LINELEN = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS : `XLEN; localparam integer LOGBWPL = (CACHE_ENABLED) ? $clog2(WORDSPERLINE) : 1; - logic [LINELEN-1:0] DCacheBusWriteData; + logic [LINELEN-1:0] DLSUBusBuffer; logic [`PA_BITS-1:0] DCacheBusAdr; logic DCacheWriteLine; logic DCacheFetchLine; @@ -222,13 +222,13 @@ module lsu ( busdp #(WORDSPERLINE, LINELEN, LOGBWPL, CACHE_ENABLED) busdp( .clk, .reset, .LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete, - .WordCount, .LSUBusWriteCrit, + .WordCount, .SelLSUBusWord, .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .DCacheFetchLine, - .DCacheWriteLine, .DCacheBusAck, .DCacheBusWriteData, .LSUPAdrM, + .DCacheWriteLine, .DCacheBusAck, .DLSUBusBuffer, .LSUPAdrM, .SelUncachedAdr, .IgnoreRequest, .LSURWM, .CPUBusy, .CacheableM, .BusStall, .BusCommittedM); - mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}), + mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DLSUBusBuffer[`XLEN-1:0]}), .s(SelUncachedAdr), .y(ReadDataWordMuxM)); mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); @@ -240,14 +240,14 @@ module lsu ( assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( - .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), + .clk, .reset, .CPUBusy, .SelLSUBusWord, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), .ByteMask(FinalByteMaskM), .WordCount, .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), - .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), + .IgnoreRequestTLB, .TrapM, .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM), - .CacheBusWriteData(DCacheBusWriteData), .CacheFetchLine(DCacheFetchLine), + .LSUBusBuffer(DLSUBusBuffer), .CacheFetchLine(DCacheFetchLine), .CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); end else begin : passthrough diff --git a/pipelined/src/lsu/lsuvirtmen.sv b/pipelined/src/lsu/lsuvirtmen.sv index 751c2c928..748aa3df0 100644 --- a/pipelined/src/lsu/lsuvirtmen.sv +++ b/pipelined/src/lsu/lsuvirtmen.sv @@ -65,8 +65,7 @@ module lsuvirtmem( output logic InterlockStall, output logic CPUBusy, output logic SelHPTW, - output logic IgnoreRequestTLB, - output logic IgnoreRequestTrapM); + output logic IgnoreRequestTLB); logic AnyCPUReqM; @@ -87,7 +86,7 @@ module lsuvirtmem( interlockfsm interlockfsm ( .clk, .reset, .MemRWM, .AtomicM, .ITLBMissOrDAFaultF, .ITLBWriteF, .DTLBMissOrDAFaultM, .DTLBWriteM, .TrapM, .DCacheStallM, - .InterlockStall, .SelReplayMemE, .SelHPTW, .IgnoreRequestTLB, .IgnoreRequestTrapM); + .InterlockStall, .SelReplayMemE, .SelHPTW, .IgnoreRequestTLB); hptw hptw( .clk, .reset, .SATP_REGW, .PCF, .IEUAdrExtM, .MemRWM, .AtomicM, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, diff --git a/pipelined/src/wally/wallypipelinedsocwrapper.v b/pipelined/src/wally/wallypipelinedsocwrapper.v index 2a25f476f..faf762285 100644 --- a/pipelined/src/wally/wallypipelinedsocwrapper.v +++ b/pipelined/src/wally/wallypipelinedsocwrapper.v @@ -42,7 +42,7 @@ module wallypipelinedsocwrapper ( output HCLK, HRESETn, output [31:0] HADDR, output [`AHBW-1:0] HWDATA, - output logic [`XLEN/8-1:0] HWSTRB, + output [`XLEN/8-1:0] HWSTRB, output HWRITE, output [2:0] HSIZE, output [2:0] HBURST, diff --git a/pipelined/testbench/testbench-fpga.sv b/pipelined/testbench/testbench-fpga.sv index 5997180f4..2a161d97e 100644 --- a/pipelined/testbench/testbench-fpga.sv +++ b/pipelined/testbench/testbench-fpga.sv @@ -56,6 +56,7 @@ logic [3:0] dummy; logic HREADYEXT, HRESPEXT; logic [31:0] HADDR; logic [`AHBW-1:0] HWDATA; + logic [`XLEN/8-1:0] HWSTRB; logic HWRITE; logic [2:0] HSIZE; logic [2:0] HBURST; @@ -105,7 +106,7 @@ logic [3:0] dummy; assign UARTSin = 1; wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT, - .HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT, + .HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn, .UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK); @@ -135,14 +136,15 @@ logic [3:0] dummy; if (TEST == "coremark") // read test vectors into memory - pathname = tvpaths[tests[0].atoi()]; + //pathname = tvpaths[tests[0].atoi()]; + pathname = "../../tests/testsBP/fpga-test-sdc/bin/"; /* if (tests[0] == `IMPERASTEST) pathname = tvpaths[0]; else pathname = tvpaths[1]; */ - memfilename = {pathname, tests[test], ".elf.memfile"}; + memfilename = "../../tests/testsBP/fpga-test-sdc/bin/fpga-test-sdc.memfile"; romfilename = {"../../tests/testsBP/fpga-test-sdc/bin/fpga-test-sdc.memfile"}; sdcfilename = {"../testbench/sdc/ramdisk2.hex"}; - //$readmemh(romfilename, dut.wallypipelinedsoc.uncore.bootrom.bootrom.memory.RAM); + $readmemh(romfilename, dut.wallypipelinedsoc.uncore.bootrom.bootrom.memory.RAM); $readmemh(sdcfilename, sdcard.FLASHmem); ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"}; @@ -162,7 +164,7 @@ logic [3:0] dummy; always @(negedge clk) begin if (TEST == "coremark") - if (dut.core.priv.priv.ecallM) begin + if (dut.core.priv.priv.EcallFaultM) begin $display("Benchmark: coremark is done."); $stop; end @@ -285,11 +287,21 @@ logic [3:0] dummy; .done(DCacheFlushDone)); // initialize the branch predictor - if (`BPRED_ENABLED == 1) - initial begin - $readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem); - $readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem); - end + if (`BPRED_ENABLED == 1) + begin + genvar adrindex; + + // Initializing all zeroes into the branch predictor memory. + for(adrindex = 0; adrindex < 1024; adrindex++) begin + initial begin + force dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem[adrindex] = 0; + force dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem[adrindex] = 0; + #1; + release dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem[adrindex]; + release dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem[adrindex]; + end + end + end endmodule module riscvassertions; @@ -342,8 +354,13 @@ module DCacheFlushFSM localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES; localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS; localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN; - localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; - localparam integer lognumlines = $clog2(numlines); + localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN; + localparam integer sramlen = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].SRAMLEN; + localparam integer cachesramwords = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].NUMSRAM; + +//testbench.dut.core.lsu.bus.dcache.dcache.CacheWays.NUMSRAM; + localparam integer numwords = sramlen/`XLEN; + localparam integer lognumlines = $clog2(numlines); localparam integer loglinebytelen = $clog2(linebytelen); localparam integer lognumways = $clog2(numways); localparam integer tagstart = lognumlines + loglinebytelen; @@ -351,65 +368,71 @@ module DCacheFlushFSM genvar index, way, cacheWord; - logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0]; - for(index = 0; index < numlines; index++) begin - for(way = 0; way < numways; way++) begin - for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin - copyShadow #(.tagstart(tagstart), - .loglinebytelen(loglinebytelen)) - copyShadow(.clk, - .start, - .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), - .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), - .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), - .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]), - .index(index), - .cacheWord(cacheWord), - .CacheData(CacheData[way][index][cacheWord]), - .CacheAdr(CacheAdr[way][index][cacheWord]), - .CacheTag(CacheTag[way][index][cacheWord]), - .CacheValid(CacheValid[way][index][cacheWord]), - .CacheDirty(CacheDirty[way][index][cacheWord])); - end - end + logic [sramlen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic [sramlen-1:0] cacheline; + logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic CacheValid [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic CacheDirty [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + for(index = 0; index < numlines; index++) begin + for(way = 0; way < numways; way++) begin + for(cacheWord = 0; cacheWord < cachesramwords; cacheWord++) begin + copyShadow #(.tagstart(tagstart), + .loglinebytelen(loglinebytelen), .sramlen(sramlen)) + copyShadow(.clk, + .start, + .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), + .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), + .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), + .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]), + .index(index), + .cacheWord(cacheWord), + .CacheData(CacheData[way][index][cacheWord]), + .CacheAdr(CacheAdr[way][index][cacheWord]), + .CacheTag(CacheTag[way][index][cacheWord]), + .CacheValid(CacheValid[way][index][cacheWord]), + .CacheDirty(CacheDirty[way][index][cacheWord])); + end end + end - integer i, j, k; + integer i, j, k, l; - always @(posedge clk) begin - if (start) begin #1 - #1 - for(i = 0; i < numlines; i++) begin - for(j = 0; j < numways; j++) begin - for(k = 0; k < numwords; k++) begin - if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin - ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; - end - end - end - end - end - end - - - end + always @(posedge clk) begin + if (start) begin #1 + #1 + for(i = 0; i < numlines; i++) begin + for(j = 0; j < numways; j++) begin + for(l = 0; l < cachesramwords; l++) begin + if (CacheValid[j][i][l] & CacheDirty[j][i][l]) begin + for(k = 0; k < numwords; k++) begin + //cacheline = CacheData[j][i][0]; + // does not work with modelsim + // # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions. + // see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions + //ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k]; + ShadowRAM[(CacheAdr[j][i][l] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][l][`XLEN*k +: `XLEN]; + end + end + end + end + end + end + end + end flop #(1) doneReg(.clk, .d(start), .q(done)); endmodule module copyShadow - #(parameter tagstart, loglinebytelen) + #(parameter tagstart, loglinebytelen, sramlen) (input logic clk, input logic start, input logic [`PA_BITS-1:tagstart] tag, input logic valid, dirty, - input logic [`XLEN-1:0] data, + input logic [sramlen-1:0] data, input logic [32-1:0] index, input logic [32-1:0] cacheWord, - output logic [`XLEN-1:0] CacheData, + output logic [sramlen-1:0] CacheData, output logic [`PA_BITS-1:0] CacheAdr, output logic [`XLEN-1:0] CacheTag, output logic CacheValid, @@ -422,9 +445,8 @@ module copyShadow CacheValid = valid; CacheDirty = dirty; CacheData = data; - CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8)); + CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(sramlen/8)); end end -endmodule - +endmodule