diff --git a/bin/elf2hex.sh b/bin/elf2hex.sh new file mode 100755 index 000000000..b68439465 --- /dev/null +++ b/bin/elf2hex.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +# james.stine@okstate.edu 4 Jan 2022 +# Script to run elf2hex for memfile for +# Imperas and riscv-arch-test benchmarks + +for file in work/rv64i_m/*/*.elf ; do + memfile=${file%.elf}.elf.memfile + echo riscv64-unknown-elf-elf2hex --bit-width 64 --input "$file" --output "$memfile" + riscv64-unknown-elf-elf2hex --bit-width 64 --input "$file" --output "$memfile" +done + +for file in work/rv32i_m/*/*.elf ; do + memfile=${file%.elf}.elf.memfile + echo riscv64-unknown-elf-elf2hex --bit-width 32 --input "$file" --output "$memfile" + riscv64-unknown-elf-elf2hex --bit-width 32 --input "$file" --output "$memfile" +done diff --git a/pipelined/config/buildroot/wally-config.vh b/pipelined/config/buildroot/wally-config.vh index 53ea11538..b0d85e517 100644 --- a/pipelined/config/buildroot/wally-config.vh +++ b/pipelined/config/buildroot/wally-config.vh @@ -62,14 +62,14 @@ `define DTLB_ENTRIES 32 // Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines `define DCACHE_NUMWAYS 4 `define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_LINELENINBITS 256 `define DCACHE_REPLBITS 3 `define ICACHE_NUMWAYS 4 `define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_BLOCKLENINBITS 256 +`define ICACHE_LINELENINBITS 256 // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 diff --git a/pipelined/config/busybear/wally-config.vh b/pipelined/config/busybear/wally-config.vh index fc2868ecc..d9c61a852 100644 --- a/pipelined/config/busybear/wally-config.vh +++ b/pipelined/config/busybear/wally-config.vh @@ -60,14 +60,14 @@ `define DTLB_ENTRIES 32 // Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines `define DCACHE_NUMWAYS 4 `define DCACHE_WAYSIZEINBYTES 2048 -`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_LINELENINBITS 256 `define DCACHE_REPLBITS 3 `define ICACHE_NUMWAYS 1 `define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_BLOCKLENINBITS 256 +`define ICACHE_LINELENINBITS 256 // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 diff --git a/pipelined/config/coremark/wally-config.vh b/pipelined/config/coremark/wally-config.vh index fa089a3d9..944c3a4a5 100644 --- a/pipelined/config/coremark/wally-config.vh +++ b/pipelined/config/coremark/wally-config.vh @@ -62,14 +62,14 @@ `define DTLB_ENTRIES 32 // Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines `define DCACHE_NUMWAYS 4 `define DCACHE_WAYSIZEINBYTES 2048 -`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_LINELENINBITS 256 `define DCACHE_REPLBITS 3 `define ICACHE_NUMWAYS 1 `define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_BLOCKLENINBITS 256 +`define ICACHE_LINELENINBITS 256 // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 diff --git a/pipelined/config/coremark_bare/wally-config.vh b/pipelined/config/coremark_bare/wally-config.vh index b7061fd52..90d4a5e9f 100644 --- a/pipelined/config/coremark_bare/wally-config.vh +++ b/pipelined/config/coremark_bare/wally-config.vh @@ -63,13 +63,13 @@ `define DTLB_ENTRIES 32 // Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines `define DCACHE_NUMWAYS 4 `define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_LINELENINBITS 256 `define ICACHE_NUMWAYS 4 `define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_BLOCKLENINBITS 256 +`define ICACHE_LINELENINBITS 256 // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 diff --git a/pipelined/config/fpga/wally-config.vh b/pipelined/config/fpga/wally-config.vh index c2ef2446b..b3c87cc78 100644 --- a/pipelined/config/fpga/wally-config.vh +++ b/pipelined/config/fpga/wally-config.vh @@ -62,14 +62,14 @@ `define DTLB_ENTRIES 32 // Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines `define DCACHE_NUMWAYS 4 `define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_LINELENINBITS 256 `define DCACHE_REPLBITS 3 `define ICACHE_NUMWAYS 4 `define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_BLOCKLENINBITS 256 +`define ICACHE_LINELENINBITS 256 // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 diff --git a/pipelined/config/rv32gc/wally-config.vh b/pipelined/config/rv32gc/wally-config.vh index 06499180b..adda1c6c3 100644 --- a/pipelined/config/rv32gc/wally-config.vh +++ b/pipelined/config/rv32gc/wally-config.vh @@ -61,14 +61,14 @@ `define DTLB_ENTRIES 32 // Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines `define DCACHE_NUMWAYS 4 `define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_LINELENINBITS 256 `define DCACHE_REPLBITS 3 `define ICACHE_NUMWAYS 4 `define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_BLOCKLENINBITS 256 +`define ICACHE_LINELENINBITS 256 // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh index ade57a71a..abce4e271 100644 --- a/pipelined/config/rv32ic/wally-config.vh +++ b/pipelined/config/rv32ic/wally-config.vh @@ -61,14 +61,14 @@ `define DTLB_ENTRIES 0 // Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines `define DCACHE_NUMWAYS 4 `define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_LINELENINBITS 256 `define DCACHE_REPLBITS 3 `define ICACHE_NUMWAYS 4 `define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_BLOCKLENINBITS 256 +`define ICACHE_LINELENINBITS 256 // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 diff --git a/pipelined/config/rv64BP/wally-config.vh b/pipelined/config/rv64BP/wally-config.vh index d29349012..0a3ebc9c1 100644 --- a/pipelined/config/rv64BP/wally-config.vh +++ b/pipelined/config/rv64BP/wally-config.vh @@ -63,14 +63,14 @@ `define DTLB_ENTRIES 32 // Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines `define DCACHE_NUMWAYS 4 `define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_LINELENINBITS 256 `define DCACHE_REPLBITS 3 `define ICACHE_NUMWAYS 4 `define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_BLOCKLENINBITS 256 +`define ICACHE_LINELENINBITS 256 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index 6f7dbd886..7f8638e27 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -62,14 +62,14 @@ `define DTLB_ENTRIES 32 // Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines `define DCACHE_NUMWAYS 4 `define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_LINELENINBITS 256 `define DCACHE_REPLBITS 3 `define ICACHE_NUMWAYS 4 `define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_BLOCKLENINBITS 256 +`define ICACHE_LINELENINBITS 256 // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 diff --git a/pipelined/config/rv64ic/wally-config.vh b/pipelined/config/rv64ic/wally-config.vh index b4b93ed03..4957a0e8c 100644 --- a/pipelined/config/rv64ic/wally-config.vh +++ b/pipelined/config/rv64ic/wally-config.vh @@ -62,14 +62,14 @@ `define DTLB_ENTRIES 32 // Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines `define DCACHE_NUMWAYS 4 `define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_BLOCKLENINBITS 256 +`define DCACHE_LINELENINBITS 256 `define DCACHE_REPLBITS 3 `define ICACHE_NUMWAYS 4 `define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_BLOCKLENINBITS 256 +`define ICACHE_LINELENINBITS 256 // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 diff --git a/pipelined/fpu-testfloat/FMA/tbgen/tb.sv b/pipelined/fpu-testfloat/FMA/tbgen/tb.sv index 9091af1ad..621429aa4 100644 --- a/pipelined/fpu-testfloat/FMA/tbgen/tb.sv +++ b/pipelined/fpu-testfloat/FMA/tbgen/tb.sv @@ -153,7 +153,7 @@ fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZEx .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM); - // generate clock + // produce clock always begin clk = 1; #5; clk = 0; #5; diff --git a/pipelined/ppa/ppa.sv b/pipelined/ppa/ppa.sv index 84ae7d971..8ff8bdcc0 100644 --- a/pipelined/ppa/ppa.sv +++ b/pipelined/ppa/ppa.sv @@ -45,14 +45,12 @@ endmodule module INVX2(input logic a, output logic y); - generate - if (LIB == SKY130) - sky130_osu_sc_12T_ms__inv_2 inv(a, y); - else if (LIB == SKL90) - scc9gena_inv_2 inv(a, y) - else if (LIB == GF14) - INV_X2N_A10P5PP84TSL_C14(a, y) - endgenerate + if (LIB == SKY130) + sky130_osu_sc_12T_ms__inv_2 inv(a, y); + else if (LIB == SKL90) + scc9gena_inv_2 inv(a, y) + else if (LIB == GF14) + INV_X2N_A10P5PP84TSL_C14(a, y) endmodule module driver #(parameter WDITH=1) ( diff --git a/pipelined/regression/Makefile b/pipelined/regression/Makefile index 1b3279195..b323bdb97 100644 --- a/pipelined/regression/Makefile +++ b/pipelined/regression/Makefile @@ -3,11 +3,16 @@ make all: make -C ../../addins/riscv-arch-test make -C ../../addins/riscv-arch-test XLEN=32 exe2memfile.pl ../../addins/riscv-arch-test/work/*/*/*.elf + # extractFunctionRadix. *** # Build wally-riscv-arch-test make -C ../../tests/wally-riscv-arch-test/ make -C ../../tests/wally-riscv-arch-test/ XLEN=32 exe2memfile.pl ../../tests/wally-riscv-arch-test/work/*/*/*.elf + # ***extractFunctionRadix + # *** use elf2hex + + # *** add optional imperas tests # Link Linux test vectors (fix this later***) #cd ../../tests/linux-testgen/linux-testvectors/;./tvLinker.sh diff --git a/pipelined/regression/regression-wally.py b/pipelined/regression/regression-wally.py index 5d503498d..d740784d4 100755 --- a/pipelined/regression/regression-wally.py +++ b/pipelined/regression/regression-wally.py @@ -51,7 +51,7 @@ tc = TestCase( grepstr="400100000 instructions") configs.append(tc) -tests64gc = ["arch64i", "arch64priv", "arch64c", "arch64m", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64p", "imperas64m", "imperas64a", "imperas64c", "wally64priv", "imperas64mmu"] # "wally64i", #, "testsBP64"] +tests64gc = ["arch64i", "arch64priv", "arch64c", "arch64m", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64p", "imperas64m", "imperas64a", "imperas64c", "wally64priv"] # , "imperas64mmu" "wally64i", #, "testsBP64"] for test in tests64gc: tc = TestCase( name=test, @@ -59,7 +59,7 @@ for test in tests64gc: cmd="vsim > {} -c < 1) begin:vict + cachereplacementpolicy #(NUMWAYS, INDEXLEN, OFFSETLEN, NUMLINES) + cachereplacementpolicy(.clk, .reset, + .WayHit, + .VictimWay, + .LsuPAdrM(LsuPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .RAdr, + .LRUWriteEn); + end else begin:vict + assign VictimWay = 1'b1; // one hot. + end + + assign CacheHit = | WayHit; + assign VictimDirty = | VictimDirtyWay; + + + // ReadDataLineWayMaskedM is a 2d array of cache line len by number of ways. + // Need to OR together each way in a bitwise manner. + // Final part of the AO Mux. First is the AND in the cacheway. + or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWayMasked), .y(ReadDataLineM)); + or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); + + + // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can + // easily build a variable input mux. + // *** consider using a limited range shift to do this final muxing. + genvar index; + if(DCACHE == 1) begin: readdata + for (index = 0; index < WORDSPERLINE; index++) begin:readdatalinesetsmux + assign ReadDataLineSets[index] = ReadDataLineM[((index+1)*`XLEN)-1: (index*`XLEN)]; + end + // variable input mux + assign ReadDataWord = ReadDataLineSets[LsuPAdrM[LOGWPL + LOGXLENBYTES - 1 : LOGXLENBYTES]]; + end else begin: readdata + logic [31:0] ReadLineSetsF [LINELEN/16-1:0]; + logic [31:0] FinalInstrRawF; + for(index = 0; index < LINELEN / 16 - 1; index++) + assign ReadLineSetsF[index] = ReadDataLineM[((index+1)*16)+16-1 : (index*16)]; + assign ReadLineSetsF[LINELEN/16-1] = {16'b0, ReadDataLineM[LINELEN-1:LINELEN-16]}; + assign FinalInstrRawF = ReadLineSetsF[LsuPAdrM[$clog2(LINELEN / 32) + 1 : 1]]; + if (`XLEN == 64) assign ReadDataWord = {32'b0, FinalInstrRawF}; + else assign ReadDataWord = FinalInstrRawF; + end + + // Write Path CPU (IEU) side + + onehotdecoder #(LOGWPL) + adrdec(.bin(LsuPAdrM[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), + .decoded(MemPAdrDecodedW)); + + assign SRAMWordEnable = SRAMLineWriteEnableM ? '1 : MemPAdrDecodedW; + + assign SRAMLineWayWriteEnableM = SRAMLineWriteEnableM ? VictimWay : '0; + + mux2 #(NUMWAYS) WriteEnableMux(.d0(SRAMWordWriteEnableM ? WayHit : '0), + .d1(SRAMLineWayWriteEnableM), + .s(SRAMLineWriteEnableM), + .y(SRAMWayWriteEnable)); + + + + mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), + .d1(CacheMemWriteData), + .s(SRAMLineWriteEnableM), + .y(SRAMWriteData)); + + + mux3 #(`PA_BITS) BaseAdrMux(.d0({LsuPAdrM[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), + .d1({VictimTag, LsuPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), + .d2({VictimTag, FlushAdrQ, {{OFFSETLEN}{1'b0}}}), + .s({SelFlush, SelEvict}), + .y(CacheBusAdr)); + + + // flush address and way generation. + // increment on 2nd to last way + flopenr #(INDEXLEN) + FlushAdrReg(.clk, + .reset(reset | FlushAdrCntRst), + .en(FlushAdrCntEn & FlushWay[NUMWAYS-2]), + .d(FlushAdrP1), + .q(FlushAdr)); + assign FlushAdrP1 = FlushAdr + 1'b1; + + flopenr #(INDEXLEN) + FlushAdrQReg(.clk, + .reset(reset | FlushAdrCntRst), + .en(FlushAdrCntEn), + .d(FlushAdr), + .q(FlushAdrQ)); + + flopenl #(NUMWAYS) + FlushWayReg(.clk, + .load(reset | FlushWayCntRst), + .en(FlushWayCntEn), + .val({{NUMWAYS-1{1'b0}}, 1'b1}), + .d(NextFlushWay), + .q(FlushWay)); + + assign VDWriteEnableWay = FlushWay & {NUMWAYS{VDWriteEnable}}; + + assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; + + assign FlushAdrFlag = FlushAdr == FlushAdrThreshold[INDEXLEN-1:0] & FlushWay[NUMWAYS-1]; + + // controller + // *** fixme + logic CacheableM; + + assign CacheableM = 1; + + + cachefsm cachefsm(.clk, .reset, .CacheFetchLine, .CacheWriteLine, .CacheBusAck, + .RW, .Atomic, .CPUBusy, .CacheableM, .IgnoreRequest, + .CacheHit, .VictimDirty, .CacheStall, .CacheCommitted, + .CacheMiss, .CacheAccess, .SelAdrM, .SetValid, + .ClearValid, .SetDirty, .ClearDirty, .SRAMWordWriteEnableM, + .SRAMLineWriteEnableM, .SelEvict, .SelFlush, + .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, + .FlushWayCntRst, .FlushAdrFlag, .FlushCache, + .VDWriteEnable, .LRUWriteEn); + + +endmodule // dcache diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv new file mode 100644 index 000000000..77b5efb09 --- /dev/null +++ b/pipelined/src/cache/cachefsm.sv @@ -0,0 +1,398 @@ +/////////////////////////////////////////// +// dcache (data cache) fsm +// +// Written: ross1728@gmail.com August 25, 2021 +// Implements the L1 data cache fsm +// +// Purpose: Controller for the dcache fsm +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module cachefsm + (input logic clk, + input logic reset, + // inputs from IEU + input logic [1:0] RW, + input logic [1:0] Atomic, + input logic FlushCache, + // hazard inputs + input logic CPUBusy, + input logic CacheableM, + // interlock fsm + input logic IgnoreRequest, + // Bus inputs + input logic CacheBusAck, + // dcache internals + input logic CacheHit, + input logic VictimDirty, + input logic FlushAdrFlag, + + // hazard outputs + output logic CacheStall, + // counter outputs + output logic CacheMiss, + output logic CacheAccess, + // Bus outputs + output logic CacheCommitted, + output logic CacheWriteLine, + output logic CacheFetchLine, + + // dcache internals + output logic [1:0] SelAdrM, + output logic SetValid, + output logic ClearValid, + output logic SetDirty, + output logic ClearDirty, + output logic SRAMWordWriteEnableM, + output logic SRAMLineWriteEnableM, + output logic SelEvict, + output logic LRUWriteEn, + output logic SelFlush, + output logic FlushAdrCntEn, + output logic FlushWayCntEn, + output logic FlushAdrCntRst, + output logic FlushWayCntRst, + output logic VDWriteEnable + + ); + + logic AnyCPUReqM; + + typedef enum {STATE_READY, + + STATE_MISS_FETCH_WDV, + STATE_MISS_FETCH_DONE, + STATE_MISS_EVICT_DIRTY, + STATE_MISS_WRITE_CACHE_LINE, + STATE_MISS_READ_WORD, + STATE_MISS_READ_WORD_DELAY, + STATE_MISS_WRITE_WORD, + + STATE_CPU_BUSY, + STATE_CPU_BUSY_FINISH_AMO, + + STATE_FLUSH, + STATE_FLUSH_WRITE_BACK, + STATE_FLUSH_CLEAR_DIRTY} statetype; + + (* mark_debug = "true" *) statetype CurrState, NextState; + + assign AnyCPUReqM = |RW | (|Atomic); + + // outputs for the performance counters. + assign CacheAccess = AnyCPUReqM & CacheableM & CurrState == STATE_READY; + assign CacheMiss = CacheAccess & CacheableM & ~CacheHit; + + always_ff @(posedge clk) + if (reset) CurrState <= #1 STATE_READY; + else CurrState <= #1 NextState; + + // next state logic and some state ouputs. + always_comb begin + CacheStall = 1'b0; + SelAdrM = 2'b00; + SetValid = 1'b0; + ClearValid = 1'b0; + SetDirty = 1'b0; + ClearDirty = 1'b0; + SRAMWordWriteEnableM = 1'b0; + SRAMLineWriteEnableM = 1'b0; + SelEvict = 1'b0; + LRUWriteEn = 1'b0; + SelFlush = 1'b0; + FlushAdrCntEn = 1'b0; + FlushWayCntEn = 1'b0; + FlushAdrCntRst = 1'b0; + FlushWayCntRst = 1'b0; + VDWriteEnable = 1'b0; + NextState = STATE_READY; + CacheFetchLine = 1'b0; + CacheWriteLine = 1'b0; + + case (CurrState) + STATE_READY: begin + + CacheStall = 1'b0; + SelAdrM = 2'b00; + SRAMWordWriteEnableM = 1'b0; + SetDirty = 1'b0; + LRUWriteEn = 1'b0; + + // TLB Miss + if(IgnoreRequest) begin + // the LSU arbiter has not yet selected the PTW. + // The CPU needs to be stalled until that happens. + // If we set CacheStall for 1 cycle before going to + // PTW ready the CPU will stall. + // The page table walker asserts it's control 1 cycle + // after the TLBs miss. + SelAdrM = 2'b01; + NextState = STATE_READY; + end + + // Flush dcache to next level of memory + else if(FlushCache) begin + NextState = STATE_FLUSH; + CacheStall = 1'b1; + SelAdrM = 2'b10; + FlushAdrCntRst = 1'b1; + FlushWayCntRst = 1'b1; + end + + // amo hit + else if(Atomic[1] & (&RW) & CacheableM & CacheHit) begin + SelAdrM = 2'b01; + CacheStall = 1'b0; + + if(CPUBusy) begin + NextState = STATE_CPU_BUSY_FINISH_AMO; + SelAdrM = 2'b01; + end + else begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; + end + end + // read hit valid cached + else if(RW[1] & CacheableM & CacheHit) begin + CacheStall = 1'b0; + LRUWriteEn = 1'b1; + + if(CPUBusy) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b01; + end + else begin + NextState = STATE_READY; + end + end + // write hit valid cached + else if (RW[0] & CacheableM & CacheHit) begin + SelAdrM = 2'b01; + CacheStall = 1'b0; + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + + if(CPUBusy) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b01; + end + else begin + NextState = STATE_READY; + end + end + // read or write miss valid cached + else if((|RW) & CacheableM & ~CacheHit) begin + NextState = STATE_MISS_FETCH_WDV; + CacheStall = 1'b1; + CacheFetchLine = 1'b1; + end + else NextState = STATE_READY; + end + + STATE_MISS_FETCH_WDV: begin + CacheStall = 1'b1; + SelAdrM = 2'b01; + + if (CacheBusAck) begin + NextState = STATE_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_FETCH_WDV; + end + end + + STATE_MISS_FETCH_DONE: begin + CacheStall = 1'b1; + SelAdrM = 2'b01; + if(VictimDirty) begin + NextState = STATE_MISS_EVICT_DIRTY; + CacheWriteLine = 1'b1; + end else begin + NextState = STATE_MISS_WRITE_CACHE_LINE; + end + end + + STATE_MISS_WRITE_CACHE_LINE: begin + SRAMLineWriteEnableM = 1'b1; + CacheStall = 1'b1; + NextState = STATE_MISS_READ_WORD; + SelAdrM = 2'b01; + SetValid = 1'b1; + ClearDirty = 1'b1; + //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write + end + + STATE_MISS_READ_WORD: begin + SelAdrM = 2'b01; + CacheStall = 1'b1; + if (RW[0] & ~Atomic[1]) begin // handles stores and amo write. + NextState = STATE_MISS_WRITE_WORD; + end else begin + NextState = STATE_MISS_READ_WORD_DELAY; + // delay state is required as the read signal RW[1] is still high when we + // return to the ready state because the cache is stalling the cpu. + end + end + + STATE_MISS_READ_WORD_DELAY: begin + //SelAdrM = 2'b01; + SRAMWordWriteEnableM = 1'b0; + SetDirty = 1'b0; + LRUWriteEn = 1'b0; + if(&RW & Atomic[1]) begin // amo write + SelAdrM = 2'b01; + if(CPUBusy) begin + NextState = STATE_CPU_BUSY_FINISH_AMO; + end + else begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; + end + end else begin + LRUWriteEn = 1'b1; + if(CPUBusy) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b01; + end + else begin + NextState = STATE_READY; + end + end + end + + STATE_MISS_WRITE_WORD: begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + SelAdrM = 2'b01; + LRUWriteEn = 1'b1; + if(CPUBusy) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b01; + end + else begin + NextState = STATE_READY; + end + end + + STATE_MISS_EVICT_DIRTY: begin + CacheStall = 1'b1; + SelAdrM = 2'b01; + SelEvict = 1'b1; + if(CacheBusAck) begin + NextState = STATE_MISS_WRITE_CACHE_LINE; + end else begin + NextState = STATE_MISS_EVICT_DIRTY; + end + end + + + STATE_CPU_BUSY: begin + SelAdrM = 2'b00; + if(CPUBusy) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b01; + end + else begin + NextState = STATE_READY; + end + end + + STATE_CPU_BUSY_FINISH_AMO: begin + SelAdrM = 2'b01; + SRAMWordWriteEnableM = 1'b0; + SetDirty = 1'b0; + LRUWriteEn = 1'b0; + if(CPUBusy) begin + NextState = STATE_CPU_BUSY_FINISH_AMO; + end + else begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; + end + end + + STATE_FLUSH: begin + CacheStall = 1'b1; + SelAdrM = 2'b10; + SelFlush = 1'b1; + FlushAdrCntEn = 1'b1; + FlushWayCntEn = 1'b1; + if(VictimDirty) begin + NextState = STATE_FLUSH_WRITE_BACK; + FlushAdrCntEn = 1'b0; + FlushWayCntEn = 1'b0; + CacheWriteLine = 1'b1; + end else if (FlushAdrFlag) begin + NextState = STATE_READY; + CacheStall = 1'b0; + FlushAdrCntEn = 1'b0; + FlushWayCntEn = 1'b0; + end else begin + NextState = STATE_FLUSH; + end + end + + STATE_FLUSH_WRITE_BACK: begin + CacheStall = 1'b1; + SelAdrM = 2'b10; + SelFlush = 1'b1; + if(CacheBusAck) begin + NextState = STATE_FLUSH_CLEAR_DIRTY; + end else begin + NextState = STATE_FLUSH_WRITE_BACK; + end + end + + STATE_FLUSH_CLEAR_DIRTY: begin + CacheStall = 1'b1; + ClearDirty = 1'b1; + VDWriteEnable = 1'b1; + SelFlush = 1'b1; + SelAdrM = 2'b10; + FlushAdrCntEn = 1'b0; + FlushWayCntEn = 1'b0; + if(FlushAdrFlag) begin + NextState = STATE_READY; + CacheStall = 1'b0; + SelAdrM = 2'b00; + end else begin + NextState = STATE_FLUSH; + FlushAdrCntEn = 1'b1; + FlushWayCntEn = 1'b1; + end + end + + default: begin + NextState = STATE_READY; + end + endcase + end + + assign CacheCommitted = CurrState != STATE_READY; + +endmodule // cachefsm + diff --git a/pipelined/src/cache/cachereplacementpolicy.sv b/pipelined/src/cache/cachereplacementpolicy.sv index 10e642a34..71206eb3d 100644 --- a/pipelined/src/cache/cachereplacementpolicy.sv +++ b/pipelined/src/cache/cachereplacementpolicy.sv @@ -40,7 +40,7 @@ module cachereplacementpolicy logic [NUMWAYS-2:0] LRUEn, LRUMask; logic [$clog2(NUMWAYS)-1:0] EncVicWay; logic [NUMWAYS-2:0] ReplacementBits [NUMLINES-1:0]; - logic [NUMWAYS-2:0] BlockReplacementBits; + logic [NUMWAYS-2:0] LineReplacementBits; logic [NUMWAYS-2:0] NewReplacement; logic [NUMWAYS-2:0] NewReplacementD; @@ -69,112 +69,109 @@ module cachereplacementpolicy end /* verilator lint_on BLKLOOPINIT */ - assign BlockReplacementBits = ReplacementBits[RAdrD]; + assign LineReplacementBits = ReplacementBits[RAdrD]; genvar index; - generate - if(NUMWAYS == 2) begin : TwoWay - - assign LRUEn[0] = 1'b0; + if(NUMWAYS == 2) begin : TwoWay + + assign LRUEn[0] = 1'b0; - assign NewReplacement[0] = WayHit[1]; + assign NewReplacement[0] = WayHit[1]; - assign VictimWay[1] = ~BlockReplacementBits[0]; - assign VictimWay[0] = BlockReplacementBits[0]; - - end else if (NUMWAYS == 4) begin : FourWay + assign VictimWay[1] = ~LineReplacementBits[0]; + assign VictimWay[0] = LineReplacementBits[0]; + + end else if (NUMWAYS == 4) begin : FourWay - // VictimWay is a function only of the current value of the LRU. - // binary encoding - //assign VictimWay[0] = BlockReplacementBits[2] ? BlockReplacementBits[1] : BlockReplacementBits[0]; - //assign VictimWay[1] = BlockReplacementBits[2]; + // VictimWay is a function only of the current value of the LRU. + // binary encoding + //assign VictimWay[0] = LineReplacementBits[2] ? LineReplacementBits[1] : LineReplacementBits[0]; + //assign VictimWay[1] = LineReplacementBits[2]; - // 1 hot encoding - //| WayHit | LRU 2 | LRU 1 | LRU 0 | - //|--------+-------+-------+-------| - //| 0000 | - | - | - | - //| 0001 | 1 | - | 1 | - //| 0010 | 1 | - | 0 | - //| 0100 | 0 | 1 | - | - //| 1000 | 0 | 0 | - | + // 1 hot encoding + //| WayHit | LRU 2 | LRU 1 | LRU 0 | + //|--------+-------+-------+-------| + //| 0000 | - | - | - | + //| 0001 | 1 | - | 1 | + //| 0010 | 1 | - | 0 | + //| 0100 | 0 | 1 | - | + //| 1000 | 0 | 0 | - | - assign VictimWay[0] = ~BlockReplacementBits[2] & ~BlockReplacementBits[0]; - assign VictimWay[1] = ~BlockReplacementBits[2] & BlockReplacementBits[0]; - assign VictimWay[2] = BlockReplacementBits[2] & ~BlockReplacementBits[1]; - assign VictimWay[3] = BlockReplacementBits[2] & BlockReplacementBits[1]; + assign VictimWay[0] = ~LineReplacementBits[2] & ~LineReplacementBits[0]; + assign VictimWay[1] = ~LineReplacementBits[2] & LineReplacementBits[0]; + assign VictimWay[2] = LineReplacementBits[2] & ~LineReplacementBits[1]; + assign VictimWay[3] = LineReplacementBits[2] & LineReplacementBits[1]; - // New LRU bits which are updated is function only of the WayHit. - // However the not updated bits come from the old LRU. - assign LRUEn[2] = |WayHit; - assign LRUEn[1] = WayHit[3] | WayHit[2]; - assign LRUEn[0] = WayHit[1] | WayHit[0]; + // New LRU bits which are updated is function only of the WayHit. + // However the not updated bits come from the old LRU. + assign LRUEn[2] = |WayHit; + assign LRUEn[1] = WayHit[3] | WayHit[2]; + assign LRUEn[0] = WayHit[1] | WayHit[0]; - assign LRUMask[2] = WayHit[1] | WayHit[0]; - assign LRUMask[1] = WayHit[2]; - assign LRUMask[0] = WayHit[0]; - + assign LRUMask[2] = WayHit[1] | WayHit[0]; + assign LRUMask[1] = WayHit[2]; + assign LRUMask[0] = WayHit[0]; + /* -----\/----- EXCLUDED -----\/----- - // selects - assign LRUEn[2] = 1'b1; - assign LRUEn[1] = WayHit[3]; - assign LRUEn[0] = WayHit[3] | WayHit[2]; + // selects + assign LRUEn[2] = 1'b1; + assign LRUEn[1] = WayHit[3]; + assign LRUEn[0] = WayHit[3] | WayHit[2]; - // mask - assign LRUMask[0] = WayHit[1]; - assign LRUMask[1] = WayHit[3]; - assign LRUMask[2] = WayHit[3] | WayHit[2]; - -----/\----- EXCLUDED -----/\----- */ + // mask + assign LRUMask[0] = WayHit[1]; + assign LRUMask[1] = WayHit[3]; + assign LRUMask[2] = WayHit[3] | WayHit[2]; +-----/\----- EXCLUDED -----/\----- */ - for(index = 0; index < NUMWAYS-1; index++) - assign NewReplacement[index] = LRUEn[index] ? LRUMask[index] : BlockReplacementBits[index]; + for(index = 0; index < NUMWAYS-1; index++) +assign NewReplacement[index] = LRUEn[index] ? LRUMask[index] : LineReplacementBits[index]; /* -----\/----- EXCLUDED -----\/----- - assign EncVicWay[1] = BlockReplacementBits[2]; - assign EncVicWay[0] = BlockReplacementBits[2] ? BlockReplacementBits[0] : BlockReplacementBits[1]; + assign EncVicWay[1] = LineReplacementBits[2]; + assign EncVicWay[0] = LineReplacementBits[2] ? LineReplacementBits[0] : LineReplacementBits[1]; - onehotdecoder #(2) - waydec(.bin(EncVicWay), - .decoded({VictimWay[0], VictimWay[1], VictimWay[2], VictimWay[3]})); - -----/\----- EXCLUDED -----/\----- */ + onehotdecoder #(2) + waydec(.bin(EncVicWay), + .decoded({VictimWay[0], VictimWay[1], VictimWay[2], VictimWay[3]})); +-----/\----- EXCLUDED -----/\----- */ - end else if (NUMWAYS == 8) begin : EightWay + end else if (NUMWAYS == 8) begin : EightWay - // selects - assign LRUEn[6] = 1'b1; - assign LRUEn[5] = WayHit[7] | WayHit[6] | WayHit[5] | WayHit[4]; - assign LRUEn[4] = WayHit[7] | WayHit[6]; - assign LRUEn[3] = WayHit[5] | WayHit[4]; - assign LRUEn[2] = WayHit[3] | WayHit[2] | WayHit[1] | WayHit[0]; - assign LRUEn[1] = WayHit[3] | WayHit[2]; - assign LRUEn[0] = WayHit[1] | WayHit[0]; + // selects + assign LRUEn[6] = 1'b1; + assign LRUEn[5] = WayHit[7] | WayHit[6] | WayHit[5] | WayHit[4]; + assign LRUEn[4] = WayHit[7] | WayHit[6]; + assign LRUEn[3] = WayHit[5] | WayHit[4]; + assign LRUEn[2] = WayHit[3] | WayHit[2] | WayHit[1] | WayHit[0]; + assign LRUEn[1] = WayHit[3] | WayHit[2]; + assign LRUEn[0] = WayHit[1] | WayHit[0]; - // mask - assign LRUMask[6] = WayHit[7] | WayHit[6] | WayHit[5] | WayHit[4]; - assign LRUMask[5] = WayHit[7] | WayHit[6]; - assign LRUMask[4] = WayHit[7]; - assign LRUMask[3] = WayHit[5]; - assign LRUMask[2] = WayHit[3] | WayHit[2]; - assign LRUMask[1] = WayHit[2]; - assign LRUMask[0] = WayHit[0]; + // mask + assign LRUMask[6] = WayHit[7] | WayHit[6] | WayHit[5] | WayHit[4]; + assign LRUMask[5] = WayHit[7] | WayHit[6]; + assign LRUMask[4] = WayHit[7]; + assign LRUMask[3] = WayHit[5]; + assign LRUMask[2] = WayHit[3] | WayHit[2]; + assign LRUMask[1] = WayHit[2]; + assign LRUMask[0] = WayHit[0]; - for(index = 0; index < NUMWAYS-1; index++) - assign NewReplacement[index] = LRUEn[index] ? LRUMask[index] : BlockReplacementBits[index]; + for(index = 0; index < NUMWAYS-1; index++) +assign NewReplacement[index] = LRUEn[index] ? LRUMask[index] : LineReplacementBits[index]; - assign EncVicWay[2] = BlockReplacementBits[6]; - assign EncVicWay[1] = BlockReplacementBits[6] ? BlockReplacementBits[5] : BlockReplacementBits[2]; - assign EncVicWay[0] = BlockReplacementBits[6] ? BlockReplacementBits[5] ? BlockReplacementBits[4] : BlockReplacementBits[3] : - BlockReplacementBits[2] ? BlockReplacementBits[1] : BlockReplacementBits[0]; - + assign EncVicWay[2] = LineReplacementBits[6]; + assign EncVicWay[1] = LineReplacementBits[6] ? LineReplacementBits[5] : LineReplacementBits[2]; + assign EncVicWay[0] = LineReplacementBits[6] ? LineReplacementBits[5] ? LineReplacementBits[4] : LineReplacementBits[3] : + LineReplacementBits[2] ? LineReplacementBits[1] : LineReplacementBits[0]; + - onehotdecoder #(3) - waydec(.bin(EncVicWay), - .decoded({VictimWay[0], VictimWay[1], VictimWay[2], VictimWay[3], - VictimWay[4], VictimWay[5], VictimWay[6], VictimWay[7]})); - end - endgenerate - + onehotdecoder #(3) + waydec(.bin(EncVicWay), + .decoded({VictimWay[0], VictimWay[1], VictimWay[2], VictimWay[3], + VictimWay[4], VictimWay[5], VictimWay[6], VictimWay[7]})); + end endmodule diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 158b6252c..f16d7194b 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -25,7 +25,7 @@ `include "wally-config.vh" -module cacheway #(parameter NUMLINES=512, parameter BLOCKLEN = 256, TAGLEN = 26, +module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) (input logic clk, input logic reset, @@ -34,9 +34,9 @@ module cacheway #(parameter NUMLINES=512, parameter BLOCKLEN = 256, TAGLEN = 26, input logic [`PA_BITS-1:0] PAdr, input logic WriteEnable, input logic VDWriteEnable, - input logic [BLOCKLEN/`XLEN-1:0] WriteWordEnable, + input logic [LINELEN/`XLEN-1:0] WriteWordEnable, input logic TagWriteEnable, - input logic [BLOCKLEN-1:0] WriteData, + input logic [LINELEN-1:0] WriteData, input logic SetValid, input logic ClearValid, input logic SetDirty, @@ -47,7 +47,7 @@ module cacheway #(parameter NUMLINES=512, parameter BLOCKLEN = 256, TAGLEN = 26, input logic SelFlush, input logic FlushWay, - output logic [BLOCKLEN-1:0] ReadDataLineWayMasked, + output logic [LINELEN-1:0] ReadDataLineWayMasked, output logic WayHit, output logic VictimDirtyWay, output logic [TAGLEN-1:0] VictimTagWay @@ -55,7 +55,7 @@ module cacheway #(parameter NUMLINES=512, parameter BLOCKLEN = 256, TAGLEN = 26, logic [NUMLINES-1:0] ValidBits; logic [NUMLINES-1:0] DirtyBits; - logic [BLOCKLEN-1:0] ReadDataBlockWay; + logic [LINELEN-1:0] ReadDataLineWay; logic [TAGLEN-1:0] ReadTag; logic Valid; logic Dirty; @@ -72,16 +72,13 @@ module cacheway #(parameter NUMLINES=512, parameter BLOCKLEN = 256, TAGLEN = 26, genvar words; - - generate - for(words = 0; words < BLOCKLEN/`XLEN; words++) begin : word - sram1rw #(.DEPTH(`XLEN), .WIDTH(NUMLINES)) - CacheDataMem(.clk(clk), .Addr(RAdr), - .ReadData(ReadDataBlockWay[(words+1)*`XLEN-1:words*`XLEN] ), - .WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]), - .WriteEnable(WriteEnable & WriteWordEnable[words])); - end - endgenerate + for(words = 0; words < LINELEN/`XLEN; words++) begin: word + sram1rw #(.DEPTH(`XLEN), .WIDTH(NUMLINES)) + CacheDataMem(.clk(clk), .Addr(RAdr), + .ReadData(ReadDataLineWay[(words+1)*`XLEN-1:words*`XLEN] ), + .WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]), + .WriteEnable(WriteEnable & WriteWordEnable[words])); + end sram1rw #(.DEPTH(TAGLEN), .WIDTH(NUMLINES)) CacheTagMem(.clk(clk), @@ -93,7 +90,7 @@ module cacheway #(parameter NUMLINES=512, parameter BLOCKLEN = 256, TAGLEN = 26, assign WayHit = Valid & (ReadTag == PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]); assign SelectedWay = SelFlush ? FlushWay : SelEvict ? VictimWay : WayHit; - assign ReadDataLineWayMasked = SelectedWay ? ReadDataBlockWay : '0; // first part of AO mux. + assign ReadDataLineWayMasked = SelectedWay ? ReadDataLineWay : '0; // first part of AO mux. assign VictimDirtyWay = SelFlush ? FlushWay & Dirty & Valid : VictimWay & Dirty & Valid; @@ -123,27 +120,21 @@ module cacheway #(parameter NUMLINES=512, parameter BLOCKLEN = 256, TAGLEN = 26, assign Valid = ValidBits[RAdrD]; - generate - if(DIRTY_BITS) begin:dirty - always_ff @(posedge clk) begin - if (reset) - DirtyBits <= {NUMLINES{1'b0}}; - else if (SetDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= 1'b1; - else if (ClearDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= 1'b0; - end - - always_ff @(posedge clk) begin - SetDirtyD <= SetDirty; - ClearDirtyD <= ClearDirty; - end - - assign Dirty = DirtyBits[RAdrD]; - - end else begin:dirty - assign Dirty = 1'b0; + // Dirty bits + if(DIRTY_BITS) begin:dirty + always_ff @(posedge clk) begin + if (reset) DirtyBits <= {NUMLINES{1'b0}}; + else if (SetDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= 1'b1; + else if (ClearDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= 1'b0; end - endgenerate - + always_ff @(posedge clk) begin + SetDirtyD <= SetDirty; + ClearDirtyD <= ClearDirty; + end + assign Dirty = DirtyBits[RAdrD]; + end else begin:dirty + assign Dirty = 1'b0; + end endmodule // DCacheMemWay diff --git a/pipelined/src/cache/dcache.sv b/pipelined/src/cache/dcache.sv index 3862fe541..dd0b7d7ed 100644 --- a/pipelined/src/cache/dcache.sv +++ b/pipelined/src/cache/dcache.sv @@ -25,71 +25,70 @@ `include "wally-config.vh" -module dcache +module dcache #(parameter integer LINELEN, + parameter integer NUMLINES, + parameter integer NUMWAYS) (input logic clk, - input logic reset, - input logic CPUBusy, + input logic reset, + input logic CPUBusy, // mmu - input logic CacheableM, + input logic CacheableM, // cpu side - input logic [1:0] LsuRWM, - input logic [1:0] LsuAtomicM, - input logic FlushDCacheM, - input logic [11:0] LsuAdrE, // virtual address, but we only use the lower 12 bits. - input logic [`PA_BITS-1:0] LsuPAdrM, // physical address - input logic [11:0] PreLsuPAdrM, // physical or virtual address - input logic [`XLEN-1:0] FinalWriteDataM, - output logic [`XLEN-1:0] ReadDataWordM, - output logic DCacheCommittedM, + input logic [1:0] LsuRWM, + input logic [1:0] LsuAtomicM, + input logic FlushDCacheM, + input logic [11:0] LsuAdrE, // virtual address, but we only use the lower 12 bits. + input logic [`PA_BITS-1:0] LsuPAdrM, // physical address + input logic [11:0] PreLsuPAdrM, // physical or virtual address + input logic [`XLEN-1:0] FinalWriteDataM, + output logic [`XLEN-1:0] ReadDataWordM, + output logic DCacheCommittedM, // Bus fsm interface - input logic IgnoreRequest, - output logic DCacheFetchLine, - output logic DCacheWriteLine, + input logic IgnoreRequest, + output logic DCacheFetchLine, + output logic DCacheWriteLine, - input logic DCacheBusAck, - output logic [`PA_BITS-1:0] DCacheBusAdr, + input logic DCacheBusAck, + output logic [`PA_BITS-1:0] DCacheBusAdr, - input logic [`DCACHE_BLOCKLENINBITS-1:0] DCacheMemWriteData, - output logic [`XLEN-1:0] ReadDataBlockSetsM [(`DCACHE_BLOCKLENINBITS/`XLEN)-1:0], + input logic [LINELEN-1:0] DCacheMemWriteData, + output logic [`XLEN-1:0] ReadDataLineSetsM [(LINELEN/`XLEN)-1:0], - output logic DCacheStall, + output logic DCacheStall, // to performance counters - output logic DCacheMiss, - output logic DCacheAccess + output logic DCacheMiss, + output logic DCacheAccess ); - localparam integer BLOCKLEN = `DCACHE_BLOCKLENINBITS; - localparam integer NUMLINES = `DCACHE_WAYSIZEINBYTES*8/BLOCKLEN; - localparam integer NUMWAYS = `DCACHE_NUMWAYS; - localparam integer BLOCKBYTELEN = BLOCKLEN/8; - localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN); + localparam integer LINEBYTELEN = LINELEN/8; + localparam integer OFFSETLEN = $clog2(LINEBYTELEN); localparam integer INDEXLEN = $clog2(NUMLINES); localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN; - localparam integer WORDSPERLINE = BLOCKLEN/`XLEN; + localparam integer WORDSPERLINE = LINELEN/`XLEN; localparam integer LOGWPL = $clog2(WORDSPERLINE); localparam integer LOGXLENBYTES = $clog2(`XLEN/8); - localparam integer FlushAdrThreshold = NUMLINES - 1; + localparam integer FlushAdrThreshold = NUMLINES; logic [1:0] SelAdrM; logic [INDEXLEN-1:0] RAdr; - logic [BLOCKLEN-1:0] SRAMWriteData; + logic [LINELEN-1:0] SRAMWriteData; logic SetValid, ClearValid; logic SetDirty, ClearDirty; - logic [BLOCKLEN-1:0] ReadDataLineWayMasked [NUMWAYS-1:0]; + logic [LINELEN-1:0] ReadDataLineWayMasked [NUMWAYS-1:0]; logic [NUMWAYS-1:0] WayHit; logic CacheHit; - logic [BLOCKLEN-1:0] ReadDataLineM; + logic [LINELEN-1:0] ReadDataLineM; logic [WORDSPERLINE-1:0] SRAMWordEnable; logic SRAMWordWriteEnableM; - logic SRAMBlockWriteEnableM; - logic [NUMWAYS-1:0] SRAMBlockWayWriteEnableM; + logic SRAMLineWriteEnableM; + logic [NUMWAYS-1:0] SRAMLineWayWriteEnableM; logic [NUMWAYS-1:0] SRAMWayWriteEnable; @@ -104,6 +103,7 @@ module dcache logic [INDEXLEN-1:0] FlushAdr; logic [INDEXLEN-1:0] FlushAdrP1; + logic [INDEXLEN-1:0] FlushAdrQ; logic FlushAdrCntEn; logic FlushAdrCntRst; logic FlushAdrFlag; @@ -127,15 +127,15 @@ module dcache .d2(FlushAdr), .s(SelAdrM), .y(RAdr)); - - cacheway #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN), + + cacheway #(.NUMLINES(NUMLINES), .LINELEN(LINELEN), .TAGLEN(TAGLEN), .OFFSETLEN(OFFSETLEN), .INDEXLEN(INDEXLEN)) MemWay[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr(LsuPAdrM), .WriteEnable(SRAMWayWriteEnable), .VDWriteEnable(VDWriteEnableWay), .WriteWordEnable(SRAMWordEnable), - .TagWriteEnable(SRAMBlockWayWriteEnableM), + .TagWriteEnable(SRAMLineWayWriteEnableM), .WriteData(SRAMWriteData), .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .VictimWay, .FlushWay, .SelFlush, @@ -143,28 +143,26 @@ module dcache .WayHit, .VictimDirtyWay, .VictimTagWay, .InvalidateAll(1'b0)); - generate - if(NUMWAYS > 1) begin:vict - cachereplacementpolicy #(NUMWAYS, INDEXLEN, OFFSETLEN, NUMLINES) - cachereplacementpolicy(.clk, .reset, - .WayHit, - .VictimWay, - .LsuPAdrM(LsuPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), - .RAdr, - .LRUWriteEn); - end else begin:vict - assign VictimWay = 1'b1; // one hot. - end - endgenerate + if(NUMWAYS > 1) begin:vict + cachereplacementpolicy #(NUMWAYS, INDEXLEN, OFFSETLEN, NUMLINES) + cachereplacementpolicy(.clk, .reset, + .WayHit, + .VictimWay, + .LsuPAdrM(LsuPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .RAdr, + .LRUWriteEn); + end else begin:vict + assign VictimWay = 1'b1; // one hot. + end assign CacheHit = | WayHit; assign VictimDirty = | VictimDirtyWay; - // ReadDataLineWayMaskedM is a 2d array of cache block len by number of ways. + // ReadDataLineWayMaskedM is a 2d array of cache line len by number of ways. // Need to OR together each way in a bitwise manner. // Final part of the AO Mux. First is the AND in the cacheway. - or_rows #(NUMWAYS, BLOCKLEN) ReadDataAOMux(.a(ReadDataLineWayMasked), .y(ReadDataLineM)); + or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWayMasked), .y(ReadDataLineM)); or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); @@ -172,15 +170,12 @@ module dcache // easily build a variable input mux. // *** consider using a limited range shift to do this final muxing. genvar index; - generate - for (index = 0; index < WORDSPERLINE; index++) begin:readdatablocksetsmux - assign ReadDataBlockSetsM[index] = ReadDataLineM[((index+1)*`XLEN)-1: (index*`XLEN)]; - end - endgenerate - + for (index = 0; index < WORDSPERLINE; index++) + assign ReadDataLineSetsM[index] = ReadDataLineM[((index+1)*`XLEN)-1: (index*`XLEN)]; + // variable input mux - assign ReadDataWordM = ReadDataBlockSetsM[LsuPAdrM[LOGWPL + LOGXLENBYTES - 1 : LOGXLENBYTES]]; + assign ReadDataWordM = ReadDataLineSetsM[LsuPAdrM[LOGWPL + LOGXLENBYTES - 1 : LOGXLENBYTES]]; // Write Path CPU (IEU) side @@ -188,39 +183,46 @@ module dcache adrdec(.bin(LsuPAdrM[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedW)); - assign SRAMWordEnable = SRAMBlockWriteEnableM ? '1 : MemPAdrDecodedW; + assign SRAMWordEnable = SRAMLineWriteEnableM ? '1 : MemPAdrDecodedW; - assign SRAMBlockWayWriteEnableM = SRAMBlockWriteEnableM ? VictimWay : '0; + assign SRAMLineWayWriteEnableM = SRAMLineWriteEnableM ? VictimWay : '0; mux2 #(NUMWAYS) WriteEnableMux(.d0(SRAMWordWriteEnableM ? WayHit : '0), - .d1(SRAMBlockWayWriteEnableM), - .s(SRAMBlockWriteEnableM), + .d1(SRAMLineWayWriteEnableM), + .s(SRAMLineWriteEnableM), .y(SRAMWayWriteEnable)); - mux2 #(BLOCKLEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteDataM}}), + mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteDataM}}), .d1(DCacheMemWriteData), - .s(SRAMBlockWriteEnableM), + .s(SRAMLineWriteEnableM), .y(SRAMWriteData)); mux3 #(`PA_BITS) BaseAdrMux(.d0({LsuPAdrM[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), .d1({VictimTag, LsuPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), - .d2({VictimTag, FlushAdr, {{OFFSETLEN}{1'b0}}}), + .d2({VictimTag, FlushAdrQ, {{OFFSETLEN}{1'b0}}}), .s({SelFlush, SelEvict}), .y(DCacheBusAdr)); // flush address and way generation. + // increment on 2nd to last way flopenr #(INDEXLEN) FlushAdrReg(.clk, .reset(reset | FlushAdrCntRst), - .en(FlushAdrCntEn & FlushWay[NUMWAYS-1]), + .en(FlushAdrCntEn & FlushWay[NUMWAYS-2]), .d(FlushAdrP1), .q(FlushAdr)); assign FlushAdrP1 = FlushAdr + 1'b1; + flopenr #(INDEXLEN) + FlushAdrQReg(.clk, + .reset(reset | FlushAdrCntRst), + .en(FlushAdrCntEn), + .d(FlushAdr), + .q(FlushAdrQ)); flopenl #(NUMWAYS) FlushWayReg(.clk, @@ -243,7 +245,7 @@ module dcache .CacheHit, .VictimDirty, .DCacheStall, .DCacheCommittedM, .DCacheMiss, .DCacheAccess, .SelAdrM, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SRAMWordWriteEnableM, - .SRAMBlockWriteEnableM, .SelEvict, .SelFlush, + .SRAMLineWriteEnableM, .SelEvict, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, .FlushWayCntRst, .FlushAdrFlag, .FlushDCacheM, .VDWriteEnable, .LRUWriteEn); diff --git a/pipelined/src/cache/dcachefsm.sv b/pipelined/src/cache/dcachefsm.sv index 4069eae22..6e1e53b57 100644 --- a/pipelined/src/cache/dcachefsm.sv +++ b/pipelined/src/cache/dcachefsm.sv @@ -61,7 +61,7 @@ module dcachefsm output logic SetDirty, output logic ClearDirty, output logic SRAMWordWriteEnableM, - output logic SRAMBlockWriteEnableM, + output logic SRAMLineWriteEnableM, output logic SelEvict, output logic LRUWriteEn, output logic SelFlush, @@ -80,7 +80,7 @@ module dcachefsm STATE_MISS_FETCH_WDV, STATE_MISS_FETCH_DONE, STATE_MISS_EVICT_DIRTY, - STATE_MISS_WRITE_CACHE_BLOCK, + STATE_MISS_WRITE_CACHE_LINE, STATE_MISS_READ_WORD, STATE_MISS_READ_WORD_DELAY, STATE_MISS_WRITE_WORD, @@ -113,7 +113,7 @@ module dcachefsm SetDirty = 1'b0; ClearDirty = 1'b0; SRAMWordWriteEnableM = 1'b0; - SRAMBlockWriteEnableM = 1'b0; + SRAMLineWriteEnableM = 1'b0; SelEvict = 1'b0; LRUWriteEn = 1'b0; SelFlush = 1'b0; @@ -228,12 +228,12 @@ module dcachefsm NextState = STATE_MISS_EVICT_DIRTY; DCacheWriteLine = 1'b1; end else begin - NextState = STATE_MISS_WRITE_CACHE_BLOCK; + NextState = STATE_MISS_WRITE_CACHE_LINE; end end - STATE_MISS_WRITE_CACHE_BLOCK: begin - SRAMBlockWriteEnableM = 1'b1; + STATE_MISS_WRITE_CACHE_LINE: begin + SRAMLineWriteEnableM = 1'b1; DCacheStall = 1'b1; NextState = STATE_MISS_READ_WORD; SelAdrM = 2'b01; @@ -301,7 +301,7 @@ module dcachefsm SelAdrM = 2'b01; SelEvict = 1'b1; if(DCacheBusAck) begin - NextState = STATE_MISS_WRITE_CACHE_BLOCK; + NextState = STATE_MISS_WRITE_CACHE_LINE; end else begin NextState = STATE_MISS_EVICT_DIRTY; end diff --git a/pipelined/src/cache/icache.sv b/pipelined/src/cache/icache.sv index b54accf57..9cee03c97 100644 --- a/pipelined/src/cache/icache.sv +++ b/pipelined/src/cache/icache.sv @@ -25,31 +25,32 @@ `include "wally-config.vh" -module icache +module icache #(parameter integer LINELEN, + parameter integer NUMLINES, + parameter integer NUMWAYS) ( // Basic pipeline stuff - input logic clk, reset, - input logic CPUBusy, + input logic clk, reset, + input logic CPUBusy, // mmu - //input logic CacheableF, - input logic [1:0] IfuRWF, + input logic [1:0] IfuRWF, // cpu side - input logic InvalidateICacheM, - input logic [11:0] PCNextF, - input logic [`PA_BITS-1:0] PCPF, - input logic [`XLEN-1:0] PCF, + input logic InvalidateICacheM, + input logic [11:0] PCNextF, + input logic [`PA_BITS-1:0] PCPF, + input logic [`XLEN-1:0] PCF, // bus fsm interface - input logic IgnoreRequest, - input logic [`ICACHE_BLOCKLENINBITS-1:0] ICacheMemWriteData, - output logic ICacheFetchLine, + input logic IgnoreRequest, + input logic [LINELEN-1:0] ICacheMemWriteData, + output logic ICacheFetchLine, - (* mark_debug = "true" *) input logic ICacheBusAck, + (* mark_debug = "true" *) input logic ICacheBusAck, (* mark_debug = "true" *) output logic [`PA_BITS-1:0] ICacheBusAdr, // High if the icache is requesting a stall - output logic ICacheStallF, + output logic ICacheStallF, // The raw (not decompressed) instruction that was requested // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros @@ -57,25 +58,23 @@ module icache ); // Configuration parameters - localparam integer BLOCKLEN = `ICACHE_BLOCKLENINBITS; - localparam integer NUMLINES = `ICACHE_WAYSIZEINBYTES*8/`ICACHE_BLOCKLENINBITS; - localparam integer BLOCKBYTELEN = BLOCKLEN/8; + localparam integer LINEBYTELEN = LINELEN/8; - localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN); + localparam integer OFFSETLEN = $clog2(LINEBYTELEN); localparam integer INDEXLEN = $clog2(NUMLINES); localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN; // *** not used? - localparam WORDSPERLINE = BLOCKLEN/`XLEN; + localparam WORDSPERLINE = LINELEN/`XLEN; localparam LOGWPL = $clog2(WORDSPERLINE); - localparam integer NUMWAYS = `ICACHE_NUMWAYS; + // Input signals to cache memory logic ICacheMemWriteEnable; // Output signals from cache memory - logic [BLOCKLEN-1:0] ReadLineF; + logic [LINELEN-1:0] ReadLineF; logic SelAdr; logic [INDEXLEN-1:0] RAdr; logic [NUMWAYS-1:0] VictimWay; @@ -84,9 +83,9 @@ module icache logic hit; - logic [BLOCKLEN-1:0] ReadDataLineWayMasked [NUMWAYS-1:0]; + logic [LINELEN-1:0] ReadDataLineWayMasked [NUMWAYS-1:0]; - logic [31:0] ReadLineSetsF [`ICACHE_BLOCKLENINBITS/16-1:0]; + logic [31:0] ReadLineSetsF [LINELEN/16-1:0]; logic [NUMWAYS-1:0] SRAMWayWriteEnable; @@ -98,13 +97,13 @@ module icache .y(RAdr)); - cacheway #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN), + cacheway #(.NUMLINES(NUMLINES), .LINELEN(LINELEN), .TAGLEN(TAGLEN), .OFFSETLEN(OFFSETLEN), .INDEXLEN(INDEXLEN), .DIRTY_BITS(0)) MemWay[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr(PCPF), .WriteEnable(SRAMWayWriteEnable), .VDWriteEnable(1'b0), - .WriteWordEnable({{(BLOCKLEN/`XLEN){1'b1}}}), + .WriteWordEnable({{(LINELEN/`XLEN){1'b1}}}), .TagWriteEnable(SRAMWayWriteEnable), .WriteData(ICacheMemWriteData), .SetValid(ICacheMemWriteEnable), @@ -115,36 +114,31 @@ module icache .VictimDirtyWay(), .VictimTagWay(), .InvalidateAll(InvalidateICacheM)); - generate - if(NUMWAYS > 1) begin:vict - cachereplacementpolicy #(NUMWAYS, INDEXLEN, OFFSETLEN, NUMLINES) - cachereplacementpolicy(.clk, .reset, - .WayHit, - .VictimWay, - .LsuPAdrM(PCPF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), - .RAdr, - .LRUWriteEn); - end else begin:vict - assign VictimWay = 1'b1; // one hot. - end - endgenerate + if(NUMWAYS > 1) begin:vict + cachereplacementpolicy #(NUMWAYS, INDEXLEN, OFFSETLEN, NUMLINES) + cachereplacementpolicy(.clk, .reset, + .WayHit, + .VictimWay, + .LsuPAdrM(PCPF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .RAdr, + .LRUWriteEn); + end else begin:vict + assign VictimWay = 1'b1; // one hot. + end assign hit = | WayHit; - // ReadDataLineWayMasked is a 2d array of cache block len by number of ways. + // ReadDataLineWayMasked is a 2d array of cache line len by number of ways. // Need to OR together each way in a bitwise manner. // Final part of the AO Mux. First is the AND in the cacheway. - or_rows #(NUMWAYS, BLOCKLEN) ReadDataAOMux(.a(ReadDataLineWayMasked), .y(ReadLineF)); + or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWayMasked), .y(ReadLineF)); genvar index; - generate - for(index = 0; index < BLOCKLEN / 16 - 1; index++) begin:readlinesetsmux + for(index = 0; index < LINELEN / 16 - 1; index++) assign ReadLineSetsF[index] = ReadLineF[((index+1)*16)+16-1 : (index*16)]; - end - assign ReadLineSetsF[BLOCKLEN/16-1] = {16'b0, ReadLineF[BLOCKLEN-1:BLOCKLEN-16]}; - endgenerate + assign ReadLineSetsF[LINELEN/16-1] = {16'b0, ReadLineF[LINELEN-1:LINELEN-16]}; - assign FinalInstrRawF = ReadLineSetsF[PCPF[$clog2(BLOCKLEN / 32) + 1 : 1]]; + assign FinalInstrRawF = ReadLineSetsF[PCPF[$clog2(LINELEN / 32) + 1 : 1]]; assign ICacheBusAdr = {PCPF[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}; diff --git a/pipelined/src/cache/icachefsm.sv b/pipelined/src/cache/icachefsm.sv index ead8afd84..aea8a7377 100644 --- a/pipelined/src/cache/icachefsm.sv +++ b/pipelined/src/cache/icachefsm.sv @@ -59,8 +59,8 @@ module icachefsm STATE_MISS_FETCH_WDV, // aligned miss, issue read to AHB and wait for data. STATE_MISS_FETCH_DONE, // write data into SRAM/LUT - STATE_MISS_READ, // read block 1 from SRAM/LUT - STATE_MISS_READ_DELAY, // read block 1 from SRAM/LUT + STATE_MISS_READ, // read line 1 from SRAM/LUT + STATE_MISS_READ_DELAY, // read line 1 from SRAM/LUT STATE_CPU_BUSY } statetype; diff --git a/pipelined/src/ebu/amoalu.sv b/pipelined/src/ebu/amoalu.sv index e8a77d603..5d3a137ab 100644 --- a/pipelined/src/ebu/amoalu.sv +++ b/pipelined/src/ebu/amoalu.sv @@ -56,25 +56,22 @@ module amoalu ( endcase // sign extend if necessary - generate - if (`XLEN == 32) begin:sext - assign a = srca; - assign b = srcb; - assign result = y; - end else begin:sext // `XLEN = 64 - always_comb - if (width == 2'b10) begin // sign-extend word-length operations - // *** it would be more efficient to look at carry out of bit 31 to determine comparisons than do this big mux on and b - a = {{32{srca[31]}}, srca[31:0]}; - b = {{32{srcb[31]}}, srcb[31:0]}; - result = {{32{y[31]}}, y[31:0]}; - end else begin - a = srca; - b = srcb; - result = y; - end - end - endgenerate - + if (`XLEN == 32) begin:sext + assign a = srca; + assign b = srcb; + assign result = y; + end else begin:sext // `XLEN = 64 + always_comb + if (width == 2'b10) begin // sign-extend word-length operations + // *** it would be more efficient to look at carry out of bit 31 to determine comparisons than do this big mux on and b + a = {{32{srca[31]}}, srca[31:0]}; + b = {{32{srcb[31]}}, srcb[31:0]}; + result = {{32{y[31]}}, y[31:0]}; + end else begin + a = srca; + b = srcb; + result = y; + end + end endmodule diff --git a/pipelined/src/fpu/cvtfp.sv b/pipelined/src/fpu/cvtfp.sv index 52c441481..0b91b82e4 100644 --- a/pipelined/src/fpu/cvtfp.sv +++ b/pipelined/src/fpu/cvtfp.sv @@ -157,7 +157,7 @@ module cvtfp ( // Result Selection /////////////////////////////////////////////////////////////////////////////// - generate if(`IEEE754) begin + if(`IEEE754) begin // select the double to single precision result assign DSRes = XNaNE ? {XSgnE, {8{1'b1}}, 1'b1, XManE[50:29]} : Underflow & ~Denorm ? {XSgnE, 30'b0, CalcPlus1&(|FrmE[1:0]|Shift)} : @@ -178,8 +178,6 @@ module cvtfp ( // select the final result based on the opperation assign CvtFpResE = FmtE ? {{32{1'b1}},DSRes} : {XSgnE&~XNaNE, SDExp, SDFrac[51]|XNaNE, SDFrac[50:0]&{51{~XNaNE}}}; end - endgenerate - endmodule // fpadd diff --git a/pipelined/src/fpu/divconv_pipe.sv b/pipelined/src/fpu/divconv_pipe.sv index 240000c28..7727e69f2 100755 --- a/pipelined/src/fpu/divconv_pipe.sv +++ b/pipelined/src/fpu/divconv_pipe.sv @@ -174,3 +174,20 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r flopenr #(60) regk (clk, reset, regs_pipe2, {qp_out0[59:35], (qp_out0[34:6] & {29{~P_pipe}}), 6'h0}, qp0); endmodule // divconv + +// *** rewrote behaviorally dh 5 Jan 2021 for speed +module csa #(parameter WIDTH=8) ( + input logic [WIDTH-1:0] a, b, c, + output logic [WIDTH-1:0] sum, carry); + + assign sum = a ^ b ^ c; + assign carry = (a & (b | c)) | (b & c); +/* + logic [WIDTH:0] carry_temp; + genvar i; + for (i=0;i 32) + if (instr16[6:5] == 2'b00) + InstrD = {7'b0100000, rs2p, rds1p, 3'b000, rds1p, 7'b0111011}; // c.subw + else if (instr16[6:5] == 2'b01) + InstrD = {7'b0000000, rs2p, rds1p, 3'b000, rds1p, 7'b0111011}; // c.addw + else begin // reserved IllegalCompInstrD = 1; InstrD = {16'b0, instr16}; // preserve instruction for mtval on trap end - 5'b00001: InstrD = {immCLD, rs1p, 3'b011, rdp, 7'b0000111}; // c.fld - 5'b00010: InstrD = {immCL, rs1p, 3'b010, rdp, 7'b0000011}; // c.lw - 5'b00011: if (`XLEN==32) - InstrD = {immCL, rs1p, 3'b010, rdp, 7'b0000111}; // c.flw + else begin // illegal instruction + IllegalCompInstrD = 1; + InstrD = {16'b0, instr16}; // preserve instruction for mtval on trap + end + 5'b01101: InstrD = {immCJ, 5'b00000, 7'b1101111}; // c.j + 5'b01110: InstrD = {immCB[11:5], 5'b00000, rs1p, 3'b000, immCB[4:0], 7'b1100011}; // c.beqz + 5'b01111: InstrD = {immCB[11:5], 5'b00000, rs1p, 3'b001, immCB[4:0], 7'b1100011}; // c.bnez + 5'b10000: InstrD = {6'b000000, immSH, rds1, 3'b001, rds1, 7'b0010011}; // c.slli + 5'b10001: InstrD = {immCILSPD, 5'b00010, 3'b011, rds1, 7'b0000111}; // c.fldsp + 5'b10010: InstrD = {immCILSP, 5'b00010, 3'b010, rds1, 7'b0000011}; // c.lwsp + 5'b10011: if (`XLEN == 32) + InstrD = {immCILSP, 5'b00010, 3'b010, rds1, 7'b0000111}; // c.flwsp + else + InstrD = {immCILSPD, 5'b00010, 3'b011, rds1, 7'b0000011}; // c.ldsp + 5'b10100: if (instr16[12] == 0) + if (instr16[6:2] == 5'b00000) + InstrD = {7'b0000000, 5'b00000, rds1, 3'b000, 5'b00000, 7'b1100111}; // c.jr else - InstrD = {immCLD, rs1p, 3'b011, rdp, 7'b0000011}; // c.ld; - 5'b00101: InstrD = {immCSD[11:5], rs2p, rs1p, 3'b011, immCSD[4:0], 7'b0100111}; // c.fsd - 5'b00110: InstrD = {immCS[11:5], rs2p, rs1p, 3'b010, immCS[4:0], 7'b0100011}; // c.sw - 5'b00111: if (`XLEN==32) - InstrD = {immCS[11:5], rs2p, rs1p, 3'b010, immCS[4:0], 7'b0100111}; // c.fsw - else - InstrD = {immCSD[11:5], rs2p, rs1p, 3'b011, immCSD[4:0], 7'b0100011}; //c.sd - 5'b01000: InstrD = {immCI, rds1, 3'b000, rds1, 7'b0010011}; // c.addi - 5'b01001: if (`XLEN==32) - InstrD = {immCJ, 5'b00001, 7'b1101111}; // c.jal - else - InstrD = {immCI, rds1, 3'b000, rds1, 7'b0011011}; // c.addiw - 5'b01010: InstrD = {immCI, 5'b00000, 3'b000, rds1, 7'b0010011}; // c.li - 5'b01011: if (rds1 != 5'b00010) - InstrD = {immCILUI, rds1, 7'b0110111}; // c.lui - else - InstrD = {immCIASP, rds1, 3'b000, rds1, 7'b0010011}; // c.addi16sp - 5'b01100: if (instr16[11:10] == 2'b00) - InstrD = {6'b000000, immSH, rds1p, 3'b101, rds1p, 7'b0010011}; // c.srli - else if (instr16[11:10] == 2'b01) - InstrD = {6'b010000, immSH, rds1p, 3'b101, rds1p, 7'b0010011}; // c.srai - else if (instr16[11:10] == 2'b10) - InstrD = {immCI, rds1p, 3'b111, rds1p, 7'b0010011}; // c.andi - else if (instr16[12:10] == 3'b011) - if (instr16[6:5] == 2'b00) - InstrD = {7'b0100000, rs2p, rds1p, 3'b000, rds1p, 7'b0110011}; // c.sub - else if (instr16[6:5] == 2'b01) - InstrD = {7'b0000000, rs2p, rds1p, 3'b100, rds1p, 7'b0110011}; // c.xor - else if (instr16[6:5] == 2'b10) - InstrD = {7'b0000000, rs2p, rds1p, 3'b110, rds1p, 7'b0110011}; // c.or - else // if (instr16[6:5] == 2'b11) - InstrD = {7'b0000000, rs2p, rds1p, 3'b111, rds1p, 7'b0110011}; // c.and - else if (instr16[12:10] == 3'b111 & `XLEN > 32) - if (instr16[6:5] == 2'b00) - InstrD = {7'b0100000, rs2p, rds1p, 3'b000, rds1p, 7'b0111011}; // c.subw - else if (instr16[6:5] == 2'b01) - InstrD = {7'b0000000, rs2p, rds1p, 3'b000, rds1p, 7'b0111011}; // c.addw - else begin // reserved - IllegalCompInstrD = 1; - InstrD = {16'b0, instr16}; // preserve instruction for mtval on trap - end - else begin // illegal instruction - IllegalCompInstrD = 1; - InstrD = {16'b0, instr16}; // preserve instruction for mtval on trap - end - 5'b01101: InstrD = {immCJ, 5'b00000, 7'b1101111}; // c.j - 5'b01110: InstrD = {immCB[11:5], 5'b00000, rs1p, 3'b000, immCB[4:0], 7'b1100011}; // c.beqz - 5'b01111: InstrD = {immCB[11:5], 5'b00000, rs1p, 3'b001, immCB[4:0], 7'b1100011}; // c.bnez - 5'b10000: InstrD = {6'b000000, immSH, rds1, 3'b001, rds1, 7'b0010011}; // c.slli - 5'b10001: InstrD = {immCILSPD, 5'b00010, 3'b011, rds1, 7'b0000111}; // c.fldsp - 5'b10010: InstrD = {immCILSP, 5'b00010, 3'b010, rds1, 7'b0000011}; // c.lwsp - 5'b10011: if (`XLEN == 32) - InstrD = {immCILSP, 5'b00010, 3'b010, rds1, 7'b0000111}; // c.flwsp - else - InstrD = {immCILSPD, 5'b00010, 3'b011, rds1, 7'b0000011}; // c.ldsp - 5'b10100: if (instr16[12] == 0) - if (instr16[6:2] == 5'b00000) - InstrD = {7'b0000000, 5'b00000, rds1, 3'b000, 5'b00000, 7'b1100111}; // c.jr + InstrD = {7'b0000000, rs2, 5'b00000, 3'b000, rds1, 7'b0110011}; // c.mv + else + if (rs2 == 5'b00000) + if (rds1 == 5'b00000) + InstrD = {12'b1, 5'b00000, 3'b000, 5'b00000, 7'b1110011}; // c.ebreak else - InstrD = {7'b0000000, rs2, 5'b00000, 3'b000, rds1, 7'b0110011}; // c.mv + InstrD = {12'b0, rds1, 3'b000, 5'b00001, 7'b1100111}; // c.jalr else - if (rs2 == 5'b00000) - if (rds1 == 5'b00000) - InstrD = {12'b1, 5'b00000, 3'b000, 5'b00000, 7'b1110011}; // c.ebreak - else - InstrD = {12'b0, rds1, 3'b000, 5'b00001, 7'b1100111}; // c.jalr - else - InstrD = {7'b0000000, rs2, rds1, 3'b000, rds1, 7'b0110011}; // c.add - 5'b10101: InstrD = {immCSSD[11:5], rs2, 5'b00010, 3'b011, immCSSD[4:0], 7'b0100111}; // c.fsdsp - 5'b10110: InstrD = {immCSS[11:5], rs2, 5'b00010, 3'b010, immCSS[4:0], 7'b0100011}; // c.swsp - 5'b10111: if (`XLEN==32) - InstrD = {immCSS[11:5], rs2, 5'b00010, 3'b010, immCSS[4:0], 7'b0100111}; // c.fswsp - else - InstrD = {immCSSD[11:5], rs2, 5'b00010, 3'b011, immCSSD[4:0], 7'b0100011}; // c.sdsp - default: begin // illegal instruction - IllegalCompInstrD = 1; - InstrD = {16'b0, instr16}; // preserve instruction for mtval on trap - end - endcase - end - end - endgenerate + InstrD = {7'b0000000, rs2, rds1, 3'b000, rds1, 7'b0110011}; // c.add + 5'b10101: InstrD = {immCSSD[11:5], rs2, 5'b00010, 3'b011, immCSSD[4:0], 7'b0100111}; // c.fsdsp + 5'b10110: InstrD = {immCSS[11:5], rs2, 5'b00010, 3'b010, immCSS[4:0], 7'b0100011}; // c.swsp + 5'b10111: if (`XLEN==32) + InstrD = {immCSS[11:5], rs2, 5'b00010, 3'b010, immCSS[4:0], 7'b0100111}; // c.fswsp + else + InstrD = {immCSSD[11:5], rs2, 5'b00010, 3'b011, immCSSD[4:0], 7'b0100011}; // c.sdsp + default: begin // illegal instruction + IllegalCompInstrD = 1; + InstrD = {16'b0, instr16}; // preserve instruction for mtval on trap + end + endcase + end + end endmodule diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 8a0c36d75..b7cc68088 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -111,12 +111,11 @@ module ifu ( logic [31:0] PostSpillInstrRawF; - generate if(`C_SUPPORTED) begin : SpillSupport logic [`XLEN-1:0] PCFp2; logic Spill; logic SelSpill, SpillSave; - logic [15:0] SpillDataBlock0; + logic [15:0] SpillDataLine0; // this exists only if there are compressed instructions. assign PCFp2 = PCF + `XLEN'b10; @@ -124,7 +123,7 @@ module ifu ( assign PCNextFMux = SelNextSpill ? PCFp2[11:0] : PCNextF[11:0]; assign PCFMux = SelSpill ? PCFp2 : PCF; - assign Spill = &PCF[$clog2(`ICACHE_BLOCKLENINBITS/32)+1:1]; + assign Spill = &PCF[$clog2(`ICACHE_LINELENINBITS/32)+1:1]; typedef enum {STATE_SPILL_READY, STATE_SPILL_SPILL} statetype; (* mark_debug = "true" *) statetype CurrState, NextState; @@ -154,19 +153,18 @@ module ifu ( .en(SpillSave), .reset(reset), .d(InstrRawF[15:0]), - .q(SpillDataBlock0)); + .q(SpillDataLine0)); - assign PostSpillInstrRawF = Spill ? {InstrRawF[15:0], SpillDataBlock0} : InstrRawF; + assign PostSpillInstrRawF = Spill ? {InstrRawF[15:0], SpillDataLine0} : InstrRawF; assign CompressedF = PostSpillInstrRawF[1:0] != 2'b11; // end of spill support - end else begin : NoSpillSupport // block: SpillSupport + end else begin : NoSpillSupport // line: SpillSupport assign PCNextFMux = PCNextF[11:0]; assign PCFMux = PCF; assign SelNextSpill = 0; assign PostSpillInstrRawF = InstrRawF; end - endgenerate assign PCFExt = {2'b00, PCFMux}; @@ -220,30 +218,30 @@ module ifu ( // 2. cache // `MEM_ICACHE // 3. wire pass-through - localparam integer WORDSPERLINE = `MEM_ICACHE ? `ICACHE_BLOCKLENINBITS/`XLEN : 1; + localparam integer WORDSPERLINE = `MEM_ICACHE ? `ICACHE_LINELENINBITS/`XLEN : 1; localparam integer LOGWPL = `MEM_ICACHE ? $clog2(WORDSPERLINE) : 1; - localparam integer BLOCKLEN = `MEM_ICACHE ? `ICACHE_BLOCKLENINBITS : `XLEN; + localparam integer LINELEN = `MEM_ICACHE ? `ICACHE_LINELENINBITS : `XLEN; localparam integer WordCountThreshold = `MEM_ICACHE ? WORDSPERLINE - 1 : 0; - localparam integer BLOCKBYTELEN = BLOCKLEN/8; - localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN); + localparam integer LINEBYTELEN = LINELEN/8; + localparam integer OFFSETLEN = $clog2(LINEBYTELEN); logic [LOGWPL-1:0] WordCount; - logic [BLOCKLEN-1:0] ICacheMemWriteData; + logic [LINELEN-1:0] ICacheMemWriteData; logic ICacheBusAck; logic [`PA_BITS-1:0] LocalIfuBusAdr; logic [`PA_BITS-1:0] ICacheBusAdr; logic SelUncachedAdr; - - - - generate if(`MEM_ICACHE) begin : icache logic [1:0] IfuRWF; assign IfuRWF = CacheableF ? 2'b10 : 2'b00; - icache icache(.clk, .reset, .CPUBusy, .IgnoreRequest, .ICacheMemWriteData , .ICacheBusAck, +/* -----\/----- EXCLUDED -----\/----- + icache #(.LINELEN(`ICACHE_LINELENINBITS), + .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), + .NUMWAYS(`ICACHE_NUMWAYS)) + icache(.clk, .reset, .CPUBusy, .IgnoreRequest, .ICacheMemWriteData , .ICacheBusAck, .ICacheBusAdr, .ICacheStallF, .FinalInstrRawF, .ICacheFetchLine, .IfuRWF(IfuRWF), //aways read @@ -251,15 +249,38 @@ module ifu ( .PCPF(PCPF), .PCF(PCFMux), .InvalidateICacheM); + -----/\----- EXCLUDED -----/\----- */ - end else begin : passthrough + logic [`XLEN-1:0] FinalInstrRawF_FIXME; + + cache #(.LINELEN(`ICACHE_LINELENINBITS), + .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), + .NUMWAYS(`ICACHE_NUMWAYS), .DCACHE(0)) + icache(.clk, .reset, .CPUBusy, .IgnoreRequest, .CacheMemWriteData(ICacheMemWriteData) , .CacheBusAck(ICacheBusAck), + .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), .ReadDataWord(FinalInstrRawF_FIXME), + .CacheFetchLine(ICacheFetchLine), + .CacheWriteLine(), + .ReadDataLineSets(), + .CacheMiss(), + .CacheAccess(), + .FinalWriteData('0), + .RW(IfuRWF), //aways read + .Atomic(2'b00), + .FlushCache(1'b0), + .LsuAdrE(PCNextFMux), // fixme + .LsuPAdrM(PCPF), // fixme + .PreLsuPAdrM(PCFMux[11:0]), //fixme + .CacheCommitted(), + .InvalidateCacheM(InvalidateICacheM)); + + assign FinalInstrRawF = FinalInstrRawF_FIXME[31:0]; + end else begin assign ICacheFetchLine = 0; assign ICacheBusAdr = 0; //assign CompressedF = 0; //? assign ICacheStallF = 0; assign FinalInstrRawF = 0; end - endgenerate // select between dcache and direct from the BUS. Always selected if no dcache. // handled in the busfsm. @@ -270,14 +291,12 @@ module ifu ( // always present genvar index; - generate - for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer - flopen #(`XLEN) fb(.clk(clk), - .en(IfuBusAck & IfuBusRead & (index == WordCount)), - .d(IfuBusHRDATA), - .q(ICacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN])); - end - endgenerate + for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer + flopen #(`XLEN) fb(.clk(clk), + .en(IfuBusAck & IfuBusRead & (index == WordCount)), + .d(IfuBusHRDATA), + .q(ICacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN])); + end assign LocalIfuBusAdr = SelUncachedAdr ? PCPF : ICacheBusAdr; assign IfuBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) + LocalIfuBusAdr; @@ -351,25 +370,23 @@ module ifu ( flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); // branch and jump predictor - generate - if (`BPRED_ENABLED == 1) begin : bpred - bpred bpred(.clk, .reset, - .StallF, .StallD, .StallE, - .FlushF, .FlushD, .FlushE, - .PCNextF, .BPPredPCF, .SelBPPredF, .PCE, .PCSrcE, .IEUAdrE, - .PCD, .PCLinkE, .InstrClassE, .BPPredWrongE, .BPPredDirWrongE, - .BTBPredPCWrongE, .RASPredPCWrongE, .BPPredClassNonCFIWrongE); - - end else begin : bpred - assign BPPredPCF = {`XLEN{1'b0}}; - assign SelBPPredF = 1'b0; - assign BPPredWrongE = PCSrcE; - assign BPPredDirWrongE = 1'b0; - assign BTBPredPCWrongE = 1'b0; - assign RASPredPCWrongE = 1'b0; - assign BPPredClassNonCFIWrongE = 1'b0; - end - endgenerate + if (`BPRED_ENABLED == 1) begin : bpred + bpred bpred(.clk, .reset, + .StallF, .StallD, .StallE, + .FlushF, .FlushD, .FlushE, + .PCNextF, .BPPredPCF, .SelBPPredF, .PCE, .PCSrcE, .IEUAdrE, + .PCD, .PCLinkE, .InstrClassE, .BPPredWrongE, .BPPredDirWrongE, + .BTBPredPCWrongE, .RASPredPCWrongE, .BPPredClassNonCFIWrongE); + + end else begin : bpred + assign BPPredPCF = {`XLEN{1'b0}}; + assign SelBPPredF = 1'b0; + assign BPPredWrongE = PCSrcE; + assign BPPredDirWrongE = 1'b0; + assign BTBPredPCWrongE = 1'b0; + assign RASPredPCWrongE = 1'b0; + assign BPPredClassNonCFIWrongE = 1'b0; + end // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. assign PCCorrectE = PCSrcE ? IEUAdrE : PCLinkE; diff --git a/pipelined/src/ifu/localHistoryPredictor.sv b/pipelined/src/ifu/localHistoryPredictor.sv index 493d11246..2fda26d6d 100644 --- a/pipelined/src/ifu/localHistoryPredictor.sv +++ b/pipelined/src/ifu/localHistoryPredictor.sv @@ -66,16 +66,10 @@ module localHistoryPredictor // .BitWEN1(2'b11)); genvar index; - generate - for (index = 0; index < 2**m; index = index +1) begin:localhist - - flopenr #(k) LocalHistoryRegister(.clk(clk), - .reset(reset), - .en(UpdateEN & (index == UpdatePCIndex)), - .d(LHRFNext), - .q(LHRNextF[index])); - end - endgenerate + for (index = 0; index < 2**m; index = index +1) begin:localhist + flopenr #(k) LocalHistoryRegister(.clk, .reset, .en(UpdateEN & (index == UpdatePCIndex)), + .d(LHRFNext), .q(LHRNextF[index])); + end // need to forward when updating to the same address as reading. // first we compare to see if the update and lookup addreses are the same diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 2575774a6..ea9266de1 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -120,75 +120,73 @@ module lsu flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); assign IEUAdrExtM = {2'b00, IEUAdrM}; - generate - if(`MEM_VIRTMEM) begin : MEM_VIRTMEM - logic AnyCPUReqM; - logic [`PA_BITS-1:0] HPTWAdr; - logic HPTWRead; - logic [2:0] HPTWSize; - logic SelReplayCPURequest; + if(`MEM_VIRTMEM) begin : MEM_VIRTMEM + logic AnyCPUReqM; + logic [`PA_BITS-1:0] HPTWAdr; + logic HPTWRead; + logic [2:0] HPTWSize; + logic SelReplayCPURequest; - assign AnyCPUReqM = (|MemRWM) | (|AtomicM); + assign AnyCPUReqM = (|MemRWM) | (|AtomicM); - interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF, - .DTLBMissM, .DTLBWriteM, .ExceptionM, .PendingInterruptM, .DCacheStall, - .InterlockStall, .SelReplayCPURequest, .SelHPTW, - .IgnoreRequest); - - hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM, - .ITLBMissF(ITLBMissF & ~PendingInterruptM), - .DTLBMissM(DTLBMissM & ~PendingInterruptM), - .MemRWM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM, - .HPTWReadPTE(ReadDataM), - .DCacheStall, .HPTWAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM); + interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF, + .DTLBMissM, .DTLBWriteM, .ExceptionM, .PendingInterruptM, .DCacheStall, + .InterlockStall, .SelReplayCPURequest, .SelHPTW, + .IgnoreRequest); + + hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM, + .ITLBMissF(ITLBMissF & ~PendingInterruptM), + .DTLBMissM(DTLBMissM & ~PendingInterruptM), + .MemRWM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM, + .HPTWReadPTE(ReadDataM), + .DCacheStall, .HPTWAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM); - // arbiter between IEU and hptw - - // multiplex the outputs to LSU - mux2 #(2) rwmux(MemRWM, {HPTWRead, 1'b0}, SelHPTW, PreLsuRWM); - mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LsuFunct3M); - mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LsuAtomicM); - mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLsuAdrE); - mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLsuPAdrM); + // arbiter between IEU and hptw + + // multiplex the outputs to LSU + mux2 #(2) rwmux(MemRWM, {HPTWRead, 1'b0}, SelHPTW, PreLsuRWM); + mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LsuFunct3M); + mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LsuAtomicM); + mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLsuAdrE); + mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLsuPAdrM); - // always block interrupts when using the hardware page table walker. - assign CPUBusy = StallW & ~SelHPTW; - - // It is not possible to pipeline hptw as the following load will depend on the previous load's - // data. Therefore we don't need a pipeline register - //flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle + // always block interrupts when using the hardware page table walker. + assign CPUBusy = StallW & ~SelHPTW; + + // It is not possible to pipeline hptw as the following load will depend on the previous load's + // data. Therefore we don't need a pipeline register + //flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle - // Specify which type of page fault is occurring - assign DTLBLoadPageFaultM = DTLBPageFaultM & PreLsuRWM[1]; - assign DTLBStorePageFaultM = DTLBPageFaultM & PreLsuRWM[0]; + // Specify which type of page fault is occurring + assign DTLBLoadPageFaultM = DTLBPageFaultM & PreLsuRWM[1]; + assign DTLBStorePageFaultM = DTLBPageFaultM & PreLsuRWM[0]; - // When replaying CPU memory request after PTW select the IEUAdrM for correct address. - assign LsuAdrE = SelReplayCPURequest ? IEUAdrM[11:0] : PreLsuAdrE; + // When replaying CPU memory request after PTW select the IEUAdrM for correct address. + assign LsuAdrE = SelReplayCPURequest ? IEUAdrM[11:0] : PreLsuAdrE; - end // if (`MEM_VIRTMEM) - else begin - assign InterlockStall = 1'b0; - - assign LsuAdrE = PreLsuAdrE; - assign SelHPTW = 1'b0; - assign IgnoreRequest = 1'b0; + end // if (`MEM_VIRTMEM) + else begin + assign InterlockStall = 1'b0; + + assign LsuAdrE = PreLsuAdrE; + assign SelHPTW = 1'b0; + assign IgnoreRequest = 1'b0; - assign PTE = '0; - assign PageType = '0; - assign DTLBWriteM = 1'b0; - assign ITLBWriteF = 1'b0; - - assign PreLsuRWM = MemRWM; - assign LsuFunct3M = Funct3M; - assign LsuAtomicM = AtomicM; - assign PreLsuAdrE = IEUAdrE[11:0]; - assign PreLsuPAdrM = IEUAdrExtM; - assign CPUBusy = StallW; - - assign DTLBLoadPageFaultM = 1'b0; - assign DTLBStorePageFaultM = 1'b0; - end - endgenerate + assign PTE = '0; + assign PageType = '0; + assign DTLBWriteM = 1'b0; + assign ITLBWriteF = 1'b0; + + assign PreLsuRWM = MemRWM; + assign LsuFunct3M = Funct3M; + assign LsuAtomicM = AtomicM; + assign PreLsuAdrE = IEUAdrE[11:0]; + assign PreLsuPAdrM = IEUAdrExtM; + assign CPUBusy = StallW; + + assign DTLBLoadPageFaultM = 1'b0; + assign DTLBStorePageFaultM = 1'b0; + end // **** look into this confusing signal. // This signal is confusing. CommittedM tells the CPU's trap unit the current instruction @@ -200,72 +198,66 @@ module lsu // to flush the memory operation at that time. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; - generate - if(`ZICSR_SUPPORTED == 1) begin : dmmu - logic DataMisalignedM; + if(`ZICSR_SUPPORTED == 1) begin : dmmu + logic DataMisalignedM; - mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) - dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, - .PrivilegeModeW, .DisableTranslation(SelHPTW), - .PAdr(PreLsuPAdrM), - .VAdr(IEUAdrM), - .Size(LsuFunct3M[1:0]), - .PTE, - .PageTypeWriteVal(PageType), - .TLBWrite(DTLBWriteM), - .TLBFlush(DTLBFlushM), - .PhysicalAddress(LsuPAdrM), - .TLBMiss(DTLBMissM), - .Cacheable(CacheableM), - .Idempotent(), .AtomicAllowed(), - .TLBPageFault(DTLBPageFaultM), - .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM, - .AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), /// atomicaccessm is probably a bug - .WriteAccessM(PreLsuRWM[0]), .ReadAccessM(PreLsuRWM[1]), - .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW - ); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist? + mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) + dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .PrivilegeModeW, .DisableTranslation(SelHPTW), + .PAdr(PreLsuPAdrM), + .VAdr(IEUAdrM), + .Size(LsuFunct3M[1:0]), + .PTE, + .PageTypeWriteVal(PageType), + .TLBWrite(DTLBWriteM), + .TLBFlush(DTLBFlushM), + .PhysicalAddress(LsuPAdrM), + .TLBMiss(DTLBMissM), + .Cacheable(CacheableM), + .Idempotent(), .AtomicAllowed(), + .TLBPageFault(DTLBPageFaultM), + .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM, + .AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), /// atomicaccessm is probably a bug + .WriteAccessM(PreLsuRWM[0]), .ReadAccessM(PreLsuRWM[1]), + .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW + ); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist? - // Determine if an Unaligned access is taking place - // hptw guarantees alignment, only check inputs from IEU. - always_comb - case(Funct3M[1:0]) - 2'b00: DataMisalignedM = 0; // lb, sb, lbu - 2'b01: DataMisalignedM = IEUAdrM[0]; // lh, sh, lhu - 2'b10: DataMisalignedM = IEUAdrM[1] | IEUAdrM[0]; // lw, sw, flw, fsw, lwu - 2'b11: DataMisalignedM = |IEUAdrM[2:0]; // ld, sd, fld, fsd - endcase + // Determine if an Unaligned access is taking place + // hptw guarantees alignment, only check inputs from IEU. + always_comb + case(Funct3M[1:0]) + 2'b00: DataMisalignedM = 0; // lb, sb, lbu + 2'b01: DataMisalignedM = IEUAdrM[0]; // lh, sh, lhu + 2'b10: DataMisalignedM = IEUAdrM[1] | IEUAdrM[0]; // lw, sw, flw, fsw, lwu + 2'b11: DataMisalignedM = |IEUAdrM[2:0]; // ld, sd, fld, fsd + endcase - // If the CPU's (not HPTW's) request is a page fault. - assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; - assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0]; - - end else begin - assign LsuPAdrM = PreLsuPAdrM; - assign DTLBMissM = 0; - assign CacheableM = 1; - assign DTLBPageFaultM = 0; - assign LoadAccessFaultM = 0; - assign StoreAccessFaultM = 0; - assign LoadMisalignedFaultM = 0; - assign StoreMisalignedFaultM = 0; - end - endgenerate + // If the CPU's (not HPTW's) request is a page fault. + assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; + assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0]; + + end else begin + assign LsuPAdrM = PreLsuPAdrM; + assign DTLBMissM = 0; + assign CacheableM = 1; + assign DTLBPageFaultM = 0; + assign LoadAccessFaultM = 0; + assign StoreAccessFaultM = 0; + assign LoadMisalignedFaultM = 0; + assign StoreMisalignedFaultM = 0; + end assign LSUStall = DCacheStall | InterlockStall | BusStall; - - // Move generate from lrsc to outside this module. // use PreLsu as prefix for lrsc - generate - if (`A_SUPPORTED) begin:lrsc - assign MemReadM = PreLsuRWM[1] & ~(IgnoreRequest) & ~DTLBMissM; - lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLsuRWM, .LsuAtomicM, .LsuPAdrM, - .SquashSCW, .LsuRWM); - end else begin:lrsc - assign SquashSCW = 0; - assign LsuRWM = PreLsuRWM; - end - endgenerate + if (`A_SUPPORTED) begin:lrsc + assign MemReadM = PreLsuRWM[1] & ~(IgnoreRequest) & ~DTLBMissM; + lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLsuRWM, .LsuAtomicM, .LsuPAdrM, + .SquashSCW, .LsuRWM); + end else begin:lrsc + assign SquashSCW = 0; + assign LsuRWM = PreLsuRWM; + end // conditional @@ -273,20 +265,20 @@ module lsu // 2. cache `MEM_DCACHE // 3. wire pass-through - localparam integer WORDSPERLINE = `MEM_DCACHE ? `DCACHE_BLOCKLENINBITS/`XLEN : 1; + localparam integer WORDSPERLINE = `MEM_DCACHE ? `DCACHE_LINELENINBITS/`XLEN : 1; localparam integer LOGWPL = `MEM_DCACHE ? $clog2(WORDSPERLINE) : 1; - localparam integer BLOCKLEN = `MEM_DCACHE ? `DCACHE_BLOCKLENINBITS : `XLEN; + localparam integer LINELEN = `MEM_DCACHE ? `DCACHE_LINELENINBITS : `XLEN; localparam integer WordCountThreshold = `MEM_DCACHE ? WORDSPERLINE - 1 : 0; - localparam integer BLOCKBYTELEN = BLOCKLEN/8; - localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN); + localparam integer LINEBYTELEN = LINELEN/8; + localparam integer OFFSETLEN = $clog2(LINEBYTELEN); // temp logic [`XLEN-1:0] FinalAMOWriteDataM, FinalWriteDataM; (* mark_debug = "true" *) logic [`XLEN-1:0] PreLsuBusHWDATA; logic [`XLEN-1:0] ReadDataWordM; - logic [BLOCKLEN-1:0] DCacheMemWriteData; + logic [LINELEN-1:0] DCacheMemWriteData; // keep logic [`XLEN-1:0] ReadDataWordMuxM; @@ -294,7 +286,7 @@ module lsu logic [`PA_BITS-1:0] DCacheBusAdr; - logic [`XLEN-1:0] ReadDataBlockSetsM [WORDSPERLINE-1:0]; + logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0]; @@ -304,27 +296,28 @@ module lsu logic SelUncachedAdr; - generate - if(`MEM_DCACHE) begin : dcache - dcache dcache(.clk, .reset, .CPUBusy, - .LsuRWM, .FlushDCacheM, .LsuAtomicM, .LsuAdrE, .LsuPAdrM, .PreLsuPAdrM(PreLsuPAdrM[11:0]), // still don't like this name PreLsuPAdrM, not always physical - .FinalWriteDataM, .ReadDataWordM, .DCacheStall, - .DCacheMiss, .DCacheAccess, - .IgnoreRequest, .CacheableM, .DCacheCommittedM, - .DCacheBusAdr, .ReadDataBlockSetsM, .DCacheMemWriteData, - .DCacheFetchLine, .DCacheWriteLine,.DCacheBusAck); - end else begin : passthrough - assign ReadDataWordM = 0; - assign DCacheStall = 0; - assign DCacheMiss = 1; - assign DCacheAccess = CacheableM; - assign DCacheCommittedM = 0; - assign DCacheWriteLine = 0; - assign DCacheFetchLine = 0; - assign DCacheBusAdr = 0; - assign ReadDataBlockSetsM[0] = 0; - end - endgenerate + if(`MEM_DCACHE) begin : dcache + cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), + .NUMWAYS(`DCACHE_NUMWAYS), .DCACHE(1)) + dcache(.clk, .reset, .CPUBusy, + .RW(CacheableM ? LsuRWM : 2'b00), .FlushCache(FlushDCacheM), .Atomic(CacheableM ? LsuAtomicM : 2'b00), + .LsuAdrE, .LsuPAdrM, .PreLsuPAdrM(PreLsuPAdrM[11:0]), // still don't like this name PreLsuPAdrM, not always physical + .FinalWriteData(FinalWriteDataM), .ReadDataWord(ReadDataWordM), .CacheStall(DCacheStall), + .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), + .IgnoreRequest, .CacheCommitted(DCacheCommittedM), + .CacheBusAdr(DCacheBusAdr), .ReadDataLineSets(ReadDataLineSetsM), .CacheMemWriteData(DCacheMemWriteData), + .CacheFetchLine(DCacheFetchLine), .CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCacheM(1'b0)); + end else begin : passthrough + assign ReadDataWordM = 0; + assign DCacheStall = 0; + assign DCacheMiss = 1; + assign DCacheAccess = CacheableM; + assign DCacheCommittedM = 0; + assign DCacheWriteLine = 0; + assign DCacheFetchLine = 0; + assign DCacheBusAdr = 0; + assign ReadDataLineSetsM[0] = 0; + end // select between dcache and direct from the BUS. Always selected if no dcache. @@ -340,15 +333,13 @@ module lsu .Funct3M(LsuFunct3M), .ReadDataM); - generate - if (`A_SUPPORTED) begin : amo - logic [`XLEN-1:0] AMOResult; - amoalu amoalu(.srca(ReadDataM), .srcb(WriteDataM), .funct(Funct7M), .width(LsuFunct3M[1:0]), - .result(AMOResult)); - mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, LsuAtomicM[1], FinalAMOWriteDataM); - end else - assign FinalAMOWriteDataM = WriteDataM; - endgenerate + if (`A_SUPPORTED) begin : amo + logic [`XLEN-1:0] AMOResult; + amoalu amoalu(.srca(ReadDataM), .srcb(WriteDataM), .funct(Funct7M), .width(LsuFunct3M[1:0]), + .result(AMOResult)); + mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, LsuAtomicM[1], FinalAMOWriteDataM); + end else + assign FinalAMOWriteDataM = WriteDataM; // this might only get instantiated if there is a dcache or dtim. // There is a copy in the ebu. @@ -365,24 +356,20 @@ module lsu logic [LOGWPL-1:0] WordCount; genvar index; - generate - for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer - flopen #(`XLEN) fb(.clk(clk), - .en(LsuBusAck & LsuBusRead & (index == WordCount)), - .d(LsuBusHRDATA), - .q(DCacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN])); - end - endgenerate + for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer + flopen #(`XLEN) fb(.clk, + .en(LsuBusAck & LsuBusRead & (index == WordCount)), + .d(LsuBusHRDATA), + .q(DCacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN])); + end assign LocalLsuBusAdr = SelUncachedAdr ? LsuPAdrM : DCacheBusAdr ; assign LsuBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) + LocalLsuBusAdr; - assign PreLsuBusHWDATA = ReadDataBlockSetsM[WordCount]; + assign PreLsuBusHWDATA = ReadDataLineSetsM[WordCount]; assign LsuBusHWDATA = SelUncachedAdr ? WriteDataM : PreLsuBusHWDATA; // *** why is this not FinalWriteDataM? which does not work. - generate - if (`XLEN == 32) assign LsuBusSize = SelUncachedAdr ? LsuFunct3M : 3'b010; - else assign LsuBusSize = SelUncachedAdr ? LsuFunct3M : 3'b011; - endgenerate; + if (`XLEN == 32) assign LsuBusSize = SelUncachedAdr ? LsuFunct3M : 3'b010; + else assign LsuBusSize = SelUncachedAdr ? LsuFunct3M : 3'b011; busfsm #(WordCountThreshold, LOGWPL, `MEM_DCACHE) busfsm(.clk, .reset, .IgnoreRequest, .LsuRWM, .DCacheFetchLine, .DCacheWriteLine, diff --git a/pipelined/src/lsu/subwordread.sv b/pipelined/src/lsu/subwordread.sv index 8d787cdd3..1e0f380c6 100644 --- a/pipelined/src/lsu/subwordread.sv +++ b/pipelined/src/lsu/subwordread.sv @@ -38,77 +38,75 @@ module subwordread // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - generate - if (`XLEN == 64) begin:swrmux - // ByteMe mux - always_comb - case(LsuPAdrM[2:0]) - 3'b000: ByteM = ReadDataWordMuxM[7:0]; - 3'b001: ByteM = ReadDataWordMuxM[15:8]; - 3'b010: ByteM = ReadDataWordMuxM[23:16]; - 3'b011: ByteM = ReadDataWordMuxM[31:24]; - 3'b100: ByteM = ReadDataWordMuxM[39:32]; - 3'b101: ByteM = ReadDataWordMuxM[47:40]; - 3'b110: ByteM = ReadDataWordMuxM[55:48]; - 3'b111: ByteM = ReadDataWordMuxM[63:56]; - endcase + if (`XLEN == 64) begin:swrmux + // ByteMe mux + always_comb + case(LsuPAdrM[2:0]) + 3'b000: ByteM = ReadDataWordMuxM[7:0]; + 3'b001: ByteM = ReadDataWordMuxM[15:8]; + 3'b010: ByteM = ReadDataWordMuxM[23:16]; + 3'b011: ByteM = ReadDataWordMuxM[31:24]; + 3'b100: ByteM = ReadDataWordMuxM[39:32]; + 3'b101: ByteM = ReadDataWordMuxM[47:40]; + 3'b110: ByteM = ReadDataWordMuxM[55:48]; + 3'b111: ByteM = ReadDataWordMuxM[63:56]; + endcase + + // halfword mux + always_comb + case(LsuPAdrM[2:1]) + 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; + 2'b01: HalfwordM = ReadDataWordMuxM[31:16]; + 2'b10: HalfwordM = ReadDataWordMuxM[47:32]; + 2'b11: HalfwordM = ReadDataWordMuxM[63:48]; + endcase - // halfword mux - always_comb - case(LsuPAdrM[2:1]) - 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; - 2'b01: HalfwordM = ReadDataWordMuxM[31:16]; - 2'b10: HalfwordM = ReadDataWordMuxM[47:32]; - 2'b11: HalfwordM = ReadDataWordMuxM[63:48]; - endcase - - logic [31:0] WordM; - - always_comb - case(LsuPAdrM[2]) - 1'b0: WordM = ReadDataWordMuxM[31:0]; - 1'b1: WordM = ReadDataWordMuxM[63:32]; - endcase - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{56{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{48{HalfwordM[15]}}, HalfwordM[15:0]}; // lh - 3'b010: ReadDataM = {{32{WordM[31]}}, WordM[31:0]}; // lw - 3'b011: ReadDataM = ReadDataWordMuxM; // ld - 3'b100: ReadDataM = {56'b0, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {48'b0, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {32'b0, WordM[31:0]}; // lwu - default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen - endcase - end else begin :swrmux // 32-bit - // byte mux - always_comb - case(LsuPAdrM[1:0]) - 2'b00: ByteM = ReadDataWordMuxM[7:0]; - 2'b01: ByteM = ReadDataWordMuxM[15:8]; - 2'b10: ByteM = ReadDataWordMuxM[23:16]; - 2'b11: ByteM = ReadDataWordMuxM[31:24]; - endcase + logic [31:0] WordM; - // halfword mux - always_comb - case(LsuPAdrM[1]) - 1'b0: HalfwordM = ReadDataWordMuxM[15:0]; - 1'b1: HalfwordM = ReadDataWordMuxM[31:16]; + always_comb + case(LsuPAdrM[2]) + 1'b0: WordM = ReadDataWordMuxM[31:0]; + 1'b1: WordM = ReadDataWordMuxM[63:32]; endcase - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{24{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh - 3'b010: ReadDataM = ReadDataWordMuxM; // lw - 3'b100: ReadDataM = {24'b0, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {16'b0, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM; - endcase - end - endgenerate + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{56{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{48{HalfwordM[15]}}, HalfwordM[15:0]}; // lh + 3'b010: ReadDataM = {{32{WordM[31]}}, WordM[31:0]}; // lw + 3'b011: ReadDataM = ReadDataWordMuxM; // ld + 3'b100: ReadDataM = {56'b0, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {48'b0, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {32'b0, WordM[31:0]}; // lwu + default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen + endcase + end else begin:swrmux // 32-bit + // byte mux + always_comb + case(LsuPAdrM[1:0]) + 2'b00: ByteM = ReadDataWordMuxM[7:0]; + 2'b01: ByteM = ReadDataWordMuxM[15:8]; + 2'b10: ByteM = ReadDataWordMuxM[23:16]; + 2'b11: ByteM = ReadDataWordMuxM[31:24]; + endcase + + // halfword mux + always_comb + case(LsuPAdrM[1]) + 1'b0: HalfwordM = ReadDataWordMuxM[15:0]; + 1'b1: HalfwordM = ReadDataWordMuxM[31:16]; + endcase + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{24{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh + 3'b010: ReadDataM = ReadDataWordMuxM; // lw + 3'b100: ReadDataM = {24'b0, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {16'b0, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM; + endcase + end endmodule diff --git a/pipelined/src/mmu/hptw.sv b/pipelined/src/mmu/hptw.sv index 95d202679..157a80bcb 100644 --- a/pipelined/src/mmu/hptw.sv +++ b/pipelined/src/mmu/hptw.sv @@ -52,7 +52,7 @@ module hptw L1_ADR, L1_RD, L2_ADR, L2_RD, L3_ADR, L3_RD, - LEAF, IDLE} statetype; // *** placed outside generate statement to remove synthesis errors + LEAF, IDLE} statetype; logic DTLBWalk; // register TLBs translation miss requests logic [`PPN_BITS-1:0] BasePageTablePPN; diff --git a/pipelined/src/mmu/mmu.sv b/pipelined/src/mmu/mmu.sv index 43251b7a4..e69028118 100644 --- a/pipelined/src/mmu/mmu.sv +++ b/pipelined/src/mmu/mmu.sv @@ -90,27 +90,25 @@ module mmu #(parameter TLB_ENTRIES = 8, // number of TLB Entries // only instantiate TLB if Virtual Memory is supported - generate - if (`MEM_VIRTMEM) begin:tlb - logic ReadAccess, WriteAccess; - assign ReadAccess = ExecuteAccessF | ReadAccessM; // execute also acts as a TLB read. Execute and Read are never active for the same MMU, so safe to mix pipestages - assign WriteAccess = WriteAccessM; - tlb #(.TLB_ENTRIES(TLB_ENTRIES), .ITLB(IMMU)) - tlb(.clk, .reset, - .SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), - .SATP_ASID(SATP_REGW[`ASID_BASE+`ASID_BITS-1:`ASID_BASE]), - .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, - .PrivilegeModeW, .ReadAccess, .WriteAccess, - .DisableTranslation, .PTE, .PageTypeWriteVal, - .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, - .Translate, .TLBPageFault); - end else begin:tlb// just pass address through as physical - assign Translate = 0; - assign TLBMiss = 0; - assign TLBHit = 1; // *** is this necessary - assign TLBPageFault = 0; - end - endgenerate + if (`MEM_VIRTMEM) begin:tlb + logic ReadAccess, WriteAccess; + assign ReadAccess = ExecuteAccessF | ReadAccessM; // execute also acts as a TLB read. Execute and Read are never active for the same MMU, so safe to mix pipestages + assign WriteAccess = WriteAccessM; + tlb #(.TLB_ENTRIES(TLB_ENTRIES), .ITLB(IMMU)) + tlb(.clk, .reset, + .SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), + .SATP_ASID(SATP_REGW[`ASID_BASE+`ASID_BITS-1:`ASID_BASE]), + .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .PrivilegeModeW, .ReadAccess, .WriteAccess, + .DisableTranslation, .PTE, .PageTypeWriteVal, + .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, + .Translate, .TLBPageFault); + end else begin:tlb// just pass address through as physical + assign Translate = 0; + assign TLBMiss = 0; + assign TLBHit = 1; // *** is this necessary + assign TLBPageFault = 0; + end // If translation is occuring, select translated physical address from TLB mux2 #(`PA_BITS) addressmux(PAdr, TLBPAdr, Translate, PhysicalAddress); diff --git a/pipelined/src/mmu/pmpadrdec.sv b/pipelined/src/mmu/pmpadrdec.sv index 868688863..4913f088c 100644 --- a/pipelined/src/mmu/pmpadrdec.sv +++ b/pipelined/src/mmu/pmpadrdec.sv @@ -68,7 +68,7 @@ module pmpadrdec ( assign NAMask[1:0] = {2'b11}; assign NAMask[`PA_BITS-1:2] = (PMPAdr[`PA_BITS-3:0] + {{(`PA_BITS-3){1'b0}}, (AdrMode == NAPOT)}) ^ PMPAdr[`PA_BITS-3:0]; - // generates a mask where the bottom k bits are 1, corresponding to a size of 2^k bytes for this memory region. + // form a mask where the bottom k bits are 1, corresponding to a size of 2^k bytes for this memory region. // This assumes we're using at least an NA4 region, but works for any size NAPOT region. assign NABase = {(PMPAdr[`PA_BITS-3:0] & ~NAMask[`PA_BITS-1:2]), 2'b00}; // base physical address of the pmp. diff --git a/pipelined/src/mmu/pmpchecker.sv b/pipelined/src/mmu/pmpchecker.sv index 824217f09..593b2c19a 100644 --- a/pipelined/src/mmu/pmpchecker.sv +++ b/pipelined/src/mmu/pmpchecker.sv @@ -47,37 +47,34 @@ module pmpchecker ( output logic PMPStoreAccessFaultM ); - generate - if (`PMP_ENTRIES > 0) begin: pmpchecker - // Bit i is high when the address falls in PMP region i - logic EnforcePMP; - logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges - logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address. - logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null - logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set - logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] - - pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0]( - .PhysicalAddress, - .PMPCfg(PMPCFG_ARRAY_REGW), - .PMPAdr(PMPADDR_ARRAY_REGW), - .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}), - .PAgePMPAdrOut(PAgePMPAdr), - .FirstMatch, .Match, .Active, .L, .X, .W, .R); + if (`PMP_ENTRIES > 0) begin: pmpchecker + // Bit i is high when the address falls in PMP region i + logic EnforcePMP; + logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges + logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address. + logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null + logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set + logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] - priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches. + pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0]( + .PhysicalAddress, + .PMPCfg(PMPCFG_ARRAY_REGW), + .PMPAdr(PMPADDR_ARRAY_REGW), + .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}), + .PAgePMPAdrOut(PAgePMPAdr), + .FirstMatch, .Match, .Active, .L, .X, .W, .R); - // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region - assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; + priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches. - assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|X; - assign PMPStoreAccessFaultM = EnforcePMP & WriteAccessM & ~|W; - assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|R; - end else begin: pmpchecker // no checker - assign PMPInstrAccessFaultF = 0; - assign PMPLoadAccessFaultM = 0; - assign PMPStoreAccessFaultM = 0; - end - endgenerate - //assign PMPSquashBusAccess = PMPInstrAccessFaultF | PMPLoadAccessFaultM | PMPStoreAccessFaultM; + // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region + assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; + + assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|X; + assign PMPStoreAccessFaultM = EnforcePMP & WriteAccessM & ~|W; + assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|R; + end else begin: pmpchecker // no checker + assign PMPInstrAccessFaultF = 0; + assign PMPLoadAccessFaultM = 0; + assign PMPStoreAccessFaultM = 0; + end endmodule diff --git a/pipelined/src/mmu/priorityonehot.sv b/pipelined/src/mmu/priorityonehot.sv index 849f178ee..e6e8ec225 100644 --- a/pipelined/src/mmu/priorityonehot.sv +++ b/pipelined/src/mmu/priorityonehot.sv @@ -34,25 +34,9 @@ module priorityonehot #(parameter ENTRIES = 8) ( input logic [ENTRIES-1:0] a, output logic [ENTRIES-1:0] y ); - - /* verilator lint_off UNOPTFLAT */ - logic [ENTRIES-1:0] nolower; - // generate thermometer code mask + // create thermometer code mask prioritythermometer #(ENTRIES) maskgen(.a({a[ENTRIES-2:0], 1'b1}), .y(nolower)); - // genvar i; - // generate - // assign nolower[0] = 1'b1; - // for (i=1; i 2'd0); // least signifcant section - assign Match1 = (Query1 == Key1) | (PageType > 2'd1); - assign Match2 = (Query2 == Key2) | (PageType > 2'd2); - assign Match3 = (Query3 == Key3) | SV39Mode; // this should always match in sv39 because they aren't used - - assign Match = Match0 & Match1 & Match2 & Match3 & MatchASID & Valid; - end - endgenerate + // Calculate the actual match value based on the input vpn and the page type. + // For example, a gigapage in SV39 only cares about VPN[2], so VPN[0] and VPN[1] + // should automatically match. + assign Match0 = (Query0 == Key0) | (PageType > 2'd0); // least signifcant section + assign Match1 = (Query1 == Key1) | (PageType > 2'd1); + assign Match2 = (Query2 == Key2) | (PageType > 2'd2); + assign Match3 = (Query3 == Key3) | SV39Mode; // this should always match in sv39 because they aren't used + + assign Match = Match0 & Match1 & Match2 & Match3 & MatchASID & Valid; + end // On a write, update the type of the page referred to by this line. flopenr #(2) pagetypeflop(clk, reset, WriteEnable, PageTypeWriteVal, PageType); diff --git a/pipelined/src/mmu/tlbcontrol.sv b/pipelined/src/mmu/tlbcontrol.sv index f3844717f..cb6675cfd 100644 --- a/pipelined/src/mmu/tlbcontrol.sv +++ b/pipelined/src/mmu/tlbcontrol.sv @@ -60,65 +60,59 @@ module tlbcontrol #(parameter ITLB = 0) ( logic UpperBitsUnequalPageFault; logic DAPageFault; logic TLBAccess; + logic ImproperPrivilege; // Grab the sv mode from SATP and determine whether translation should occur assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 assign Translate = (SATP_MODE != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~DisableTranslation; - generate - if (`XLEN==64) begin:rv64 - assign SV39Mode = (SATP_MODE == `SV39); - // generate page fault if upper bits aren't all the same - logic UpperEqual39, UpperEqual48; - assign UpperEqual39 = &(VAdr[63:38]) | ~|(VAdr[63:38]); - assign UpperEqual48 = &(VAdr[63:47]) | ~|(VAdr[63:47]); - assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48; - end else begin - assign SV39Mode = 0; - assign UpperBitsUnequalPageFault = 0; - end - endgenerate + if (`XLEN==64) begin:rv64 + assign SV39Mode = (SATP_MODE == `SV39); + // page fault if upper bits aren't all the same + logic UpperEqual39, UpperEqual48; + assign UpperEqual39 = &(VAdr[63:38]) | ~|(VAdr[63:38]); + assign UpperEqual48 = &(VAdr[63:47]) | ~|(VAdr[63:47]); + assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48; + end else begin + assign SV39Mode = 0; + assign UpperBitsUnequalPageFault = 0; + end // Determine whether TLB is being used assign TLBAccess = ReadAccess | WriteAccess; // Check whether upper bits of virtual addresss are all equal - // unswizzle useful PTE bits assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; assign {PTE_U, PTE_X, PTE_W, PTE_R, PTE_V} = PTEAccessBits[4:0]; // Check whether the access is allowed, page faulting if not. - generate - if (ITLB == 1) begin:itlb // Instruction TLB fault checking - logic ImproperPrivilege; + if (ITLB == 1) begin:itlb // Instruction TLB fault checking + // User mode may only execute user mode pages, and supervisor mode may + // only execute non-user mode pages. + assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | + ((EffectivePrivilegeMode == `S_MODE) & PTE_U); + // fault for software handling if access bit is off + assign DAPageFault = ~PTE_A; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); + end else begin:dtlb // Data TLB fault checking + logic InvalidRead, InvalidWrite; - // User mode may only execute user mode pages, and supervisor mode may - // only execute non-user mode pages. - assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | - ((EffectivePrivilegeMode == `S_MODE) & PTE_U); - // fault for software handling if access bit is off - assign DAPageFault = ~PTE_A; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); - end else begin:dtlb // Data TLB fault checking - logic ImproperPrivilege, InvalidRead, InvalidWrite; - - // User mode may only load/store from user mode pages, and supervisor mode - // may only access user mode pages when STATUS_SUM is low. - assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | - ((EffectivePrivilegeMode == `S_MODE) & PTE_U & ~STATUS_SUM); - // Check for read error. Reads are invalid when the page is not readable - // (and executable pages are not readable) or when the page is neither - // readable nor executable (and executable pages are readable). - assign InvalidRead = ReadAccess & ~PTE_R & (~STATUS_MXR | ~PTE_X); - // Check for write error. Writes are invalid when the page's write bit is - // low. - assign InvalidWrite = WriteAccess & ~PTE_W; - // Fault for software handling if access bit is off or writing a page with dirty bit off - assign DAPageFault = ~PTE_A | WriteAccess & ~PTE_D; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); - end - endgenerate + // User mode may only load/store from user mode pages, and supervisor mode + // may only access user mode pages when STATUS_SUM is low. + assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | + ((EffectivePrivilegeMode == `S_MODE) & PTE_U & ~STATUS_SUM); + // Check for read error. Reads are invalid when the page is not readable + // (and executable pages are not readable) or when the page is neither + // readable nor executable (and executable pages are readable). + assign InvalidRead = ReadAccess & ~PTE_R & (~STATUS_MXR | ~PTE_X); + // Check for write error. Writes are invalid when the page's write bit is + // low. + assign InvalidWrite = WriteAccess & ~PTE_W; + // Fault for software handling if access bit is off or writing a page with dirty bit off + assign DAPageFault = ~PTE_A | WriteAccess & ~PTE_D; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); + end assign TLBHit = CAMHit & TLBAccess; assign TLBMiss = (~CAMHit | TLBFlush) & Translate & TLBAccess; diff --git a/pipelined/src/mmu/tlbmixer.sv b/pipelined/src/mmu/tlbmixer.sv index 3e3a15601..f44da6a80 100644 --- a/pipelined/src/mmu/tlbmixer.sv +++ b/pipelined/src/mmu/tlbmixer.sv @@ -43,18 +43,16 @@ module tlbmixer ( logic [`PPN_BITS-1:0] PPNMixed; // produce PageNumberMask with 1s where virtual page number bits should be untranslaetd for superpages - generate - if (`XLEN == 32) - // kilopage: 22 bits of PPN, 0 bits of VPN - // megapage: 12 bits of PPN, 10 bits of VPN - mux2 #(22) pnm(22'h000000, 22'h0003FF, HitPageType[0], PageNumberMask); - else - // kilopage: 44 bits of PPN, 0 bits of VPN - // megapage: 35 bits of PPN, 9 bits of VPN - // gigapage: 26 bits of PPN, 18 bits of VPN - // terapage: 17 bits of PPN, 27 bits of VPN - mux4 #(44) pnm(44'h00000000000, 44'h000000001FF, 44'h0000003FFFF, 44'h00007FFFFFF, HitPageType, PageNumberMask); - endgenerate + if (`XLEN == 32) + // kilopage: 22 bits of PPN, 0 bits of VPN + // megapage: 12 bits of PPN, 10 bits of VPN + mux2 #(22) pnm(22'h000000, 22'h0003FF, HitPageType[0], PageNumberMask); + else + // kilopage: 44 bits of PPN, 0 bits of VPN + // megapage: 35 bits of PPN, 9 bits of VPN + // gigapage: 26 bits of PPN, 18 bits of VPN + // terapage: 17 bits of PPN, 27 bits of VPN + mux4 #(44) pnm(44'h00000000000, 44'h000000001FF, 44'h0000003FFFF, 44'h00007FFFFFF, HitPageType, PageNumberMask); // merge low segments of VPN with high segments of PPN decided by the pagetype. assign ZeroExtendedVPN = {{EXTRA_BITS{1'b0}}, VPN}; // forces the VPN to be the same width as PPN. diff --git a/pipelined/src/muldiv/div.sv b/pipelined/src/muldiv/div.sv deleted file mode 100755 index d7f311a3f..000000000 --- a/pipelined/src/muldiv/div.sv +++ /dev/null @@ -1,1376 +0,0 @@ -/////////////////////////////////////////// -// divide4x64.sv -// -// Written: James.Stine@okstate.edu 1 February 2021 -// Modified: -// -// Purpose: Integer Divide instructions -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -// *** I added these verilator controls to clean up the -// lint output. The linter warnings should be fixed, but now the output is at -// least readable. -/* verilator lint_off COMBDLY */ -/* verilator lint_off IMPLICIT */ - -module intdiv #(parameter WIDTH=64) - (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); - - input logic [WIDTH-1:0] N, D; - input logic clk; - input logic reset; - input logic start; - input logic S; - - output logic [WIDTH-1:0] Qf; - output logic [WIDTH-1:0] remf; - output logic div0; - output logic done; - output logic divBusy; - - logic enable; - logic state0; - logic V; - logic [$clog2(WIDTH):0] Num; - logic [$clog2(WIDTH)-1:0] P, NumIter, RemShift; - logic [WIDTH-1:0] op1, op2, op1shift, Rem5; - logic [WIDTH:0] Qd, Rd, Qd2, Rd2; - logic [WIDTH-1:0] Q, rem0; - logic [3:0] quotient; - logic otfzero; - logic shiftResult; - logic enablev, state0v, donev, oftzerov, divBusyv, ulp; - - logic [WIDTH-1:0] twoD; - logic [WIDTH-1:0] twoN; - logic SignD; - logic SignN; - logic [WIDTH-1:0] QT, remT; - logic D_NegOne; - logic Max_N; - - logic otfzerov; - logic tcQ; - logic tcR; - - // Check if negative (two's complement) - // If so, convert to positive - adder #(WIDTH) cpa1 ((D ^ {WIDTH{D[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, D[WIDTH-1]&S}, twoD); - adder #(WIDTH) cpa2 ((N ^ {WIDTH{N[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, N[WIDTH-1]&S}, twoN); - assign SignD = D[WIDTH-1]; - assign SignN = N[WIDTH-1]; - // Max N and D = -1 (Overflow) - assign Max_N = (~|N[WIDTH-2:0]) & N[WIDTH-1]; - assign D_NegOne = &D; - - // Divider goes the distance to 37 cycles - // (thanks to the evil divisor for D = 0x1) - - // Shift D, if needed (for integer) - // needed to allow qst to be in range for integer - // division [1,2) and allow integer divide to work. - // - // The V or valid bit can be used to determine if D - // is 0 and thus a divide by 0 exception. This div0 - // exception is given to FSM to tell the operation to - // quit gracefully. - lzd_hier #(WIDTH) p1 (.ZP(P), .ZV(V), .B(twoD)); - shift_left #(WIDTH) p2 (twoD, P, op2); - assign op1 = twoN; - assign div0 = ~V; - - // #iter: N = m+v+s = m+2+s (mod k = 0) - // v = 2 since \rho < 1 (add 4 to make sure its a ceil) - // k = 2 (r = 2^k) - adder #($clog2(WIDTH)+1) cpa3 ({1'b0, P}, - {{$clog2(WIDTH)+1-3{1'b0}}, shiftResult, ~shiftResult, 1'b0}, - Num); - - // Determine whether need to add just Q/Rem - assign shiftResult = P[0]; - // div by 2 (ceil) - assign NumIter = Num[$clog2(WIDTH):1]; - assign RemShift = P; - - // FSM to control integer divider - // assume inputs are postive edge and - // datapath (divider) is negative edge - fsm64 #($clog2(WIDTH)) fsm1 (enablev, state0v, donev, otfzerov, divBusyv, - start, div0, NumIter, ~clk, reset); - - flopr #(1) rega (~clk, reset, donev, done); - flopr #(1) regc (~clk, reset, otfzerov, otfzero); - flopr #(1) regd (~clk, reset, enablev, enable); - flopr #(1) rege (~clk, reset, state0v, state0); - flopr #(1) regf (~clk, reset, divBusyv, divBusy); - - // To obtain a correct remainder the last bit of the - // quotient has to be aligned with a radix-r boundary. - // Since the quotient is in the range 1/2 < q < 2 (one - // integer bit and m fractional bits), this is achieved by - // shifting N right by v+s so that (m+v+s) mod k = 0. And, - // the quotient has to be aligned to the integer position. - divide4 #(WIDTH) p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, - enable, otfzero, shiftResult); - - // Storage registers to hold contents stable - flopenr #(WIDTH+1) reg3 (clk, reset, enable, Rd, Rd2); - flopenr #(WIDTH+1) reg4 (clk, reset, enable, Qd, Qd2); - - // Probably not needed - just assigns results - assign Q = Qd2[WIDTH-1:0]; - assign Rem5 = Rd2[WIDTH:1]; - - // Adjust remainder by m (no need to adjust by - shift_right #(WIDTH) p4 (Rem5, RemShift, rem0); - - // Adjust Q/Rem for Signed - assign tcQ = (SignN ^ SignD) & S; - assign tcR = SignN & S; - - // When Dividend (N) and/or Divisor (D) are negative (first bit is '1'): - // - When N and D are negative: Remainder is negative (undergoes a two's complement). - // - When N is negative: Quotient and Remainder are both negative (undergo a two's complement). - // - When D is negative: Quotient is negative (undergoes a two's complement). - adder #(WIDTH) cpa4 ((rem0 ^ {WIDTH{tcR}}), {{WIDTH-1{1'b0}}, tcR}, remT); - adder #(WIDTH) cpa5 ((Q ^ {WIDTH{tcQ}}), {{WIDTH-1{1'b0}}, tcQ}, QT); - - // RISC-V has exceptions for divide by 0 and overflow (see Table 6.1 of spec) - exception_int #(WIDTH) exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf); - -endmodule // int32div - -// Division by Recurrence (r=4) -module divide4 #(parameter WIDTH=64) - (Q, rem0, quotient, op1, op2, clk, reset, state0, - enable, otfzero, shiftResult); - - input logic [WIDTH-1:0] op1, op2; - input logic clk, state0; - input logic reset; - input logic enable; - input logic otfzero; - input logic shiftResult; - - output logic [WIDTH:0] rem0; - output logic [WIDTH:0] Q; - output logic [3:0] quotient; - - logic [WIDTH+3:0] Sum, Carry; - logic [WIDTH:0] Qstar; - logic [WIDTH:0] QMstar; - logic [7:0] qtotal; - logic [WIDTH+3:0] SumN, CarryN, SumN2, CarryN2; - logic [WIDTH+3:0] divi1, divi2, divi1c, divi2c, dive1; - logic [WIDTH+3:0] mdivi_temp, mdivi; - logic zero; - logic [1:0] qsel; - logic [1:0] Qin, QMin; - logic CshiftQ, CshiftQM; - logic [WIDTH+3:0] rem1, rem2, rem3; - logic [WIDTH+3:0] SumR, CarryR; - logic [WIDTH:0] Qt; - - logic ulp; - - // Create one's complement values of Divisor (for q*D) - assign divi1 = {3'h0, op2, 1'b0}; - assign divi2 = {2'h0, op2, 2'b0}; - assign divi1c = ~divi1; - assign divi2c = ~divi2; - // Shift x1 if not mod k - mux2 #(WIDTH+4) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); - - // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D) - mux2 #(WIDTH+4) mx2 ({CarryN2[WIDTH+1:0], 2'h0}, {WIDTH+4{1'b0}}, state0, CarryN); - mux2 #(WIDTH+4) mx3 ({SumN2[WIDTH+1:0], 2'h0}, dive1, state0, SumN); - // Simplify QST - adder #(8) cpa1 (SumN[WIDTH+3:WIDTH-4], CarryN[WIDTH+3:WIDTH-4], qtotal); - // q = {+2, +1, -1, -2} else q = 0 - qst4 pd1 (qtotal[7:1], divi1[WIDTH-1:WIDTH-3], quotient); - assign ulp = quotient[2]|quotient[3]; - assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]); - // Map to binary encoding - assign qsel[1] = quotient[3]|quotient[2]; - assign qsel[0] = quotient[3]|quotient[1]; - mux4 #(WIDTH+4) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); - mux2 #(WIDTH+4) mx5 (mdivi_temp, {WIDTH+4{1'b0}}, zero, mdivi); - csa #(WIDTH+4) csa1 (mdivi, SumN, {CarryN[WIDTH+3:1], ulp}, Sum, Carry); - // regs : save CSA - flopenr #(WIDTH+4) reg1 (clk, reset, enable, Sum, SumN2); - flopenr #(WIDTH+4) reg2 (clk, reset, enable, Carry, CarryN2); - // OTF - ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM); - otf #(WIDTH+1) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, - otfzero, enable, Qstar, QMstar); - - // Correction and generation of Remainder - adder #(WIDTH+4) cpa2 (SumN2[WIDTH+3:0], CarryN2[WIDTH+3:0], rem1); - // Add back +D as correction - csa #(WIDTH+4) csa2 (CarryN2[WIDTH+3:0], SumN2[WIDTH+3:0], divi1, SumR, CarryR); - adder #(WIDTH+4) cpa3 (SumR, CarryR, rem2); - // Choose remainder (Rem or Rem+D) - mux2 #(WIDTH+4) mx6 (rem1, rem2, rem1[WIDTH+3], rem3); - // Choose correct Q or QM - mux2 #(WIDTH+1) mx7 (Qstar, QMstar, rem1[WIDTH+3], Qt); - // Final results - assign rem0 = rem3[WIDTH:0]; - assign Q = Qt; - -endmodule // divide4x64 - -// Load/Control for OTFC -module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); - - input logic [3:0] quot; - - output logic [1:0] Qin; - output logic [1:0] QMin; - output logic CshiftQ; - output logic CshiftQM; - - // Load/Store Control for OTF - assign Qin[1] = (quot[1]) | (quot[3]) | (quot[0]); - assign Qin[0] = (quot[1]) | (quot[2]); - assign QMin[1] = (quot[1]) | (!quot[3]&!quot[2]&!quot[1]&!quot[0]); - assign QMin[0] = (quot[3]) | (quot[0]) | - (!quot[3]&!quot[2]&!quot[1]&!quot[0]); - assign CshiftQ = (quot[1]) | (quot[0]); - assign CshiftQM = (quot[3]) | (quot[2]); - -endmodule - -// On-the-fly Conversion (OTFC) -module otf #(parameter WIDTH=8) - (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q); - - input logic [1:0] Qin, QMin; - input logic CshiftQ, CshiftQM; - input logic clk; - input logic reset; - input logic enable; - - output logic [WIDTH-1:0] R2Q; - output logic [WIDTH-1:0] R1Q; - - logic [WIDTH-1:0] Qstar, QMstar; - logic [WIDTH-1:0] M1Q, M2Q; - - // QM - mux2 #(WIDTH) m1 (QMstar, Qstar, CshiftQM, M1Q); - flopenr #(WIDTH) r1 (clk, reset, enable, {M1Q[WIDTH-3:0], QMin}, R1Q); - // Q - mux2 #(WIDTH) m2 (Qstar, QMstar, CshiftQ, M2Q); - flopenr #(WIDTH) r2 (clk, reset, enable, {M2Q[WIDTH-3:0], Qin}, R2Q); - - assign Qstar = R2Q; - assign QMstar = R1Q; - -endmodule // otf8 -/* -module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, - output logic [WIDTH-1:0] y); - - assign y = a + b; - -endmodule // adder -*/ - -module fa (input logic a, b, c, output logic sum, carry); - - assign sum = a^b^c; - assign carry = a&b|a&c|b&c; - -endmodule // fa - -module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c, - output logic [WIDTH-1:0] sum, carry); - - logic [WIDTH:0] carry_temp; - genvar i; - generate - for (i=0;i B. LT and GT are both '0' if A = B. - -module magcompare2b (LT, GT, A, B); - - input logic [1:0] A; - input logic [1:0] B; - - output logic LT; - output logic GT; - - // Determine if A < B using a minimized sum-of-products expression - assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; - // Determine if A > B using a minimized sum-of-products expression - assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -endmodule // magcompare2b - -// J. E. Stine and M. J. Schulte, "A combined two's complement and -// floating-point comparator," 2005 IEEE International Symposium on -// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// doi: 10.1109/ISCAS.2005.1464531 - -module magcompare8 (LT, EQ, A, B); - - input logic [7:0] A; - input logic [7:0] B; - - logic [3:0] s; - logic [3:0] t; - logic [1:0] u; - logic [1:0] v; - logic GT; - //wire LT; - - output logic EQ; - output logic LT; - - magcompare2b mag1 (s[0], t[0], A[1:0], B[1:0]); - magcompare2b mag2 (s[1], t[1], A[3:2], B[3:2]); - magcompare2b mag3 (s[2], t[2], A[5:4], B[5:4]); - magcompare2b mag4 (s[3], t[3], A[7:6], B[7:6]); - - magcompare2b mag5 (u[0], v[0], t[1:0], s[1:0]); - magcompare2b mag6 (u[1], v[1], t[3:2], s[3:2]); - - magcompare2b mag7 (LT, GT, v[1:0], u[1:0]); - - assign EQ = ~(GT | LT); - -endmodule // magcompare8 - -// RISC-V Exception Logic for Divide by 0 and Overflow (Signed Integer Divide) -module exception_int #(parameter WIDTH=8) - (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); - - input logic [WIDTH-1:0] Q; - input logic [WIDTH-1:0] rem; - input logic [WIDTH-1:0] op1; - input logic S; - input logic div0; - input logic Max_N; - input logic D_NegOne; - - output logic [WIDTH-1:0] Qf; - output logic [WIDTH-1:0] remf; - - always_comb - case ({div0, S, Max_N, D_NegOne}) - 4'b0000 : Qf = Q; - 4'b0001 : Qf = Q; - 4'b0010 : Qf = Q; - 4'b0011 : Qf = Q; - 4'b0100 : Qf = Q; - 4'b0101 : Qf = Q; - 4'b0110 : Qf = Q; - 4'b0111 : Qf = {1'b1, {WIDTH-1{1'h0}}}; - 4'b1000 : Qf = {WIDTH{1'b1}}; - 4'b1001 : Qf = {WIDTH{1'b1}}; - 4'b1010 : Qf = {WIDTH{1'b1}}; - 4'b1011 : Qf = {WIDTH{1'b1}}; - 4'b1100 : Qf = {WIDTH{1'b1}}; - 4'b1101 : Qf = {WIDTH{1'b1}}; - 4'b1110 : Qf = {WIDTH{1'b1}}; - 4'b1111 : Qf = {WIDTH{1'b1}}; - default: Qf = Q; - endcase - - always_comb - case ({div0, S, Max_N, D_NegOne}) - 4'b0000 : remf = rem; - 4'b0001 : remf = rem; - 4'b0010 : remf = rem; - 4'b0011 : remf = rem; - 4'b0100 : remf = rem; - 4'b0101 : remf = rem; - 4'b0110 : remf = rem; - 4'b0111 : remf = {WIDTH{1'h0}}; - 4'b1000 : remf = op1; - 4'b1001 : remf = op1; - 4'b1010 : remf = op1; - 4'b1011 : remf = op1; - 4'b1100 : remf = op1; - 4'b1101 : remf = op1; - 4'b1110 : remf = op1; - 4'b1111 : remf = op1; - default: remf = rem; - endcase - -endmodule // exception_int - -/* verilator lint_on COMBDLY */ -/* verilator lint_on IMPLICIT */ diff --git a/pipelined/src/muldiv/intdivrestoring.sv b/pipelined/src/muldiv/intdivrestoring.sv index 04eba4af3..c64c01084 100644 --- a/pipelined/src/muldiv/intdivrestoring.sv +++ b/pipelined/src/muldiv/intdivrestoring.sv @@ -60,15 +60,13 @@ module intdivrestoring ( assign DivBusyE = (state == BUSY) | DivStartE; // Handle sign extension for W-type instructions - generate - if (`XLEN == 64) begin:rv64 // RV64 has W-type instructions - mux2 #(`XLEN) xinmux(ForwardedSrcAE, {ForwardedSrcAE[31:0], 32'b0}, W64E, XinE); - mux2 #(`XLEN) dinmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31]&DivSignedE}}, ForwardedSrcBE[31:0]}, W64E, DinE); - end else begin // RV32 has no W-type instructions - assign XinE = ForwardedSrcAE; - assign DinE = ForwardedSrcBE; + if (`XLEN == 64) begin:rv64 // RV64 has W-type instructions + mux2 #(`XLEN) xinmux(ForwardedSrcAE, {ForwardedSrcAE[31:0], 32'b0}, W64E, XinE); + mux2 #(`XLEN) dinmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31]&DivSignedE}}, ForwardedSrcBE[31:0]}, W64E, DinE); + end else begin // RV32 has no W-type instructions + assign XinE = ForwardedSrcAE; + assign DinE = ForwardedSrcBE; end - endgenerate // Extract sign bits and check fo division by zero assign SignDE = DivSignedE & DinE[`XLEN-1]; @@ -97,11 +95,9 @@ module intdivrestoring ( flopen #(3) Div0eMReg(clk, DivStartE, {Div0E, NegQE, SignXE}, {Div0M, NegQM, NegWM}); // one copy of divstep for each bit produced per cycle - generate - genvar i; - for (i=0; i<`DIV_BITSPERCYCLE; i = i+1) - intdivrestoringstep divstep(WM[i], XQM[i], DAbsBM, WM[i+1], XQM[i+1]); - endgenerate + genvar i; + for (i=0; i<`DIV_BITSPERCYCLE; i = i+1) + intdivrestoringstep divstep(WM[i], XQM[i], DAbsBM, WM[i+1], XQM[i+1]); // On final setp of signed operations, negate outputs as needed to get correct sign neg #(`XLEN) qneg(XQM[0], XQnM); diff --git a/pipelined/src/muldiv/mul.sv b/pipelined/src/muldiv/mul.sv index e1174a9cc..565a5d640 100644 --- a/pipelined/src/muldiv/mul.sv +++ b/pipelined/src/muldiv/mul.sv @@ -49,8 +49,8 @@ module mul ( // Signed * Unsigned = P' + ( PA - PB)*2^(XLEN-1) - PP*2^(2XLEN-2) // Unsigned * Unsigned = P' + ( PA + PB)*2^(XLEN-1) + PP*2^(2XLEN-2) - logic [`XLEN*2-1:0] PP0E, PP1E, PP2E, PP3E, PP4E; - logic [`XLEN*2-1:0] PP0M, PP1M, PP2M, PP3M, PP4M; + logic [`XLEN*2-1:0] PP1E, PP2E, PP3E, PP4E; + logic [`XLEN*2-1:0] PP1M, PP2M, PP3M, PP4M; logic [`XLEN-2:0] PA, PB; logic PP; logic MULH, MULHSU; @@ -62,7 +62,7 @@ module mul ( assign Aprime = {1'b0, ForwardedSrcAE[`XLEN-2:0]}; assign Bprime = {1'b0, ForwardedSrcBE[`XLEN-2:0]}; - redundantmul #(`XLEN) bigmul(.a(Aprime), .b(Bprime), .out0(PP0E), .out1(PP1E)); + assign PP1E = Aprime * Bprime; assign PA = {(`XLEN-1){ForwardedSrcAE[`XLEN-1]}} & ForwardedSrcBE[`XLEN-2:0]; assign PB = {(`XLEN-1){ForwardedSrcBE[`XLEN-1]}} & ForwardedSrcAE[`XLEN-2:0]; assign PP = ForwardedSrcAE[`XLEN-1] & ForwardedSrcBE[`XLEN-1]; @@ -83,12 +83,11 @@ module mul ( // Memory Stage: Sum partial proudcts ////////////////////////////// - flopenrc #(`XLEN*2) PP0Reg(clk, reset, FlushM, ~StallM, PP0E, PP0M); flopenrc #(`XLEN*2) PP1Reg(clk, reset, FlushM, ~StallM, PP1E, PP1M); flopenrc #(`XLEN*2) PP2Reg(clk, reset, FlushM, ~StallM, PP2E, PP2M); flopenrc #(`XLEN*2) PP3Reg(clk, reset, FlushM, ~StallM, PP3E, PP3M); flopenrc #(`XLEN*2) PP4Reg(clk, reset, FlushM, ~StallM, PP4E, PP4M); - assign ProdM = PP0M + PP1M + PP2M + PP3M + PP4M; //ForwardedSrcAE * ForwardedSrcBE; + assign ProdM = PP1M + PP2M + PP3M + PP4M; //ForwardedSrcAE * ForwardedSrcBE; endmodule diff --git a/pipelined/src/muldiv/muldiv.sv b/pipelined/src/muldiv/muldiv.sv index a1e76616c..e51bec2f1 100644 --- a/pipelined/src/muldiv/muldiv.sv +++ b/pipelined/src/muldiv/muldiv.sv @@ -74,13 +74,11 @@ module muldiv ( // Handle sign extension for W-type instructions flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M); - generate - if (`XLEN == 64) begin:resmux // RV64 has W-type instructions - assign MulDivResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM; - end else begin:resmux // RV32 has no W-type instructions - assign MulDivResultM = PrelimResultM; - end - endgenerate + if (`XLEN == 64) begin:resmux // RV64 has W-type instructions + assign MulDivResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM; + end else begin:resmux // RV32 has no W-type instructions + assign MulDivResultM = PrelimResultM; + end // Writeback stage pipeline register flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); diff --git a/pipelined/src/privileged/csrc.sv b/pipelined/src/privileged/csrc.sv index bd529e6c2..4e75486d3 100644 --- a/pipelined/src/privileged/csrc.sv +++ b/pipelined/src/privileged/csrc.sv @@ -56,111 +56,109 @@ module csrc #(parameter output logic IllegalCSRCAccessM ); - generate - if (`ZICOUNTERS_SUPPORTED) begin:counters - (* mark_debug = "true" *) logic [63:0] CYCLE_REGW, INSTRET_REGW; - logic [63:0] CYCLEPlusM, INSTRETPlusM; - logic [`XLEN-1:0] NextCYCLEM, NextINSTRETM; - logic WriteCYCLEM, WriteINSTRETM; - logic [4:0] CounterNumM; - logic [`XLEN-1:0] HPMCOUNTER_REGW[`COUNTERS-1:0]; - logic [`XLEN-1:0] HPMCOUNTERH_REGW[`COUNTERS-1:0]; - logic InstrValidNotFlushedM; - logic LoadStallE, LoadStallM; - logic [`COUNTERS-1:0] WriteHPMCOUNTERM; - logic [`COUNTERS-1:0] CounterEvent; - logic [63:0] HPMCOUNTERPlusM[`COUNTERS-1:0]; - logic [`XLEN-1:0] NextHPMCOUNTERM[`COUNTERS-1:0]; - genvar i; + if (`ZICOUNTERS_SUPPORTED) begin:counters + (* mark_debug = "true" *) logic [63:0] CYCLE_REGW, INSTRET_REGW; + logic [63:0] CYCLEPlusM, INSTRETPlusM; + logic [`XLEN-1:0] NextCYCLEM, NextINSTRETM; + logic WriteCYCLEM, WriteINSTRETM; + logic [4:0] CounterNumM; + logic [`XLEN-1:0] HPMCOUNTER_REGW[`COUNTERS-1:0]; + logic [`XLEN-1:0] HPMCOUNTERH_REGW[`COUNTERS-1:0]; + logic InstrValidNotFlushedM; + logic LoadStallE, LoadStallM; + logic [`COUNTERS-1:0] WriteHPMCOUNTERM; + logic [`COUNTERS-1:0] CounterEvent; + logic [63:0] HPMCOUNTERPlusM[`COUNTERS-1:0]; + logic [`XLEN-1:0] NextHPMCOUNTERM[`COUNTERS-1:0]; + genvar i; - // Interface signals - flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); // don't flush the load stall during a load stall. - flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); - assign InstrValidNotFlushedM = InstrValidM & ~StallW & ~FlushW; - - // Determine when to increment each counter - assign CounterEvent[0] = 1'b1; // MCYCLE always increments - assign CounterEvent[1] = 1'b0; // Counter 0 doesn't exist - assign CounterEvent[2] = InstrValidNotFlushedM; - if(`QEMU) begin // No other performance counters in QEMU - assign CounterEvent[`COUNTERS-1:3] = 0; - end else begin // User-defined counters - assign CounterEvent[3] = LoadStallM; // don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[4] = BPPredDirWrongM & InstrValidNotFlushedM; - assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; - assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; - assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM; - assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; - assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM; - assign CounterEvent[10] = BPPredClassNonCFIWrongM & InstrValidNotFlushedM; - assign CounterEvent[11] = DCacheAccess; - assign CounterEvent[12] = DCacheMiss; - assign CounterEvent[`COUNTERS-1:13] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions - end - - // Counter update and write logic - for (i = 0; i < `COUNTERS; i = i+1) begin - assign WriteHPMCOUNTERM[i] = CSRMWriteM & (CSRAdrM == MHPMCOUNTERBASE + i); - assign NextHPMCOUNTERM[i][`XLEN-1:0] = WriteHPMCOUNTERM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][`XLEN-1:0]; - always_ff @(posedge clk) //, posedge reset) // ModelSim doesn't like syntax of passing array element to flop - if (reset) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 0; - else HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERM[i]; - - if (`XLEN==32) begin // write high and low separately - logic [`COUNTERS-1:0] WriteHPMCOUNTERHM; - logic [`XLEN-1:0] NextHPMCOUNTERHM[`COUNTERS-1:0]; - assign HPMCOUNTERPlusM[i] = {HPMCOUNTERH_REGW[i], HPMCOUNTER_REGW[i]} + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; - assign WriteHPMCOUNTERHM[i] = CSRMWriteM & (CSRAdrM == MHPMCOUNTERHBASE + i); - assign NextHPMCOUNTERHM[i] = WriteHPMCOUNTERHM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][63:32]; - always_ff @(posedge clk) //, posedge reset) // ModelSim doesn't like syntax of passing array element to flop - if (reset) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 0; - else HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERHM[i]; - end else begin // XLEN=64; write entire register - assign HPMCOUNTERPlusM[i] = HPMCOUNTER_REGW[i] + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; - end - end - - // Read Counters, or cause excepiton if insufficient privilege in light of COUNTEREN flags - assign CounterNumM = CSRAdrM[4:0]; // which counter to read? - always_comb - if (PrivilegeModeW == `M_MODE | - MCOUNTEREN_REGW[CounterNumM] & (!`S_SUPPORTED | PrivilegeModeW == `S_MODE | SCOUNTEREN_REGW[CounterNumM])) begin - IllegalCSRCAccessM = 0; - if (`XLEN==64) begin // 64-bit counter reads - // Veri lator doesn't realize this only occurs for XLEN=64 - /* verilator lint_off WIDTH */ - if (CSRAdrM == TIME) CSRCReadValM = MTIME_CLINT; // TIME register is a shadow of the memory-mapped MTIME from the CLINT - /* verilator lint_on WIDTH */ - else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; - else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; - else begin - CSRCReadValM = 0; - IllegalCSRCAccessM = 1; // requested CSR doesn't exist - end - end else begin // 32-bit counter reads - // Veri lator doesn't realize this only occurs for XLEN=32 - /* verilator lint_off WIDTH */ - if (CSRAdrM == TIME) CSRCReadValM = MTIME_CLINT[31:0];// TIME register is a shadow of the memory-mapped MTIME from the CLINT - else if (CSRAdrM == TIMEH) CSRCReadValM = MTIME_CLINT[63:32]; - /* verilator lint_on WIDTH */ - else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; - else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; - else if (CSRAdrM >= MHPMCOUNTERHBASE & CSRAdrM < MHPMCOUNTERHBASE+`COUNTERS) CSRCReadValM = HPMCOUNTERH_REGW[CounterNumM]; - else if (CSRAdrM >= HPMCOUNTERHBASE & CSRAdrM < HPMCOUNTERHBASE+`COUNTERS) CSRCReadValM = HPMCOUNTERH_REGW[CounterNumM]; - else begin - CSRCReadValM = 0; - IllegalCSRCAccessM = 1; // requested CSR doesn't exist - end - end - end else begin - CSRCReadValM = 0; - IllegalCSRCAccessM = 1; // no privileges for this csr - end - end else begin - assign CSRCReadValM = 0; - assign IllegalCSRCAccessM = 1; // counters aren't enabled + // Interface signals + flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); // don't flush the load stall during a load stall. + flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); + assign InstrValidNotFlushedM = InstrValidM & ~StallW & ~FlushW; + + // Determine when to increment each counter + assign CounterEvent[0] = 1'b1; // MCYCLE always increments + assign CounterEvent[1] = 1'b0; // Counter 0 doesn't exist + assign CounterEvent[2] = InstrValidNotFlushedM; + if(`QEMU) begin: cevent // No other performance counters in QEMU + assign CounterEvent[`COUNTERS-1:3] = 0; + end else begin: cevent // User-defined counters + assign CounterEvent[3] = LoadStallM; // don't want to suppress on flush as this only happens if flushed. + assign CounterEvent[4] = BPPredDirWrongM & InstrValidNotFlushedM; + assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; + assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; + assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM; + assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; + assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM; + assign CounterEvent[10] = BPPredClassNonCFIWrongM & InstrValidNotFlushedM; + assign CounterEvent[11] = DCacheAccess; + assign CounterEvent[12] = DCacheMiss; + assign CounterEvent[`COUNTERS-1:13] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end - endgenerate + + // Counter update and write logic + for (i = 0; i < `COUNTERS; i = i+1) begin:cntr + assign WriteHPMCOUNTERM[i] = CSRMWriteM & (CSRAdrM == MHPMCOUNTERBASE + i); + assign NextHPMCOUNTERM[i][`XLEN-1:0] = WriteHPMCOUNTERM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][`XLEN-1:0]; + always_ff @(posedge clk) //, posedge reset) // ModelSim doesn't like syntax of passing array element to flop + if (reset) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 0; + else HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERM[i]; + + if (`XLEN==32) begin // write high and low separately + logic [`COUNTERS-1:0] WriteHPMCOUNTERHM; + logic [`XLEN-1:0] NextHPMCOUNTERHM[`COUNTERS-1:0]; + assign HPMCOUNTERPlusM[i] = {HPMCOUNTERH_REGW[i], HPMCOUNTER_REGW[i]} + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; + assign WriteHPMCOUNTERHM[i] = CSRMWriteM & (CSRAdrM == MHPMCOUNTERHBASE + i); + assign NextHPMCOUNTERHM[i] = WriteHPMCOUNTERHM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][63:32]; + always_ff @(posedge clk) //, posedge reset) // ModelSim doesn't like syntax of passing array element to flop + if (reset) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 0; + else HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERHM[i]; + end else begin // XLEN=64; write entire register + assign HPMCOUNTERPlusM[i] = HPMCOUNTER_REGW[i] + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; + end + end + + // Read Counters, or cause excepiton if insufficient privilege in light of COUNTEREN flags + assign CounterNumM = CSRAdrM[4:0]; // which counter to read? + always_comb + if (PrivilegeModeW == `M_MODE | + MCOUNTEREN_REGW[CounterNumM] & (!`S_SUPPORTED | PrivilegeModeW == `S_MODE | SCOUNTEREN_REGW[CounterNumM])) begin + IllegalCSRCAccessM = 0; + if (`XLEN==64) begin // 64-bit counter reads + // Veri lator doesn't realize this only occurs for XLEN=64 + /* verilator lint_off WIDTH */ + if (CSRAdrM == TIME) CSRCReadValM = MTIME_CLINT; // TIME register is a shadow of the memory-mapped MTIME from the CLINT + /* verilator lint_on WIDTH */ + else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; + else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; + else begin + CSRCReadValM = 0; + IllegalCSRCAccessM = 1; // requested CSR doesn't exist + end + end else begin // 32-bit counter reads + // Veri lator doesn't realize this only occurs for XLEN=32 + /* verilator lint_off WIDTH */ + if (CSRAdrM == TIME) CSRCReadValM = MTIME_CLINT[31:0];// TIME register is a shadow of the memory-mapped MTIME from the CLINT + else if (CSRAdrM == TIMEH) CSRCReadValM = MTIME_CLINT[63:32]; + /* verilator lint_on WIDTH */ + else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; + else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; + else if (CSRAdrM >= MHPMCOUNTERHBASE & CSRAdrM < MHPMCOUNTERHBASE+`COUNTERS) CSRCReadValM = HPMCOUNTERH_REGW[CounterNumM]; + else if (CSRAdrM >= HPMCOUNTERHBASE & CSRAdrM < HPMCOUNTERHBASE+`COUNTERS) CSRCReadValM = HPMCOUNTERH_REGW[CounterNumM]; + else begin + CSRCReadValM = 0; + IllegalCSRCAccessM = 1; // requested CSR doesn't exist + end + end + end else begin + CSRCReadValM = 0; + IllegalCSRCAccessM = 1; // no privileges for this csr + end + end else begin + assign CSRCReadValM = 0; + assign IllegalCSRCAccessM = 1; // counters aren't enabled + end endmodule // To Do: diff --git a/pipelined/src/privileged/csri.sv b/pipelined/src/privileged/csri.sv index 9e4d7850a..81ddf88a0 100644 --- a/pipelined/src/privileged/csri.sv +++ b/pipelined/src/privileged/csri.sv @@ -66,58 +66,54 @@ module csri #(parameter assign WriteSIPM = CSRSWriteM & (CSRAdrM == SIP) & ~StallW; assign WriteSIEM = CSRSWriteM & (CSRAdrM == SIE) & ~StallW; - // Interrupt Pending and Enable Registers - // MEIP, MTIP, MSIP are read-only - // SEIP, STIP, SSIP is writable in MIP if S mode exists - // SSIP is writable in SIP if S mode exists - generate - if (`S_SUPPORTED) begin:mask - assign MIP_WRITE_MASK = 12'h222; // SEIP, STIP, SSIP are writable in MIP (20210108-draft 3.1.9) - assign SIP_WRITE_MASK = 12'h002; // SSIP is writable in SIP (privileged 20210108-draft 4.1.3) - end else begin:mask - assign MIP_WRITE_MASK = 12'h000; - assign SIP_WRITE_MASK = 12'h000; - end - always @(posedge clk) //, posedge reset) begin // *** I strongly feel that IntInM should go directly to IP_REGW -- Ben 9/7/21 - if (reset) IP_REGW_writeable <= 10'b0; - else if (WriteMIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & MIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable - else if (WriteSIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & SIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable +// Interrupt Pending and Enable Registers +// MEIP, MTIP, MSIP are read-only +// SEIP, STIP, SSIP is writable in MIP if S mode exists +// SSIP is writable in SIP if S mode exists + if (`S_SUPPORTED) begin:mask + assign MIP_WRITE_MASK = 12'h222; // SEIP, STIP, SSIP are writable in MIP (20210108-draft 3.1.9) + assign SIP_WRITE_MASK = 12'h002; // SSIP is writable in SIP (privileged 20210108-draft 4.1.3) + end else begin:mask + assign MIP_WRITE_MASK = 12'h000; + assign SIP_WRITE_MASK = 12'h000; + end + always @(posedge clk) //, posedge reset) begin // *** I strongly feel that IntInM should go directly to IP_REGW -- Ben 9/7/21 + if (reset) IP_REGW_writeable <= 10'b0; + else if (WriteMIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & MIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable + else if (WriteSIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & SIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable // else if (WriteUIPM) IP_REGW = (CSRWriteValM & 12'hBBB) | (NextIPM & 12'h080); // MTIP unclearable - else IP_REGW_writeable <= IP_REGW_writeable | IntInM[9:0]; // *** check this turns off interrupts properly even when MIDELEG changes - always @(posedge clk) //, posedge reset) begin - if (reset) IE_REGW <= 12'b0; - else if (WriteMIEM) IE_REGW <= (CSRWriteValM[11:0] & 12'hAAA); // MIE controls M and S fields - else if (WriteSIEM) IE_REGW <= (CSRWriteValM[11:0] & 12'h222) | (IE_REGW & 12'h888); // only S fields + else IP_REGW_writeable <= IP_REGW_writeable | IntInM[9:0]; // *** check this turns off interrupts properly even when MIDELEG changes + always @(posedge clk) //, posedge reset) begin + if (reset) IE_REGW <= 12'b0; + else if (WriteMIEM) IE_REGW <= (CSRWriteValM[11:0] & 12'hAAA); // MIE controls M and S fields + else if (WriteSIEM) IE_REGW <= (CSRWriteValM[11:0] & 12'h222) | (IE_REGW & 12'h888); // only S fields // else if (WriteUIEM) IE_REGW = (CSRWriteValM & 12'h111) | (IE_REGW & 12'hAAA); // only U field - endgenerate // restricted views of registers - generate - always_comb begin:regs - // Add MEIP read-only signal - IP_REGW = {IntInM[11],1'b0,IP_REGW_writeable}; + always_comb begin:regs + // Add MEIP read-only signal + IP_REGW = {IntInM[11],1'b0,IP_REGW_writeable}; - // Machine Mode - MIP_REGW = IP_REGW; - MIE_REGW = IE_REGW; + // Machine Mode + MIP_REGW = IP_REGW; + MIE_REGW = IE_REGW; - // Supervisor mode - if (`S_SUPPORTED) begin - SIP_REGW = IP_REGW & MIDELEG_REGW[11:0] & 'h222; // only delegated interrupts visible - SIE_REGW = IE_REGW & MIDELEG_REGW[11:0] & 'h222; - end else begin - SIP_REGW = 12'b0; - SIE_REGW = 12'b0; - end - - // User Modes iterrupts depricated - /*if (`U_SUPPORTED & `N_SUPPORTED) begin - UIP_REGW = IP_REGW & MIDELEG_REGW & SIDELEG_REGW & 'h111; // only delegated interrupts visible - UIE_REGW = IE_REGW & MIDELEG_REGW & SIDELEG_REGW & 'h111; // only delegated interrupts visible - end else begin - UIP_REGW = 12'b0; - UIE_REGW = 12'b0; - end */ + // Supervisor mode + if (`S_SUPPORTED) begin + SIP_REGW = IP_REGW & MIDELEG_REGW[11:0] & 'h222; // only delegated interrupts visible + SIE_REGW = IE_REGW & MIDELEG_REGW[11:0] & 'h222; + end else begin + SIP_REGW = 12'b0; + SIE_REGW = 12'b0; end - endgenerate + + // User Modes iterrupts depricated + /*if (`U_SUPPORTED & `N_SUPPORTED) begin + UIP_REGW = IP_REGW & MIDELEG_REGW & SIDELEG_REGW & 'h111; // only delegated interrupts visible + UIE_REGW = IE_REGW & MIDELEG_REGW & SIDELEG_REGW & 'h111; // only delegated interrupts visible + end else begin + UIP_REGW = 12'b0; + UIE_REGW = 12'b0; + end */ + end endmodule diff --git a/pipelined/src/privileged/csrm.sv b/pipelined/src/privileged/csrm.sv index 17a917d20..3807df8f6 100644 --- a/pipelined/src/privileged/csrm.sv +++ b/pipelined/src/privileged/csrm.sv @@ -92,32 +92,30 @@ module csrm #(parameter // There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop genvar i; - generate - if (`PMP_ENTRIES > 0) begin:pmp - logic [`PMP_ENTRIES-1:0] WritePMPCFGM; - logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; - logic [`PMP_ENTRIES-1:0] ADDRLocked, CFGLocked; - for(i=0; i<`PMP_ENTRIES; i++) begin - // when the lock bit is set, don't allow writes to the PMPCFG or PMPADDR - // also, when the lock bit of the next entry is set and the next entry is TOR, don't allow writes to this entry PMPADDR - assign CFGLocked[i] = PMPCFG_ARRAY_REGW[i][7]; - if (i == `PMP_ENTRIES-1) - assign ADDRLocked[i] = PMPCFG_ARRAY_REGW[i][7]; - else - assign ADDRLocked[i] = PMPCFG_ARRAY_REGW[i][7] | (PMPCFG_ARRAY_REGW[i+1][7] & PMPCFG_ARRAY_REGW[i+1][4:3] == 2'b01); - - assign WritePMPADDRM[i] = (CSRMWriteM & (CSRAdrM == (PMPADDR0+i))) & ~StallW & ~ADDRLocked[i]; - flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]); - if (`XLEN==64) begin - assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+2*(i/8)))) & ~StallW & ~CFGLocked[i]; - flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%8)*8+7:(i%8)*8], PMPCFG_ARRAY_REGW[i]); - end else begin - assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+i/4))) & ~StallW & ~CFGLocked[i]; - flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%4)*8+7:(i%4)*8], PMPCFG_ARRAY_REGW[i]); - end + if (`PMP_ENTRIES > 0) begin:pmp + logic [`PMP_ENTRIES-1:0] WritePMPCFGM; + logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; + logic [`PMP_ENTRIES-1:0] ADDRLocked, CFGLocked; + for(i=0; i<`PMP_ENTRIES; i++) begin + // when the lock bit is set, don't allow writes to the PMPCFG or PMPADDR + // also, when the lock bit of the next entry is set and the next entry is TOR, don't allow writes to this entry PMPADDR + assign CFGLocked[i] = PMPCFG_ARRAY_REGW[i][7]; + if (i == `PMP_ENTRIES-1) + assign ADDRLocked[i] = PMPCFG_ARRAY_REGW[i][7]; + else + assign ADDRLocked[i] = PMPCFG_ARRAY_REGW[i][7] | (PMPCFG_ARRAY_REGW[i+1][7] & PMPCFG_ARRAY_REGW[i+1][4:3] == 2'b01); + + assign WritePMPADDRM[i] = (CSRMWriteM & (CSRAdrM == (PMPADDR0+i))) & ~StallW & ~ADDRLocked[i]; + flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]); + if (`XLEN==64) begin + assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+2*(i/8)))) & ~StallW & ~CFGLocked[i]; + flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%8)*8+7:(i%8)*8], PMPCFG_ARRAY_REGW[i]); + end else begin + assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+i/4))) & ~StallW & ~CFGLocked[i]; + flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%4)*8+7:(i%4)*8], PMPCFG_ARRAY_REGW[i]); end end - endgenerate + end localparam MISA_26 = (`MISA) & 32'h03ffffff; @@ -143,28 +141,24 @@ module csrm #(parameter // CSRs flopenr #(`XLEN) MTVECreg(clk, reset, WriteMTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, MTVEC_REGW); //busybear: changed reset value to 0 - generate - if (`S_SUPPORTED | (`U_SUPPORTED & `N_SUPPORTED)) begin:deleg // DELEG registers should exist - flopenr #(`XLEN) MEDELEGreg(clk, reset, WriteMEDELEGM, CSRWriteValM & MEDELEG_MASK /*12'h7FF*/, MEDELEG_REGW); - flopenr #(`XLEN) MIDELEGreg(clk, reset, WriteMIDELEGM, CSRWriteValM & MIDELEG_MASK /*12'h222*/, MIDELEG_REGW); - end else begin - assign MEDELEG_REGW = 0; - assign MIDELEG_REGW = 0; - end - endgenerate + if (`S_SUPPORTED | (`U_SUPPORTED & `N_SUPPORTED)) begin:deleg // DELEG registers should exist + flopenr #(`XLEN) MEDELEGreg(clk, reset, WriteMEDELEGM, CSRWriteValM & MEDELEG_MASK /*12'h7FF*/, MEDELEG_REGW); + flopenr #(`XLEN) MIDELEGreg(clk, reset, WriteMIDELEGM, CSRWriteValM & MIDELEG_MASK /*12'h222*/, MIDELEG_REGW); + end else begin + assign MEDELEG_REGW = 0; + assign MIDELEG_REGW = 0; + end flopenr #(`XLEN) MSCRATCHreg(clk, reset, WriteMSCRATCHM, CSRWriteValM, MSCRATCH_REGW); flopenr #(`XLEN) MEPCreg(clk, reset, WriteMEPCM, NextEPCM, MEPC_REGW); flopenr #(`XLEN) MCAUSEreg(clk, reset, WriteMCAUSEM, NextCauseM, MCAUSE_REGW); if(`QEMU) assign MTVAL_REGW = `XLEN'b0; else flopenr #(`XLEN) MTVALreg(clk, reset, WriteMTVALM, NextMtvalM, MTVAL_REGW); - generate // *** needs comment about bit 1 - if (`BUSYBEAR == 1) begin:counters + if (`BUSYBEAR == 1) begin:counters // counter 1 (TIME) enable tied to 0 to match simulator*** flopenr #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, {CSRWriteValM[31:2],1'b0,CSRWriteValM[0]}, MCOUNTEREN_REGW); end else begin:counters flopenr #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], MCOUNTEREN_REGW); end - endgenerate flopenr #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], MCOUNTINHIBIT_REGW); diff --git a/pipelined/src/privileged/csrn.sv b/pipelined/src/privileged/csrn.sv index 17f4f4563..c20b7cf5a 100644 --- a/pipelined/src/privileged/csrn.sv +++ b/pipelined/src/privileged/csrn.sv @@ -49,52 +49,50 @@ module csrn #(parameter ); // User mode CSRs below only needed when user mode traps are supported - generate - if (`N_SUPPORTED) begin:nmode - logic WriteUTVECM; - logic WriteUSCRATCHM, WriteUEPCM; - logic WriteUCAUSEM, WriteUTVALM; - logic [`XLEN-1:0] UEDELEG_REGW, UIDELEG_REGW; - logic [`XLEN-1:0] USCRATCH_REGW, UCAUSE_REGW, UTVAL_REGW; - - // Write enables - assign WriteUSTATUSM = CSRNWriteM & (CSRAdrM == USTATUS) & ~StallW; - assign WriteUTVECM = CSRNWriteM & (CSRAdrM == UTVEC) & ~StallW; - assign WriteUEPCM = UTrapM | (CSRNWriteM & (CSRAdrM == UEPC)) & ~StallW; - assign WriteUCAUSEM = UTrapM | (CSRNWriteM & (CSRAdrM == UCAUSE)) & ~StallW; - assign WriteUTVALM = UTrapM | (CSRNWriteM & (CSRAdrM == UTVAL)) & ~StallW; + if (`N_SUPPORTED) begin:nmode // depricated; consider removing*** + logic WriteUTVECM; + logic WriteUSCRATCHM, WriteUEPCM; + logic WriteUCAUSEM, WriteUTVALM; + logic [`XLEN-1:0] UEDELEG_REGW, UIDELEG_REGW; + logic [`XLEN-1:0] USCRATCH_REGW, UCAUSE_REGW, UTVAL_REGW; + + // Write enables + assign WriteUSTATUSM = CSRNWriteM & (CSRAdrM == USTATUS) & ~StallW; + assign WriteUTVECM = CSRNWriteM & (CSRAdrM == UTVEC) & ~StallW; + assign WriteUEPCM = UTrapM | (CSRNWriteM & (CSRAdrM == UEPC)) & ~StallW; + assign WriteUCAUSEM = UTrapM | (CSRNWriteM & (CSRAdrM == UCAUSE)) & ~StallW; + assign WriteUTVALM = UTrapM | (CSRNWriteM & (CSRAdrM == UTVAL)) & ~StallW; - // CSRs - flopenl #(`XLEN) UTVECreg(clk, reset, WriteUTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, `RESET_VECTOR, UTVEC_REGW); - flopenr #(`XLEN) USCRATCHreg(clk, reset, WriteUSCRATCHM, CSRWriteValM, USCRATCH_REGW); - flopenr #(`XLEN) UEPCreg(clk, reset, WriteUEPCM, NextEPCM, UEPC_REGW); - flopenr #(`XLEN) UCAUSEreg(clk, reset, WriteUCAUSEM, NextCauseM, UCAUSE_REGW); - flopenr #(`XLEN) UTVALreg(clk, reset, WriteUTVALM, NextMtvalM, UTVAL_REGW); + // CSRs + flopenl #(`XLEN) UTVECreg(clk, reset, WriteUTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, `RESET_VECTOR, UTVEC_REGW); + flopenr #(`XLEN) USCRATCHreg(clk, reset, WriteUSCRATCHM, CSRWriteValM, USCRATCH_REGW); + flopenr #(`XLEN) UEPCreg(clk, reset, WriteUEPCM, NextEPCM, UEPC_REGW); + flopenr #(`XLEN) UCAUSEreg(clk, reset, WriteUCAUSEM, NextCauseM, UCAUSE_REGW); + flopenr #(`XLEN) UTVALreg(clk, reset, WriteUTVALM, NextMtvalM, UTVAL_REGW); - // CSR Reads - always_comb begin - IllegalCSRNAccessM = 0; - case (CSRAdrM) - USTATUS: CSRNReadValM = USTATUS_REGW; - UTVEC: CSRNReadValM = UTVEC_REGW; - UIP: CSRNReadValM = {{(`XLEN-12){1'b0}}, UIP_REGW}; - UIE: CSRNReadValM = {{(`XLEN-12){1'b0}}, UIE_REGW}; - USCRATCH: CSRNReadValM = USCRATCH_REGW; - UEPC: CSRNReadValM = UEPC_REGW; - UCAUSE: CSRNReadValM = UCAUSE_REGW; - UTVAL: CSRNReadValM = UTVAL_REGW; - default: begin - CSRNReadValM = 0; - IllegalCSRNAccessM = 1; - end - endcase - end - end else begin // if not supported - assign WriteUSTATUSM = 0; - assign CSRNReadValM = 0; - assign UEPC_REGW = 0; - assign UTVEC_REGW = 0; - assign IllegalCSRNAccessM = 1; + // CSR Reads + always_comb begin + IllegalCSRNAccessM = 0; + case (CSRAdrM) + USTATUS: CSRNReadValM = USTATUS_REGW; + UTVEC: CSRNReadValM = UTVEC_REGW; + UIP: CSRNReadValM = {{(`XLEN-12){1'b0}}, UIP_REGW}; + UIE: CSRNReadValM = {{(`XLEN-12){1'b0}}, UIE_REGW}; + USCRATCH: CSRNReadValM = USCRATCH_REGW; + UEPC: CSRNReadValM = UEPC_REGW; + UCAUSE: CSRNReadValM = UCAUSE_REGW; + UTVAL: CSRNReadValM = UTVAL_REGW; + default: begin + CSRNReadValM = 0; + IllegalCSRNAccessM = 1; + end + endcase end - endgenerate -endmodule + end else begin // if not supported + assign WriteUSTATUSM = 0; + assign CSRNReadValM = 0; + assign UEPC_REGW = 0; + assign UTVEC_REGW = 0; + assign IllegalCSRNAccessM = 1; + end + endmodule diff --git a/pipelined/src/privileged/csrs.sv b/pipelined/src/privileged/csrs.sv index daaafe639..413ee3c90 100644 --- a/pipelined/src/privileged/csrs.sv +++ b/pipelined/src/privileged/csrs.sv @@ -69,89 +69,87 @@ module csrs #(parameter //logic [`XLEN-1:0] SEDELEG_MASK = ~(zero | 3'b111 << 9); // sedeleg[11:9] hardwired to zero per Privileged Spec 3.1.8 // Supervisor mode CSRs sometimes supported - generate - if (`S_SUPPORTED) begin:csrs - logic WriteSTVECM; - logic WriteSSCRATCHM, WriteSEPCM; - logic WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM; - logic [`XLEN-1:0] SSCRATCH_REGW, STVAL_REGW; - (* mark_debug = "true" *) logic [`XLEN-1:0] SCAUSE_REGW; - - assign WriteSSTATUSM = CSRSWriteM & (CSRAdrM == SSTATUS) & ~StallW; - assign WriteSTVECM = CSRSWriteM & (CSRAdrM == STVEC) & ~StallW; - assign WriteSSCRATCHM = CSRSWriteM & (CSRAdrM == SSCRATCH) & ~StallW; - assign WriteSEPCM = STrapM | (CSRSWriteM & (CSRAdrM == SEPC)) & ~StallW; - assign WriteSCAUSEM = STrapM | (CSRSWriteM & (CSRAdrM == SCAUSE)) & ~StallW; - assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL)) & ~StallW; - assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == `M_MODE | ~STATUS_TVM) & ~StallW; - assign WriteSCOUNTERENM = CSRSWriteM & (CSRAdrM == SCOUNTEREN) & ~StallW; + if (`S_SUPPORTED) begin:csrs + logic WriteSTVECM; + logic WriteSSCRATCHM, WriteSEPCM; + logic WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM; + logic [`XLEN-1:0] SSCRATCH_REGW, STVAL_REGW; + (* mark_debug = "true" *) logic [`XLEN-1:0] SCAUSE_REGW; + + assign WriteSSTATUSM = CSRSWriteM & (CSRAdrM == SSTATUS) & ~StallW; + assign WriteSTVECM = CSRSWriteM & (CSRAdrM == STVEC) & ~StallW; + assign WriteSSCRATCHM = CSRSWriteM & (CSRAdrM == SSCRATCH) & ~StallW; + assign WriteSEPCM = STrapM | (CSRSWriteM & (CSRAdrM == SEPC)) & ~StallW; + assign WriteSCAUSEM = STrapM | (CSRSWriteM & (CSRAdrM == SCAUSE)) & ~StallW; + assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL)) & ~StallW; + assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == `M_MODE | ~STATUS_TVM) & ~StallW; + assign WriteSCOUNTERENM = CSRSWriteM & (CSRAdrM == SCOUNTEREN) & ~StallW; - // CSRs - flopenr #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, STVEC_REGW); //busybear: change reset to 0 - flopenr #(`XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW); - flopenr #(`XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW); - flopenr #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, NextCauseM, SCAUSE_REGW); - flopenr #(`XLEN) STVALreg(clk, reset, WriteSTVALM, NextMtvalM, STVAL_REGW); - if (`MEM_VIRTMEM) - flopenr #(`XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW); - else - assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported - if (`BUSYBEAR == 1) begin:scounteren - flopenr #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, {CSRWriteValM[31:2],1'b0,CSRWriteValM[0]}, SCOUNTEREN_REGW); - end else if (`BUILDROOT == 1) begin:scounteren - flopenr #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW); - end else begin:scounteren - flopens #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW); - end - if (`N_SUPPORTED) begin:nregs - logic WriteSEDELEGM, WriteSIDELEGM; - assign WriteSEDELEGM = CSRSWriteM & (CSRAdrM == SEDELEG); - assign WriteSIDELEGM = CSRSWriteM & (CSRAdrM == SIDELEG); - flopenr #(`XLEN) SEDELEGreg(clk, reset, WriteSEDELEGM, CSRWriteValM & SEDELEG_MASK, SEDELEG_REGW); - flopenr #(`XLEN) SIDELEGreg(clk, reset, WriteSIDELEGM, CSRWriteValM, SIDELEG_REGW); - end else begin - assign SEDELEG_REGW = 0; - assign SIDELEG_REGW = 0; - end - - // CSR Reads - always_comb begin:csrr - IllegalCSRSAccessM = !(`N_SUPPORTED) & (CSRAdrM == SEDELEG | CSRAdrM == SIDELEG); // trap on DELEG register access when no N-mode - case (CSRAdrM) - SSTATUS: CSRSReadValM = SSTATUS_REGW; - STVEC: CSRSReadValM = STVEC_REGW; -// SIDELEG: CSRSReadValM = {{(`XLEN-12){1'b0}}, SIDELEG_REGW}; -// SEDELEG: CSRSReadValM = {{(`XLEN-12){1'b0}}, SEDELEG_REGW}; - SIDELEG: CSRSReadValM = SIDELEG_REGW; - SEDELEG: CSRSReadValM = SEDELEG_REGW; - SIP: CSRSReadValM = {{(`XLEN-12){1'b0}}, SIP_REGW}; - SIE: CSRSReadValM = {{(`XLEN-12){1'b0}}, SIE_REGW}; - SSCRATCH: CSRSReadValM = SSCRATCH_REGW; - SEPC: CSRSReadValM = SEPC_REGW; - SCAUSE: CSRSReadValM = SCAUSE_REGW; - STVAL: CSRSReadValM = STVAL_REGW; - SATP: if (`MEM_VIRTMEM & (PrivilegeModeW == `M_MODE | ~STATUS_TVM)) CSRSReadValM = SATP_REGW; - else begin - CSRSReadValM = 0; - if (PrivilegeModeW == `S_MODE & STATUS_TVM) IllegalCSRSAccessM = 1; - end - SCOUNTEREN:CSRSReadValM = {{(`XLEN-32){1'b0}}, SCOUNTEREN_REGW}; - default: begin - CSRSReadValM = 0; - IllegalCSRSAccessM = 1; - end - endcase - end + // CSRs + flopenr #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, STVEC_REGW); //busybear: change reset to 0 + flopenr #(`XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW); + flopenr #(`XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW); + flopenr #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, NextCauseM, SCAUSE_REGW); + flopenr #(`XLEN) STVALreg(clk, reset, WriteSTVALM, NextMtvalM, STVAL_REGW); + if (`MEM_VIRTMEM) + flopenr #(`XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW); + else + assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported + if (`BUSYBEAR == 1) begin:scounteren + flopenr #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, {CSRWriteValM[31:2],1'b0,CSRWriteValM[0]}, SCOUNTEREN_REGW); + end else if (`BUILDROOT == 1) begin:scounteren + flopenr #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW); + end else begin:scounteren + flopens #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW); + end + if (`N_SUPPORTED) begin:nregs + logic WriteSEDELEGM, WriteSIDELEGM; + assign WriteSEDELEGM = CSRSWriteM & (CSRAdrM == SEDELEG); + assign WriteSIDELEGM = CSRSWriteM & (CSRAdrM == SIDELEG); + flopenr #(`XLEN) SEDELEGreg(clk, reset, WriteSEDELEGM, CSRWriteValM & SEDELEG_MASK, SEDELEG_REGW); + flopenr #(`XLEN) SIDELEGreg(clk, reset, WriteSIDELEGM, CSRWriteValM, SIDELEG_REGW); end else begin - assign WriteSSTATUSM = 0; - assign CSRSReadValM = 0; - assign SEPC_REGW = 0; - assign STVEC_REGW = 0; assign SEDELEG_REGW = 0; assign SIDELEG_REGW = 0; - assign SCOUNTEREN_REGW = 0; - assign SATP_REGW = 0; - assign IllegalCSRSAccessM = 1; end - endgenerate + + // CSR Reads + always_comb begin:csrr + IllegalCSRSAccessM = !(`N_SUPPORTED) & (CSRAdrM == SEDELEG | CSRAdrM == SIDELEG); // trap on DELEG register access when no N-mode + case (CSRAdrM) + SSTATUS: CSRSReadValM = SSTATUS_REGW; + STVEC: CSRSReadValM = STVEC_REGW; +// SIDELEG: CSRSReadValM = {{(`XLEN-12){1'b0}}, SIDELEG_REGW}; +// SEDELEG: CSRSReadValM = {{(`XLEN-12){1'b0}}, SEDELEG_REGW}; + SIDELEG: CSRSReadValM = SIDELEG_REGW; + SEDELEG: CSRSReadValM = SEDELEG_REGW; + SIP: CSRSReadValM = {{(`XLEN-12){1'b0}}, SIP_REGW}; + SIE: CSRSReadValM = {{(`XLEN-12){1'b0}}, SIE_REGW}; + SSCRATCH: CSRSReadValM = SSCRATCH_REGW; + SEPC: CSRSReadValM = SEPC_REGW; + SCAUSE: CSRSReadValM = SCAUSE_REGW; + STVAL: CSRSReadValM = STVAL_REGW; + SATP: if (`MEM_VIRTMEM & (PrivilegeModeW == `M_MODE | ~STATUS_TVM)) CSRSReadValM = SATP_REGW; + else begin + CSRSReadValM = 0; + if (PrivilegeModeW == `S_MODE & STATUS_TVM) IllegalCSRSAccessM = 1; + end + SCOUNTEREN:CSRSReadValM = {{(`XLEN-32){1'b0}}, SCOUNTEREN_REGW}; + default: begin + CSRSReadValM = 0; + IllegalCSRSAccessM = 1; + end + endcase + end + end else begin + assign WriteSSTATUSM = 0; + assign CSRSReadValM = 0; + assign SEPC_REGW = 0; + assign STVEC_REGW = 0; + assign SEDELEG_REGW = 0; + assign SIDELEG_REGW = 0; + assign SCOUNTEREN_REGW = 0; + assign SATP_REGW = 0; + assign IllegalCSRSAccessM = 1; + end endmodule diff --git a/pipelined/src/privileged/csrsr.sv b/pipelined/src/privileged/csrsr.sv index 3b14fd59c..85d164683 100644 --- a/pipelined/src/privileged/csrsr.sv +++ b/pipelined/src/privileged/csrsr.sv @@ -50,54 +50,50 @@ module csrsr ( // See Privileged Spec Section 3.1.6 // Lower privilege status registers are a subset of the full status register // *** consider adding MBE, SBE, UBE fields later from 20210108 draft spec - generate - if (`XLEN==64) begin: csrsr64 // RV64 - assign MSTATUS_REGW = {STATUS_SD, 27'b0, STATUS_SXL, STATUS_UXL, 9'b0, - STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, - STATUS_XS, STATUS_FS, STATUS_MPP, 2'b0, - STATUS_SPP, STATUS_MPIE, 1'b0, STATUS_SPIE, STATUS_UPIE, - STATUS_MIE, 1'b0, STATUS_SIE, STATUS_UIE}; - assign SSTATUS_REGW = {STATUS_SD, /*27'b0, */ 29'b0, /*STATUS_SXL, */ STATUS_UXL, /*9'b0, */ 12'b0, - /*STATUS_TSR, STATUS_TW, STATUS_TVM, */STATUS_MXR, STATUS_SUM, /* STATUS_MPRV, */ 1'b0, - STATUS_XS, STATUS_FS, /*STATUS_MPP, 2'b0*/ 4'b0, - STATUS_SPP, /*STATUS_MPIE, 1'b0*/ 2'b0, STATUS_SPIE, STATUS_UPIE, - /*STATUS_MIE, 1'b0*/ 2'b0, STATUS_SIE, STATUS_UIE}; - assign USTATUS_REGW = {/*STATUS_SD, */ 59'b0, /*STATUS_SXL, STATUS_UXL, 9'b0, */ - /*STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, , 1'b0,*/ - /* STATUS_XS, STATUS_FS, /*STATUS_MPP, 8'b0, */ - /*STATUS_SPP, STATUS_MPIE, 1'b0 2'b0, STATUS_SPIE,*/ STATUS_UPIE, - /*STATUS_MIE, 1'b0*/ 3'b0, /*STATUS_SIE, */STATUS_UIE}; - end else begin: csrsr32 // RV32 - assign MSTATUS_REGW = {STATUS_SD, 8'b0, - STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, - STATUS_XS, STATUS_FS, STATUS_MPP, 2'b0, - STATUS_SPP, STATUS_MPIE, 1'b0, STATUS_SPIE, STATUS_UPIE, STATUS_MIE, 1'b0, STATUS_SIE, STATUS_UIE}; - assign SSTATUS_REGW = {STATUS_SD, 11'b0, - /*STATUS_TSR, STATUS_TW, STATUS_TVM, */STATUS_MXR, STATUS_SUM, /* STATUS_MPRV, */ 1'b0, - STATUS_XS, STATUS_FS, /*STATUS_MPP, 2'b0*/ 4'b0, - STATUS_SPP, /*STATUS_MPIE, 1'b0*/ 2'b0, STATUS_SPIE, STATUS_UPIE, - /*STATUS_MIE, 1'b0*/ 2'b0, STATUS_SIE, STATUS_UIE}; - assign USTATUS_REGW = {/*STATUS_SD, */ 27'b0, /*STATUS_SXL, STATUS_UXL, 9'b0, */ - /*STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, , 1'b0,*/ - /*STATUS_XS, STATUS_FS, STATUS_MPP, 8'b0, */ - /*STATUS_SPP, STATUS_MPIE, 1'b0 2'b0, STATUS_SPIE,*/ STATUS_UPIE, - /*STATUS_MIE, 1'b0*/ 3'b0, /*STATUS_SIE, */STATUS_UIE}; - end - endgenerate + if (`XLEN==64) begin: csrsr64 // RV64 + assign MSTATUS_REGW = {STATUS_SD, 27'b0, STATUS_SXL, STATUS_UXL, 9'b0, + STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, + STATUS_XS, STATUS_FS, STATUS_MPP, 2'b0, + STATUS_SPP, STATUS_MPIE, 1'b0, STATUS_SPIE, STATUS_UPIE, + STATUS_MIE, 1'b0, STATUS_SIE, STATUS_UIE}; + assign SSTATUS_REGW = {STATUS_SD, /*27'b0, */ 29'b0, /*STATUS_SXL, */ STATUS_UXL, /*9'b0, */ 12'b0, + /*STATUS_TSR, STATUS_TW, STATUS_TVM, */STATUS_MXR, STATUS_SUM, /* STATUS_MPRV, */ 1'b0, + STATUS_XS, STATUS_FS, /*STATUS_MPP, 2'b0*/ 4'b0, + STATUS_SPP, /*STATUS_MPIE, 1'b0*/ 2'b0, STATUS_SPIE, STATUS_UPIE, + /*STATUS_MIE, 1'b0*/ 2'b0, STATUS_SIE, STATUS_UIE}; + assign USTATUS_REGW = {/*STATUS_SD, */ 59'b0, /*STATUS_SXL, STATUS_UXL, 9'b0, */ + /*STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, , 1'b0,*/ + /* STATUS_XS, STATUS_FS, /*STATUS_MPP, 8'b0, */ + /*STATUS_SPP, STATUS_MPIE, 1'b0 2'b0, STATUS_SPIE,*/ STATUS_UPIE, + /*STATUS_MIE, 1'b0*/ 3'b0, /*STATUS_SIE, */STATUS_UIE}; + end else begin: csrsr32 // RV32 + assign MSTATUS_REGW = {STATUS_SD, 8'b0, + STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, + STATUS_XS, STATUS_FS, STATUS_MPP, 2'b0, + STATUS_SPP, STATUS_MPIE, 1'b0, STATUS_SPIE, STATUS_UPIE, STATUS_MIE, 1'b0, STATUS_SIE, STATUS_UIE}; + assign SSTATUS_REGW = {STATUS_SD, 11'b0, + /*STATUS_TSR, STATUS_TW, STATUS_TVM, */STATUS_MXR, STATUS_SUM, /* STATUS_MPRV, */ 1'b0, + STATUS_XS, STATUS_FS, /*STATUS_MPP, 2'b0*/ 4'b0, + STATUS_SPP, /*STATUS_MPIE, 1'b0*/ 2'b0, STATUS_SPIE, STATUS_UPIE, + /*STATUS_MIE, 1'b0*/ 2'b0, STATUS_SIE, STATUS_UIE}; + assign USTATUS_REGW = {/*STATUS_SD, */ 27'b0, /*STATUS_SXL, STATUS_UXL, 9'b0, */ + /*STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, , 1'b0,*/ + /*STATUS_XS, STATUS_FS, STATUS_MPP, 8'b0, */ + /*STATUS_SPP, STATUS_MPIE, 1'b0 2'b0, STATUS_SPIE,*/ STATUS_UPIE, + /*STATUS_MIE, 1'b0*/ 3'b0, /*STATUS_SIE, */STATUS_UIE}; + end // harwired STATUS bits - generate - assign STATUS_TSR = `S_SUPPORTED & STATUS_TSR_INT; // override reigster with 0 if supervisor mode not supported - assign STATUS_TW = (`S_SUPPORTED | `U_SUPPORTED) & STATUS_TW_INT; // override reigster with 0 if only machine mode supported - assign STATUS_TVM = `S_SUPPORTED & STATUS_TVM_INT; // override reigster with 0 if supervisor mode not supported - assign STATUS_MXR = `S_SUPPORTED & STATUS_MXR_INT; // override reigster with 0 if supervisor mode not supported - // SXL and UXL bits only matter for RV64. Set to 10 for RV64 if mode is supported, or 0 if not - assign STATUS_SXL = `S_SUPPORTED & ~`QEMU ? 2'b10 : 2'b00; // 10 if supervisor mode supported - assign STATUS_UXL = `U_SUPPORTED & ~`QEMU ? 2'b10 : 2'b00; // 10 if user mode supported - assign STATUS_SUM = `S_SUPPORTED & `MEM_VIRTMEM & STATUS_SUM_INT; // override reigster with 0 if supervisor mode not supported - assign STATUS_MPRV = `U_SUPPORTED & STATUS_MPRV_INT; // override with 0 if user mode not supported - assign STATUS_FS = (`S_SUPPORTED & (`F_SUPPORTED | `D_SUPPORTED)) ? STATUS_FS_INT : 2'b00; // off if no FP - endgenerate + assign STATUS_TSR = `S_SUPPORTED & STATUS_TSR_INT; // override reigster with 0 if supervisor mode not supported + assign STATUS_TW = (`S_SUPPORTED | `U_SUPPORTED) & STATUS_TW_INT; // override reigster with 0 if only machine mode supported + assign STATUS_TVM = `S_SUPPORTED & STATUS_TVM_INT; // override reigster with 0 if supervisor mode not supported + assign STATUS_MXR = `S_SUPPORTED & STATUS_MXR_INT; // override reigster with 0 if supervisor mode not supported + // SXL and UXL bits only matter for RV64. Set to 10 for RV64 if mode is supported, or 0 if not + assign STATUS_SXL = `S_SUPPORTED & ~`QEMU ? 2'b10 : 2'b00; // 10 if supervisor mode supported + assign STATUS_UXL = `U_SUPPORTED & ~`QEMU ? 2'b10 : 2'b00; // 10 if user mode supported + assign STATUS_SUM = `S_SUPPORTED & `MEM_VIRTMEM & STATUS_SUM_INT; // override reigster with 0 if supervisor mode not supported + assign STATUS_MPRV = `U_SUPPORTED & STATUS_MPRV_INT; // override with 0 if user mode not supported + assign STATUS_FS = (`S_SUPPORTED & (`F_SUPPORTED | `D_SUPPORTED)) ? STATUS_FS_INT : 2'b00; // off if no FP assign STATUS_SD = (STATUS_FS == 2'b11) | (STATUS_XS == 2'b11); // dirty state logic assign STATUS_XS = 2'b00; // No additional user-mode state to be dirty diff --git a/pipelined/src/privileged/csru.sv b/pipelined/src/privileged/csru.sv index d92a50cb9..eb5e53995 100644 --- a/pipelined/src/privileged/csru.sv +++ b/pipelined/src/privileged/csru.sv @@ -44,42 +44,40 @@ module csru #(parameter ); // Floating Point CSRs in User Mode only needed if Floating Point is supported - generate - if (`F_SUPPORTED | `D_SUPPORTED) begin:csru - logic [4:0] FFLAGS_REGW; - logic [2:0] NextFRMM; - logic [4:0] NextFFLAGSM; - - // Write enables - //assign WriteFCSRM = CSRUWriteM & (CSRAdrM == FCSR) & ~StallW; - assign WriteFRMM = (CSRUWriteM & (CSRAdrM == FRM | CSRAdrM == FCSR)) & ~StallW; - assign WriteFFLAGSM = (CSRUWriteM & (CSRAdrM == FFLAGS | CSRAdrM == FCSR)) & ~StallW; - - // Write Values - assign NextFRMM = (CSRAdrM == FCSR) ? CSRWriteValM[7:5] : CSRWriteValM[2:0]; - assign NextFFLAGSM = WriteFFLAGSM ? CSRWriteValM[4:0] : FFLAGS_REGW | SetFflagsM; + if (`F_SUPPORTED | `D_SUPPORTED) begin:csru + logic [4:0] FFLAGS_REGW; + logic [2:0] NextFRMM; + logic [4:0] NextFFLAGSM; + + // Write enables + //assign WriteFCSRM = CSRUWriteM & (CSRAdrM == FCSR) & ~StallW; + assign WriteFRMM = (CSRUWriteM & (CSRAdrM == FRM | CSRAdrM == FCSR)) & ~StallW; + assign WriteFFLAGSM = (CSRUWriteM & (CSRAdrM == FFLAGS | CSRAdrM == FCSR)) & ~StallW; + + // Write Values + assign NextFRMM = (CSRAdrM == FCSR) ? CSRWriteValM[7:5] : CSRWriteValM[2:0]; + assign NextFFLAGSM = WriteFFLAGSM ? CSRWriteValM[4:0] : FFLAGS_REGW | SetFflagsM; - // CSRs - flopenr #(3) FRMreg(clk, reset, WriteFRMM, NextFRMM, FRM_REGW); - flopr #(5) FFLAGSreg(clk, reset, NextFFLAGSM, FFLAGS_REGW); + // CSRs + flopenr #(3) FRMreg(clk, reset, WriteFRMM, NextFRMM, FRM_REGW); + flopr #(5) FFLAGSreg(clk, reset, NextFFLAGSM, FFLAGS_REGW); - // CSR Reads - always_comb begin - IllegalCSRUAccessM = 0; - case (CSRAdrM) - FFLAGS: CSRUReadValM = {{(`XLEN-5){1'b0}}, FFLAGS_REGW}; - FRM: CSRUReadValM = {{(`XLEN-3){1'b0}}, FRM_REGW}; - FCSR: CSRUReadValM = {{(`XLEN-8){1'b0}}, FRM_REGW, FFLAGS_REGW}; - default: begin - CSRUReadValM = 0; - IllegalCSRUAccessM = 1; - end - endcase - end - end else begin // if not supported - assign FRM_REGW = 0; - assign CSRUReadValM = 0; - assign IllegalCSRUAccessM = 1; + // CSR Reads + always_comb begin + IllegalCSRUAccessM = 0; + case (CSRAdrM) + FFLAGS: CSRUReadValM = {{(`XLEN-5){1'b0}}, FFLAGS_REGW}; + FRM: CSRUReadValM = {{(`XLEN-3){1'b0}}, FRM_REGW}; + FCSR: CSRUReadValM = {{(`XLEN-8){1'b0}}, FRM_REGW, FFLAGS_REGW}; + default: begin + CSRUReadValM = 0; + IllegalCSRUAccessM = 1; + end + endcase end - endgenerate + end else begin // if not supported + assign FRM_REGW = 0; + assign CSRUReadValM = 0; + assign IllegalCSRUAccessM = 1; + end endmodule diff --git a/pipelined/src/privileged/trap.sv b/pipelined/src/privileged/trap.sv index f48a8ad09..3fac49ff8 100644 --- a/pipelined/src/privileged/trap.sv +++ b/pipelined/src/privileged/trap.sv @@ -103,18 +103,16 @@ module trap ( // > implemented without a hardware adder circuit. // For example, we could require m/stvec be aligned on 7 bits to let us replace the adder directly below with // [untested] PrivilegedVectoredTrapVector = {PrivilegedTrapVector[`XLEN-1:7], CauseM[3:0], 4'b0000} - generate - if(`VECTORED_INTERRUPTS_SUPPORTED) begin:vec - always_comb - if (PrivilegedTrapVector[1:0] == 2'b01 & CauseM[`XLEN-1] == 1) - PrivilegedVectoredTrapVector = {PrivilegedTrapVector[`XLEN-1:2] + {CauseM[`XLEN-5:0], 2'b00}, 2'b00}; - else - PrivilegedVectoredTrapVector = {PrivilegedTrapVector[`XLEN-1:2], 2'b00}; - end - else begin - assign PrivilegedVectoredTrapVector = {PrivilegedTrapVector[`XLEN-1:2], 2'b00}; - end - endgenerate + if(`VECTORED_INTERRUPTS_SUPPORTED) begin:vec + always_comb + if (PrivilegedTrapVector[1:0] == 2'b01 & CauseM[`XLEN-1] == 1) + PrivilegedVectoredTrapVector = {PrivilegedTrapVector[`XLEN-1:2] + {CauseM[`XLEN-5:0], 2'b00}, 2'b00}; + else + PrivilegedVectoredTrapVector = {PrivilegedTrapVector[`XLEN-1:2], 2'b00}; + end + else begin + assign PrivilegedVectoredTrapVector = {PrivilegedTrapVector[`XLEN-1:2], 2'b00}; + end always_comb if (mretM) PrivilegedNextPCM = MEPC_REGW; diff --git a/pipelined/src/uncore/clint.sv b/pipelined/src/uncore/clint.sv index 3f0d61e44..bcc8a01c7 100644 --- a/pipelined/src/uncore/clint.sv +++ b/pipelined/src/uncore/clint.sv @@ -55,12 +55,8 @@ module clint ( assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during accesses // word aligned reads - generate - if (`XLEN==64) - assign #2 entry = {HADDR[15:3], 3'b000}; - else - assign #2 entry = {HADDR[15:2], 2'b00}; - endgenerate + if (`XLEN==64) assign #2 entry = {HADDR[15:3], 3'b000}; + else assign #2 entry = {HADDR[15:2], 2'b00}; // DH 2/20/21: Eventually allow MTIME to run off a separate clock // This will require synchronizing MTIME to the system clock @@ -69,74 +65,72 @@ module clint ( // Use req and ack signals synchronized across the clock domains. // register access - generate - if (`XLEN==64) begin:clint // 64-bit - always @(posedge HCLK) begin - case(entry) - 16'h0000: HREADCLINT <= {63'b0, MSIP}; - 16'h4000: HREADCLINT <= MTIMECMP; - 16'hBFF8: HREADCLINT <= MTIME; - default: HREADCLINT <= 0; - endcase - end - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin - MSIP <= 0; - MTIMECMP <= 0; - // MTIMECMP is not reset - end else if (memwrite) begin - if (entryd == 16'h0000) MSIP <= HWDATA[0]; - if (entryd == 16'h4000) MTIMECMP <= HWDATA; - end - - // eventually replace MTIME logic below with timereg - // timereg tr(HCLK, HRESETn, TIMECLK, memwrite & (entryd==16'hBFF8), 1'b0, HWDATA, MTIME, done); - - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin - MTIME <= 0; - // MTIMECMP is not reset - end else if (memwrite & entryd == 16'hBFF8) begin - // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed - MTIME <= HWDATA; - end else MTIME <= MTIME + 1; - end else begin:clint // 32-bit - always @(posedge HCLK) begin - case(entry) - 16'h0000: HREADCLINT <= {31'b0, MSIP}; - 16'h4000: HREADCLINT <= MTIMECMP[31:0]; - 16'h4004: HREADCLINT <= MTIMECMP[63:32]; - 16'hBFF8: HREADCLINT <= MTIME[31:0]; - 16'hBFFC: HREADCLINT <= MTIME[63:32]; - default: HREADCLINT <= 0; - endcase - end - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin - MSIP <= 0; - MTIMECMP <= 0; - // MTIMECMP is not reset ***? - end else if (memwrite) begin - if (entryd == 16'h0000) MSIP <= HWDATA[0]; - if (entryd == 16'h4000) MTIMECMP[31:0] <= HWDATA; - if (entryd == 16'h4004) MTIMECMP[63:32] <= HWDATA; - // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed - end + if (`XLEN==64) begin:clint // 64-bit + always @(posedge HCLK) begin + case(entry) + 16'h0000: HREADCLINT <= {63'b0, MSIP}; + 16'h4000: HREADCLINT <= MTIMECMP; + 16'hBFF8: HREADCLINT <= MTIME; + default: HREADCLINT <= 0; + endcase + end + always_ff @(posedge HCLK or negedge HRESETn) + if (~HRESETn) begin + MSIP <= 0; + MTIMECMP <= 0; + // MTIMECMP is not reset + end else if (memwrite) begin + if (entryd == 16'h0000) MSIP <= HWDATA[0]; + if (entryd == 16'h4000) MTIMECMP <= HWDATA; + end // eventually replace MTIME logic below with timereg - // timereg tr(HCLK, HRESETn, TIMECLK, memwrite & (entryd==16'hBFF8), memwrite & (entryd == 16'hBFFC), HWDATA, MTIME, done); - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin - MTIME <= 0; - // MTIMECMP is not reset - end else if (memwrite & (entryd == 16'hBFF8)) begin - MTIME[31:0] <= HWDATA; - end else if (memwrite & (entryd == 16'hBFFC)) begin - // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed - MTIME[63:32]<= HWDATA; - end else MTIME <= MTIME + 1; +// timereg tr(HCLK, HRESETn, TIMECLK, memwrite & (entryd==16'hBFF8), 1'b0, HWDATA, MTIME, done); + + always_ff @(posedge HCLK or negedge HRESETn) + if (~HRESETn) begin + MTIME <= 0; + // MTIMECMP is not reset + end else if (memwrite & entryd == 16'hBFF8) begin + // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed + MTIME <= HWDATA; + end else MTIME <= MTIME + 1; + end else begin:clint // 32-bit + always @(posedge HCLK) begin + case(entry) + 16'h0000: HREADCLINT <= {31'b0, MSIP}; + 16'h4000: HREADCLINT <= MTIMECMP[31:0]; + 16'h4004: HREADCLINT <= MTIMECMP[63:32]; + 16'hBFF8: HREADCLINT <= MTIME[31:0]; + 16'hBFFC: HREADCLINT <= MTIME[63:32]; + default: HREADCLINT <= 0; + endcase end - endgenerate + always_ff @(posedge HCLK or negedge HRESETn) + if (~HRESETn) begin + MSIP <= 0; + MTIMECMP <= 0; + // MTIMECMP is not reset ***? + end else if (memwrite) begin + if (entryd == 16'h0000) MSIP <= HWDATA[0]; + if (entryd == 16'h4000) MTIMECMP[31:0] <= HWDATA; + if (entryd == 16'h4004) MTIMECMP[63:32] <= HWDATA; + // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed + end + +// eventually replace MTIME logic below with timereg +// timereg tr(HCLK, HRESETn, TIMECLK, memwrite & (entryd==16'hBFF8), memwrite & (entryd == 16'hBFFC), HWDATA, MTIME, done); + always_ff @(posedge HCLK or negedge HRESETn) + if (~HRESETn) begin + MTIME <= 0; + // MTIMECMP is not reset + end else if (memwrite & (entryd == 16'hBFF8)) begin + MTIME[31:0] <= HWDATA; + end else if (memwrite & (entryd == 16'hBFFC)) begin + // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed + MTIME[63:32]<= HWDATA; + end else MTIME <= MTIME + 1; + end // Software interrupt when MSIP is set assign SwIntM = MSIP; @@ -234,13 +228,9 @@ module graytobinary #(parameter N = `XLEN) ( // B[N-1] = G[N-1]; B[i] = G[i] ^ B[i+1] for 0 <= i < N-1 // requires rippling through N-1 XOR gates - generate - begin - genvar i; - assign b[N-1] = g[N-1]; - for (i=N-2; i >= 0; i--) begin:g2b - assign b[i] = g[i] ^ b[i+1]; - end + genvar i; + assign b[N-1] = g[N-1]; + for (i=N-2; i >= 0; i--) begin:g2b + assign b[i] = g[i] ^ b[i+1]; end - endgenerate endmodule diff --git a/pipelined/src/uncore/gpio.sv b/pipelined/src/uncore/gpio.sv index c5a62eaae..d3473f192 100644 --- a/pipelined/src/uncore/gpio.sv +++ b/pipelined/src/uncore/gpio.sv @@ -61,23 +61,13 @@ module gpio ( // account for subword read/write circuitry // -- Note GPIO registers are 32 bits no matter what; access them with LW SW. // (At least that's what I think when FE310 spec says "only naturally aligned 32-bit accesses are supported") - generate - if (`XLEN == 64) begin:gpio - always_comb - if (entryd[2]) begin - Din = HWDATA[63:32]; - HREADGPIO = {Dout,32'b0}; - end else begin - Din = HWDATA[31:0]; - HREADGPIO = {32'b0,Dout}; - end - end else begin:gpio // 32-bit - always_comb begin - Din = HWDATA[31:0]; - HREADGPIO = Dout; - end - end - endgenerate + if (`XLEN == 64) begin + assign Din = entryd[2] ? HWDATA[63:32] : HWDATA[31:0]; + assign HREADGPIO = entryd[2] ? {Dout,32'b0} : {32'b0,Dout}; + end else begin // 32-bit + assign Din = HWDATA[31:0]; + assign HREADGPIO = Dout; + end // register access always_ff @(posedge HCLK, negedge HRESETn) begin @@ -150,12 +140,9 @@ module gpio ( end // chip i/o - generate - if (`GPIO_LOOPBACK_TEST) // connect OUT to IN for loopback testing - assign input0d = GPIOPinsOut & input_en & output_en; - else - assign input0d = GPIOPinsIn & input_en; - endgenerate + // connect OUT to IN for loopback testing + if (`GPIO_LOOPBACK_TEST) assign input0d = GPIOPinsOut & input_en & output_en; + else assign input0d = GPIOPinsIn & input_en; flop #(32) sync1(HCLK,input0d,input1d); flop #(32) sync2(HCLK,input1d,input2d); flop #(32) sync3(HCLK,input2d,input3d); diff --git a/pipelined/src/uncore/plic.sv b/pipelined/src/uncore/plic.sv index 145c2b527..80dcf7fa6 100644 --- a/pipelined/src/uncore/plic.sv +++ b/pipelined/src/uncore/plic.sv @@ -77,23 +77,13 @@ module plic ( // account for subword read/write circuitry // -- Note PLIC registers are 32 bits no matter what; access them with LW SW. - generate - if (`XLEN == 64) begin:plic - always_comb - if (entryd[2]) begin - Din = HWDATA[63:32]; - HREADPLIC = {Dout,32'b0}; - end else begin - Din = HWDATA[31:0]; - HREADPLIC = {32'b0,Dout}; - end - end else begin:plic // 32-bit - always_comb begin - Din = HWDATA[31:0]; - HREADPLIC = Dout; - end - end - endgenerate + if (`XLEN == 64) begin + assign Din = entryd[2] ? HWDATA[63:32] : HWDATA[31:0]; + assign HREADPLIC = entryd[2] ? {Dout,32'b0} : {32'b0,Dout}; + end else begin // 32-bit + assign Din = HWDATA[31:0]; + assign HREADPLIC = Dout; + end // ================== // Register Interface @@ -165,14 +155,11 @@ module plic ( // pending array - indexed by priority_lvl x source_ID genvar i, j; - generate - for (j=1; j<=7; j++) begin: pending - for (i=1; i<=N; i=i+1) begin: pendingbit - // *** make sure that this synthesizes into N decoders, not 7*N 3-bit equality comparators (right?) - assign pendingArray[j][i] = (intPriority[i]==j) & intEn[i] & intPending[i]; - end + for (j=1; j<=7; j++) begin: pending + for (i=1; i<=N; i=i+1) begin: pendingbit + assign pendingArray[j][i] = (intPriority[i]==j) & intEn[i] & intPending[i]; end - endgenerate + end // pending array, except grouped by priority assign pendingPGrouped[7:1] = {|pendingArray[7], |pendingArray[6], @@ -200,8 +187,7 @@ module plic ( | ({N{pendingMaxP[2]}} & pendingArray[2]) | ({N{pendingMaxP[1]}} & pendingArray[1]); // find the lowest ID amongst active interrupts at the highest priority - int k; - // *** verify that this synthesizes to a reasonable priority encoder and that k doesn't actually exist in hardware + int k; // *** rewrite as priority encoder always_comb begin intClaim = 6'b0; for(k=N; k>0; k=k-1) begin diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index 413914758..645b5271e 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -49,56 +49,53 @@ module ram #(parameter BASE=0, RANGE = 65535) ( logic memwrite; logic [3:0] busycount; - generate - if(`FPGA) begin:ram - initial begin - //$readmemh(PRELOAD, RAM); - // FPGA only - RAM[0] = 64'h94e1819300002197; - RAM[1] = 64'h4281420141014081; - RAM[2] = 64'h4481440143814301; - RAM[3] = 64'h4681460145814501; - RAM[4] = 64'h4881480147814701; - RAM[5] = 64'h4a814a0149814901; - RAM[6] = 64'h4c814c014b814b01; - RAM[7] = 64'h4e814e014d814d01; - RAM[8] = 64'h0110011b4f814f01; - RAM[9] = 64'h059b45011161016e; - RAM[10] = 64'h0004063705fe0010; - RAM[11] = 64'h05a000ef8006061b; - RAM[12] = 64'h0ff003930000100f; - RAM[13] = 64'h4e952e3110012e37; - RAM[14] = 64'hc602829b0053f2b7; - RAM[15] = 64'h2023fe02dfe312fd; - RAM[16] = 64'h829b0053f2b7007e; - RAM[17] = 64'hfe02dfe312fdc602; - RAM[18] = 64'h4de31efd000e2023; - RAM[19] = 64'h059bf1402573fdd0; - RAM[20] = 64'h0000061705e20870; - RAM[21] = 64'h0010029b01260613; - RAM[22] = 64'h11010002806702fe; - RAM[23] = 64'h84b2842ae426e822; - RAM[24] = 64'h892ee04aec064505; - RAM[25] = 64'h06e000ef07e000ef; - RAM[26] = 64'h979334fd02905563; - RAM[27] = 64'h07930177d4930204; - RAM[28] = 64'h4089093394be2004; - RAM[29] = 64'h04138522008905b3; - RAM[30] = 64'h19e3014000ef2004; - RAM[31] = 64'h64a2644260e2fe94; - RAM[32] = 64'h6749808261056902; - RAM[33] = 64'hdfed8b8510472783; - RAM[34] = 64'h2423479110a73823; - RAM[35] = 64'h10472783674910f7; - RAM[36] = 64'h20058693ffed8b89; - RAM[37] = 64'h05a1118737836749; - RAM[38] = 64'hfed59be3fef5bc23; - RAM[39] = 64'h1047278367498082; - RAM[40] = 64'h67c98082dfed8b85; - RAM[41] = 64'h0000808210a7a023; - end // initial begin - end // if (FPGA) - endgenerate + if(`FPGA) begin:ram + initial begin + //$readmemh(PRELOAD, RAM); + RAM[0] = 64'h94e1819300002197; + RAM[1] = 64'h4281420141014081; + RAM[2] = 64'h4481440143814301; + RAM[3] = 64'h4681460145814501; + RAM[4] = 64'h4881480147814701; + RAM[5] = 64'h4a814a0149814901; + RAM[6] = 64'h4c814c014b814b01; + RAM[7] = 64'h4e814e014d814d01; + RAM[8] = 64'h0110011b4f814f01; + RAM[9] = 64'h059b45011161016e; + RAM[10] = 64'h0004063705fe0010; + RAM[11] = 64'h05a000ef8006061b; + RAM[12] = 64'h0ff003930000100f; + RAM[13] = 64'h4e952e3110012e37; + RAM[14] = 64'hc602829b0053f2b7; + RAM[15] = 64'h2023fe02dfe312fd; + RAM[16] = 64'h829b0053f2b7007e; + RAM[17] = 64'hfe02dfe312fdc602; + RAM[18] = 64'h4de31efd000e2023; + RAM[19] = 64'h059bf1402573fdd0; + RAM[20] = 64'h0000061705e20870; + RAM[21] = 64'h0010029b01260613; + RAM[22] = 64'h11010002806702fe; + RAM[23] = 64'h84b2842ae426e822; + RAM[24] = 64'h892ee04aec064505; + RAM[25] = 64'h06e000ef07e000ef; + RAM[26] = 64'h979334fd02905563; + RAM[27] = 64'h07930177d4930204; + RAM[28] = 64'h4089093394be2004; + RAM[29] = 64'h04138522008905b3; + RAM[30] = 64'h19e3014000ef2004; + RAM[31] = 64'h64a2644260e2fe94; + RAM[32] = 64'h6749808261056902; + RAM[33] = 64'hdfed8b8510472783; + RAM[34] = 64'h2423479110a73823; + RAM[35] = 64'h10472783674910f7; + RAM[36] = 64'h20058693ffed8b89; + RAM[37] = 64'h05a1118737836749; + RAM[38] = 64'hfed59be3fef5bc23; + RAM[39] = 64'h1047278367498082; + RAM[40] = 64'h67c98082dfed8b85; + RAM[41] = 64'h0000808210a7a023; + end // initial begin + end // if (FPGA) assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00); @@ -144,26 +141,23 @@ module ram #(parameter BASE=0, RANGE = 65535) ( -----/\----- EXCLUDED -----/\----- */ /* verilator lint_off WIDTH */ - generate - if (`XLEN == 64) begin:ramrd - always_ff @(posedge HCLK) begin - HWADDR <= #1 A; - HREADRam0 <= #1 RAM[A[31:3]]; - if (memwrite & risingHREADYRam) RAM[HWADDR[31:3]] <= #1 HWDATA; - end - end else begin - always_ff @(posedge HCLK) begin:ramrd - HWADDR <= #1 A; - HREADRam0 <= #1 RAM[A[31:2]]; - if (memwrite & risingHREADYRam) RAM[HWADDR[31:2]] <= #1 HWDATA; - end + if (`XLEN == 64) begin:ramrw + always_ff @(posedge HCLK) begin + HWADDR <= #1 A; + HREADRam0 <= #1 RAM[A[31:3]]; + if (memwrite & risingHREADYRam) RAM[HWADDR[31:3]] <= #1 HWDATA; end - endgenerate + end else begin + always_ff @(posedge HCLK) begin:ramrw + HWADDR <= #1 A; + HREADRam0 <= #1 RAM[A[31:2]]; + if (memwrite & risingHREADYRam) RAM[HWADDR[31:2]] <= #1 HWDATA; + end + end /* verilator lint_on WIDTH */ //assign HREADRam = HREADYRam ? HREADRam0 : `XLEN'bz; // *** Ross Thompson: removed tristate as fpga synthesis removes. assign HREADRam = HREADRam0; - endmodule diff --git a/pipelined/src/uncore/sdc/SDC.sv b/pipelined/src/uncore/sdc/SDC.sv index 8ba413821..d2543a6a9 100644 --- a/pipelined/src/uncore/sdc/SDC.sv +++ b/pipelined/src/uncore/sdc/SDC.sv @@ -156,17 +156,15 @@ module SDC flopenl #(3) CommandReg(HCLK, ~HRESETn, (HADDRDelay == 'h8 & RegWrite) | (CommandCompleted), CommandCompleted ? '0 : HWDATA[2:0], '0, Command); - generate - if (`XLEN == 64) begin - flopenr #(64-9) AddressReg(HCLK, ~HRESETn, (HADDRDelay == 'h10 & RegWrite), - HWDATA[`XLEN-1:9], Address); - end else begin - flopenr #(32-9) AddressLowReg(HCLK, ~HRESETn, (HADDRDelay == 'h10 & RegWrite), - HWDATA[`XLEN-1:9], Address[31:9]); - flopenr #(32) AddressHighReg(HCLK, ~HRESETn, (HADDRDelay == 'h14 & RegWrite), - HWDATA, Address[63:32]); - end - endgenerate + if (`XLEN == 64) begin + flopenr #(64-9) AddressReg(HCLK, ~HRESETn, (HADDRDelay == 'h10 & RegWrite), + HWDATA[`XLEN-1:9], Address); + end else begin + flopenr #(32-9) AddressLowReg(HCLK, ~HRESETn, (HADDRDelay == 'h10 & RegWrite), + HWDATA[`XLEN-1:9], Address[31:9]); + flopenr #(32) AddressHighReg(HCLK, ~HRESETn, (HADDRDelay == 'h14 & RegWrite), + HWDATA, Address[63:32]); + end flopen #(`XLEN) DataReg(HCLK, (HADDRDelay == 'h18 & RegWrite), HWDATA, SDCWriteData); @@ -175,7 +173,6 @@ module SDC assign Status = {ErrorCode, InvalidCommand, SDCBusy, SDCInitialized}; - generate if(`XLEN == 64) begin always_comb case(HADDRDelay[4:0]) @@ -200,16 +197,14 @@ module SDC default: HREADSDC = {24'b0, CLKDiv}; endcase end - endgenerate - + for(index = 0; index < 4096/`XLEN; index++) begin assign ReadData512ByteWords[index] = ReadData512Byte[(index+1)*`XLEN-1:index*`XLEN]; end assign SDCReadDataPreNibbleSwap = ReadData512ByteWords[WordCount]; - generate - if(`XLEN == 64) begin + if(`XLEN == 64) begin assign SDCReadData = {SDCReadDataPreNibbleSwap[59:56], SDCReadDataPreNibbleSwap[63:60], SDCReadDataPreNibbleSwap[51:48], SDCReadDataPreNibbleSwap[55:52], SDCReadDataPreNibbleSwap[43:40], SDCReadDataPreNibbleSwap[47:44], @@ -224,7 +219,6 @@ module SDC SDCReadDataPreNibbleSwap[11:8], SDCReadDataPreNibbleSwap[15:12], SDCReadDataPreNibbleSwap[3:0], SDCReadDataPreNibbleSwap[7:4]}; end - endgenerate flopenr #($clog2(4096/`XLEN)) WordCountReg (.clk(HCLK), diff --git a/pipelined/src/uncore/sdc/clkdivider.sv b/pipelined/src/uncore/sdc/clkdivider.sv index d4218aa4f..221099f1c 100644 --- a/pipelined/src/uncore/sdc/clkdivider.sv +++ b/pipelined/src/uncore/sdc/clkdivider.sv @@ -88,17 +88,6 @@ module clkdivider #(parameter integer g_COUNT_WIDTH) assign w_fd_D = ~ r_fd_Q; - - generate - if(`FPGA) begin - BUFGMUX - clkMux(.I1(r_fd_Q), - .I0(i_CLK), - .S(i_EN), - .O(o_CLK)); - - end else begin - assign o_CLK = i_EN ? r_fd_Q : i_CLK; - end - endgenerate + if(`FPGA) BUFGMUX clkMux(.I1(r_fd_Q), .I0(i_CLK), .S(i_EN), .O(o_CLK)); + else assign o_CLK = i_EN ? r_fd_Q : i_CLK; endmodule diff --git a/pipelined/src/uncore/subwordwrite.sv b/pipelined/src/uncore/subwordwrite.sv index 0ca6e5f9c..346387adc 100644 --- a/pipelined/src/uncore/subwordwrite.sv +++ b/pipelined/src/uncore/subwordwrite.sv @@ -35,75 +35,72 @@ module subwordwrite ( logic [`XLEN-1:0] WriteDataSubwordDuplicated; - generate - if (`XLEN == 64) begin:sww - logic [7:0] ByteMaskM; - // Compute write mask - always_comb - case(HSIZED[1:0]) - 2'b00: begin ByteMaskM = 8'b00000000; ByteMaskM[HADDRD[2:0]] = 1; end // sb - 2'b01: case (HADDRD[2:1]) - 2'b00: ByteMaskM = 8'b00000011; - 2'b01: ByteMaskM = 8'b00001100; - 2'b10: ByteMaskM = 8'b00110000; - 2'b11: ByteMaskM = 8'b11000000; - endcase - 2'b10: if (HADDRD[2]) ByteMaskM = 8'b11110000; - else ByteMaskM = 8'b00001111; - 2'b11: ByteMaskM = 8'b11111111; - endcase + if (`XLEN == 64) begin:sww + logic [7:0] ByteMaskM; + // Compute write mask + always_comb + case(HSIZED[1:0]) + 2'b00: begin ByteMaskM = 8'b00000000; ByteMaskM[HADDRD[2:0]] = 1; end // sb + 2'b01: case (HADDRD[2:1]) + 2'b00: ByteMaskM = 8'b00000011; + 2'b01: ByteMaskM = 8'b00001100; + 2'b10: ByteMaskM = 8'b00110000; + 2'b11: ByteMaskM = 8'b11000000; + endcase + 2'b10: if (HADDRD[2]) ByteMaskM = 8'b11110000; + else ByteMaskM = 8'b00001111; + 2'b11: ByteMaskM = 8'b11111111; + endcase - // Handle subword writes - always_comb - case(HSIZED[1:0]) - 2'b00: WriteDataSubwordDuplicated = {8{HWDATAIN[7:0]}}; // sb - 2'b01: WriteDataSubwordDuplicated = {4{HWDATAIN[15:0]}}; // sh - 2'b10: WriteDataSubwordDuplicated = {2{HWDATAIN[31:0]}}; // sw - 2'b11: WriteDataSubwordDuplicated = HWDATAIN; // sw - endcase + // Handle subword writes + always_comb + case(HSIZED[1:0]) + 2'b00: WriteDataSubwordDuplicated = {8{HWDATAIN[7:0]}}; // sb + 2'b01: WriteDataSubwordDuplicated = {4{HWDATAIN[15:0]}}; // sh + 2'b10: WriteDataSubwordDuplicated = {2{HWDATAIN[31:0]}}; // sw + 2'b11: WriteDataSubwordDuplicated = HWDATAIN; // sw + endcase - always_comb begin - HWDATA=HRDATA; - if (ByteMaskM[0]) HWDATA[7:0] = WriteDataSubwordDuplicated[7:0]; - if (ByteMaskM[1]) HWDATA[15:8] = WriteDataSubwordDuplicated[15:8]; - if (ByteMaskM[2]) HWDATA[23:16] = WriteDataSubwordDuplicated[23:16]; - if (ByteMaskM[3]) HWDATA[31:24] = WriteDataSubwordDuplicated[31:24]; - if (ByteMaskM[4]) HWDATA[39:32] = WriteDataSubwordDuplicated[39:32]; - if (ByteMaskM[5]) HWDATA[47:40] = WriteDataSubwordDuplicated[47:40]; - if (ByteMaskM[6]) HWDATA[55:48] = WriteDataSubwordDuplicated[55:48]; - if (ByteMaskM[7]) HWDATA[63:56] = WriteDataSubwordDuplicated[63:56]; - end + always_comb begin + HWDATA=HRDATA; + if (ByteMaskM[0]) HWDATA[7:0] = WriteDataSubwordDuplicated[7:0]; + if (ByteMaskM[1]) HWDATA[15:8] = WriteDataSubwordDuplicated[15:8]; + if (ByteMaskM[2]) HWDATA[23:16] = WriteDataSubwordDuplicated[23:16]; + if (ByteMaskM[3]) HWDATA[31:24] = WriteDataSubwordDuplicated[31:24]; + if (ByteMaskM[4]) HWDATA[39:32] = WriteDataSubwordDuplicated[39:32]; + if (ByteMaskM[5]) HWDATA[47:40] = WriteDataSubwordDuplicated[47:40]; + if (ByteMaskM[6]) HWDATA[55:48] = WriteDataSubwordDuplicated[55:48]; + if (ByteMaskM[7]) HWDATA[63:56] = WriteDataSubwordDuplicated[63:56]; + end - end else begin:sww // 32-bit - logic [3:0] ByteMaskM; - // Compute write mask - always_comb - case(HSIZED[1:0]) - 2'b00: begin ByteMaskM = 4'b0000; ByteMaskM[HADDRD[1:0]] = 1; end // sb - 2'b01: if (HADDRD[1]) ByteMaskM = 4'b1100; - else ByteMaskM = 4'b0011; - 2'b10: ByteMaskM = 4'b1111; - default: ByteMaskM = 4'b111; // shouldn't happen - endcase + end else begin:sww // 32-bit + logic [3:0] ByteMaskM; + // Compute write mask + always_comb + case(HSIZED[1:0]) + 2'b00: begin ByteMaskM = 4'b0000; ByteMaskM[HADDRD[1:0]] = 1; end // sb + 2'b01: if (HADDRD[1]) ByteMaskM = 4'b1100; + else ByteMaskM = 4'b0011; + 2'b10: ByteMaskM = 4'b1111; + default: ByteMaskM = 4'b111; // shouldn't happen + endcase - // Handle subword writes - always_comb - case(HSIZED[1:0]) - 2'b00: WriteDataSubwordDuplicated = {4{HWDATAIN[7:0]}}; // sb - 2'b01: WriteDataSubwordDuplicated = {2{HWDATAIN[15:0]}}; // sh - 2'b10: WriteDataSubwordDuplicated = HWDATAIN; // sw - default: WriteDataSubwordDuplicated = HWDATAIN; // shouldn't happen - endcase + // Handle subword writes + always_comb + case(HSIZED[1:0]) + 2'b00: WriteDataSubwordDuplicated = {4{HWDATAIN[7:0]}}; // sb + 2'b01: WriteDataSubwordDuplicated = {2{HWDATAIN[15:0]}}; // sh + 2'b10: WriteDataSubwordDuplicated = HWDATAIN; // sw + default: WriteDataSubwordDuplicated = HWDATAIN; // shouldn't happen + endcase - always_comb begin - HWDATA=HRDATA; - if (ByteMaskM[0]) HWDATA[7:0] = WriteDataSubwordDuplicated[7:0]; - if (ByteMaskM[1]) HWDATA[15:8] = WriteDataSubwordDuplicated[15:8]; - if (ByteMaskM[2]) HWDATA[23:16] = WriteDataSubwordDuplicated[23:16]; - if (ByteMaskM[3]) HWDATA[31:24] = WriteDataSubwordDuplicated[31:24]; - end - - end - endgenerate + always_comb begin + HWDATA=HRDATA; + if (ByteMaskM[0]) HWDATA[7:0] = WriteDataSubwordDuplicated[7:0]; + if (ByteMaskM[1]) HWDATA[15:8] = WriteDataSubwordDuplicated[15:8]; + if (ByteMaskM[2]) HWDATA[23:16] = WriteDataSubwordDuplicated[23:16]; + if (ByteMaskM[3]) HWDATA[31:24] = WriteDataSubwordDuplicated[31:24]; + end + end endmodule diff --git a/pipelined/src/uncore/uart.sv b/pipelined/src/uncore/uart.sv index f32ba883b..4bbbd91d0 100644 --- a/pipelined/src/uncore/uart.sv +++ b/pipelined/src/uncore/uart.sv @@ -54,33 +54,31 @@ module uart ( assign HRESPUART = 0; // OK assign HREADYUART = 1; // should idle high during address phase and respond high when done; will need to be modified if UART ever needs more than 1 cycle to do something - generate - if (`XLEN == 64) begin:uart - always_comb begin - HREADUART = {Dout, Dout, Dout, Dout, Dout, Dout, Dout, Dout}; - case (A) - 3'b000: Din = HWDATA[7:0]; - 3'b001: Din = HWDATA[15:8]; - 3'b010: Din = HWDATA[23:16]; - 3'b011: Din = HWDATA[31:24]; - 3'b100: Din = HWDATA[39:32]; - 3'b101: Din = HWDATA[47:40]; - 3'b110: Din = HWDATA[55:48]; - 3'b111: Din = HWDATA[63:56]; - endcase - end - end else begin:uart // 32-bit - always_comb begin - HREADUART = {Dout, Dout, Dout, Dout}; - case (A[1:0]) - 2'b00: Din = HWDATA[7:0]; - 2'b01: Din = HWDATA[15:8]; - 2'b10: Din = HWDATA[23:16]; - 2'b11: Din = HWDATA[31:24]; - endcase - end + if (`XLEN == 64) begin:uart + always_comb begin + HREADUART = {Dout, Dout, Dout, Dout, Dout, Dout, Dout, Dout}; + case (A) + 3'b000: Din = HWDATA[7:0]; + 3'b001: Din = HWDATA[15:8]; + 3'b010: Din = HWDATA[23:16]; + 3'b011: Din = HWDATA[31:24]; + 3'b100: Din = HWDATA[39:32]; + 3'b101: Din = HWDATA[47:40]; + 3'b110: Din = HWDATA[55:48]; + 3'b111: Din = HWDATA[63:56]; + endcase + end + end else begin:uart // 32-bit + always_comb begin + HREADUART = {Dout, Dout, Dout, Dout}; + case (A[1:0]) + 2'b00: Din = HWDATA[7:0]; + 2'b01: Din = HWDATA[15:8]; + 2'b10: Din = HWDATA[23:16]; + 2'b11: Din = HWDATA[31:24]; + endcase end - endgenerate + end logic BAUDOUTb; // loop tx clock BAUDOUTb back to rx clock RCLK // *** make sure reads don't occur on UART unless fully selected because they could change state. This applies to all peripherals diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv index 144d28b36..56ec24f61 100644 --- a/pipelined/src/uncore/uartPC16550D.sv +++ b/pipelined/src/uncore/uartPC16550D.sv @@ -257,13 +257,10 @@ module uartPC16550D( else if (fifoenabled & ~rxfifoempty & rxbaudpulse & ~rxfifotimeout) rxtimeoutcnt <= #1 rxtimeoutcnt+1; // *** not right end - generate - if(`QEMU) - assign rxcentered = rxbaudpulse & (rxoversampledcnt[1:0] == 2'b10); // implies rxstate = UART_ACTIVE - else - assign rxcentered = rxbaudpulse & (rxoversampledcnt == 4'b1000); // implies rxstate = UART_ACTIVE - endgenerate - + // ***explain why + if(`QEMU) assign rxcentered = rxbaudpulse & (rxoversampledcnt[1:0] == 2'b10); // implies rxstate = UART_ACTIVE + else assign rxcentered = rxbaudpulse & (rxoversampledcnt == 4'b1000); // implies rxstate = UART_ACTIVE + assign rxbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1; // start bit + data bits + (parity bit) + stop bit /////////////////////////////////////////// @@ -325,22 +322,20 @@ module uartPC16550D( // detect any errors in rx fifo // although rxfullbit looks like a combinational loop, in one bit rxfifotail == i and breaks the loop // tail is normally higher than head, but might wrap around. unwrapped variable adds 16 to eliminate wrapping - generate - assign rxfifotailunwrapped = rxfifotail < rxfifohead ? {1'b1, rxfifotail} : {1'b0, rxfifotail}; - genvar i; - for (i=0; i<32; i++) begin:rxfull - if (i == 0) assign rxfullbitunwrapped[i] = (rxfifohead==0) & (rxfifotail != 0); - else assign rxfullbitunwrapped[i] = ({1'b0,rxfifohead}==i | rxfullbitunwrapped[i-1]) & (rxfifotailunwrapped != i); - end - for (i=0; i<16; i++) begin:rx - assign RXerrbit[i] = |rxfifo[i][10:8]; // are any of the error conditions set? - assign rxfullbit[i] = rxfullbitunwrapped[i] | rxfullbitunwrapped[i+16]; - /* if (i > 0) - assign rxfullbit[i] = ((rxfifohead==i) | rxfullbit[i-1]) & (rxfifotail != i); - else - assign rxfullbit[0] = ((rxfifohead==i) | rxfullbit[15]) & (rxfifotail != i);*/ - end - endgenerate + assign rxfifotailunwrapped = rxfifotail < rxfifohead ? {1'b1, rxfifotail} : {1'b0, rxfifotail}; + genvar i; + for (i=0; i<32; i++) begin:rxfull + if (i == 0) assign rxfullbitunwrapped[i] = (rxfifohead==0) & (rxfifotail != 0); + else assign rxfullbitunwrapped[i] = ({1'b0,rxfifohead}==i | rxfullbitunwrapped[i-1]) & (rxfifotailunwrapped != i); + end + for (i=0; i<16; i++) begin:rx + assign RXerrbit[i] = |rxfifo[i][10:8]; // are any of the error conditions set? + assign rxfullbit[i] = rxfullbitunwrapped[i] | rxfullbitunwrapped[i+16]; + /* if (i > 0) + assign rxfullbit[i] = ((rxfifohead==i) | rxfullbit[i-1]) & (rxfifotail != i); + else + assign rxfullbit[0] = ((rxfifohead==i) | rxfullbit[15]) & (rxfifotail != i);*/ + end assign rxfifohaserr = |(RXerrbit & rxfullbit); // receive buffer register and ready bit @@ -383,13 +378,9 @@ module uartPC16550D( end assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) - generate - if (`QEMU) - assign txnextbit = txbaudpulse & (txoversampledcnt[1:0] == 2'b00); // implies txstate = UART_ACTIVE - else - assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE - endgenerate - + // *** explain; is this necessary? + if (`QEMU) assign txnextbit = txbaudpulse & (txoversampledcnt[1:0] == 2'b00); // implies txstate = UART_ACTIVE + else assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE /////////////////////////////////////////// // transmit holding register, shift register, FIFO diff --git a/pipelined/src/wally/wallypipelinedhart.sv b/pipelined/src/wally/wallypipelinedhart.sv index d4ffc3cf4..59ba0aee8 100644 --- a/pipelined/src/wally/wallypipelinedhart.sv +++ b/pipelined/src/wally/wallypipelinedhart.sv @@ -304,88 +304,84 @@ module wallypipelinedhart ( .FlushF, .FlushD, .FlushE, .FlushM, .FlushW ); // global stall and flush control - generate - if (`ZICSR_SUPPORTED) begin:priv - privileged priv( - .clk, .reset, - .FlushD, .FlushE, .FlushM, .FlushW, - .StallD, .StallE, .StallM, .StallW, - .CSRReadM, .CSRWriteM, .SrcAM, .PCM, - .InstrM, .CSRReadValW, .PrivilegedNextPCM, - .RetM, .TrapM, - .ITLBFlushF, .DTLBFlushM, - .InstrValidM, .CommittedM, - .FRegWriteM, .LoadStallD, - .BPPredDirWrongM, .BTBPredPCWrongM, - .RASPredPCWrongM, .BPPredClassNonCFIWrongM, - .InstrClassM, .DCacheMiss, .DCacheAccess, .PrivilegedM, - .ITLBInstrPageFaultF, .DTLBLoadPageFaultM, .DTLBStorePageFaultM, - .InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .IllegalFPUInstrD, - .LoadMisalignedFaultM, .StoreMisalignedFaultM, - .TimerIntM, .ExtIntM, .SwIntM, - .MTIME_CLINT, - .InstrMisalignedAdrM, .IEUAdrM, - .SetFflagsM, - // Trap signals from pmp/pma in mmu - // *** do these need to be split up into one for dmem and one for ifu? - // instead, could we only care about the instr and F pins that come from ifu and only care about the load/store and m pins that come from dmem? - .InstrAccessFaultF, .LoadAccessFaultM, .StoreAccessFaultM, - .ExceptionM, .PendingInterruptM, .IllegalFPUInstrE, - .PrivilegeModeW, .SATP_REGW, - .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, - .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, - .FRM_REGW,.BreakpointFaultM, .EcallFaultM - ); - end else begin - assign CSRReadValW = 0; - assign PrivilegedNextPCM = 0; - assign RetM = 0; - assign TrapM = 0; - assign ITLBFlushF = 0; - assign DTLBFlushM = 0; - end - if (`M_SUPPORTED) begin:mdu - muldiv mdu( - .clk, .reset, - .ForwardedSrcAE, .ForwardedSrcBE, - .Funct3E, .Funct3M, .MulDivE, .W64E, - .MulDivResultW, .DivBusyE, - .StallM, .StallW, .FlushM, .FlushW - ); - end else begin // no M instructions supported - assign MulDivResultW = 0; - assign DivBusyE = 0; - end + if (`ZICSR_SUPPORTED) begin:priv + privileged priv( + .clk, .reset, + .FlushD, .FlushE, .FlushM, .FlushW, + .StallD, .StallE, .StallM, .StallW, + .CSRReadM, .CSRWriteM, .SrcAM, .PCM, + .InstrM, .CSRReadValW, .PrivilegedNextPCM, + .RetM, .TrapM, + .ITLBFlushF, .DTLBFlushM, + .InstrValidM, .CommittedM, + .FRegWriteM, .LoadStallD, + .BPPredDirWrongM, .BTBPredPCWrongM, + .RASPredPCWrongM, .BPPredClassNonCFIWrongM, + .InstrClassM, .DCacheMiss, .DCacheAccess, .PrivilegedM, + .ITLBInstrPageFaultF, .DTLBLoadPageFaultM, .DTLBStorePageFaultM, + .InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .IllegalFPUInstrD, + .LoadMisalignedFaultM, .StoreMisalignedFaultM, + .TimerIntM, .ExtIntM, .SwIntM, + .MTIME_CLINT, + .InstrMisalignedAdrM, .IEUAdrM, + .SetFflagsM, + // Trap signals from pmp/pma in mmu + // *** do these need to be split up into one for dmem and one for ifu? + // instead, could we only care about the instr and F pins that come from ifu and only care about the load/store and m pins that come from dmem? + .InstrAccessFaultF, .LoadAccessFaultM, .StoreAccessFaultM, + .ExceptionM, .PendingInterruptM, .IllegalFPUInstrE, + .PrivilegeModeW, .SATP_REGW, + .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, + .FRM_REGW,.BreakpointFaultM, .EcallFaultM + ); + end else begin + assign CSRReadValW = 0; + assign PrivilegedNextPCM = 0; + assign RetM = 0; + assign TrapM = 0; + assign ITLBFlushF = 0; + assign DTLBFlushM = 0; + end + if (`M_SUPPORTED) begin:mdu + muldiv mdu( + .clk, .reset, + .ForwardedSrcAE, .ForwardedSrcBE, + .Funct3E, .Funct3M, .MulDivE, .W64E, + .MulDivResultW, .DivBusyE, + .StallM, .StallW, .FlushM, .FlushW + ); + end else begin // no M instructions supported + assign MulDivResultW = 0; + assign DivBusyE = 0; + end - if (`F_SUPPORTED) begin:fpu - fpu fpu( - .clk, .reset, - .FRM_REGW, // Rounding mode from CSR - .InstrD, // instruction from IFU - .ReadDataW,// Read data from memory - .ForwardedSrcAE, // Integer input being processed (from IEU) - .StallE, .StallM, .StallW, // stall signals from HZU - .FlushE, .FlushM, .FlushW, // flush signals from HZU - .RdM, .RdW, // which FP register to write to (from IEU) - .FRegWriteM, // FP register write enable - .FStallD, // Stall the decode stage - .FWriteIntE, // integer register write enable - .FWriteDataE, // Data to be written to memory - .FIntResM, // data to be written to integer register - .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) - .IllegalFPUInstrD, // Is the instruction an illegal fpu instruction - .SetFflagsM // FPU flags (to privileged unit) - ); // floating point unit - end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low - assign FStallD = 0; - assign FWriteIntE = 0; - assign FWriteDataE = 0; - assign FIntResM = 0; - assign FDivBusyE = 0; - assign IllegalFPUInstrD = 1; - assign SetFflagsM = 0; - end - - endgenerate - // Priveleged block operates in M and W stages, handling CSRs and exceptions + if (`F_SUPPORTED) begin:fpu + fpu fpu( + .clk, .reset, + .FRM_REGW, // Rounding mode from CSR + .InstrD, // instruction from IFU + .ReadDataW,// Read data from memory + .ForwardedSrcAE, // Integer input being processed (from IEU) + .StallE, .StallM, .StallW, // stall signals from HZU + .FlushE, .FlushM, .FlushW, // flush signals from HZU + .RdM, .RdW, // which FP register to write to (from IEU) + .FRegWriteM, // FP register write enable + .FStallD, // Stall the decode stage + .FWriteIntE, // integer register write enable + .FWriteDataE, // Data to be written to memory + .FIntResM, // data to be written to integer register + .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) + .IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + .SetFflagsM // FPU flags (to privileged unit) + ); // floating point unit + end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 1; + assign SetFflagsM = 0; + end endmodule diff --git a/pipelined/testbench/common/logging.sv b/pipelined/testbench/common/logging.sv deleted file mode 100644 index f37f63402..000000000 --- a/pipelined/testbench/common/logging.sv +++ /dev/null @@ -1,10 +0,0 @@ -module logging( - input logic clk, reset, - input logic [31:0] HADDR, - input logic [1:0] HTRANS); - - always @(posedge clk) - if (HTRANS != 2'b00 & HADDR == 0) - $display("%t Warning: access to memory address 0\n", $realtime); -endmodule - diff --git a/pipelined/testbench/common/sdModel.sv b/pipelined/testbench/common/sdModel.sv index b01fd6ec7..1e36c23e2 100644 --- a/pipelined/testbench/common/sdModel.sv +++ b/pipelined/testbench/common/sdModel.sv @@ -121,11 +121,9 @@ module sdModel integer sdModel_file_desc; genvar i; - generate - for(i=0; i<4; i=i+1) begin:CRC_16_gen - sd_crc_16 CRC_16_i (crcDat_in[i],crcDat_en, sdClk, crcDat_rst, crcDat_out[i]); - end - endgenerate + for(i=0; i<4; i=i+1) begin:CRC_16_gen + sd_crc_16 CRC_16_i (crcDat_in[i],crcDat_en, sdClk, crcDat_rst, crcDat_out[i]); + end sd_crc_7 crc_7 ( diff --git a/pipelined/testbench/testbench-fpga.sv b/pipelined/testbench/testbench-fpga.sv index 15840b27b..cd479750b 100644 --- a/pipelined/testbench/testbench-fpga.sv +++ b/pipelined/testbench/testbench-fpga.sv @@ -763,17 +763,12 @@ string tests32f[] = '{ .done(DCacheFlushDone)); - generate - // initialize the branch predictor - if (`BPRED_ENABLED == 1) begin : bpred - - initial begin - $readmemb(`TWO_BIT_PRELOAD, dut.wallypipelinedsoc.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem); - $readmemb(`BTB_PRELOAD, dut.wallypipelinedsoc.hart.ifu.bpred.bpred.TargetPredictor.memory.mem); - end + // initialize the branch predictor + if (`BPRED_ENABLED == 1) + initial begin + $readmemb(`TWO_BIT_PRELOAD, dut.wallypipelinedsoc.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem); + $readmemb(`BTB_PRELOAD, dut.wallypipelinedsoc.hart.ifu.bpred.bpred.TargetPredictor.memory.mem); end - endgenerate - endmodule module riscvassertions(); @@ -783,14 +778,14 @@ module riscvassertions(); assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double without supporting float"); assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32"); assert (`DCACHE_WAYSIZEINBYTES <= 4096 | `MEM_DCACHE == 0 | `MEM_VIRTMEM == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`DCACHE_BLOCKLENINBITS >= 128 | `MEM_DCACHE == 0) else $error("DCACHE_BLOCKLENINBITS must be at least 128 when caches are enabled"); - assert (`DCACHE_BLOCKLENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_BLOCKLENINBITS must be smaller than way size"); + assert (`DCACHE_LINELENINBITS >= 128 | `MEM_DCACHE == 0) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); + assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); assert (`ICACHE_WAYSIZEINBYTES <= 4096 | `MEM_ICACHE == 0 | `MEM_VIRTMEM == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`ICACHE_BLOCKLENINBITS >= 32 | `MEM_ICACHE == 0) else $error("ICACHE_BLOCKLENINBITS must be at least 32 when caches are enabled"); - assert (`ICACHE_BLOCKLENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_BLOCKLENINBITS must be smaller than way size"); - assert (2**$clog2(`DCACHE_BLOCKLENINBITS) == `DCACHE_BLOCKLENINBITS) else $error("DCACHE_BLOCKLENINBITS must be a power of 2"); + assert (`ICACHE_LINELENINBITS >= 32 | `MEM_ICACHE == 0) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); + assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); + assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS) else $error("DCACHE_LINELENINBITS must be a power of 2"); assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ICACHE_BLOCKLENINBITS) == `ICACHE_BLOCKLENINBITS) else $error("ICACHE_BLOCKLENINBITS must be a power of 2"); + assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS) else $error("ICACHE_LINELENINBITS must be a power of 2"); assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); assert (`ICACHE_NUMWAYS == 1 | `MEM_ICACHE == 0) else $warning("Multiple Instruction Cache ways not yet implemented"); assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES) else $error("ITLB_ENTRIES must be a power of 2"); @@ -811,12 +806,12 @@ module DCacheFlushFSM localparam integer numlines = testbench.dut.wallypipelinedsoc.hart.lsu.dcache.NUMLINES; localparam integer numways = testbench.dut.wallypipelinedsoc.hart.lsu.dcache.NUMWAYS; - localparam integer blockbytelen = testbench.dut.wallypipelinedsoc.hart.lsu.dcache.BLOCKBYTELEN; - localparam integer numwords = testbench.dut.wallypipelinedsoc.hart.lsu.dcache.BLOCKLEN/`XLEN; + localparam integer linebytelen = testbench.dut.wallypipelinedsoc.hart.lsu.dcache.LINEBYTELEN; + localparam integer numwords = testbench.dut.wallypipelinedsoc.hart.lsu.dcache.LINELEN/`XLEN; localparam integer lognumlines = $clog2(numlines); - localparam integer logblockbytelen = $clog2(blockbytelen); + localparam integer loglinebytelen = $clog2(linebytelen); localparam integer lognumways = $clog2(numways); - localparam integer tagstart = lognumlines + logblockbytelen; + localparam integer tagstart = lognumlines + loglinebytelen; @@ -830,29 +825,26 @@ module DCacheFlushFSM logic [`XLEN-1:0] ShadowRAM[`RAM_BASE>>(1+`XLEN/32):(`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32)]; - generate - for(index = 0; index < numlines; index++) begin - for(way = 0; way < numways; way++) begin - for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin - copyShadow #(.tagstart(tagstart), - .logblockbytelen(logblockbytelen)) - copyShadow(.clk, - .start, - .tag(testbench.dut.wallypipelinedsoc.hart.lsu.dcache.MemWay[way].CacheTagMem.StoredData[index]), - .valid(testbench.dut.wallypipelinedsoc.hart.lsu.dcache.MemWay[way].ValidBits[index]), - .dirty(testbench.dut.wallypipelinedsoc.hart.lsu.dcache.MemWay[way].DirtyBits[index]), - .data(testbench.dut.wallypipelinedsoc.hart.lsu.dcache.MemWay[way].word[cacheWord].CacheDataMem.StoredData[index]), - .index(index), - .cacheWord(cacheWord), - .CacheData(CacheData[way][index][cacheWord]), - .CacheAdr(CacheAdr[way][index][cacheWord]), - .CacheTag(CacheTag[way][index][cacheWord]), - .CacheValid(CacheValid[way][index][cacheWord]), - .CacheDirty(CacheDirty[way][index][cacheWord])); - end + for(index = 0; index < numlines; index++) begin + for(way = 0; way < numways; way++) begin + for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin + copyShadow #(.tagstart(tagstart), .loglinebytelen(loglinebytelen)) + copyShadow(.clk, + .start, + .tag(testbench.dut.wallypipelinedsoc.hart.lsu.dcache.MemWay[way].CacheTagMem.StoredData[index]), + .valid(testbench.dut.wallypipelinedsoc.hart.lsu.dcache.MemWay[way].ValidBits[index]), + .dirty(testbench.dut.wallypipelinedsoc.hart.lsu.dcache.MemWay[way].DirtyBits[index]), + .data(testbench.dut.wallypipelinedsoc.hart.lsu.dcache.MemWay[way].word[cacheWord].CacheDataMem.StoredData[index]), + .index(index), + .cacheWord(cacheWord), + .CacheData(CacheData[way][index][cacheWord]), + .CacheAdr(CacheAdr[way][index][cacheWord]), + .CacheTag(CacheTag[way][index][cacheWord]), + .CacheValid(CacheValid[way][index][cacheWord]), + .CacheDirty(CacheDirty[way][index][cacheWord])); end end - endgenerate + end integer i, j, k; @@ -879,7 +871,7 @@ module DCacheFlushFSM endmodule module copyShadow - #(parameter tagstart, logblockbytelen) + #(parameter tagstart, loglinebytelen) (input logic clk, input logic start, input logic [`PA_BITS-1:tagstart] tag, @@ -900,7 +892,7 @@ module copyShadow CacheValid = valid; CacheDirty = dirty; CacheData = data; - CacheAdr = (tag << tagstart) + (index << logblockbytelen) + (cacheWord << $clog2(`XLEN/8)); + CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8)); end end diff --git a/pipelined/testbench/testbench-linux.sv b/pipelined/testbench/testbench-linux.sv index 3eae5e8f1..74925056c 100644 --- a/pipelined/testbench/testbench-linux.sv +++ b/pipelined/testbench/testbench-linux.sv @@ -22,7 +22,7 @@ // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// When letting Wally go for it, let wally generate own interrupts +// When letting Wally go for it, let wally make own interrupts /////////////////////////////////////////// `include "wally-config.vh" @@ -36,7 +36,7 @@ // 4: print memory accesses whenever they happen // 5: print everything -module testbench(); +module testbench; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////// CONFIG //////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// @@ -280,13 +280,6 @@ module testbench(); `INIT_CHECKPOINT_SIMPLE_ARRAY(RF, [`XLEN-1:0],31,1); `INIT_CHECKPOINT_SIMPLE_ARRAY(HPMCOUNTER, [`XLEN-1:0],`COUNTERS-1,3); - generate - genvar i; -/* -----\/----- EXCLUDED -----\/----- - `INIT_CHECKPOINT_GENBLK_ARRAY(PMP_BASE, PMPCFG, [7:0],`PMP_ENTRIES-1,0); - `INIT_CHECKPOINT_GENBLK_ARRAY(PMP_BASE, PMPADDR, [`XLEN-1:0],`PMP_ENTRIES-1,0); - -----/\----- EXCLUDED -----/\----- */ - endgenerate `INIT_CHECKPOINT_VAL(PC, [`XLEN-1:0]); `INIT_CHECKPOINT_VAL(MEDELEG, [`XLEN-1:0]); `INIT_CHECKPOINT_VAL(MIDELEG, [`XLEN-1:0]); diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 09e060595..c744eba9f 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -71,7 +71,6 @@ logic [3:0] dummy; // check assertions for a legal configuration riscvassertions riscvassertions(); - logging logging(clk, reset, dut.uncore.HADDR, dut.uncore.HTRANS); // pick tests based on modes supported initial begin @@ -88,7 +87,7 @@ logic [3:0] dummy; "arch64d": if (`D_SUPPORTED) tests = arch64d; "imperas64i": tests = imperas64i; "imperas64p": tests = imperas64p; - "imperas64mmu": if (`MEM_VIRTMEM) tests = imperas64mmu; +// "imperas64mmu": if (`MEM_VIRTMEM) tests = imperas64mmu; "imperas64f": if (`F_SUPPORTED) tests = imperas64f; "imperas64d": if (`D_SUPPORTED) tests = imperas64d; "imperas64m": if (`M_SUPPORTED) tests = imperas64m; @@ -111,7 +110,7 @@ logic [3:0] dummy; "arch32f": if (`F_SUPPORTED) tests = arch32f; "imperas32i": tests = imperas32i; "imperas32p": tests = imperas32p; - "imperas32mmu": if (`MEM_VIRTMEM) tests = imperas32mmu; +// "imperas32mmu": if (`MEM_VIRTMEM) tests = imperas32mmu; "imperas32f": if (`F_SUPPORTED) tests = imperas32f; "imperas32m": if (`M_SUPPORTED) tests = imperas32m; "imperas32a": if (`A_SUPPORTED) tests = imperas32a; @@ -297,13 +296,8 @@ logic [3:0] dummy; // or sw gp, -56(t0) // or on a jump to self infinite loop (6f) for RISC-V Arch tests logic ecf; // remove this once we don't rely on old Imperas tests with Ecalls - generate - if (`ZICSR_SUPPORTED) begin - assign ecf = dut.hart.priv.priv.EcallFaultM; - end else begin - assign ecf = 0; - end - endgenerate + if (`ZICSR_SUPPORTED) assign ecf = dut.hart.priv.priv.EcallFaultM; + else assign ecf = 0; assign DCacheFlushStart = ecf & (dut.hart.ieu.dp.regf.rf[3] == 1 | (dut.hart.ieu.dp.regf.we3 & @@ -311,24 +305,17 @@ logic [3:0] dummy; dut.hart.ieu.dp.regf.wd3 == 1)) | (dut.hart.ifu.InstrM == 32'h6f | dut.hart.ifu.InstrM == 32'hfc32a423 | dut.hart.ifu.InstrM == 32'hfc32a823) & dut.hart.ieu.c.InstrValidM; - // **** Fix when the check in the shadow ram is fixed. DCacheFlushFSM DCacheFlushFSM(.clk(clk), .reset(reset), .start(DCacheFlushStart), .done(DCacheFlushDone)); - - generate - // initialize the branch predictor - if (`BPRED_ENABLED == 1) begin : bpred - - initial begin - $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem); - $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.mem); - end - end - endgenerate - + // initialize the branch predictor + if (`BPRED_ENABLED == 1) + initial begin + $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem); + $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.mem); + end endmodule module riscvassertions; @@ -339,14 +326,14 @@ module riscvassertions; assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)"); assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32"); assert (`DCACHE_WAYSIZEINBYTES <= 4096 | `MEM_DCACHE == 0 | `MEM_VIRTMEM == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`DCACHE_BLOCKLENINBITS >= 128 | `MEM_DCACHE == 0) else $error("DCACHE_BLOCKLENINBITS must be at least 128 when caches are enabled"); - assert (`DCACHE_BLOCKLENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_BLOCKLENINBITS must be smaller than way size"); + assert (`DCACHE_LINELENINBITS >= 128 | `MEM_DCACHE == 0) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); + assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); assert (`ICACHE_WAYSIZEINBYTES <= 4096 | `MEM_ICACHE == 0 | `MEM_VIRTMEM == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`ICACHE_BLOCKLENINBITS >= 32 | `MEM_ICACHE == 0) else $error("ICACHE_BLOCKLENINBITS must be at least 32 when caches are enabled"); - assert (`ICACHE_BLOCKLENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_BLOCKLENINBITS must be smaller than way size"); - assert (2**$clog2(`DCACHE_BLOCKLENINBITS) == `DCACHE_BLOCKLENINBITS | `MEM_DCACHE==0) else $error("DCACHE_BLOCKLENINBITS must be a power of 2"); + assert (`ICACHE_LINELENINBITS >= 32 | `MEM_ICACHE == 0) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); + assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); + assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | `MEM_DCACHE==0) else $error("DCACHE_LINELENINBITS must be a power of 2"); assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | `MEM_DCACHE==0) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ICACHE_BLOCKLENINBITS) == `ICACHE_BLOCKLENINBITS | `MEM_ICACHE==0) else $error("ICACHE_BLOCKLENINBITS must be a power of 2"); + assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | `MEM_ICACHE==0) else $error("ICACHE_LINELENINBITS must be a power of 2"); assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | `MEM_ICACHE==0) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `MEM_VIRTMEM==0) else $error("ITLB_ENTRIES must be a power of 2"); assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `MEM_VIRTMEM==0) else $error("DTLB_ENTRIES must be a power of 2"); @@ -371,16 +358,15 @@ module DCacheFlushFSM logic [`XLEN-1:0] ShadowRAM[`RAM_BASE>>(1+`XLEN/32):(`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32)]; - generate if(`MEM_DCACHE) begin localparam integer numlines = testbench.dut.hart.lsu.dcache.dcache.NUMLINES; localparam integer numways = testbench.dut.hart.lsu.dcache.dcache.NUMWAYS; - localparam integer blockbytelen = testbench.dut.hart.lsu.dcache.dcache.BLOCKBYTELEN; - localparam integer numwords = testbench.dut.hart.lsu.dcache.dcache.BLOCKLEN/`XLEN; + localparam integer linebytelen = testbench.dut.hart.lsu.dcache.dcache.LINEBYTELEN; + localparam integer numwords = testbench.dut.hart.lsu.dcache.dcache.LINELEN/`XLEN; localparam integer lognumlines = $clog2(numlines); - localparam integer logblockbytelen = $clog2(blockbytelen); + localparam integer loglinebytelen = $clog2(linebytelen); localparam integer lognumways = $clog2(numways); - localparam integer tagstart = lognumlines + logblockbytelen; + localparam integer tagstart = lognumlines + loglinebytelen; @@ -394,7 +380,7 @@ module DCacheFlushFSM for(way = 0; way < numways; way++) begin for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin copyShadow #(.tagstart(tagstart), - .logblockbytelen(logblockbytelen)) + .loglinebytelen(loglinebytelen)) copyShadow(.clk, .start, .tag(testbench.dut.hart.lsu.dcache.dcache.MemWay[way].CacheTagMem.StoredData[index]), @@ -431,19 +417,11 @@ module DCacheFlushFSM end - endgenerate - - - - - flop #(1) doneReg(.clk(clk), - .d(start), - .q(done)); - + flop #(1) doneReg(.clk, .d(start), .q(done)); endmodule module copyShadow - #(parameter tagstart, logblockbytelen) + #(parameter tagstart, loglinebytelen) (input logic clk, input logic start, input logic [`PA_BITS-1:tagstart] tag, @@ -464,7 +442,7 @@ module copyShadow CacheValid = valid; CacheDirty = dirty; CacheData = data; - CacheAdr = (tag << tagstart) + (index << logblockbytelen) + (cacheWord << $clog2(`XLEN/8)); + CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8)); end end diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index c4484c8c6..672aedbc4 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -36,6 +36,7 @@ string tvpaths[] = '{ "../../tests/imperas-riscv-tests/work/" }; + // *** make sure these are somewhere string imperas64a[] = '{ `MYIMPERASTEST, "rv64a/WALLY-AMO", "2110", @@ -48,7 +49,7 @@ string tvpaths[] = '{ "rv32a/WALLY-LRSC", "2110" }; - string imperas32mmu[] = '{ +/* string imperas32mmu[] = '{ `MYIMPERASTEST, "rv32mmu/WALLY-MMU-SV32", "3000", "rv32mmu/WALLY-PMP", "3000" @@ -61,7 +62,7 @@ string tvpaths[] = '{ "rv64mmu/WALLY-MMU-SV39", "3000", "rv64mmu/WALLY-PMP", "3000" //"rv64mmu/WALLY-PMA", "3000" - }; + }; */ // *** restore CSR tests from Imperas old