mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-03 18:25:27 +00:00
Revert to 98b824
This commit is contained in:
parent
74979cdc82
commit
93bb8036be
251
pipelined/src/cache/cache.sv
vendored
251
pipelined/src/cache/cache.sv
vendored
@ -8,96 +8,108 @@
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
|
||||
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
|
||||
// cpu side
|
||||
input logic [1:0] CacheRW, // [1] Read, [0] Write
|
||||
input logic [1:0] CacheAtomic, // Atomic operation
|
||||
input logic FlushCache, // Flush all dirty lines back to memory
|
||||
input logic InvalidateCache, // Clear all valid bits
|
||||
input logic [11:0] NextAdr, // Virtual address, but we only use the lower 12 bits.
|
||||
input logic [`PA_BITS-1:0] PAdr, // Physical address
|
||||
input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only)
|
||||
input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only)
|
||||
output logic CacheCommitted, // Cache has started bus operation that shouldn't be interrupted
|
||||
output logic CacheStall, // Cache stalls pipeline during multicycle operation
|
||||
output logic [WORDLEN-1:0] ReadDataWord, // Word read from cache (goes to CPU and bus)
|
||||
// to performance counters to cpu
|
||||
output logic CacheMiss, // Cache miss
|
||||
output logic CacheAccess, // Cache access
|
||||
// lsu control
|
||||
input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextAdr
|
||||
// Bus fsm interface
|
||||
input logic CacheBusAck, // Bus operation completed
|
||||
input logic SelBusBeat, // Word in cache line comes from BeatCount
|
||||
input logic [LOGBWPL-1:0] BeatCount, // Beat in burst
|
||||
input logic [LINELEN-1:0] FetchBuffer, // Buffer long enough to hold entire cache line arriving from bus
|
||||
output logic [1:0] CacheBusRW, // [1] Read or [0] write bus
|
||||
output logic [`PA_BITS-1:0] CacheBusAdr // Address for bus access
|
||||
);
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
// cpu side
|
||||
input logic FlushStage,
|
||||
input logic Stall,
|
||||
input logic [1:0] CacheRW,
|
||||
input logic [1:0] CacheAtomic,
|
||||
input logic FlushCache,
|
||||
input logic InvalidateCache,
|
||||
input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits.
|
||||
input logic [`PA_BITS-1:0] PAdr, // physical address
|
||||
input logic [(WORDLEN-1)/8:0] ByteMask,
|
||||
input logic [WORDLEN-1:0] CacheWriteData,
|
||||
output logic CacheCommitted,
|
||||
output logic CacheStall,
|
||||
// to performance counters to cpu
|
||||
output logic CacheMiss,
|
||||
output logic CacheAccess,
|
||||
// lsu control
|
||||
input logic SelHPTW,
|
||||
// Bus fsm interface
|
||||
output logic [1:0] CacheBusRW,
|
||||
input logic CacheBusAck,
|
||||
input logic SelBusBeat,
|
||||
input logic [LOGBWPL-1:0] BeatCount,
|
||||
input logic [LINELEN-1:0] FetchBuffer,
|
||||
output logic [`PA_BITS-1:0] CacheBusAdr,
|
||||
output logic [WORDLEN-1:0] ReadDataWord);
|
||||
|
||||
// Cache parameters
|
||||
localparam LINEBYTELEN = LINELEN/8; // Line length in bytes
|
||||
localparam OFFSETLEN = $clog2(LINEBYTELEN); // Number of bits in offset field
|
||||
localparam SETLEN = $clog2(NUMLINES); // Number of set bits
|
||||
localparam SETTOP = SETLEN+OFFSETLEN; // Number of set plus offset bits
|
||||
localparam TAGLEN = `PA_BITS - SETTOP; // Number of tag bits
|
||||
localparam WORDSPERLINE = LINELEN/WORDLEN; // Number of words in cache line
|
||||
localparam FLUSHADRTHRESHOLD = NUMLINES - 1; // Used to determine when flush is complete
|
||||
localparam LOGLLENBYTES = $clog2(WORDLEN/8); // Number of bits to address a word
|
||||
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN; // *** see if this is the same as WORDSPERLINE
|
||||
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE); // ***
|
||||
localparam LINEBYTELEN = LINELEN/8;
|
||||
localparam OFFSETLEN = $clog2(LINEBYTELEN);
|
||||
localparam SETLEN = $clog2(NUMLINES);
|
||||
localparam SETTOP = SETLEN+OFFSETLEN;
|
||||
localparam TAGLEN = `PA_BITS - SETTOP;
|
||||
localparam WORDSPERLINE = LINELEN/WORDLEN;
|
||||
localparam FlushAdrThreshold = NUMLINES - 1;
|
||||
|
||||
logic SelAdr;
|
||||
logic [1:0] AdrSelMuxSel;
|
||||
logic [SETLEN-1:0] CAdr;
|
||||
logic [LINELEN-1:0] LineWriteData;
|
||||
logic ClearValid, ClearDirty, SetDirty, SetValid;
|
||||
logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0];
|
||||
logic [NUMWAYS-1:0] HitWay, ValidWay;
|
||||
logic CacheHit;
|
||||
logic [NUMWAYS-1:0] VictimWay, DirtyWay;
|
||||
logic LineDirty;
|
||||
logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0];
|
||||
logic [TAGLEN-1:0] Tag;
|
||||
logic [SETLEN-1:0] FlushAdr, NextFlushAdr, FlushAdrP1;
|
||||
logic FlushAdrCntEn, FlushCntRst;
|
||||
logic FlushAdrFlag, FlushWayFlag;
|
||||
logic [NUMWAYS-1:0] FlushWay, NextFlushWay;
|
||||
logic FlushWayCntEn;
|
||||
logic SelWriteback;
|
||||
logic LRUWriteEn;
|
||||
logic SelFlush;
|
||||
logic ResetOrFlushCntRst;
|
||||
logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache;
|
||||
logic SelFetchBuffer;
|
||||
logic CacheEn;
|
||||
logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded;
|
||||
logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel;
|
||||
logic SelAdr;
|
||||
logic [SETLEN-1:0] CAdr;
|
||||
logic [LINELEN-1:0] LineWriteData;
|
||||
logic ClearValid;
|
||||
logic ClearDirty;
|
||||
logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0];
|
||||
logic [NUMWAYS-1:0] HitWay, ValidWay;
|
||||
logic CacheHit;
|
||||
logic SetDirty;
|
||||
logic SetValid;
|
||||
logic [NUMWAYS-1:0] VictimWay;
|
||||
logic [NUMWAYS-1:0] DirtyWay;
|
||||
logic LineDirty;
|
||||
logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0];
|
||||
logic [TAGLEN-1:0] Tag;
|
||||
logic [SETLEN-1:0] FlushAdr;
|
||||
logic [SETLEN-1:0] NextFlushAdr;
|
||||
logic [SETLEN-1:0] FlushAdrP1;
|
||||
logic FlushAdrCntEn;
|
||||
logic FlushCntRst;
|
||||
logic FlushAdrFlag;
|
||||
logic FlushWayFlag;
|
||||
logic [NUMWAYS-1:0] FlushWay;
|
||||
logic [NUMWAYS-1:0] NextFlushWay;
|
||||
logic FlushWayCntEn;
|
||||
logic SelWriteback;
|
||||
logic LRUWriteEn;
|
||||
logic SelFlush;
|
||||
logic ResetOrFlushCntRst;
|
||||
logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache;
|
||||
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
|
||||
logic SelFetchBuffer;
|
||||
logic CacheEn;
|
||||
|
||||
genvar index;
|
||||
|
||||
localparam LOGLLENBYTES = $clog2(WORDLEN/8);
|
||||
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN;
|
||||
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE);
|
||||
logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded;
|
||||
logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel;
|
||||
genvar index;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Read Path
|
||||
@ -107,100 +119,91 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
// and FlushAdr when handling D$ flushes
|
||||
// The icache must update to the newest PCNextF on flush as it is probably a trap. Trap
|
||||
// sets PCNextF to XTVEC and the icache must start reading the instruction.
|
||||
assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))};
|
||||
mux3 #(SETLEN) AdrSelMux(.d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr),
|
||||
.s(AdrSelMuxSel), .y(CAdr));
|
||||
mux3 #(SETLEN) AdrSelMux(
|
||||
.d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr),
|
||||
.s({SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))}), .y(CAdr));
|
||||
|
||||
// Array of cache ways, along with victim, hit, dirty, and read merging logic
|
||||
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) CacheWays[NUMWAYS-1:0](
|
||||
.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
|
||||
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE)
|
||||
CacheWays[NUMWAYS-1:0](.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
|
||||
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
|
||||
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
|
||||
|
||||
// Select victim way for associative caches
|
||||
if(NUMWAYS > 1) begin:vict
|
||||
cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU(
|
||||
.clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
|
||||
.SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache);
|
||||
end else
|
||||
assign VictimWay = 1'b1; // one hot.
|
||||
|
||||
assign CacheHit = |HitWay;
|
||||
assign LineDirty = |DirtyWay;
|
||||
|
||||
end else assign VictimWay = 1'b1; // one hot.
|
||||
assign CacheHit = | HitWay;
|
||||
assign LineDirty = | DirtyWay;
|
||||
// ReadDataLineWay is a 2d array of cache line len by number of ways.
|
||||
// Need to OR together each way in a bitwise manner.
|
||||
// Final part of the AO Mux. First is the AND in the cacheway.
|
||||
or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache));
|
||||
or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag));
|
||||
|
||||
// Data cache needs to choose word offset from PAdr or BeatCount to writeback dirty lines
|
||||
// like to fix this.
|
||||
if(DCACHE)
|
||||
mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]),
|
||||
.d1(BeatCount), .s(SelBusBeat),
|
||||
.y(WordOffsetAddr));
|
||||
else
|
||||
assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)];
|
||||
else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)];
|
||||
|
||||
// Bypass cache array to save a cycle when finishing a load miss
|
||||
mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, FetchBuffer, SelFetchBuffer, ReadDataLine);
|
||||
|
||||
// Select word from cache line
|
||||
subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread(
|
||||
.PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord);
|
||||
.PAdr(WordOffsetAddr),
|
||||
.ReadDataLine, .ReadDataWord);
|
||||
|
||||
// Bus address for fetch, writeback, or flush writeback
|
||||
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
|
||||
.s({SelFlush, SelWriteback}), .y(CacheBusAdr));
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Write Path
|
||||
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Adjust byte mask from word to cache line
|
||||
onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
|
||||
onehotdecoder #(LOGCWPL) adrdec(
|
||||
.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
|
||||
for(index = 0; index < 2**LOGCWPL; index++) begin
|
||||
assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0;
|
||||
end
|
||||
|
||||
assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1.
|
||||
logic [LINELEN/8-1:0] LineByteMask2;
|
||||
assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
|
||||
|
||||
// Merge write data into fetched cache line for store miss
|
||||
for(index = 0; index < LINELEN/8; index++) begin
|
||||
mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),
|
||||
.d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index]));
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Flush logic
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Flush address (line number)
|
||||
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
|
||||
.s({SelFlush, SelWriteback}), .y(CacheBusAdr));
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Flush address and way generation during flush
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
assign ResetOrFlushCntRst = reset | FlushCntRst;
|
||||
flopenr #(SETLEN) FlushAdrReg(clk, ResetOrFlushCntRst, FlushAdrCntEn, FlushAdrP1, NextFlushAdr);
|
||||
mux2 #(SETLEN) FlushAdrMux(NextFlushAdr, FlushAdrP1, FlushAdrCntEn, FlushAdr);
|
||||
flopenr #(SETLEN) FlushAdrReg(.clk, .reset(ResetOrFlushCntRst), .en(FlushAdrCntEn),
|
||||
.d(FlushAdrP1), .q(NextFlushAdr));
|
||||
assign FlushAdr = FlushAdrCntEn ? FlushAdrP1 : NextFlushAdr;
|
||||
assign FlushAdrP1 = NextFlushAdr + 1'b1;
|
||||
assign FlushAdrFlag = (NextFlushAdr == FLUSHADRTHRESHOLD[SETLEN-1:0]);
|
||||
|
||||
// Flush way
|
||||
flopenl #(NUMWAYS) FlushWayReg(clk, ResetOrFlushCntRst, FlushWayCntEn, {{NUMWAYS-1{1'b0}}, 1'b1}, NextFlushWay, FlushWay);
|
||||
if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]};
|
||||
else assign NextFlushWay = FlushWay[NUMWAYS-1];
|
||||
assign FlushAdrFlag = (NextFlushAdr == FlushAdrThreshold[SETLEN-1:0]);
|
||||
flopenl #(NUMWAYS) FlushWayReg(.clk, .load(ResetOrFlushCntRst), .en(FlushWayCntEn),
|
||||
.val({{NUMWAYS-1{1'b0}}, 1'b1}), .d(NextFlushWay), .q(FlushWay));
|
||||
assign FlushWayFlag = FlushWay[NUMWAYS-1];
|
||||
if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]};
|
||||
else assign NextFlushWay = FlushWay[NUMWAYS-1];
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Cache FSM
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cachefsm cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck,
|
||||
.FlushStage, .CacheRW, .CacheAtomic, .Stall,
|
||||
.CacheHit, .LineDirty, .CacheStall, .CacheCommitted,
|
||||
.CacheMiss, .CacheAccess, .SelAdr,
|
||||
.ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush,
|
||||
.ClearValid, .ClearDirty, .SetDirty,
|
||||
.SetValid, .SelWriteback, .SelFlush,
|
||||
.FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst,
|
||||
.FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,
|
||||
.InvalidateCache, .CacheEn, .LRUWriteEn);
|
||||
|
||||
.InvalidateCache,
|
||||
.CacheEn,
|
||||
.LRUWriteEn);
|
||||
endmodule
|
||||
|
@ -42,6 +42,7 @@ module fdivsqrt(
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic StallM,
|
||||
input logic StallE,
|
||||
input logic FlushE,
|
||||
input logic SqrtE, SqrtM,
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
@ -74,17 +75,17 @@ module fdivsqrt(
|
||||
.clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE),
|
||||
.Sqrt(SqrtE), .Ym(YmE), .XZeroE, .X, .DPreproc, .ForwardedSrcAM,
|
||||
.nE, .nM, .mM, .CalcOTFCSwapE, .OTFCSwapE, .ALTBM, .AZeroM, .BZeroM, .AZeroE, .BZeroE, .As,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .MDUE, .W64E);
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
|
||||
fdivsqrtfsm fdivsqrtfsm(
|
||||
.clk, .reset, .FmtE, .XsE, .SqrtE, .nE,
|
||||
.FDivBusyE, .FDivStartE, .IDivStartE, .IFDivStartE, .FDivDoneE, .StallM, .FlushE, /*.DivDone, */
|
||||
.FDivBusyE, .FDivStartE, .IDivStartE, .IFDivStartE, .FDivDoneE, .StallE, .StallM, .FlushE, /*.DivDone, */
|
||||
.XZeroE, .YZeroE, .AZeroE, .BZeroE,
|
||||
.XNaNE, .YNaNE, .MDUE,
|
||||
.XInfE, .YInfE, .WZeroM, .SpecialCaseM);
|
||||
fdivsqrtiter fdivsqrtiter(
|
||||
.clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .MDUE, .SqrtE, // .SqrtM,
|
||||
.X,.DPreproc, .FirstWS(WS), .FirstWC(WC),
|
||||
.IFDivStartE, .CalcOTFCSwapE, .OTFCSwapE,
|
||||
.IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .CalcOTFCSwapE, .OTFCSwapE,
|
||||
.FDivBusyE);
|
||||
fdivsqrtpostproc fdivsqrtpostproc(
|
||||
.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun,
|
||||
|
@ -41,6 +41,7 @@ module fdivsqrtfsm(
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic XsE,
|
||||
input logic SqrtE,
|
||||
input logic StallE,
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
input logic WZeroM,
|
||||
@ -116,9 +117,9 @@ module fdivsqrtfsm(
|
||||
if (SpecialCaseE) state <= #1 DONE;
|
||||
else state <= #1 BUSY;
|
||||
end else if (state == BUSY) begin
|
||||
if (step == 1 | WZeroM) state <= #1 DONE; // terminate early when residual is zero
|
||||
if (step == 1) state <= #1 DONE;
|
||||
step <= step - 1;
|
||||
end else if ((state == DONE)) begin
|
||||
end else if ((state == DONE) | (WZeroM & (state == BUSY))) begin
|
||||
if (StallM) state <= #1 DONE;
|
||||
else state <= #1 IDLE;
|
||||
end
|
||||
|
@ -34,6 +34,8 @@ module fdivsqrtiter(
|
||||
input logic clk,
|
||||
input logic IFDivStartE,
|
||||
input logic FDivBusyE,
|
||||
input logic [`NE-1:0] Xe, Ye,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic SqrtE, MDUE,
|
||||
// input logic SqrtM,
|
||||
input logic CalcOTFCSwapE, OTFCSwapE,
|
||||
@ -62,6 +64,7 @@ module fdivsqrtiter(
|
||||
logic [`DIVb+3:0] WSN, WCN; // Q4.b
|
||||
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b
|
||||
logic [`DIVb+1:0] NextC;
|
||||
logic [`DIVb+1:0] CMux;
|
||||
logic [`DIVb:0] UMux, UMMux;
|
||||
logic [`DIVb:0] initU, initUM;
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
@ -91,8 +94,8 @@ module fdivsqrtiter(
|
||||
logic [1:0] initCUpper;
|
||||
assign initCUpper = (SqrtE & ~(MDUE)) ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10;
|
||||
assign initC = {initCUpper, {`DIVb{1'b0}}};
|
||||
mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC);
|
||||
flopen #(`DIVb+2) creg(clk, IFDivStartE|FDivBusyE, NextC, C[0]);
|
||||
mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, CMux);
|
||||
flopen #(`DIVb+2) creg(clk, IFDivStartE|FDivBusyE, CMux, C[0]);
|
||||
|
||||
// Divisior register
|
||||
flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D);
|
||||
|
@ -123,7 +123,7 @@ module fdivsqrtpostproc(
|
||||
IntRemM = NormRemM;
|
||||
end
|
||||
|
||||
always_comb // could merge into postprocessor shifter
|
||||
always_comb
|
||||
if (RemOpM) begin
|
||||
NormShiftM = (mM + (`DIVBLEN+1)'(`DIVa));
|
||||
PreResultM = IntRemM;
|
||||
|
@ -39,7 +39,7 @@ module fdivsqrtpreproc (
|
||||
input logic Sqrt,
|
||||
input logic XZeroE,
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [2:0] Funct3E,
|
||||
input logic [2:0] Funct3E, Funct3M,
|
||||
input logic MDUE, W64E,
|
||||
output logic [`DIVBLEN:0] nE, nM, mM,
|
||||
output logic CalcOTFCSwapE, OTFCSwapE, ALTBM, As, AZeroM, BZeroM, AZeroE, BZeroE,
|
||||
|
@ -84,7 +84,7 @@ module fma(
|
||||
// // Addition/LZA
|
||||
// ///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ZmSticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
|
||||
fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
|
||||
|
||||
fmalza #(3*`NF+6) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt);
|
||||
endmodule
|
||||
|
@ -33,8 +33,8 @@
|
||||
module fmaadd(
|
||||
input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
|
||||
input logic [2*`NF+1:0] Pm, // the product's mantissa
|
||||
input logic Ps,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
|
||||
input logic InvA, // invert the aligned addend
|
||||
input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
|
||||
input logic InvA, // invert the aligned addend
|
||||
input logic KillProd, // should the product be set to 0
|
||||
input logic ZmSticky,
|
||||
input logic [`NE-1:0] Ze,
|
||||
|
@ -91,6 +91,7 @@ module fpu (
|
||||
logic XsE, YsE, ZsE; // input's sign - execute stage
|
||||
logic XsM, YsM; // input's sign - memory stage
|
||||
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZeM; // input's exponent - memory stage
|
||||
logic [`NF:0] XmE, YmE, ZmE; // input's fraction - execute stage
|
||||
logic [`NF:0] XmM, YmM, ZmM; // input's fraction - memory stage
|
||||
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
|
||||
@ -264,7 +265,7 @@ module fpu (
|
||||
fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E,
|
||||
.StallM, .FlushE, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
|
||||
.StallE, .StallM, .FlushE, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
|
||||
.QmM, .FPIntDivResultM /*, .DivDone(DivDoneM) */);
|
||||
|
||||
//
|
||||
@ -342,6 +343,7 @@ module fpu (
|
||||
|
||||
flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
|
||||
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
|
||||
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
|
||||
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
|
||||
flopenr #(15) EMFpReg5 (clk, reset, ~StallUnpackedM,
|
||||
@ -370,7 +372,7 @@ module fpu (
|
||||
|
||||
assign FpLoadStoreM = FResSelM[1];
|
||||
|
||||
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
|
||||
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
|
||||
.FmaZmS(ZmStickyM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
|
||||
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), /*.DivDone(DivDoneM), */
|
||||
.ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
|
||||
|
@ -32,7 +32,8 @@
|
||||
|
||||
module divshiftcalc(
|
||||
input logic [`DIVb:0] DivQm,
|
||||
input logic Sqrt, // *** not used right now. Maybe merge with shift from postprocess
|
||||
input logic [`FMTBITS-1:0] Fmt,
|
||||
input logic Sqrt,
|
||||
input logic [`NE+1:0] DivQe,
|
||||
output logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt,
|
||||
output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
|
||||
|
@ -31,6 +31,7 @@
|
||||
|
||||
module fmashiftcalc(
|
||||
input logic [3*`NF+5:0] FmaSm, // the positive sum
|
||||
input logic [`NE-1:0] Ze, // exponent of Z
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count
|
||||
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic [`NE+1:0] FmaSe,
|
||||
|
@ -33,6 +33,7 @@
|
||||
module postprocess (
|
||||
// general signals
|
||||
input logic Xs, Ys, // input signs
|
||||
input logic [`NE-1:0] Ze, // input exponents
|
||||
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
|
||||
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
@ -145,9 +146,9 @@ module postprocess (
|
||||
|
||||
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,
|
||||
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
|
||||
fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
|
||||
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
|
||||
.FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
|
||||
divshiftcalc divshiftcalc(.Sqrt, .DivQe, .DivQm, .DivResDenorm, .DivDenormShiftPos, .DivShiftAmt, .DivShiftIn);
|
||||
divshiftcalc divshiftcalc(.Fmt, .Sqrt, .DivQe, .DivQm, .DivResDenorm, .DivDenormShiftPos, .DivShiftAmt, .DivShiftIn);
|
||||
|
||||
always_comb
|
||||
case(PostProcSel)
|
||||
|
@ -199,7 +199,7 @@ module csr #(parameter
|
||||
// CSRs
|
||||
///////////////////////////////////////////
|
||||
|
||||
csri csri(.clk, .reset, .InstrValidNotFlushedM,
|
||||
csri csri(.clk, .reset, .InstrValidNotFlushedM, .StallW,
|
||||
.CSRMWriteM, .CSRSWriteM, .CSRWriteValM, .CSRAdrM,
|
||||
.MExtInt, .SExtInt, .MTimerInt, .MSwInt,
|
||||
.MIP_REGW, .MIE_REGW, .MIP_REGW_writeable);
|
||||
@ -219,7 +219,7 @@ module csr #(parameter
|
||||
.CSRAdrM, .PrivilegeModeW, .CSRWriteValM,
|
||||
.MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW,
|
||||
.MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM);
|
||||
csrm csrm(.clk, .reset, .InstrValidNotFlushedM,
|
||||
csrm csrm(.clk, .reset, .InstrValidNotFlushedM, .StallW,
|
||||
.CSRMWriteM, .MTrapM, .CSRAdrM,
|
||||
.NextEPCM, .NextCauseM, .NextMtvalM, .MSTATUS_REGW, .MSTATUSH_REGW,
|
||||
.CSRWriteValM, .CSRMReadValM, .MTVEC_REGW,
|
||||
@ -227,7 +227,7 @@ module csr #(parameter
|
||||
.MEDELEG_REGW, .MIDELEG_REGW,.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW,
|
||||
.MIP_REGW, .MIE_REGW, .WriteMSTATUSM, .WriteMSTATUSHM,
|
||||
.IllegalCSRMAccessM, .IllegalCSRMWriteReadonlyM);
|
||||
csrs csrs(.clk, .reset, .InstrValidNotFlushedM,
|
||||
csrs csrs(.clk, .reset, .InstrValidNotFlushedM, .StallW,
|
||||
.CSRSWriteM, .STrapM, .CSRAdrM,
|
||||
.NextEPCM, .NextCauseM, .NextMtvalM, .SSTATUS_REGW,
|
||||
.STATUS_TVM, .CSRWriteValM, .PrivilegeModeW,
|
||||
@ -235,7 +235,7 @@ module csr #(parameter
|
||||
.SCOUNTEREN_REGW,
|
||||
.SATP_REGW, .MIP_REGW, .MIE_REGW, .MIDELEG_REGW,
|
||||
.WriteSSTATUSM, .IllegalCSRSAccessM);
|
||||
csru csru(.clk, .reset, .InstrValidNotFlushedM,
|
||||
csru csru(.clk, .reset, .InstrValidNotFlushedM, .StallW,
|
||||
.CSRUWriteM, .CSRAdrM, .CSRWriteValM, .STATUS_FS, .CSRUReadValM,
|
||||
.SetFflagsM, .FRM_REGW, .WriteFRMM, .WriteFFLAGSM,
|
||||
.IllegalCSRUAccessM);
|
||||
|
@ -38,7 +38,7 @@ module csri #(parameter
|
||||
SIP = 12'h144
|
||||
) (
|
||||
input logic clk, reset,
|
||||
input logic InstrValidNotFlushedM,
|
||||
input logic InstrValidNotFlushedM, StallW,
|
||||
input logic CSRMWriteM, CSRSWriteM,
|
||||
input logic [`XLEN-1:0] CSRWriteValM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
|
@ -72,7 +72,7 @@ module csrm #(parameter
|
||||
MIDELEG_MASK = 12'h222 // we choose to not make machine interrupts delegable
|
||||
) (
|
||||
input logic clk, reset,
|
||||
input logic InstrValidNotFlushedM,
|
||||
input logic InstrValidNotFlushedM, StallW,
|
||||
input logic CSRMWriteM, MTrapM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, MSTATUS_REGW, MSTATUSH_REGW,
|
||||
|
@ -50,7 +50,7 @@ module csrs #(parameter
|
||||
|
||||
) (
|
||||
input logic clk, reset,
|
||||
input logic InstrValidNotFlushedM,
|
||||
input logic InstrValidNotFlushedM, StallW,
|
||||
input logic CSRSWriteM, STrapM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, SSTATUS_REGW,
|
||||
|
@ -32,8 +32,7 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module csrsr (
|
||||
input logic clk, reset,
|
||||
input logic StallW,
|
||||
input logic clk, reset, StallW,
|
||||
input logic WriteMSTATUSM, WriteMSTATUSHM, WriteSSTATUSM,
|
||||
input logic TrapM, FRegWriteM,
|
||||
input logic [1:0] NextPrivilegeModeM, PrivilegeModeW,
|
||||
|
@ -37,7 +37,7 @@ module csru #(parameter
|
||||
FRM = 12'h002,
|
||||
FCSR = 12'h003) (
|
||||
input logic clk, reset,
|
||||
input logic InstrValidNotFlushedM,
|
||||
input logic InstrValidNotFlushedM, StallW,
|
||||
input logic CSRUWriteM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [`XLEN-1:0] CSRWriteValM,
|
||||
|
@ -63,12 +63,12 @@ module trap (
|
||||
///////////////////////////////////////////
|
||||
assign MIntGlobalEnM = (PrivilegeModeW != `M_MODE) | STATUS_MIE; // if M ints enabled or lower priv 3.1.9
|
||||
assign SIntGlobalEnM = (PrivilegeModeW == `U_MODE) | ((PrivilegeModeW == `S_MODE) & STATUS_SIE); // if in lower priv mode, or if S ints enabled and not in higher priv mode 3.1.9
|
||||
assign Committed = CommittedM | CommittedF;
|
||||
assign EnabledIntsM = {12{~Committed & InstrValidM}} & ({12{MIntGlobalEnM}} & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & MIDELEG_REGW);
|
||||
assign PendingIntsM = MIP_REGW & MIE_REGW;
|
||||
assign IntPendingM = |PendingIntsM;
|
||||
assign ValidIntsM = PendingIntsM & EnabledIntsM;
|
||||
assign InterruptM = (|ValidIntsM) ; // suppress interrupt if the memory system has partially processed a request.
|
||||
assign Committed = CommittedM | CommittedF;
|
||||
assign EnabledIntsM = ({12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW);
|
||||
assign ValidIntsM = {12{~Committed}} & EnabledIntsM;
|
||||
assign InterruptM = (|ValidIntsM) & InstrValidM; // suppress interrupt if the memory system has partially processed a request.
|
||||
assign DelegateM = `S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM[3:0]] : MEDELEG_REGW[CauseM]) &
|
||||
(PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user