diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 696a9c921..cf02972d7 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -8,96 +8,108 @@ // // A component of the Wally configurable RISC-V project. // -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: // -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. // -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. //////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) ( - input logic clk, - input logic reset, - input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY - input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations) - // cpu side - input logic [1:0] CacheRW, // [1] Read, [0] Write - input logic [1:0] CacheAtomic, // Atomic operation - input logic FlushCache, // Flush all dirty lines back to memory - input logic InvalidateCache, // Clear all valid bits - input logic [11:0] NextAdr, // Virtual address, but we only use the lower 12 bits. - input logic [`PA_BITS-1:0] PAdr, // Physical address - input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only) - input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only) - output logic CacheCommitted, // Cache has started bus operation that shouldn't be interrupted - output logic CacheStall, // Cache stalls pipeline during multicycle operation - output logic [WORDLEN-1:0] ReadDataWord, // Word read from cache (goes to CPU and bus) - // to performance counters to cpu - output logic CacheMiss, // Cache miss - output logic CacheAccess, // Cache access - // lsu control - input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextAdr - // Bus fsm interface - input logic CacheBusAck, // Bus operation completed - input logic SelBusBeat, // Word in cache line comes from BeatCount - input logic [LOGBWPL-1:0] BeatCount, // Beat in burst - input logic [LINELEN-1:0] FetchBuffer, // Buffer long enough to hold entire cache line arriving from bus - output logic [1:0] CacheBusRW, // [1] Read or [0] write bus - output logic [`PA_BITS-1:0] CacheBusAdr // Address for bus access -); + input logic clk, + input logic reset, + // cpu side + input logic FlushStage, + input logic Stall, + input logic [1:0] CacheRW, + input logic [1:0] CacheAtomic, + input logic FlushCache, + input logic InvalidateCache, + input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. + input logic [`PA_BITS-1:0] PAdr, // physical address + input logic [(WORDLEN-1)/8:0] ByteMask, + input logic [WORDLEN-1:0] CacheWriteData, + output logic CacheCommitted, + output logic CacheStall, + // to performance counters to cpu + output logic CacheMiss, + output logic CacheAccess, + // lsu control + input logic SelHPTW, + // Bus fsm interface + output logic [1:0] CacheBusRW, + input logic CacheBusAck, + input logic SelBusBeat, + input logic [LOGBWPL-1:0] BeatCount, + input logic [LINELEN-1:0] FetchBuffer, + output logic [`PA_BITS-1:0] CacheBusAdr, + output logic [WORDLEN-1:0] ReadDataWord); // Cache parameters - localparam LINEBYTELEN = LINELEN/8; // Line length in bytes - localparam OFFSETLEN = $clog2(LINEBYTELEN); // Number of bits in offset field - localparam SETLEN = $clog2(NUMLINES); // Number of set bits - localparam SETTOP = SETLEN+OFFSETLEN; // Number of set plus offset bits - localparam TAGLEN = `PA_BITS - SETTOP; // Number of tag bits - localparam WORDSPERLINE = LINELEN/WORDLEN; // Number of words in cache line - localparam FLUSHADRTHRESHOLD = NUMLINES - 1; // Used to determine when flush is complete - localparam LOGLLENBYTES = $clog2(WORDLEN/8); // Number of bits to address a word - localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN; // *** see if this is the same as WORDSPERLINE - localparam LOGCWPL = $clog2(CACHEWORDSPERLINE); // *** + localparam LINEBYTELEN = LINELEN/8; + localparam OFFSETLEN = $clog2(LINEBYTELEN); + localparam SETLEN = $clog2(NUMLINES); + localparam SETTOP = SETLEN+OFFSETLEN; + localparam TAGLEN = `PA_BITS - SETTOP; + localparam WORDSPERLINE = LINELEN/WORDLEN; + localparam FlushAdrThreshold = NUMLINES - 1; - logic SelAdr; - logic [1:0] AdrSelMuxSel; - logic [SETLEN-1:0] CAdr; - logic [LINELEN-1:0] LineWriteData; - logic ClearValid, ClearDirty, SetDirty, SetValid; - logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; - logic [NUMWAYS-1:0] HitWay, ValidWay; - logic CacheHit; - logic [NUMWAYS-1:0] VictimWay, DirtyWay; - logic LineDirty; - logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0]; - logic [TAGLEN-1:0] Tag; - logic [SETLEN-1:0] FlushAdr, NextFlushAdr, FlushAdrP1; - logic FlushAdrCntEn, FlushCntRst; - logic FlushAdrFlag, FlushWayFlag; - logic [NUMWAYS-1:0] FlushWay, NextFlushWay; - logic FlushWayCntEn; - logic SelWriteback; - logic LRUWriteEn; - logic SelFlush; - logic ResetOrFlushCntRst; - logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; - logic SelFetchBuffer; - logic CacheEn; - logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded; - logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel; + logic SelAdr; + logic [SETLEN-1:0] CAdr; + logic [LINELEN-1:0] LineWriteData; + logic ClearValid; + logic ClearDirty; + logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; + logic [NUMWAYS-1:0] HitWay, ValidWay; + logic CacheHit; + logic SetDirty; + logic SetValid; + logic [NUMWAYS-1:0] VictimWay; + logic [NUMWAYS-1:0] DirtyWay; + logic LineDirty; + logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0]; + logic [TAGLEN-1:0] Tag; + logic [SETLEN-1:0] FlushAdr; + logic [SETLEN-1:0] NextFlushAdr; + logic [SETLEN-1:0] FlushAdrP1; + logic FlushAdrCntEn; + logic FlushCntRst; + logic FlushAdrFlag; + logic FlushWayFlag; + logic [NUMWAYS-1:0] FlushWay; + logic [NUMWAYS-1:0] NextFlushWay; + logic FlushWayCntEn; + logic SelWriteback; + logic LRUWriteEn; + logic SelFlush; + logic ResetOrFlushCntRst; + logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; + logic SelFetchBuffer; + logic CacheEn; - genvar index; + + localparam LOGLLENBYTES = $clog2(WORDLEN/8); + localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN; + localparam LOGCWPL = $clog2(CACHEWORDSPERLINE); + logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded; + logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel; + genvar index; ///////////////////////////////////////////////////////////////////////////////////////////// // Read Path @@ -107,100 +119,91 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE // and FlushAdr when handling D$ flushes // The icache must update to the newest PCNextF on flush as it is probably a trap. Trap // sets PCNextF to XTVEC and the icache must start reading the instruction. - assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))}; - mux3 #(SETLEN) AdrSelMux(.d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr), - .s(AdrSelMuxSel), .y(CAdr)); + mux3 #(SETLEN) AdrSelMux( + .d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr), + .s({SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))}), .y(CAdr)); // Array of cache ways, along with victim, hit, dirty, and read merging logic - cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) CacheWays[NUMWAYS-1:0]( - .clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask, + cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) + CacheWays[NUMWAYS-1:0](.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); - - // Select victim way for associative caches if(NUMWAYS > 1) begin:vict cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU( .clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage), .SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache); - end else - assign VictimWay = 1'b1; // one hot. - - assign CacheHit = |HitWay; - assign LineDirty = |DirtyWay; - + end else assign VictimWay = 1'b1; // one hot. + assign CacheHit = | HitWay; + assign LineDirty = | DirtyWay; // ReadDataLineWay is a 2d array of cache line len by number of ways. // Need to OR together each way in a bitwise manner. // Final part of the AO Mux. First is the AND in the cacheway. or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache)); or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag)); - // Data cache needs to choose word offset from PAdr or BeatCount to writeback dirty lines + // like to fix this. if(DCACHE) mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]), .d1(BeatCount), .s(SelBusBeat), .y(WordOffsetAddr)); - else - assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]; + else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]; - // Bypass cache array to save a cycle when finishing a load miss mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, FetchBuffer, SelFetchBuffer, ReadDataLine); - // Select word from cache line subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread( - .PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord); + .PAdr(WordOffsetAddr), + .ReadDataLine, .ReadDataWord); - // Bus address for fetch, writeback, or flush writeback - mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), - .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), - .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}), - .s({SelFlush, SelWriteback}), .y(CacheBusAdr)); - ///////////////////////////////////////////////////////////////////////////////////////////// - // Write Path + // Write Path: Write data and address. Muxes between writes from bus and writes from CPU. ///////////////////////////////////////////////////////////////////////////////////////////// - - // Adjust byte mask from word to cache line - onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded)); + onehotdecoder #(LOGCWPL) adrdec( + .bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded)); for(index = 0; index < 2**LOGCWPL; index++) begin assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; end + assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. + logic [LINELEN/8-1:0] LineByteMask2; assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0; - // Merge write data into fetched cache line for store miss for(index = 0; index < LINELEN/8; index++) begin mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]), .d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index])); end - - ///////////////////////////////////////////////////////////////////////////////////////////// - // Flush logic - ///////////////////////////////////////////////////////////////////////////////////////////// - // Flush address (line number) + mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), + .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), + .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}), + .s({SelFlush, SelWriteback}), .y(CacheBusAdr)); + + ///////////////////////////////////////////////////////////////////////////////////////////// + // Flush address and way generation during flush + ///////////////////////////////////////////////////////////////////////////////////////////// assign ResetOrFlushCntRst = reset | FlushCntRst; - flopenr #(SETLEN) FlushAdrReg(clk, ResetOrFlushCntRst, FlushAdrCntEn, FlushAdrP1, NextFlushAdr); - mux2 #(SETLEN) FlushAdrMux(NextFlushAdr, FlushAdrP1, FlushAdrCntEn, FlushAdr); + flopenr #(SETLEN) FlushAdrReg(.clk, .reset(ResetOrFlushCntRst), .en(FlushAdrCntEn), + .d(FlushAdrP1), .q(NextFlushAdr)); + assign FlushAdr = FlushAdrCntEn ? FlushAdrP1 : NextFlushAdr; assign FlushAdrP1 = NextFlushAdr + 1'b1; - assign FlushAdrFlag = (NextFlushAdr == FLUSHADRTHRESHOLD[SETLEN-1:0]); - - // Flush way - flopenl #(NUMWAYS) FlushWayReg(clk, ResetOrFlushCntRst, FlushWayCntEn, {{NUMWAYS-1{1'b0}}, 1'b1}, NextFlushWay, FlushWay); - if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; - else assign NextFlushWay = FlushWay[NUMWAYS-1]; + assign FlushAdrFlag = (NextFlushAdr == FlushAdrThreshold[SETLEN-1:0]); + flopenl #(NUMWAYS) FlushWayReg(.clk, .load(ResetOrFlushCntRst), .en(FlushWayCntEn), + .val({{NUMWAYS-1{1'b0}}, 1'b1}), .d(NextFlushWay), .q(FlushWay)); assign FlushWayFlag = FlushWay[NUMWAYS-1]; + if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; + else assign NextFlushWay = FlushWay[NUMWAYS-1]; ///////////////////////////////////////////////////////////////////////////////////////////// // Cache FSM ///////////////////////////////////////////////////////////////////////////////////////////// - cachefsm cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, .FlushStage, .CacheRW, .CacheAtomic, .Stall, .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, - .ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush, + .ClearValid, .ClearDirty, .SetDirty, + .SetValid, .SelWriteback, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, - .InvalidateCache, .CacheEn, .LRUWriteEn); - + .InvalidateCache, + .CacheEn, + .LRUWriteEn); endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 80cd4e3ab..b4c4964d2 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -42,6 +42,7 @@ module fdivsqrt( input logic XNaNE, YNaNE, input logic FDivStartE, IDivStartE, input logic StallM, + input logic StallE, input logic FlushE, input logic SqrtE, SqrtM, input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B @@ -74,17 +75,17 @@ module fdivsqrt( .clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Ym(YmE), .XZeroE, .X, .DPreproc, .ForwardedSrcAM, .nE, .nM, .mM, .CalcOTFCSwapE, .OTFCSwapE, .ALTBM, .AZeroM, .BZeroM, .AZeroE, .BZeroE, .As, - .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .MDUE, .W64E); + .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, .nE, - .FDivBusyE, .FDivStartE, .IDivStartE, .IFDivStartE, .FDivDoneE, .StallM, .FlushE, /*.DivDone, */ + .FDivBusyE, .FDivStartE, .IDivStartE, .IFDivStartE, .FDivDoneE, .StallE, .StallM, .FlushE, /*.DivDone, */ .XZeroE, .YZeroE, .AZeroE, .BZeroE, .XNaNE, .YNaNE, .MDUE, .XInfE, .YInfE, .WZeroM, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .MDUE, .SqrtE, // .SqrtM, .X,.DPreproc, .FirstWS(WS), .FirstWC(WC), - .IFDivStartE, .CalcOTFCSwapE, .OTFCSwapE, + .IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .CalcOTFCSwapE, .OTFCSwapE, .FDivBusyE); fdivsqrtpostproc fdivsqrtpostproc( .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 430f79a2a..851dc27a5 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -41,6 +41,7 @@ module fdivsqrtfsm( input logic FDivStartE, IDivStartE, input logic XsE, input logic SqrtE, + input logic StallE, input logic StallM, input logic FlushE, input logic WZeroM, @@ -116,9 +117,9 @@ module fdivsqrtfsm( if (SpecialCaseE) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin - if (step == 1 | WZeroM) state <= #1 DONE; // terminate early when residual is zero + if (step == 1) state <= #1 DONE; step <= step - 1; - end else if ((state == DONE)) begin + end else if ((state == DONE) | (WZeroM & (state == BUSY))) begin if (StallM) state <= #1 DONE; else state <= #1 IDLE; end diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index b91728eac..75145e55a 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -34,6 +34,8 @@ module fdivsqrtiter( input logic clk, input logic IFDivStartE, input logic FDivBusyE, + input logic [`NE-1:0] Xe, Ye, + input logic XZeroE, YZeroE, input logic SqrtE, MDUE, // input logic SqrtM, input logic CalcOTFCSwapE, OTFCSwapE, @@ -62,6 +64,7 @@ module fdivsqrtiter( logic [`DIVb+3:0] WSN, WCN; // Q4.b logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b logic [`DIVb+1:0] NextC; + logic [`DIVb+1:0] CMux; logic [`DIVb:0] UMux, UMMux; logic [`DIVb:0] initU, initUM; /* verilator lint_on UNOPTFLAT */ @@ -91,8 +94,8 @@ module fdivsqrtiter( logic [1:0] initCUpper; assign initCUpper = (SqrtE & ~(MDUE)) ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10; assign initC = {initCUpper, {`DIVb{1'b0}}}; - mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC); - flopen #(`DIVb+2) creg(clk, IFDivStartE|FDivBusyE, NextC, C[0]); + mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, CMux); + flopen #(`DIVb+2) creg(clk, IFDivStartE|FDivBusyE, CMux, C[0]); // Divisior register flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D); diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 24441edab..5f9142982 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -123,7 +123,7 @@ module fdivsqrtpostproc( IntRemM = NormRemM; end - always_comb // could merge into postprocessor shifter + always_comb if (RemOpM) begin NormShiftM = (mM + (`DIVBLEN+1)'(`DIVa)); PreResultM = IntRemM; diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 902b02760..0bd3fae07 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -39,7 +39,7 @@ module fdivsqrtpreproc ( input logic Sqrt, input logic XZeroE, input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B - input logic [2:0] Funct3E, + input logic [2:0] Funct3E, Funct3M, input logic MDUE, W64E, output logic [`DIVBLEN:0] nE, nM, mM, output logic CalcOTFCSwapE, OTFCSwapE, ALTBM, As, AZeroM, BZeroM, AZeroE, BZeroE, diff --git a/pipelined/src/fpu/fma/fma.sv b/pipelined/src/fpu/fma/fma.sv index 0b8977cd0..ce42aaa19 100644 --- a/pipelined/src/fpu/fma/fma.sv +++ b/pipelined/src/fpu/fma/fma.sv @@ -84,7 +84,7 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ZmSticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); + fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); fmalza #(3*`NF+6) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fma/fmaadd.sv b/pipelined/src/fpu/fma/fmaadd.sv index cf0423d3e..16cf6b05e 100644 --- a/pipelined/src/fpu/fma/fmaadd.sv +++ b/pipelined/src/fpu/fma/fmaadd.sv @@ -33,8 +33,8 @@ module fmaadd( input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) input logic [2*`NF+1:0] Pm, // the product's mantissa - input logic Ps,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) - input logic InvA, // invert the aligned addend + input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) + input logic InvA, // invert the aligned addend input logic KillProd, // should the product be set to 0 input logic ZmSticky, input logic [`NE-1:0] Ze, diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 10c9bd771..afa645cde 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -91,6 +91,7 @@ module fpu ( logic XsE, YsE, ZsE; // input's sign - execute stage logic XsM, YsM; // input's sign - memory stage logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage + logic [`NE-1:0] ZeM; // input's exponent - memory stage logic [`NF:0] XmE, YmE, ZmE; // input's fraction - execute stage logic [`NF:0] XmM, YmM, ZmM; // input's fraction - memory stage logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage @@ -264,7 +265,7 @@ module fpu ( fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E, - .StallM, .FlushE, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, + .StallE, .StallM, .FlushE, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, .QmM, .FPIntDivResultM /*, .DivDone(DivDoneM) */); // @@ -342,6 +343,7 @@ module fpu ( flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM); flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM); + flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM}); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); flopenr #(15) EMFpReg5 (clk, reset, ~StallUnpackedM, @@ -370,7 +372,7 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; - postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), + postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaZmS(ZmStickyM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), /*.DivDone(DivDoneM), */ .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), diff --git a/pipelined/src/fpu/postproc/divshiftcalc.sv b/pipelined/src/fpu/postproc/divshiftcalc.sv index 392f8db44..58fd9b9b6 100644 --- a/pipelined/src/fpu/postproc/divshiftcalc.sv +++ b/pipelined/src/fpu/postproc/divshiftcalc.sv @@ -32,7 +32,8 @@ module divshiftcalc( input logic [`DIVb:0] DivQm, - input logic Sqrt, // *** not used right now. Maybe merge with shift from postprocess + input logic [`FMTBITS-1:0] Fmt, + input logic Sqrt, input logic [`NE+1:0] DivQe, output logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt, output logic [`NORMSHIFTSZ-1:0] DivShiftIn, diff --git a/pipelined/src/fpu/postproc/fmashiftcalc.sv b/pipelined/src/fpu/postproc/fmashiftcalc.sv index 1e8012784..ce9ff79b1 100644 --- a/pipelined/src/fpu/postproc/fmashiftcalc.sv +++ b/pipelined/src/fpu/postproc/fmashiftcalc.sv @@ -31,6 +31,7 @@ module fmashiftcalc( input logic [3*`NF+5:0] FmaSm, // the positive sum + input logic [`NE-1:0] Ze, // exponent of Z input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [`NE+1:0] FmaSe, diff --git a/pipelined/src/fpu/postproc/postprocess.sv b/pipelined/src/fpu/postproc/postprocess.sv index 368f3ef77..0880d33e2 100644 --- a/pipelined/src/fpu/postproc/postprocess.sv +++ b/pipelined/src/fpu/postproc/postprocess.sv @@ -33,6 +33,7 @@ module postprocess ( // general signals input logic Xs, Ys, // input signs + input logic [`NE-1:0] Ze, // input exponents input logic [`NF:0] Xm, Ym, Zm, // input mantissas input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single @@ -145,9 +146,9 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, + fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Sqrt, .DivQe, .DivQm, .DivResDenorm, .DivDenormShiftPos, .DivShiftAmt, .DivShiftIn); + divshiftcalc divshiftcalc(.Fmt, .Sqrt, .DivQe, .DivQm, .DivResDenorm, .DivDenormShiftPos, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSel) diff --git a/pipelined/src/privileged/csr.sv b/pipelined/src/privileged/csr.sv index 802b35762..dbb852e84 100644 --- a/pipelined/src/privileged/csr.sv +++ b/pipelined/src/privileged/csr.sv @@ -199,7 +199,7 @@ module csr #(parameter // CSRs /////////////////////////////////////////// - csri csri(.clk, .reset, .InstrValidNotFlushedM, + csri csri(.clk, .reset, .InstrValidNotFlushedM, .StallW, .CSRMWriteM, .CSRSWriteM, .CSRWriteValM, .CSRAdrM, .MExtInt, .SExtInt, .MTimerInt, .MSwInt, .MIP_REGW, .MIE_REGW, .MIP_REGW_writeable); @@ -219,7 +219,7 @@ module csr #(parameter .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, .MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM); - csrm csrm(.clk, .reset, .InstrValidNotFlushedM, + csrm csrm(.clk, .reset, .InstrValidNotFlushedM, .StallW, .CSRMWriteM, .MTrapM, .CSRAdrM, .NextEPCM, .NextCauseM, .NextMtvalM, .MSTATUS_REGW, .MSTATUSH_REGW, .CSRWriteValM, .CSRMReadValM, .MTVEC_REGW, @@ -227,7 +227,7 @@ module csr #(parameter .MEDELEG_REGW, .MIDELEG_REGW,.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .MIP_REGW, .MIE_REGW, .WriteMSTATUSM, .WriteMSTATUSHM, .IllegalCSRMAccessM, .IllegalCSRMWriteReadonlyM); - csrs csrs(.clk, .reset, .InstrValidNotFlushedM, + csrs csrs(.clk, .reset, .InstrValidNotFlushedM, .StallW, .CSRSWriteM, .STrapM, .CSRAdrM, .NextEPCM, .NextCauseM, .NextMtvalM, .SSTATUS_REGW, .STATUS_TVM, .CSRWriteValM, .PrivilegeModeW, @@ -235,7 +235,7 @@ module csr #(parameter .SCOUNTEREN_REGW, .SATP_REGW, .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .WriteSSTATUSM, .IllegalCSRSAccessM); - csru csru(.clk, .reset, .InstrValidNotFlushedM, + csru csru(.clk, .reset, .InstrValidNotFlushedM, .StallW, .CSRUWriteM, .CSRAdrM, .CSRWriteValM, .STATUS_FS, .CSRUReadValM, .SetFflagsM, .FRM_REGW, .WriteFRMM, .WriteFFLAGSM, .IllegalCSRUAccessM); diff --git a/pipelined/src/privileged/csri.sv b/pipelined/src/privileged/csri.sv index a145802f0..aa4de62af 100644 --- a/pipelined/src/privileged/csri.sv +++ b/pipelined/src/privileged/csri.sv @@ -38,7 +38,7 @@ module csri #(parameter SIP = 12'h144 ) ( input logic clk, reset, - input logic InstrValidNotFlushedM, + input logic InstrValidNotFlushedM, StallW, input logic CSRMWriteM, CSRSWriteM, input logic [`XLEN-1:0] CSRWriteValM, input logic [11:0] CSRAdrM, diff --git a/pipelined/src/privileged/csrm.sv b/pipelined/src/privileged/csrm.sv index 71368d065..3a8e73ee6 100644 --- a/pipelined/src/privileged/csrm.sv +++ b/pipelined/src/privileged/csrm.sv @@ -72,7 +72,7 @@ module csrm #(parameter MIDELEG_MASK = 12'h222 // we choose to not make machine interrupts delegable ) ( input logic clk, reset, - input logic InstrValidNotFlushedM, + input logic InstrValidNotFlushedM, StallW, input logic CSRMWriteM, MTrapM, input logic [11:0] CSRAdrM, input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, MSTATUS_REGW, MSTATUSH_REGW, diff --git a/pipelined/src/privileged/csrs.sv b/pipelined/src/privileged/csrs.sv index b43067387..7d3aeeb94 100644 --- a/pipelined/src/privileged/csrs.sv +++ b/pipelined/src/privileged/csrs.sv @@ -50,7 +50,7 @@ module csrs #(parameter ) ( input logic clk, reset, - input logic InstrValidNotFlushedM, + input logic InstrValidNotFlushedM, StallW, input logic CSRSWriteM, STrapM, input logic [11:0] CSRAdrM, input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, SSTATUS_REGW, diff --git a/pipelined/src/privileged/csrsr.sv b/pipelined/src/privileged/csrsr.sv index f6fa38183..c4f841959 100644 --- a/pipelined/src/privileged/csrsr.sv +++ b/pipelined/src/privileged/csrsr.sv @@ -32,8 +32,7 @@ `include "wally-config.vh" module csrsr ( - input logic clk, reset, - input logic StallW, + input logic clk, reset, StallW, input logic WriteMSTATUSM, WriteMSTATUSHM, WriteSSTATUSM, input logic TrapM, FRegWriteM, input logic [1:0] NextPrivilegeModeM, PrivilegeModeW, diff --git a/pipelined/src/privileged/csru.sv b/pipelined/src/privileged/csru.sv index c1eea42c3..7d1c5cbe5 100644 --- a/pipelined/src/privileged/csru.sv +++ b/pipelined/src/privileged/csru.sv @@ -37,7 +37,7 @@ module csru #(parameter FRM = 12'h002, FCSR = 12'h003) ( input logic clk, reset, - input logic InstrValidNotFlushedM, + input logic InstrValidNotFlushedM, StallW, input logic CSRUWriteM, input logic [11:0] CSRAdrM, input logic [`XLEN-1:0] CSRWriteValM, diff --git a/pipelined/src/privileged/trap.sv b/pipelined/src/privileged/trap.sv index 25936932b..ec3cc8634 100644 --- a/pipelined/src/privileged/trap.sv +++ b/pipelined/src/privileged/trap.sv @@ -63,12 +63,12 @@ module trap ( /////////////////////////////////////////// assign MIntGlobalEnM = (PrivilegeModeW != `M_MODE) | STATUS_MIE; // if M ints enabled or lower priv 3.1.9 assign SIntGlobalEnM = (PrivilegeModeW == `U_MODE) | ((PrivilegeModeW == `S_MODE) & STATUS_SIE); // if in lower priv mode, or if S ints enabled and not in higher priv mode 3.1.9 - assign Committed = CommittedM | CommittedF; - assign EnabledIntsM = {12{~Committed & InstrValidM}} & ({12{MIntGlobalEnM}} & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & MIDELEG_REGW); assign PendingIntsM = MIP_REGW & MIE_REGW; assign IntPendingM = |PendingIntsM; - assign ValidIntsM = PendingIntsM & EnabledIntsM; - assign InterruptM = (|ValidIntsM) ; // suppress interrupt if the memory system has partially processed a request. + assign Committed = CommittedM | CommittedF; + assign EnabledIntsM = ({12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW); + assign ValidIntsM = {12{~Committed}} & EnabledIntsM; + assign InterruptM = (|ValidIntsM) & InstrValidM; // suppress interrupt if the memory system has partially processed a request. assign DelegateM = `S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM[3:0]] : MEDELEG_REGW[CauseM]) & (PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE);