From e7a44d89754bb28f4768d10ea744b614d961582c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 21 Dec 2022 16:12:55 -0600 Subject: [PATCH 1/4] Changed GatedStallF to GatedStallD. --- pipelined/src/ifu/ifu.sv | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 4df8d413..1597fb98 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -114,7 +114,7 @@ module ifu ( logic ICacheFetchLine; logic BusStall; logic ICacheStallF, IFUCacheBusStallF; - logic GatedStallF; + logic GatedStallD; (* mark_debug = "true" *) logic [31:0] PostSpillInstrRawF; // branch predictor signal logic [`XLEN-1:0] PCNext1F, PCNext0F; @@ -200,7 +200,7 @@ module ifu ( // The IROM uses untranslated addresses, so it is not compatible with virtual memory. if (`IROM_SUPPORTED) begin : irom assign IFURWF = 2'b10; - irom irom(.clk, .reset, .ce(~GatedStallF | reset), .Adr(PCNextFSpill[`XLEN-1:0]), .ReadData(IROMInstrF)); + irom irom(.clk, .reset, .ce(~GatedStallD | reset), .Adr(PCNextFSpill[`XLEN-1:0]), .ReadData(IROMInstrF)); end else begin assign IFURWF = 2'b10; @@ -222,7 +222,7 @@ module ifu ( cache #(.LINELEN(`ICACHE_LINELENINBITS), .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0)) - icache(.clk, .reset, .FlushStage(TrapM), .Stall(GatedStallF), + icache(.clk, .reset, .FlushStage(TrapM), .Stall(GatedStallD), .FetchBuffer, .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), .CacheBusRW, @@ -244,7 +244,7 @@ module ifu ( .BeatCount(), .Cacheable(CacheableF), .SelBusBeat(), .WriteDataM('0), .CacheBusAck(ICacheBusAck), .HWDATA(), .CacheableOrFlushCacheM(1'b0), .CacheReadDataWordM('0), .FetchBuffer, .PAdr(PCPF), - .BusRW, .Stall(GatedStallF), + .BusRW, .Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF)); mux3 #(32) UnCachedDataMux(.d0(ICacheInstrF), .d1(FetchBuffer[32-1:0]), .d2(IROMInstrF), @@ -261,7 +261,7 @@ module ifu ( ahbinterface #(0) ahbinterface(.HCLK(clk), .Flush(TrapM), .HRESETn(~reset), .HREADY(IFUHREADY), .HRDATA(HRDATA), .HTRANS(IFUHTRANS), .HWRITE(IFUHWRITE), .HWDATA(), .HWSTRB(), .BusRW, .ByteMask(), .WriteData('0), - .Stall(GatedStallF), .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); + .Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); assign CacheCommittedF = '0; if(`IROM_SUPPORTED) mux2 #(32) UnCachedDataMux2(FetchBuffer, IROMInstrF, SelIROM, InstrRawF); @@ -278,7 +278,7 @@ module ifu ( assign IFUCacheBusStallF = ICacheStallF | BusStall; assign IFUStallF = IFUCacheBusStallF | SelNextSpillF; - assign GatedStallF = StallF & ~SelNextSpillF; + assign GatedStallD = StallD & ~SelNextSpillF; flopenl #(32) AlignedInstrRawDFlop(clk, reset | FlushD, ~StallD, PostSpillInstrRawF, nop, InstrRawD); From 84f8d9953feaed96d8491dc90f581484ce6212df Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 21 Dec 2022 16:49:53 -0600 Subject: [PATCH 2/4] Updated cache fsm names to match book. --- pipelined/src/cache/cachefsm.sv | 78 ++++++++++++++++----------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index f44d2a31..22f943da 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -81,13 +81,13 @@ module cachefsm typedef enum logic [3:0] {STATE_READY, // hit states // miss states - STATE_MISS_FETCH_WDV, - STATE_MISS_EVICT_DIRTY, - STATE_MISS_WRITE_CACHE_LINE, - STATE_MISS_READ_DELAY, // required for back to back reads. structural hazard on writting SRAM + STATE_FETCH, + STATE_WRITEBACK, + STATE_WRITE_LINE, + STATE_READ_HOLD, // required for back to back reads. structural hazard on writting SRAM // flush cache STATE_FLUSH, - STATE_FLUSH_WRITE_BACK} statetype; + STATE_FLUSH_WRITEBACK} statetype; (* mark_debug = "true" *) statetype CurrState, NextState; @@ -119,23 +119,23 @@ module cachefsm else if(FlushCache) NextState = STATE_FLUSH; // Delayed LRU update. Cannot check if victim line is dirty on this cycle. // To optimize do the fetch first, then eviction if necessary. - else if(AnyMiss & ~LineDirty) NextState = STATE_MISS_FETCH_WDV; - else if(AnyMiss & LineDirty) NextState = STATE_MISS_EVICT_DIRTY; + else if(AnyMiss & ~LineDirty) NextState = STATE_FETCH; + else if(AnyMiss & LineDirty) NextState = STATE_WRITEBACK; else NextState = STATE_READY; - STATE_MISS_FETCH_WDV: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; - else NextState = STATE_MISS_FETCH_WDV; - STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_MISS_READ_DELAY; - STATE_MISS_READ_DELAY: if(Stall) NextState = STATE_MISS_READ_DELAY; + STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; + else NextState = STATE_FETCH; + STATE_WRITE_LINE: NextState = STATE_READ_HOLD; + STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; else NextState = STATE_READY; - STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_FETCH_WDV; - else NextState = STATE_MISS_EVICT_DIRTY; + STATE_WRITEBACK: if(CacheBusAck) NextState = STATE_FETCH; + else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. - STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITE_BACK; - else if (FlushFlag) NextState = STATE_MISS_READ_DELAY; + STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; + else if (FlushFlag) NextState = STATE_READ_HOLD; else NextState = STATE_FLUSH; - STATE_FLUSH_WRITE_BACK: if(CacheBusAck & ~FlushFlag) NextState = STATE_FLUSH; - else if(CacheBusAck) NextState = STATE_MISS_READ_DELAY; - else NextState = STATE_FLUSH_WRITE_BACK; + STATE_FLUSH_WRITEBACK: if(CacheBusAck & ~FlushFlag) NextState = STATE_FLUSH; + else if(CacheBusAck) NextState = STATE_READ_HOLD; + else NextState = STATE_FLUSH_WRITEBACK; default: NextState = STATE_READY; endcase end @@ -143,48 +143,48 @@ module cachefsm // com back to CPU assign CacheCommitted = CurrState != STATE_READY; assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss)) | - (CurrState == STATE_MISS_FETCH_WDV) | - (CurrState == STATE_MISS_EVICT_DIRTY) | - (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(StoreAMO)) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. + (CurrState == STATE_FETCH) | + (CurrState == STATE_WRITEBACK) | + (CurrState == STATE_WRITE_LINE & ~(StoreAMO)) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. (CurrState == STATE_FLUSH) | - (CurrState == STATE_FLUSH_WRITE_BACK); + (CurrState == STATE_FLUSH_WRITEBACK); // write enables internal to cache - assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; + assign SetValid = CurrState == STATE_WRITE_LINE; assign SetDirty = (CurrState == STATE_READY & AnyUpdateHit) | - (CurrState == STATE_MISS_WRITE_CACHE_LINE & (StoreAMO)); + (CurrState == STATE_WRITE_LINE & (StoreAMO)); assign ClearValid = '0; - assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(StoreAMO)) | + assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(StoreAMO)) | (CurrState == STATE_FLUSH & LineDirty); // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set. assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) | - (CurrState == STATE_MISS_WRITE_CACHE_LINE); + (CurrState == STATE_WRITE_LINE); // Flush and eviction controls - assign SelWriteback = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) | + assign SelWriteback = (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_READY & AnyMiss & LineDirty); assign SelFlush = (CurrState == STATE_READY & FlushCache) | (CurrState == STATE_FLUSH) | - (CurrState == STATE_FLUSH_WRITE_BACK); - assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITE_BACK & FlushWayFlag & CacheBusAck) | + (CurrState == STATE_FLUSH_WRITEBACK); + assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | (CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty); assign FlushWayCntEn = (CurrState == STATE_FLUSH & ~LineDirty) | - (CurrState == STATE_FLUSH_WRITE_BACK & CacheBusAck); + (CurrState == STATE_FLUSH_WRITEBACK & CacheBusAck); assign FlushCntRst = (CurrState == STATE_FLUSH & FlushFlag & ~LineDirty) | - (CurrState == STATE_FLUSH_WRITE_BACK & FlushFlag & CacheBusAck); + (CurrState == STATE_FLUSH_WRITEBACK & FlushFlag & CacheBusAck); // Bus interface controls assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~LineDirty) | - (CurrState == STATE_MISS_FETCH_WDV & ~CacheBusAck) | - (CurrState == STATE_MISS_EVICT_DIRTY & CacheBusAck); + (CurrState == STATE_FETCH & ~CacheBusAck) | + (CurrState == STATE_WRITEBACK & CacheBusAck); assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & LineDirty) | - (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) | - (CurrState == STATE_FLUSH_WRITE_BACK & ~CacheBusAck); + (CurrState == STATE_WRITEBACK & ~CacheBusAck) | + (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck); // **** can this be simplified? assign SelAdr = (CurrState == STATE_READY & (StoreAMO | AnyMiss)) | // changes if store delay hazard removed - (CurrState == STATE_MISS_FETCH_WDV) | - (CurrState == STATE_MISS_EVICT_DIRTY) | - (CurrState == STATE_MISS_WRITE_CACHE_LINE) | + (CurrState == STATE_FETCH) | + (CurrState == STATE_WRITEBACK) | + (CurrState == STATE_WRITE_LINE) | resetDelay; - assign SelFetchBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE | CurrState == STATE_MISS_READ_DELAY; + assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_READ_HOLD; assign CacheEn = (CurrState == STATE_READY & ~Stall | CacheStall) | (CurrState != STATE_READY) | reset; endmodule // cachefsm From c3fdc0ab23adffec65b874f26d73c1bc49edd431 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 22 Dec 2022 00:43:27 +0000 Subject: [PATCH 3/4] Renamed signals to E and M stages, forwarded preprocessed n to fsm --- pipelined/src/fpu/fdivsqrt/fdivsqrt.sv | 14 +++++----- pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv | 4 +-- pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv | 6 ++--- .../src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 6 ++--- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 26 +++++++++---------- .../src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv | 4 +-- pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv | 4 +-- pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv | 4 +-- 8 files changed, 34 insertions(+), 34 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index cdfe5047..a780aaca 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -66,29 +66,29 @@ module fdivsqrt( logic Firstun; logic WZeroM, AZeroM, BZeroM, AZeroE, BZeroE; logic SpecialCaseM; - logic [`DIVBLEN:0] n, m; - logic OTFCSwap, ALTBM, As; + logic [`DIVBLEN:0] nE, nM, mM; + logic OTFCSwapE, ALTBM, As; logic DivStartE; fdivsqrtpreproc fdivsqrtpreproc( .clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Ym(YmE), .XZeroE, .X, .DPreproc, - .n, .m, .OTFCSwap, .ALTBM, .AZeroM, .BZeroM, .AZeroE, .BZeroE, .As, + .nE, .nM, .mM, .OTFCSwapE, .ALTBM, .AZeroM, .BZeroM, .AZeroE, .BZeroE, .As, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( - .clk, .reset, .FmtE, .XsE, .SqrtE, + .clk, .reset, .FmtE, .XsE, .SqrtE, .nE, .FDivBusyE, .FDivStartE, .IDivStartE, .IFDivStartE, .FDivDoneE, .StallE, .StallM, .FlushE, /*.DivDone, */ .XZeroE, .YZeroE, .AZeroE, .BZeroE, - .XNaNE, .YNaNE, .MDUE, .n, + .XNaNE, .YNaNE, .MDUE, .XInfE, .YInfE, .WZeroM, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .MDUE, .SqrtE, // .SqrtM, .X,.DPreproc, .FirstWS(WS), .FirstWC(WC), - .IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap, + .IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwapE, .FDivBusyE); fdivsqrtpostproc fdivsqrtpostproc( .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAE, - .n, .ALTBM, .m, .BZeroM, .As, + .nM, .ALTBM, .mM, .BZeroM, .As, .QmM, .WZeroM, .DivSM, .FPIntDivResultM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 954d6f4c..0138c05a 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -46,7 +46,7 @@ module fdivsqrtfsm( input logic FlushE, input logic WZeroM, input logic MDUE, - input logic [`DIVBLEN:0] n, + input logic [`DIVBLEN:0] nE, output logic IFDivStartE, output logic FDivBusyE, FDivDoneE, output logic SpecialCaseM @@ -104,7 +104,7 @@ module fdivsqrtfsm( always_comb begin if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - cycles = MDUE ? n : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + cycles = MDUE ? nE : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); end /* verilator lint_on WIDTH */ diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index ffafb366..0d835811 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -38,7 +38,7 @@ module fdivsqrtiter( input logic XZeroE, YZeroE, input logic SqrtE, MDUE, // input logic SqrtM, - input logic OTFCSwap, + input logic OTFCSwapE, input logic [`DIVb+3:0] X, input logic [`DIVb-1:0] DPreproc, output logic [`DIVb-1:0] D, @@ -114,13 +114,13 @@ module fdivsqrtiter( generate for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations if (`RADIX == 2) begin: stage - fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE, .OTFCSwap, .MDUE, + fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE, .OTFCSwapE, .MDUE, .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end else begin: stage logic j1; assign j1 = (i == 0 & ~C[0][`DIVb-1]); - fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, .OTFCSwap, .MDUE, + fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, .OTFCSwapE, .MDUE, .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 8190b317..71d86ab7 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -40,7 +40,7 @@ module fdivsqrtpostproc( input logic SpecialCaseM, input logic [`XLEN-1:0] ForwardedSrcAE, input logic RemOpM, ALTBM, BZeroM, As, - input logic [`DIVBLEN:0] n, m, + input logic [`DIVBLEN:0] nM, mM, output logic [`DIVb:0] QmM, output logic WZeroM, output logic DivSM, @@ -127,10 +127,10 @@ module fdivsqrtpostproc( always_comb if (RemOpM) begin - NormShiftM = (m + (`DIVBLEN+1)'(`DIVa)); + NormShiftM = (mM + (`DIVBLEN+1)'(`DIVa)); PreResultM = IntRemM; end else begin - NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (n << `LOGR)); + NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM << `LOGR)); PreResultM = {3'b000, IntQuotM}; end diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index f4624806..5fc9947d 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -41,8 +41,8 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [2:0] Funct3E, Funct3M, input logic MDUE, W64E, - output logic [`DIVBLEN:0] n, m, - output logic OTFCSwap, ALTBM, As, AZeroM, BZeroM, AZeroE, BZeroE, + output logic [`DIVBLEN:0] nE, nM, mM, + output logic OTFCSwapE, ALTBM, As, AZeroM, BZeroM, AZeroE, BZeroE, output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, output logic [`DIVb-1:0] DPreproc @@ -55,9 +55,9 @@ module fdivsqrtpreproc ( // Intdiv signals logic [`DIVb-1:0] IFNormLenX, IFNormLenD; logic [`XLEN-1:0] PosA, PosB; - logic Bs, CalcOTFCSwap, ALTBE; + logic Bs, CalcOTFCSwapE, ALTBE; logic [`XLEN-1:0] A64, B64; - logic [`DIVBLEN:0] Calcn, Calcm; + logic [`DIVBLEN:0] mE; logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell; logic [`LOGRK-1:0] pPrTrunc; @@ -72,7 +72,7 @@ module fdivsqrtpreproc ( assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE; assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE; - assign CalcOTFCSwap = (As ^ Bs) & MDUE; + assign CalcOTFCSwapE = (As ^ Bs) & MDUE; assign PosA = As ? -A64 : A64; assign PosB = Bs ? -B64 : B64; @@ -82,19 +82,19 @@ module fdivsqrtpreproc ( assign IFNormLenX = MDUE ? {PosA, {(`DIVb-`XLEN){1'b0}}} : {Xm, {(`DIVb-`NF-1){1'b0}}}; assign IFNormLenD = MDUE ? {PosB, {(`DIVb-`XLEN){1'b0}}} : {Ym, {(`DIVb-`NF-1){1'b0}}}; lzc #(`DIVb) lzcX (IFNormLenX, ell); - lzc #(`DIVb) lzcY (IFNormLenD, Calcm); + lzc #(`DIVb) lzcY (IFNormLenD, mE); assign XPreproc = IFNormLenX << (ell + {{`DIVBLEN{1'b0}}, 1'b1}); // had issue with (`DIVBLEN+1)'(~MDUE) so using this instead - assign DPreproc = IFNormLenD << (Calcm + {{`DIVBLEN{1'b0}}, 1'b1}); // replaced ~MDUE with 1 bc we always want that extra left shift + assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); // replaced ~MDUE with 1 bc we always want that extra left shift - assign ZeroDiff = Calcm - ell; + assign ZeroDiff = mE - ell; assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B assign p = ALTBE ? '0 : ZeroDiff; assign pPlusr = (`DIVBLEN)'(`LOGR) + p; assign pPrTrunc = pPlusr[`LOGRK-1:0]; assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)}; - assign Calcn = (pPrCeil << `LOGK) - 1; + assign nE = (pPrCeil << `LOGK) - 1; assign IntBits = (`DIVBLEN)'(`RK) + p; assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]}; @@ -119,14 +119,14 @@ module fdivsqrtpreproc ( // DIVRESLEN/(r*`DIVCOPIES) flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); - flopen #(1) swapreg(clk, IFDivStartE, CalcOTFCSwap, OTFCSwap); + flopen #(1) swapreg(clk, IFDivStartE, CalcOTFCSwapE, OTFCSwapE); // Retain value for each iteration of divider in Execute stage flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); flopen #(1) azeroreg(clk, IFDivStartE, AZeroE, AZeroM); flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); - flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, Calcn, n); - flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, Calcm, m); + flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); + flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); //flopen #(`XLEN) srcareg(clk, IFDivStartE, ForwardedSrcAE, ForwardedSrcAM); //HERE - expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZeroE, .ell, .m(Calcm), .Qe(QeE)); + expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZeroE, .ell, .m(mE), .Qe(QeE)); endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv index a54b20ab..aed02576 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv @@ -34,7 +34,7 @@ module fdivsqrtqsel4cmp ( input logic [2:0] Dmsbs, input logic [4:0] Smsbs, input logic [7:0] WSmsbs, WCmsbs, - input logic SqrtE, j1, OTFCSwap, MDUE, + input logic SqrtE, j1, OTFCSwapE, MDUE, output logic [3:0] udigit ); logic [6:0] Wmsbs; @@ -93,5 +93,5 @@ module fdivsqrtqsel4cmp ( else udigitsel = 4'b0001; // choose -2 assign udigitswap = {udigitsel[0], udigitsel[1], udigitsel[2], udigitsel[3]}; - assign udigit = OTFCSwap ? udigitswap : udigitsel; + assign udigit = OTFCSwapE ? udigitswap : udigitsel; endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv index 6866160f..a1ca355e 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -38,7 +38,7 @@ module fdivsqrtstage2 ( input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, input logic SqrtE, - input logic OTFCSwap, MDUE, + input logic OTFCSwapE, MDUE, output logic un, output logic [`DIVb+1:0] CNext, output logic [`DIVb:0] UNext, UMNext, @@ -60,7 +60,7 @@ module fdivsqrtstage2 ( // 0000 = 0 // 0010 = -1 // 0001 = -2 - fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], OTFCSwap, up, uz, un); + fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], OTFCSwapE, up, uz, un); // Sqrt F generation fdivsqrtfgen2 fgen2(.up, .uz, .C(CNext), .U, .UM, .F); diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv index 105e71f8..ece594e0 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -36,7 +36,7 @@ module fdivsqrtstage4 ( input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, - input logic SqrtE, j1, OTFCSwap, MDUE, + input logic SqrtE, j1, OTFCSwapE, MDUE, output logic [`DIVb+1:0] CNext, output logic un, output logic [`DIVb:0] UNext, UMNext, @@ -65,7 +65,7 @@ module fdivsqrtstage4 ( assign WCmsbs = WC[`DIVb+3:`DIVb-4]; assign WSmsbs = WS[`DIVb+3:`DIVb-4]; - fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit, .OTFCSwap, .MDUE); + fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit, .OTFCSwapE, .MDUE); assign un = 1'b0; // unused for radix 4 // F generation logic From c8c73f47d28380d2e142fdb32ad6b1b7af29418b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 21 Dec 2022 22:13:05 -0600 Subject: [PATCH 4/4] CacheEn enables reading or writing the cache memory arrays. This is only disabled if we have a stall while in the ready state and we don't have a cache miss. This is a cache hit, but we are stalled. --- pipelined/src/cache/cachefsm.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 22f943da..a8ba7856 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -185,6 +185,6 @@ module cachefsm resetDelay; assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_READ_HOLD; - assign CacheEn = (CurrState == STATE_READY & ~Stall | CacheStall) | (CurrState != STATE_READY) | reset; + assign CacheEn = (CurrState == STATE_READY & (~Stall | FlushCache | AnyMiss)) | (CurrState != STATE_READY) | reset; endmodule // cachefsm