Merge branch 'main' into hazards

This commit is contained in:
Ross Thompson 2022-12-15 08:44:59 -06:00
commit 0358a8d255
28 changed files with 178 additions and 158 deletions

View File

@ -72,6 +72,7 @@
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4
`define IDIV_ON_FPU 0
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 16

View File

@ -74,6 +74,7 @@
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4
`define IDIV_ON_FPU 0
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 64

View File

@ -73,6 +73,7 @@
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 1
`define IDIV_ON_FPU 0
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 0

View File

@ -72,6 +72,7 @@
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4
`define IDIV_ON_FPU 0
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 64

View File

@ -73,6 +73,7 @@
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4
`define IDIV_ON_FPU 0
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 64

View File

@ -72,6 +72,7 @@
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4
`define IDIV_ON_FPU 0
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 0

View File

@ -76,6 +76,7 @@
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4
`define IDIV_ON_FPU 0
// Address space
`define RESET_VECTOR 64'h0000000000001000

View File

@ -74,6 +74,7 @@
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4
`define IDIV_ON_FPU 0
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 64

View File

@ -74,6 +74,7 @@
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4
`define IDIV_ON_FPU 0
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 64

View File

@ -74,6 +74,7 @@
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4
`define IDIV_ON_FPU 0
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 0

View File

@ -94,14 +94,14 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
logic [NUMWAYS-1:0] NextFlushWay;
logic FlushWayCntEn;
logic FlushWayCntRst;
logic SelEvict;
logic SelWriteback;
logic LRUWriteEn;
logic SelFlush;
logic ResetOrFlushAdr, ResetOrFlushWay;
logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache;
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
logic SelFetchBuffer;
logic ce;
logic CacheEn;
localparam LOGLLENBYTES = $clog2(WORDLEN/8);
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN;
@ -124,12 +124,12 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE)
CacheWays[NUMWAYS-1:0](.clk, .reset, .ce, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .VictimWay,
CacheWays[NUMWAYS-1:0](.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
if(NUMWAYS > 1) begin:vict
cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU(
.clk, .reset, .ce, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
.clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
.SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache);
end else assign VictimWay = 1'b1; // one hot.
assign CacheHit = | HitWay;
@ -163,7 +163,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
end
assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1.
assign LineByteMask = ~SetValid & ~SetDirty ? '0 : ~SetValid & SetDirty ? DemuxedByteMask : '1; // if store hit only enable the word and subword bytes, else write all bytes.
logic [LINELEN/8-1:0] LineByteMask2;
assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
for(index = 0; index < LINELEN/8; index++) begin
mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),
@ -173,7 +174,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
.s({SelFlush, SelEvict}), .y(CacheBusAdr));
.s({SelFlush, SelWriteback}), .y(CacheBusAdr));
/////////////////////////////////////////////////////////////////////////////////////////////
// Flush address and way generation during flush
@ -198,10 +199,10 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
.CacheHit, .LineDirty, .CacheStall, .CacheCommitted,
.CacheMiss, .CacheAccess, .SelAdr,
.ClearValid, .ClearDirty, .SetDirty,
.SetValid, .SelEvict, .SelFlush,
.SetValid, .SelWriteback, .SelFlush,
.FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst,
.FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,
.InvalidateCache,
.ce,
.CacheEn,
.LRUWriteEn);
endmodule

View File

@ -32,7 +32,7 @@
module cacheLRU
#(parameter NUMWAYS = 4, SETLEN = 9, OFFSETLEN = 5, NUMLINES = 128)(
input logic clk, reset, ce, FlushStage,
input logic clk, reset, CacheEn, FlushStage,
input logic [NUMWAYS-1:0] HitWay,
input logic [NUMWAYS-1:0] ValidWay,
output logic [NUMWAYS-1:0] VictimWay,
@ -120,7 +120,7 @@ module cacheLRU
// LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice.
always_ff @(posedge clk) begin
if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
if(ce) begin
if(CacheEn) begin
if((InvalidateCache | FlushCache) & ~FlushStage) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
else if (LRUWriteEn & ~FlushStage) begin
LRUMemory[CAdr] <= NextLRU; ///***** RT: This is not right. Logically should be PAdr, but it breaks linux.

View File

@ -64,7 +64,7 @@ module cachefsm
output logic ClearDirty,
output logic SetDirty,
output logic SetValid,
output logic SelEvict,
output logic SelWriteback,
output logic LRUWriteEn,
output logic SelFlush,
output logic FlushAdrCntEn,
@ -72,7 +72,7 @@ module cachefsm
output logic FlushAdrCntRst,
output logic FlushWayCntRst,
output logic SelFetchBuffer,
output logic ce);
output logic CacheEn);
logic resetDelay;
logic AMO, StoreAMO;
@ -170,7 +170,7 @@ module cachefsm
assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) |
(CurrState == STATE_MISS_WRITE_CACHE_LINE);
// Flush and eviction controls
assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) |
assign SelWriteback = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) |
(CurrState == STATE_READY & AnyMiss & LineDirty);
assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) |
(CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK);
@ -201,6 +201,6 @@ module cachefsm
resetDelay;
assign SelFetchBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE | CurrState == STATE_MISS_READ_DELAY;
assign ce = (CurrState == STATE_READY & ~Stall | CacheStall) | (CurrState != STATE_READY) | reset;
assign CacheEn = (CurrState == STATE_READY & ~Stall | CacheStall) | (CurrState != STATE_READY) | reset;
endmodule // cachefsm

View File

@ -33,7 +33,7 @@
module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) (
input logic clk,
input logic ce,
input logic CacheEn,
input logic reset,
input logic [$clog2(NUMLINES)-1:0] CAdr,
input logic [`PA_BITS-1:0] PAdr,
@ -42,7 +42,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
input logic ClearValid,
input logic SetDirty,
input logic ClearDirty,
input logic SelEvict,
input logic SelWriteback,
input logic SelFlush,
input logic VictimWay,
input logic FlushWay,
@ -76,8 +76,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
logic ClearValidWay;
logic SetDirtyWay;
logic ClearDirtyWay;
logic SelectedWay;
logic SelWriteback;
logic SelNonHit;
logic SelData;
logic FlushWayEn, VictimWayEn;
@ -85,28 +84,28 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
// FlushWay and VictimWay are part of a one hot way selection. Must clear them if FlushWay not selected
// or VictimWay not selected.
assign FlushWayEn = FlushWay & SelFlush;
assign VictimWayEn = VictimWay & SelEvict;
assign VictimWayEn = VictimWay & SelWriteback;
assign SelWriteback = FlushWayEn | SetValid | SelEvict;
assign SelNonHit = FlushWayEn | SetValid | SelWriteback;
mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag);
//assign SelTag = VictimWay | FlushWay;
assign SelData = HitWay | FlushWayEn | VictimWayEn;
//assign SelData = HitWay | FlushWayEn | VictimWayEn;
mux2 #(1) selectedwaymux(HitWay, SelTag, SelWriteback , SelectedWay);
mux2 #(1) selectedwaymux(HitWay, SelTag, SelNonHit , SelData);
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Enable demux
/////////////////////////////////////////////////////////////////////////////////////////////
// RT: Can we merge these two muxes? This is also shared in cacheLRU.
//mux3 #(1) selectwaymux(HitWay, VictimWay, FlushWay, {SelFlush, SetValid}, SelectedWay);
//mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelEvict}, SelData);
//mux3 #(1) selectwaymux(HitWay, VictimWay, FlushWay, {SelFlush, SetValid}, SelData);
//mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelNonHit}, SelData);
assign SetValidWay = SetValid & SelectedWay;
assign ClearValidWay = ClearValid & SelectedWay;
assign SetDirtyWay = SetDirty & SelectedWay;
assign ClearDirtyWay = ClearDirty & SelectedWay;
assign SetValidWay = SetValid & SelData;
assign ClearValidWay = ClearValid & SelData;
assign SetDirtyWay = SetDirty & SelData;
assign ClearDirtyWay = ClearDirty & SelData;
// If writing the whole line set all write enables to 1, else only set the correct word.
assign SelectedWriteWordEn = (SetValidWay | SetDirtyWay) & ~FlushStage;
@ -117,7 +116,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
// Tag Array
/////////////////////////////////////////////////////////////////////////////////////////////
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce,
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn),
.addr(CAdr), .dout(ReadTag), .bwe('1),
.din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
@ -140,7 +139,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
localparam integer LOGNUMSRAM = $clog2(NUMSRAM);
for(words = 0; words < NUMSRAM; words++) begin: word
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce, .addr(CAdr),
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CAdr),
.dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]),
.din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]),
.we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words]));
@ -155,7 +154,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
always_ff @(posedge clk) begin // Valid bit array,
if (reset) ValidBits <= #1 '0;
if(ce) begin
if(CacheEn) begin
ValidWay <= #1 ValidBits[CAdr];
if(InvalidateCache & ~FlushStage) ValidBits <= #1 '0;
else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CAdr] <= #1 SetValidWay;
@ -171,7 +170,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
always_ff @(posedge clk) begin
// reset is optional. Consider merging with TAG array in the future.
//if (reset) DirtyBits <= #1 {NUMLINES{1'b0}};
if(ce) begin
if(CacheEn) begin
Dirty <= #1 DirtyBits[CAdr];
if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CAdr] <= #1 SetDirtyWay;
end

View File

@ -52,15 +52,16 @@ module fdivsqrt(
output logic FDivBusyE, IFDivStartE, FDivDoneE,
// output logic DivDone,
output logic [`NE+1:0] QeM,
output logic [`DIVb:0] QmM
output logic [`DIVb:0] QmM,
output logic [`XLEN-1:0] FPIntDivResultM
// output logic [`XLEN-1:0] RemM,
);
logic [`DIVb+3:0] WS, WC;
logic [`DIVb+3:0] WS, WC;
logic [`DIVb+3:0] X;
logic [`DIVN-2:0] D; // U0.N-1
logic [`DIVN-2:0] Dpreproc;
logic [`DIVb:0] FirstU, FirstUM;
logic [`DIVb-1:0] D;
logic [`DIVb-1:0] DPreproc;
logic [`DIVb:0] FirstU, FirstUM;
logic [`DIVb+1:0] FirstC;
logic Firstun;
logic WZero;
@ -71,7 +72,7 @@ module fdivsqrt(
fdivsqrtpreproc fdivsqrtpreproc(
.clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE),
.Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc,
.Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .DPreproc,
.n, .m, .OTFCSwap, .ALTBM, .BZero, .As,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
fdivsqrtfsm fdivsqrtfsm(
@ -81,12 +82,12 @@ module fdivsqrt(
.XInfE, .YInfE, .WZero, .SpecialCaseM);
fdivsqrtiter fdivsqrtiter(
.clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM,
.X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
.X,.DPreproc, .FirstWS(WS), .FirstWC(WC),
.IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap,
.FDivBusyE);
fdivsqrtpostproc fdivsqrtpostproc(
.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun,
.SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAE,
.n, .ALTBM, .m, .BZero, .As,
.QmM, .WZero, .DivSM);
.QmM, .WZero, .DivSM, .FPIntDivResultM);
endmodule

View File

@ -40,42 +40,34 @@ module fdivsqrtiter(
// input logic SqrtM,
input logic OTFCSwap,
input logic [`DIVb+3:0] X,
input logic [`DIVN-2:0] Dpreproc,
output logic [`DIVN-2:0] D, // U0.N-1
output logic [`DIVb:0] FirstU, FirstUM,
input logic [`DIVb-1:0] DPreproc,
output logic [`DIVb-1:0] D,
output logic [`DIVb:0] FirstU, FirstUM,
output logic [`DIVb+1:0] FirstC,
output logic Firstun,
output logic [`DIVb+3:0] FirstWS, FirstWC
output logic [`DIVb+3:0] FirstWS, FirstWC
);
//QLEN = 1.(number of bits created for division)
// N is NF+1 or XLEN
// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift
// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-2:0
// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0
// U/UM should be 1.b so b+1 bits or b:0
// C needs to be the lenght of the final fraction 0.b so b or b-1:0
/* verilator lint_off UNOPTFLAT */
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UM[`DIVCOPIES:0];// 1.b
logic [`DIVb:0] UNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
logic [`DIVb+1:0] initC; // Q2.b
/* verilator lint_off UNOPTFLAT */
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UM[`DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UNext[`DIVCOPIES-1:0]; // U1.b
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0]; // U1.b
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
logic [`DIVb+1:0] initC; // Q2.b
logic [`DIVCOPIES-1:0] un;
/* verilator lint_on UNOPTFLAT */
logic [`DIVb+3:0] WSN, WCN; // Q4.N-1
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1
logic [`DIVb+1:0] NextC;
logic [`DIVb+1:0] CMux;
logic [`DIVb:0] UMux, UMMux;
logic [`DIVb:0] initU, initUM;
logic [`DIVb+3:0] WSN, WCN; // Q4.b
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b
logic [`DIVb+1:0] NextC;
logic [`DIVb+1:0] CMux;
logic [`DIVb:0] UMux, UMMux;
logic [`DIVb:0] initU, initUM;
/* verilator lint_on UNOPTFLAT */
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
@ -85,15 +77,15 @@ module fdivsqrtiter(
// Residual WS/SC registers/initializaiton mux
mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN);
mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN);
flopen #(`DIVb+4) wsflop(clk, FDivBusyE, WSN, WS[0]);
flopen #(`DIVb+4) wcflop(clk, FDivBusyE, WCN, WC[0]);
flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
// UOTFC Result U and UM registers/initialization mux
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0;
assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}};
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
flopen #(`DIVb+1) UReg(clk, IFDivStartE|FDivBusyE, UMux, U[0]);
flopen #(`DIVb+1) UMReg(clk, IFDivStartE|FDivBusyE, UMMux, UM[0]);
@ -103,18 +95,18 @@ module fdivsqrtiter(
assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10;
assign initC = {initCUpper, {`DIVb{1'b0}}};
mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, CMux);
flopen #(`DIVb+2) cflop(clk, IFDivStartE|FDivBusyE, CMux, C[0]);
flopen #(`DIVb+2) creg(clk, IFDivStartE|FDivBusyE, CMux, C[0]);
// Divisior register
flopen #(`DIVN-1) dflop(clk, IFDivStartE, Dpreproc, D);
flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D);
// Divisor Selections
// - choose the negitive version of what's being selected
// - D is only the fraction
assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}};
// - D is a 0.b mantissa
assign DBar = {3'b111, 1'b0, ~D};
if(`RADIX == 4) begin : d2
assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}};
assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}};
assign DBar2 = {2'b11, 1'b0, ~D, 1'b1};
assign D2 = {2'b0, 1'b1, D, 1'b0};
end
// k=DIVCOPIES of the recurrence logic

View File

@ -32,8 +32,8 @@
module fdivsqrtpostproc(
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVN-2:0] D, // U0.N-1
input logic [`DIVb:0] FirstU, FirstUM,
input logic [`DIVb-1:0] D,
input logic [`DIVb:0] FirstU, FirstUM,
input logic [`DIVb+1:0] FirstC,
input logic Firstun,
input logic SqrtM,
@ -41,19 +41,20 @@ module fdivsqrtpostproc(
input logic [`XLEN-1:0] ForwardedSrcAE,
input logic RemOpM, ALTBM, BZero, As,
input logic [`DIVBLEN:0] n, m,
output logic [`DIVb:0] QmM,
output logic [`DIVb:0] QmM,
output logic WZero,
output logic DivSM
output logic DivSM,
output logic [`XLEN-1:0] FPIntDivResultM
);
logic [`DIVb+3:0] W, Sum, RemD;
logic [`DIVb+3:0] W, Sum, RemDM;
logic [`DIVb:0] PreQmM;
logic NegStickyM, PostIncM;
logic weq0;
logic [`DIVBLEN:0] NormShiftM;
logic [`DIVb:0] IntQuotM, NormQuotM;
logic [`DIVb+3:0] IntRemM, NormRemM;
logic [`DIVb+3:0] PreResultM, ResultM;
logic [`DIVb+3:0] PreResultM, PreFPIntDivResultM;
// check for early termination on an exact result. If the result is not exact, the sticky should be set
aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0);
@ -78,14 +79,14 @@ module fdivsqrtpostproc(
assign Sum = WC + WS;
assign W = $signed(Sum) >>> `LOGR;
assign NegStickyM = W[`DIVb+3];
assign RemD = {4'b0000, D, {(`DIVb-`DIVN+1){1'b0}}};
assign RemDM = {4'b0000, D};
// Integer division: sign handling for div and rem
always_comb
if (~As)
if (NegStickyM) begin
NormQuotM = FirstUM;
NormRemM = W + RemD;
NormRemM = W + RemDM;
PostIncM = 0;
end else begin
NormQuotM = FirstU;
@ -99,7 +100,7 @@ module fdivsqrtpostproc(
PostIncM = 0;
end else begin
NormQuotM = FirstU;
NormRemM = W - RemD;
NormRemM = W - RemDM;
PostIncM = 1;
end
@ -136,8 +137,9 @@ module fdivsqrtpostproc(
// division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
assign ResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)};
assign PreFPIntDivResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)};
assign FPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
assign PreQmM = NegStickyM ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit
assign QmM = SqrtM ? (PreQmM << 1) : PreQmM;
endmodule

View File

@ -45,22 +45,21 @@ module fdivsqrtpreproc (
output logic OTFCSwap, ALTBM, BZero, As,
output logic [`NE+1:0] QeM,
output logic [`DIVb+3:0] X,
output logic [`DIVN-2:0] Dpreproc
output logic [`DIVb-1:0] DPreproc
);
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
logic [`NF-1:0] PreprocA, PreprocX;
logic [`NF-1:0] PreprocB, PreprocY;
logic [`NF+1:0] SqrtX;
logic [`DIVb-1:0] XPreproc;
logic [`DIVb:0] SqrtX;
logic [`DIVb+3:0] DivX;
logic [`NE+1:0] Qe;
logic [`NE+1:0] QeE;
// Intdiv signals
logic [`DIVb-1:0] ZeroBufX, ZeroBufY;
logic [`DIVb-1:0] IFNormLenX, IFNormLenD;
logic [`XLEN-1:0] PosA, PosB;
logic Bs, OTFCSwapTemp, ALTBE;
logic Bs, CalcOTFCSwap, ALTBE;
logic [`XLEN-1:0] A64, B64;
logic [`DIVBLEN:0] Calcn, Calcm;
logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
logic [`DIVBLEN:0] pPlusr, pPrCeil, p, L;
logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell;
logic [`LOGRK-1:0] pPrTrunc;
logic [`DIVb+3:0] PreShiftX;
@ -72,39 +71,38 @@ module fdivsqrtpreproc (
assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE;
assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE;
assign OTFCSwapTemp = (As ^ Bs) & MDUE;
assign CalcOTFCSwap = (As ^ Bs) & MDUE;
assign PosA = As ? -A64 : A64;
assign PosB = Bs ? -B64 : B64;
assign BZero = |ForwardedSrcBE;
assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}};
assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}};
lzc #(`DIVb) lzcX (ZeroBufX, L);
lzc #(`DIVb) lzcY (ZeroBufY, Calcm);
assign IFNormLenX = MDUE ? {PosA, {(`DIVb-`XLEN){1'b0}}} : {Xm, {(`DIVb-`NF-1){1'b0}}};
assign IFNormLenD = MDUE ? {PosB, {(`DIVb-`XLEN){1'b0}}} : {Ym, {(`DIVb-`NF-1){1'b0}}};
lzc #(`DIVb) lzcX (IFNormLenX, ell);
lzc #(`DIVb) lzcY (IFNormLenD, Calcm);
assign PreprocX = Xm[`NF-1:0]<<L;
assign PreprocY = Ym[`NF-1:0]<<Calcm;
assign XPreproc = IFNormLenX << (ell + {{`DIVBLEN{1'b0}}, ~MDUE}); // had issue with (`DIVBLEN+1)'(~MDUE) so using this instead
assign DPreproc = IFNormLenD << (Calcm + {{`DIVBLEN{1'b0}}, ~MDUE});
assign ZeroDiff = Calcm - L;
assign ZeroDiff = Calcm - ell;
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B
assign p = ALTBE ? '0 : ZeroDiff;
assign pPlusr = (`DIVBLEN)'(`LOGR) + p;
assign pPrTrunc = pPlusr[`LOGRK-1:0];
assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)};
assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)};
assign Calcn = (pPrCeil << `LOGK) - 1;
assign IntBits = (`DIVBLEN)'(`RK) + p;
assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]};
assign SqrtX = Xe[0]^L[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0};
assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}};
assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~XZero, XPreproc[`DIVb-1:1]} : {~XZero, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
assign DivX = {3'b000, ~XZero, XPreproc};
// *** explain why X is shifted between radices (initial assignment of WS=RX)
if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX;
else assign PreShiftX = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX;
if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX;
else assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX;
assign X = MDUE ? DivX >> RightShiftX : PreShiftX;
assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}};
// radix 2 radix 4
// 1 copies DIVLEN+2 DIVLEN+2/2
@ -116,12 +114,12 @@ module fdivsqrtpreproc (
// r = 1 or 2
// DIVRESLEN/(r*`DIVCOPIES)
flopen #(`NE+2) expreg(clk, IFDivStartE, Qe, QeM);
flopen #(1) swapreg(clk, IFDivStartE, OTFCSwapTemp, OTFCSwap);
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
flopen #(1) swapreg(clk, IFDivStartE, CalcOTFCSwap, OTFCSwap);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, Calcn, n);
flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, Calcm, m);
expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m(Calcm), .Qe);
expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .ell, .m(Calcm), .Qe(QeE));
endmodule
@ -130,7 +128,7 @@ module expcalc(
input logic [`NE-1:0] Xe, Ye,
input logic Sqrt,
input logic XZero,
input logic [`DIVBLEN:0] L, m,
input logic [`DIVBLEN:0] ell, m,
output logic [`NE+1:0] Qe
);
logic [`NE-2:0] Bias;
@ -162,10 +160,10 @@ module expcalc(
2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase
end
assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - (`NE+2)'(`BIAS);
assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
// correct exponent for denormalized input's normalization shifts
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
assign Qe = Sqrt ? SExp : DExp;
endmodule

View File

@ -32,7 +32,7 @@
/* verilator lint_off UNOPTFLAT */
module fdivsqrtstage2 (
input logic [`DIVN-2:0] D,
input logic [`DIVb-1:0] D,
input logic [`DIVb+3:0] DBar,
input logic [`DIVb:0] U, UM,
input logic [`DIVb+3:0] WS, WC,
@ -69,7 +69,7 @@ module fdivsqrtstage2 (
always_comb
if (up) Dsel = DBar;
else if (uz) Dsel = '0; // qz
else Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; // un
else Dsel = {3'b000, 1'b1, D}; // un
// Partial Product Generation
// WSA, WCA = WS + WC - qD

View File

@ -31,7 +31,7 @@
`include "wally-config.vh"
module fdivsqrtstage4 (
input logic [`DIVN-2:0] D,
input logic [`DIVb-1:0] D,
input logic [`DIVb+3:0] DBar, D2, DBar2,
input logic [`DIVb:0] U, UM,
input logic [`DIVb+3:0] WS, WC,
@ -61,7 +61,7 @@ module fdivsqrtstage4 (
// 0010 = -1
// 0001 = -2
assign Smsbs = U[`DIVb:`DIVb-4];
assign Dmsbs = D[`DIVN-2:`DIVN-4];
assign Dmsbs = D[`DIVb-1:`DIVb-3];
assign WCmsbs = WC[`DIVb+3:`DIVb-4];
assign WSmsbs = WS[`DIVb+3:`DIVb-4];
@ -77,7 +77,7 @@ module fdivsqrtstage4 (
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = '0;
4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}};
4'b0010: Dsel = {3'b0, 1'b1, D};
4'b0001: Dsel = D2;
default: Dsel = 'x;
endcase

View File

@ -55,7 +55,8 @@ module fpu (
output logic FCvtIntW, // select FCvtIntRes (to IEU)
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit)
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
output logic [`XLEN-1:0] FPIntDivResultW
);
// FPU specifics:
@ -152,6 +153,7 @@ module fpu (
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
logic StallUnpackedM;
logic [`XLEN-1:0] FPIntDivResultM;
// DECODE STAGE
@ -267,7 +269,7 @@ module fpu (
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E,
.StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
.QmM /*, .DivDone(DivDoneM) */);
.QmM, .FPIntDivResultM /*, .DivDone(DivDoneM) */);
//
// compare
@ -387,7 +389,8 @@ module fpu (
// M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FPIntDivResultM, FPIntDivResultW);
// BEGIN WRITEBACK STAGE

View File

@ -65,7 +65,7 @@ module controller(
output logic FWriteIntM,
// Writeback stage control signals
input logic StallW, FlushW,
output logic RegWriteW, // for datapath and Hazard Unit
output logic RegWriteW, DivW, // for datapath and Hazard Unit
output logic [2:0] ResultSrcW,
// Stall during CSRs
output logic CSRWriteFencePendingDEM,
@ -109,6 +109,7 @@ module controller(
logic IllegalERegAdrD;
logic [1:0] AtomicE;
logic FencePendingD, FencePendingE, FencePendingM;
logic DivE, DivM;
// Extract fields
@ -222,16 +223,17 @@ module controller(
assign MemReadE = MemRWE[1];
assign SCE = (ResultSrcE == 3'b100);
assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers
assign DivE = MDUE & Funct3E[2]; // Division operation
// Memory stage pipeline control register
flopenrc #(19) controlregM(clk, reset, FlushM, ~StallM,
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE},
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM});
flopenrc #(20) controlregM(clk, reset, FlushM, ~StallM,
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE, DivE},
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM, DivM});
// Writeback stage pipeline control register
flopenrc #(4) controlregW(clk, reset, FlushW, ~StallW,
{RegWriteM, ResultSrcM},
{RegWriteW, ResultSrcW});
flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW,
{RegWriteM, ResultSrcM, DivM},
{RegWriteW, ResultSrcW, DivW});
// Stall pipeline at Fetch if a CSR Write or Fence is pending in the subsequent stages
assign CSRWriteFencePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM | FencePendingD | FencePendingE | FencePendingM;

View File

@ -57,14 +57,15 @@ module datapath (
output logic [`XLEN-1:0] WriteDataM,
// Writeback stage signals
input logic StallW, FlushW,
(* mark_debug = "true" *) input logic RegWriteW,
(* mark_debug = "true" *) input logic RegWriteW, DivW,
input logic SquashSCW,
input logic [2:0] ResultSrcW,
input logic [`XLEN-1:0] FCvtIntResW,
input logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
// Hazard Unit signals
input logic [`XLEN-1:0] FPIntDivResultW,
// Hazard Unit signals
output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E,
output logic [4:0] RdE, RdM, RdW
);
@ -85,7 +86,7 @@ module datapath (
// Writeback stage signals
logic [`XLEN-1:0] SCResultW;
logic [`XLEN-1:0] ResultW;
logic [`XLEN-1:0] IFResultW, IFCvtResultW;
logic [`XLEN-1:0] IFResultW, IFCvtResultW, MulDivResultW;
// Decode stage
assign Rs1D = InstrD[19:15];
@ -125,10 +126,16 @@ module datapath (
if (`F_SUPPORTED) begin:fpmux
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
if (`IDIV_ON_FPU) begin
mux2 #(`XLEN) divresultmuxW(MDUResultW, FPIntDivResultW, DivW, MulDivResultW);
end else begin
assign MulDivResultW = MDUResultW;
end
end else begin:fpmux
assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW;
assign MulDivResultW = MDUResultW;
end
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW);
// handle Store Conditional result if atomic extension supported
if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};

View File

@ -58,6 +58,7 @@ module ieu (
output logic InvalidateICacheM, FlushDCacheM,
// Writeback stage
input logic [`XLEN-1:0] FPIntDivResultW,
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
input logic [`XLEN-1:0] FCvtIntResW,
output logic [4:0] RdW,
@ -83,6 +84,7 @@ module ieu (
logic SCE;
logic [4:0] RdE;
logic FWriteIntM;
logic DivW;
// forwarding signals
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E;
@ -99,15 +101,15 @@ module ieu (
.Funct3E, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
.CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
.RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM,
.StallW, .FlushW, .RegWriteW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
.StallW, .FlushW, .RegWriteW, .DivW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
datapath dp(
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE,
.PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
.StallW, .FlushW, .RegWriteW, .DivW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .MDUResultW, .FPIntDivResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
forward fw(
.Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW,

View File

@ -59,10 +59,17 @@ module muldiv (
// Divide
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
assign DivE = MDUE & Funct3E[2];
assign DivSignedE = ~Funct3E[0];
intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE,
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
// When F extensions are supported, use the FPU divider instead
if (`IDIV_ON_FPU) begin
assign QuotM = 0;
assign RemM = 0;
assign DivBusyE = 0;
end else begin
assign DivE = MDUE & Funct3E[2];
assign DivSignedE = ~Funct3E[0];
intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE,
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
end
// Result multiplexer
always_comb

View File

@ -99,6 +99,7 @@ module wallypipelinedcore (
logic FpLoadStoreM;
logic [1:0] FResSelW;
logic [4:0] SetFflagsM;
logic [`XLEN-1:0] FPIntDivResultW;
// memory management unit signals
logic ITLBWriteF;
@ -228,7 +229,7 @@ module wallypipelinedcore (
.RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM,
// Writeback stage
.CSRReadValW, .MDUResultW,
.CSRReadValW, .MDUResultW, .FPIntDivResultW,
.RdW, .ReadDataW(ReadDataW[`XLEN-1:0]),
.InstrValidM,
.FCvtIntResW,
@ -405,7 +406,8 @@ module wallypipelinedcore (
.FCvtIntW, // fpu result selection
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
.IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
.SetFflagsM // FPU flags (to privileged unit)
.SetFflagsM, // FPU flags (to privileged unit)
.FPIntDivResultW
); // floating point unit
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FStallD = 0;

View File

@ -1413,6 +1413,7 @@ string imperas32f[] = '{
string arch32f[] = '{
`RISCVARCHTEST,
"rv32i_m/F/src/fdiv_b20-01.S",
"rv32i_m/F/src/fadd_b10-01.S",
"rv32i_m/F/src/fadd_b1-01.S",
"rv32i_m/F/src/fadd_b11-01.S",

View File

@ -8,8 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
current_dir = $(shell pwd)
#XLEN ?= 64
all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64
#all: root fsd_fld_tempfix wally32
all: root arch32 wally32 wally32e arch64 wally64
root:
mkdir -p $(work_dir)
@ -20,14 +19,8 @@ root:
sed 's,{0},$(current_dir),g;s,{1},64gc,g' config.ini > config64.ini
sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini
fsd_fld_tempfix:
# this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests
# https://github.com/riscv-non-isa/riscv-arch-test/issues/266
find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fld-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fsd-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
arch32:
riscof --verbose debug run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv32i_m/ || echo "error suppressed"
arch64: