mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge branch 'main' into hazards
This commit is contained in:
commit
0358a8d255
@ -72,6 +72,7 @@
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 4
|
||||
`define IDIV_ON_FPU 0
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 16
|
||||
|
@ -74,6 +74,7 @@
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 4
|
||||
`define IDIV_ON_FPU 0
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 64
|
||||
|
@ -73,6 +73,7 @@
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 1
|
||||
`define IDIV_ON_FPU 0
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 0
|
||||
|
@ -72,6 +72,7 @@
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 4
|
||||
`define IDIV_ON_FPU 0
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 64
|
||||
|
@ -73,6 +73,7 @@
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 4
|
||||
`define IDIV_ON_FPU 0
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 64
|
||||
|
@ -72,6 +72,7 @@
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 4
|
||||
`define IDIV_ON_FPU 0
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 0
|
||||
|
@ -76,6 +76,7 @@
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 4
|
||||
`define IDIV_ON_FPU 0
|
||||
|
||||
// Address space
|
||||
`define RESET_VECTOR 64'h0000000000001000
|
||||
|
@ -74,6 +74,7 @@
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 4
|
||||
`define IDIV_ON_FPU 0
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 64
|
||||
|
@ -74,6 +74,7 @@
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 4
|
||||
`define IDIV_ON_FPU 0
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 64
|
||||
|
@ -74,6 +74,7 @@
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 4
|
||||
`define IDIV_ON_FPU 0
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 0
|
||||
|
19
pipelined/src/cache/cache.sv
vendored
19
pipelined/src/cache/cache.sv
vendored
@ -94,14 +94,14 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
logic [NUMWAYS-1:0] NextFlushWay;
|
||||
logic FlushWayCntEn;
|
||||
logic FlushWayCntRst;
|
||||
logic SelEvict;
|
||||
logic SelWriteback;
|
||||
logic LRUWriteEn;
|
||||
logic SelFlush;
|
||||
logic ResetOrFlushAdr, ResetOrFlushWay;
|
||||
logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache;
|
||||
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
|
||||
logic SelFetchBuffer;
|
||||
logic ce;
|
||||
logic CacheEn;
|
||||
|
||||
localparam LOGLLENBYTES = $clog2(WORDLEN/8);
|
||||
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN;
|
||||
@ -124,12 +124,12 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
|
||||
// Array of cache ways, along with victim, hit, dirty, and read merging logic
|
||||
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE)
|
||||
CacheWays[NUMWAYS-1:0](.clk, .reset, .ce, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
|
||||
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .VictimWay,
|
||||
CacheWays[NUMWAYS-1:0](.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
|
||||
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
|
||||
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
|
||||
if(NUMWAYS > 1) begin:vict
|
||||
cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU(
|
||||
.clk, .reset, .ce, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
|
||||
.clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
|
||||
.SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache);
|
||||
end else assign VictimWay = 1'b1; // one hot.
|
||||
assign CacheHit = | HitWay;
|
||||
@ -163,7 +163,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
end
|
||||
|
||||
assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1.
|
||||
assign LineByteMask = ~SetValid & ~SetDirty ? '0 : ~SetValid & SetDirty ? DemuxedByteMask : '1; // if store hit only enable the word and subword bytes, else write all bytes.
|
||||
logic [LINELEN/8-1:0] LineByteMask2;
|
||||
assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
|
||||
|
||||
for(index = 0; index < LINELEN/8; index++) begin
|
||||
mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),
|
||||
@ -173,7 +174,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
|
||||
.s({SelFlush, SelEvict}), .y(CacheBusAdr));
|
||||
.s({SelFlush, SelWriteback}), .y(CacheBusAdr));
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Flush address and way generation during flush
|
||||
@ -198,10 +199,10 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
.CacheHit, .LineDirty, .CacheStall, .CacheCommitted,
|
||||
.CacheMiss, .CacheAccess, .SelAdr,
|
||||
.ClearValid, .ClearDirty, .SetDirty,
|
||||
.SetValid, .SelEvict, .SelFlush,
|
||||
.SetValid, .SelWriteback, .SelFlush,
|
||||
.FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst,
|
||||
.FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,
|
||||
.InvalidateCache,
|
||||
.ce,
|
||||
.CacheEn,
|
||||
.LRUWriteEn);
|
||||
endmodule
|
||||
|
4
pipelined/src/cache/cacheLRU.sv
vendored
4
pipelined/src/cache/cacheLRU.sv
vendored
@ -32,7 +32,7 @@
|
||||
|
||||
module cacheLRU
|
||||
#(parameter NUMWAYS = 4, SETLEN = 9, OFFSETLEN = 5, NUMLINES = 128)(
|
||||
input logic clk, reset, ce, FlushStage,
|
||||
input logic clk, reset, CacheEn, FlushStage,
|
||||
input logic [NUMWAYS-1:0] HitWay,
|
||||
input logic [NUMWAYS-1:0] ValidWay,
|
||||
output logic [NUMWAYS-1:0] VictimWay,
|
||||
@ -120,7 +120,7 @@ module cacheLRU
|
||||
// LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice.
|
||||
always_ff @(posedge clk) begin
|
||||
if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
|
||||
if(ce) begin
|
||||
if(CacheEn) begin
|
||||
if((InvalidateCache | FlushCache) & ~FlushStage) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
|
||||
else if (LRUWriteEn & ~FlushStage) begin
|
||||
LRUMemory[CAdr] <= NextLRU; ///***** RT: This is not right. Logically should be PAdr, but it breaks linux.
|
||||
|
8
pipelined/src/cache/cachefsm.sv
vendored
8
pipelined/src/cache/cachefsm.sv
vendored
@ -64,7 +64,7 @@ module cachefsm
|
||||
output logic ClearDirty,
|
||||
output logic SetDirty,
|
||||
output logic SetValid,
|
||||
output logic SelEvict,
|
||||
output logic SelWriteback,
|
||||
output logic LRUWriteEn,
|
||||
output logic SelFlush,
|
||||
output logic FlushAdrCntEn,
|
||||
@ -72,7 +72,7 @@ module cachefsm
|
||||
output logic FlushAdrCntRst,
|
||||
output logic FlushWayCntRst,
|
||||
output logic SelFetchBuffer,
|
||||
output logic ce);
|
||||
output logic CacheEn);
|
||||
|
||||
logic resetDelay;
|
||||
logic AMO, StoreAMO;
|
||||
@ -170,7 +170,7 @@ module cachefsm
|
||||
assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) |
|
||||
(CurrState == STATE_MISS_WRITE_CACHE_LINE);
|
||||
// Flush and eviction controls
|
||||
assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) |
|
||||
assign SelWriteback = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) |
|
||||
(CurrState == STATE_READY & AnyMiss & LineDirty);
|
||||
assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) |
|
||||
(CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK);
|
||||
@ -201,6 +201,6 @@ module cachefsm
|
||||
resetDelay;
|
||||
|
||||
assign SelFetchBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE | CurrState == STATE_MISS_READ_DELAY;
|
||||
assign ce = (CurrState == STATE_READY & ~Stall | CacheStall) | (CurrState != STATE_READY) | reset;
|
||||
assign CacheEn = (CurrState == STATE_READY & ~Stall | CacheStall) | (CurrState != STATE_READY) | reset;
|
||||
|
||||
endmodule // cachefsm
|
||||
|
35
pipelined/src/cache/cacheway.sv
vendored
35
pipelined/src/cache/cacheway.sv
vendored
@ -33,7 +33,7 @@
|
||||
module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) (
|
||||
input logic clk,
|
||||
input logic ce,
|
||||
input logic CacheEn,
|
||||
input logic reset,
|
||||
input logic [$clog2(NUMLINES)-1:0] CAdr,
|
||||
input logic [`PA_BITS-1:0] PAdr,
|
||||
@ -42,7 +42,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
input logic ClearValid,
|
||||
input logic SetDirty,
|
||||
input logic ClearDirty,
|
||||
input logic SelEvict,
|
||||
input logic SelWriteback,
|
||||
input logic SelFlush,
|
||||
input logic VictimWay,
|
||||
input logic FlushWay,
|
||||
@ -76,8 +76,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
logic ClearValidWay;
|
||||
logic SetDirtyWay;
|
||||
logic ClearDirtyWay;
|
||||
logic SelectedWay;
|
||||
logic SelWriteback;
|
||||
logic SelNonHit;
|
||||
logic SelData;
|
||||
logic FlushWayEn, VictimWayEn;
|
||||
|
||||
@ -85,28 +84,28 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
// FlushWay and VictimWay are part of a one hot way selection. Must clear them if FlushWay not selected
|
||||
// or VictimWay not selected.
|
||||
assign FlushWayEn = FlushWay & SelFlush;
|
||||
assign VictimWayEn = VictimWay & SelEvict;
|
||||
assign VictimWayEn = VictimWay & SelWriteback;
|
||||
|
||||
assign SelWriteback = FlushWayEn | SetValid | SelEvict;
|
||||
assign SelNonHit = FlushWayEn | SetValid | SelWriteback;
|
||||
|
||||
mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag);
|
||||
//assign SelTag = VictimWay | FlushWay;
|
||||
assign SelData = HitWay | FlushWayEn | VictimWayEn;
|
||||
//assign SelData = HitWay | FlushWayEn | VictimWayEn;
|
||||
|
||||
mux2 #(1) selectedwaymux(HitWay, SelTag, SelWriteback , SelectedWay);
|
||||
mux2 #(1) selectedwaymux(HitWay, SelTag, SelNonHit , SelData);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Write Enable demux
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// RT: Can we merge these two muxes? This is also shared in cacheLRU.
|
||||
//mux3 #(1) selectwaymux(HitWay, VictimWay, FlushWay, {SelFlush, SetValid}, SelectedWay);
|
||||
//mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelEvict}, SelData);
|
||||
//mux3 #(1) selectwaymux(HitWay, VictimWay, FlushWay, {SelFlush, SetValid}, SelData);
|
||||
//mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelNonHit}, SelData);
|
||||
|
||||
assign SetValidWay = SetValid & SelectedWay;
|
||||
assign ClearValidWay = ClearValid & SelectedWay;
|
||||
assign SetDirtyWay = SetDirty & SelectedWay;
|
||||
assign ClearDirtyWay = ClearDirty & SelectedWay;
|
||||
assign SetValidWay = SetValid & SelData;
|
||||
assign ClearValidWay = ClearValid & SelData;
|
||||
assign SetDirtyWay = SetDirty & SelData;
|
||||
assign ClearDirtyWay = ClearDirty & SelData;
|
||||
|
||||
// If writing the whole line set all write enables to 1, else only set the correct word.
|
||||
assign SelectedWriteWordEn = (SetValidWay | SetDirtyWay) & ~FlushStage;
|
||||
@ -117,7 +116,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
// Tag Array
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce,
|
||||
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn),
|
||||
.addr(CAdr), .dout(ReadTag), .bwe('1),
|
||||
.din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
|
||||
|
||||
@ -140,7 +139,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
localparam integer LOGNUMSRAM = $clog2(NUMSRAM);
|
||||
|
||||
for(words = 0; words < NUMSRAM; words++) begin: word
|
||||
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce, .addr(CAdr),
|
||||
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CAdr),
|
||||
.dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]),
|
||||
.din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]),
|
||||
.we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words]));
|
||||
@ -155,7 +154,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
|
||||
always_ff @(posedge clk) begin // Valid bit array,
|
||||
if (reset) ValidBits <= #1 '0;
|
||||
if(ce) begin
|
||||
if(CacheEn) begin
|
||||
ValidWay <= #1 ValidBits[CAdr];
|
||||
if(InvalidateCache & ~FlushStage) ValidBits <= #1 '0;
|
||||
else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CAdr] <= #1 SetValidWay;
|
||||
@ -171,7 +170,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
always_ff @(posedge clk) begin
|
||||
// reset is optional. Consider merging with TAG array in the future.
|
||||
//if (reset) DirtyBits <= #1 {NUMLINES{1'b0}};
|
||||
if(ce) begin
|
||||
if(CacheEn) begin
|
||||
Dirty <= #1 DirtyBits[CAdr];
|
||||
if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CAdr] <= #1 SetDirtyWay;
|
||||
end
|
||||
|
@ -52,15 +52,16 @@ module fdivsqrt(
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
// output logic DivDone,
|
||||
output logic [`NE+1:0] QeM,
|
||||
output logic [`DIVb:0] QmM
|
||||
output logic [`DIVb:0] QmM,
|
||||
output logic [`XLEN-1:0] FPIntDivResultM
|
||||
// output logic [`XLEN-1:0] RemM,
|
||||
);
|
||||
|
||||
logic [`DIVb+3:0] WS, WC;
|
||||
logic [`DIVb+3:0] WS, WC;
|
||||
logic [`DIVb+3:0] X;
|
||||
logic [`DIVN-2:0] D; // U0.N-1
|
||||
logic [`DIVN-2:0] Dpreproc;
|
||||
logic [`DIVb:0] FirstU, FirstUM;
|
||||
logic [`DIVb-1:0] D;
|
||||
logic [`DIVb-1:0] DPreproc;
|
||||
logic [`DIVb:0] FirstU, FirstUM;
|
||||
logic [`DIVb+1:0] FirstC;
|
||||
logic Firstun;
|
||||
logic WZero;
|
||||
@ -71,7 +72,7 @@ module fdivsqrt(
|
||||
|
||||
fdivsqrtpreproc fdivsqrtpreproc(
|
||||
.clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE),
|
||||
.Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc,
|
||||
.Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .DPreproc,
|
||||
.n, .m, .OTFCSwap, .ALTBM, .BZero, .As,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
|
||||
fdivsqrtfsm fdivsqrtfsm(
|
||||
@ -81,12 +82,12 @@ module fdivsqrt(
|
||||
.XInfE, .YInfE, .WZero, .SpecialCaseM);
|
||||
fdivsqrtiter fdivsqrtiter(
|
||||
.clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM,
|
||||
.X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
|
||||
.X,.DPreproc, .FirstWS(WS), .FirstWC(WC),
|
||||
.IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap,
|
||||
.FDivBusyE);
|
||||
fdivsqrtpostproc fdivsqrtpostproc(
|
||||
.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun,
|
||||
.SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAE,
|
||||
.n, .ALTBM, .m, .BZero, .As,
|
||||
.QmM, .WZero, .DivSM);
|
||||
.QmM, .WZero, .DivSM, .FPIntDivResultM);
|
||||
endmodule
|
@ -40,42 +40,34 @@ module fdivsqrtiter(
|
||||
// input logic SqrtM,
|
||||
input logic OTFCSwap,
|
||||
input logic [`DIVb+3:0] X,
|
||||
input logic [`DIVN-2:0] Dpreproc,
|
||||
output logic [`DIVN-2:0] D, // U0.N-1
|
||||
output logic [`DIVb:0] FirstU, FirstUM,
|
||||
input logic [`DIVb-1:0] DPreproc,
|
||||
output logic [`DIVb-1:0] D,
|
||||
output logic [`DIVb:0] FirstU, FirstUM,
|
||||
output logic [`DIVb+1:0] FirstC,
|
||||
output logic Firstun,
|
||||
output logic [`DIVb+3:0] FirstWS, FirstWC
|
||||
output logic [`DIVb+3:0] FirstWS, FirstWC
|
||||
);
|
||||
|
||||
//QLEN = 1.(number of bits created for division)
|
||||
// N is NF+1 or XLEN
|
||||
// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift
|
||||
// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-2:0
|
||||
// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0
|
||||
// U/UM should be 1.b so b+1 bits or b:0
|
||||
// C needs to be the lenght of the final fraction 0.b so b or b-1:0
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b
|
||||
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b
|
||||
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b
|
||||
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b
|
||||
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
|
||||
logic [`DIVb:0] UM[`DIVCOPIES:0];// 1.b
|
||||
logic [`DIVb:0] UNext[`DIVCOPIES-1:0];// U1.b
|
||||
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0];// U1.b
|
||||
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
|
||||
logic [`DIVb+1:0] initC; // Q2.b
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b
|
||||
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b
|
||||
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b
|
||||
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b
|
||||
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
|
||||
logic [`DIVb:0] UM[`DIVCOPIES:0]; // U1.b
|
||||
logic [`DIVb:0] UNext[`DIVCOPIES-1:0]; // U1.b
|
||||
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0]; // U1.b
|
||||
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
|
||||
logic [`DIVb+1:0] initC; // Q2.b
|
||||
logic [`DIVCOPIES-1:0] un;
|
||||
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
logic [`DIVb+3:0] WSN, WCN; // Q4.N-1
|
||||
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1
|
||||
logic [`DIVb+1:0] NextC;
|
||||
logic [`DIVb+1:0] CMux;
|
||||
logic [`DIVb:0] UMux, UMMux;
|
||||
logic [`DIVb:0] initU, initUM;
|
||||
|
||||
logic [`DIVb+3:0] WSN, WCN; // Q4.b
|
||||
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b
|
||||
logic [`DIVb+1:0] NextC;
|
||||
logic [`DIVb+1:0] CMux;
|
||||
logic [`DIVb:0] UMux, UMMux;
|
||||
logic [`DIVb:0] initU, initUM;
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
|
||||
// Top Muxes and Registers
|
||||
// When start is asserted, the inputs are loaded into the divider.
|
||||
@ -85,15 +77,15 @@ module fdivsqrtiter(
|
||||
// Residual WS/SC registers/initializaiton mux
|
||||
mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN);
|
||||
mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN);
|
||||
flopen #(`DIVb+4) wsflop(clk, FDivBusyE, WSN, WS[0]);
|
||||
flopen #(`DIVb+4) wcflop(clk, FDivBusyE, WCN, WC[0]);
|
||||
flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
|
||||
flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
|
||||
|
||||
// UOTFC Result U and UM registers/initialization mux
|
||||
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
|
||||
assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0;
|
||||
assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}};
|
||||
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
|
||||
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
|
||||
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
|
||||
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
|
||||
flopen #(`DIVb+1) UReg(clk, IFDivStartE|FDivBusyE, UMux, U[0]);
|
||||
flopen #(`DIVb+1) UMReg(clk, IFDivStartE|FDivBusyE, UMMux, UM[0]);
|
||||
|
||||
@ -103,18 +95,18 @@ module fdivsqrtiter(
|
||||
assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10;
|
||||
assign initC = {initCUpper, {`DIVb{1'b0}}};
|
||||
mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, CMux);
|
||||
flopen #(`DIVb+2) cflop(clk, IFDivStartE|FDivBusyE, CMux, C[0]);
|
||||
flopen #(`DIVb+2) creg(clk, IFDivStartE|FDivBusyE, CMux, C[0]);
|
||||
|
||||
// Divisior register
|
||||
flopen #(`DIVN-1) dflop(clk, IFDivStartE, Dpreproc, D);
|
||||
flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D);
|
||||
|
||||
// Divisor Selections
|
||||
// - choose the negitive version of what's being selected
|
||||
// - D is only the fraction
|
||||
assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}};
|
||||
// - D is a 0.b mantissa
|
||||
assign DBar = {3'b111, 1'b0, ~D};
|
||||
if(`RADIX == 4) begin : d2
|
||||
assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}};
|
||||
assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}};
|
||||
assign DBar2 = {2'b11, 1'b0, ~D, 1'b1};
|
||||
assign D2 = {2'b0, 1'b1, D, 1'b0};
|
||||
end
|
||||
|
||||
// k=DIVCOPIES of the recurrence logic
|
||||
|
@ -32,8 +32,8 @@
|
||||
|
||||
module fdivsqrtpostproc(
|
||||
input logic [`DIVb+3:0] WS, WC,
|
||||
input logic [`DIVN-2:0] D, // U0.N-1
|
||||
input logic [`DIVb:0] FirstU, FirstUM,
|
||||
input logic [`DIVb-1:0] D,
|
||||
input logic [`DIVb:0] FirstU, FirstUM,
|
||||
input logic [`DIVb+1:0] FirstC,
|
||||
input logic Firstun,
|
||||
input logic SqrtM,
|
||||
@ -41,19 +41,20 @@ module fdivsqrtpostproc(
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE,
|
||||
input logic RemOpM, ALTBM, BZero, As,
|
||||
input logic [`DIVBLEN:0] n, m,
|
||||
output logic [`DIVb:0] QmM,
|
||||
output logic [`DIVb:0] QmM,
|
||||
output logic WZero,
|
||||
output logic DivSM
|
||||
output logic DivSM,
|
||||
output logic [`XLEN-1:0] FPIntDivResultM
|
||||
);
|
||||
|
||||
logic [`DIVb+3:0] W, Sum, RemD;
|
||||
logic [`DIVb+3:0] W, Sum, RemDM;
|
||||
logic [`DIVb:0] PreQmM;
|
||||
logic NegStickyM, PostIncM;
|
||||
logic weq0;
|
||||
logic [`DIVBLEN:0] NormShiftM;
|
||||
logic [`DIVb:0] IntQuotM, NormQuotM;
|
||||
logic [`DIVb+3:0] IntRemM, NormRemM;
|
||||
logic [`DIVb+3:0] PreResultM, ResultM;
|
||||
logic [`DIVb+3:0] PreResultM, PreFPIntDivResultM;
|
||||
|
||||
// check for early termination on an exact result. If the result is not exact, the sticky should be set
|
||||
aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0);
|
||||
@ -78,14 +79,14 @@ module fdivsqrtpostproc(
|
||||
assign Sum = WC + WS;
|
||||
assign W = $signed(Sum) >>> `LOGR;
|
||||
assign NegStickyM = W[`DIVb+3];
|
||||
assign RemD = {4'b0000, D, {(`DIVb-`DIVN+1){1'b0}}};
|
||||
assign RemDM = {4'b0000, D};
|
||||
|
||||
// Integer division: sign handling for div and rem
|
||||
always_comb
|
||||
if (~As)
|
||||
if (NegStickyM) begin
|
||||
NormQuotM = FirstUM;
|
||||
NormRemM = W + RemD;
|
||||
NormRemM = W + RemDM;
|
||||
PostIncM = 0;
|
||||
end else begin
|
||||
NormQuotM = FirstU;
|
||||
@ -99,7 +100,7 @@ module fdivsqrtpostproc(
|
||||
PostIncM = 0;
|
||||
end else begin
|
||||
NormQuotM = FirstU;
|
||||
NormRemM = W - RemD;
|
||||
NormRemM = W - RemDM;
|
||||
PostIncM = 1;
|
||||
end
|
||||
|
||||
@ -136,8 +137,9 @@ module fdivsqrtpostproc(
|
||||
|
||||
// division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
|
||||
|
||||
assign ResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)};
|
||||
|
||||
assign PreFPIntDivResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)};
|
||||
assign FPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
|
||||
|
||||
assign PreQmM = NegStickyM ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit
|
||||
assign QmM = SqrtM ? (PreQmM << 1) : PreQmM;
|
||||
endmodule
|
@ -45,22 +45,21 @@ module fdivsqrtpreproc (
|
||||
output logic OTFCSwap, ALTBM, BZero, As,
|
||||
output logic [`NE+1:0] QeM,
|
||||
output logic [`DIVb+3:0] X,
|
||||
output logic [`DIVN-2:0] Dpreproc
|
||||
output logic [`DIVb-1:0] DPreproc
|
||||
);
|
||||
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
|
||||
logic [`NF-1:0] PreprocA, PreprocX;
|
||||
logic [`NF-1:0] PreprocB, PreprocY;
|
||||
logic [`NF+1:0] SqrtX;
|
||||
|
||||
logic [`DIVb-1:0] XPreproc;
|
||||
logic [`DIVb:0] SqrtX;
|
||||
logic [`DIVb+3:0] DivX;
|
||||
logic [`NE+1:0] Qe;
|
||||
logic [`NE+1:0] QeE;
|
||||
// Intdiv signals
|
||||
logic [`DIVb-1:0] ZeroBufX, ZeroBufY;
|
||||
logic [`DIVb-1:0] IFNormLenX, IFNormLenD;
|
||||
logic [`XLEN-1:0] PosA, PosB;
|
||||
logic Bs, OTFCSwapTemp, ALTBE;
|
||||
logic Bs, CalcOTFCSwap, ALTBE;
|
||||
logic [`XLEN-1:0] A64, B64;
|
||||
logic [`DIVBLEN:0] Calcn, Calcm;
|
||||
logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
|
||||
logic [`DIVBLEN:0] pPlusr, pPrCeil, p, L;
|
||||
logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell;
|
||||
logic [`LOGRK-1:0] pPrTrunc;
|
||||
logic [`DIVb+3:0] PreShiftX;
|
||||
|
||||
@ -72,39 +71,38 @@ module fdivsqrtpreproc (
|
||||
assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE;
|
||||
assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE;
|
||||
|
||||
assign OTFCSwapTemp = (As ^ Bs) & MDUE;
|
||||
assign CalcOTFCSwap = (As ^ Bs) & MDUE;
|
||||
|
||||
assign PosA = As ? -A64 : A64;
|
||||
assign PosB = Bs ? -B64 : B64;
|
||||
assign BZero = |ForwardedSrcBE;
|
||||
|
||||
assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}};
|
||||
assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}};
|
||||
lzc #(`DIVb) lzcX (ZeroBufX, L);
|
||||
lzc #(`DIVb) lzcY (ZeroBufY, Calcm);
|
||||
assign IFNormLenX = MDUE ? {PosA, {(`DIVb-`XLEN){1'b0}}} : {Xm, {(`DIVb-`NF-1){1'b0}}};
|
||||
assign IFNormLenD = MDUE ? {PosB, {(`DIVb-`XLEN){1'b0}}} : {Ym, {(`DIVb-`NF-1){1'b0}}};
|
||||
lzc #(`DIVb) lzcX (IFNormLenX, ell);
|
||||
lzc #(`DIVb) lzcY (IFNormLenD, Calcm);
|
||||
|
||||
assign PreprocX = Xm[`NF-1:0]<<L;
|
||||
assign PreprocY = Ym[`NF-1:0]<<Calcm;
|
||||
assign XPreproc = IFNormLenX << (ell + {{`DIVBLEN{1'b0}}, ~MDUE}); // had issue with (`DIVBLEN+1)'(~MDUE) so using this instead
|
||||
assign DPreproc = IFNormLenD << (Calcm + {{`DIVBLEN{1'b0}}, ~MDUE});
|
||||
|
||||
assign ZeroDiff = Calcm - L;
|
||||
assign ZeroDiff = Calcm - ell;
|
||||
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B
|
||||
assign p = ALTBE ? '0 : ZeroDiff;
|
||||
|
||||
assign pPlusr = (`DIVBLEN)'(`LOGR) + p;
|
||||
assign pPrTrunc = pPlusr[`LOGRK-1:0];
|
||||
assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)};
|
||||
assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)};
|
||||
assign Calcn = (pPrCeil << `LOGK) - 1;
|
||||
assign IntBits = (`DIVBLEN)'(`RK) + p;
|
||||
assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]};
|
||||
|
||||
assign SqrtX = Xe[0]^L[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0};
|
||||
assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}};
|
||||
assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~XZero, XPreproc[`DIVb-1:1]} : {~XZero, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
|
||||
assign DivX = {3'b000, ~XZero, XPreproc};
|
||||
|
||||
// *** explain why X is shifted between radices (initial assignment of WS=RX)
|
||||
if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX;
|
||||
else assign PreShiftX = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX;
|
||||
if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX;
|
||||
else assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX;
|
||||
assign X = MDUE ? DivX >> RightShiftX : PreShiftX;
|
||||
assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}};
|
||||
|
||||
// radix 2 radix 4
|
||||
// 1 copies DIVLEN+2 DIVLEN+2/2
|
||||
@ -116,12 +114,12 @@ module fdivsqrtpreproc (
|
||||
// r = 1 or 2
|
||||
// DIVRESLEN/(r*`DIVCOPIES)
|
||||
|
||||
flopen #(`NE+2) expreg(clk, IFDivStartE, Qe, QeM);
|
||||
flopen #(1) swapreg(clk, IFDivStartE, OTFCSwapTemp, OTFCSwap);
|
||||
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
|
||||
flopen #(1) swapreg(clk, IFDivStartE, CalcOTFCSwap, OTFCSwap);
|
||||
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
||||
flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, Calcn, n);
|
||||
flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, Calcm, m);
|
||||
expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m(Calcm), .Qe);
|
||||
expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .ell, .m(Calcm), .Qe(QeE));
|
||||
|
||||
endmodule
|
||||
|
||||
@ -130,7 +128,7 @@ module expcalc(
|
||||
input logic [`NE-1:0] Xe, Ye,
|
||||
input logic Sqrt,
|
||||
input logic XZero,
|
||||
input logic [`DIVBLEN:0] L, m,
|
||||
input logic [`DIVBLEN:0] ell, m,
|
||||
output logic [`NE+1:0] Qe
|
||||
);
|
||||
logic [`NE-2:0] Bias;
|
||||
@ -162,10 +160,10 @@ module expcalc(
|
||||
2'h2: Bias = (`NE-1)'(`H_BIAS);
|
||||
endcase
|
||||
end
|
||||
assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - (`NE+2)'(`BIAS);
|
||||
assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
|
||||
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
|
||||
// correct exponent for denormalized input's normalization shifts
|
||||
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
|
||||
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
|
||||
|
||||
assign Qe = Sqrt ? SExp : DExp;
|
||||
endmodule
|
@ -32,7 +32,7 @@
|
||||
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
module fdivsqrtstage2 (
|
||||
input logic [`DIVN-2:0] D,
|
||||
input logic [`DIVb-1:0] D,
|
||||
input logic [`DIVb+3:0] DBar,
|
||||
input logic [`DIVb:0] U, UM,
|
||||
input logic [`DIVb+3:0] WS, WC,
|
||||
@ -69,7 +69,7 @@ module fdivsqrtstage2 (
|
||||
always_comb
|
||||
if (up) Dsel = DBar;
|
||||
else if (uz) Dsel = '0; // qz
|
||||
else Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; // un
|
||||
else Dsel = {3'b000, 1'b1, D}; // un
|
||||
|
||||
// Partial Product Generation
|
||||
// WSA, WCA = WS + WC - qD
|
||||
|
@ -31,7 +31,7 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtstage4 (
|
||||
input logic [`DIVN-2:0] D,
|
||||
input logic [`DIVb-1:0] D,
|
||||
input logic [`DIVb+3:0] DBar, D2, DBar2,
|
||||
input logic [`DIVb:0] U, UM,
|
||||
input logic [`DIVb+3:0] WS, WC,
|
||||
@ -61,7 +61,7 @@ module fdivsqrtstage4 (
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
assign Smsbs = U[`DIVb:`DIVb-4];
|
||||
assign Dmsbs = D[`DIVN-2:`DIVN-4];
|
||||
assign Dmsbs = D[`DIVb-1:`DIVb-3];
|
||||
assign WCmsbs = WC[`DIVb+3:`DIVb-4];
|
||||
assign WSmsbs = WS[`DIVb+3:`DIVb-4];
|
||||
|
||||
@ -77,7 +77,7 @@ module fdivsqrtstage4 (
|
||||
4'b1000: Dsel = DBar2;
|
||||
4'b0100: Dsel = DBar;
|
||||
4'b0000: Dsel = '0;
|
||||
4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}};
|
||||
4'b0010: Dsel = {3'b0, 1'b1, D};
|
||||
4'b0001: Dsel = D2;
|
||||
default: Dsel = 'x;
|
||||
endcase
|
||||
|
@ -55,7 +55,8 @@ module fpu (
|
||||
output logic FCvtIntW, // select FCvtIntRes (to IEU)
|
||||
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
|
||||
output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit)
|
||||
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
|
||||
output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
|
||||
output logic [`XLEN-1:0] FPIntDivResultW
|
||||
);
|
||||
|
||||
// FPU specifics:
|
||||
@ -152,6 +153,7 @@ module fpu (
|
||||
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic StallUnpackedM;
|
||||
logic [`XLEN-1:0] FPIntDivResultM;
|
||||
|
||||
// DECODE STAGE
|
||||
|
||||
@ -267,7 +269,7 @@ module fpu (
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E,
|
||||
.StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
|
||||
.QmM /*, .DivDone(DivDoneM) */);
|
||||
.QmM, .FPIntDivResultM /*, .DivDone(DivDoneM) */);
|
||||
|
||||
//
|
||||
// compare
|
||||
@ -387,7 +389,8 @@ module fpu (
|
||||
|
||||
// M/W pipe registers
|
||||
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
|
||||
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
|
||||
flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
|
||||
flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FPIntDivResultM, FPIntDivResultW);
|
||||
|
||||
// BEGIN WRITEBACK STAGE
|
||||
|
||||
|
@ -65,7 +65,7 @@ module controller(
|
||||
output logic FWriteIntM,
|
||||
// Writeback stage control signals
|
||||
input logic StallW, FlushW,
|
||||
output logic RegWriteW, // for datapath and Hazard Unit
|
||||
output logic RegWriteW, DivW, // for datapath and Hazard Unit
|
||||
output logic [2:0] ResultSrcW,
|
||||
// Stall during CSRs
|
||||
output logic CSRWriteFencePendingDEM,
|
||||
@ -109,6 +109,7 @@ module controller(
|
||||
logic IllegalERegAdrD;
|
||||
logic [1:0] AtomicE;
|
||||
logic FencePendingD, FencePendingE, FencePendingM;
|
||||
logic DivE, DivM;
|
||||
|
||||
|
||||
// Extract fields
|
||||
@ -222,16 +223,17 @@ module controller(
|
||||
assign MemReadE = MemRWE[1];
|
||||
assign SCE = (ResultSrcE == 3'b100);
|
||||
assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers
|
||||
assign DivE = MDUE & Funct3E[2]; // Division operation
|
||||
|
||||
// Memory stage pipeline control register
|
||||
flopenrc #(19) controlregM(clk, reset, FlushM, ~StallM,
|
||||
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE},
|
||||
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM});
|
||||
flopenrc #(20) controlregM(clk, reset, FlushM, ~StallM,
|
||||
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE, DivE},
|
||||
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM, DivM});
|
||||
|
||||
// Writeback stage pipeline control register
|
||||
flopenrc #(4) controlregW(clk, reset, FlushW, ~StallW,
|
||||
{RegWriteM, ResultSrcM},
|
||||
{RegWriteW, ResultSrcW});
|
||||
flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW,
|
||||
{RegWriteM, ResultSrcM, DivM},
|
||||
{RegWriteW, ResultSrcW, DivW});
|
||||
|
||||
// Stall pipeline at Fetch if a CSR Write or Fence is pending in the subsequent stages
|
||||
assign CSRWriteFencePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM | FencePendingD | FencePendingE | FencePendingM;
|
||||
|
@ -57,14 +57,15 @@ module datapath (
|
||||
output logic [`XLEN-1:0] WriteDataM,
|
||||
// Writeback stage signals
|
||||
input logic StallW, FlushW,
|
||||
(* mark_debug = "true" *) input logic RegWriteW,
|
||||
(* mark_debug = "true" *) input logic RegWriteW, DivW,
|
||||
input logic SquashSCW,
|
||||
input logic [2:0] ResultSrcW,
|
||||
input logic [`XLEN-1:0] FCvtIntResW,
|
||||
input logic [`XLEN-1:0] ReadDataW,
|
||||
// input logic [`XLEN-1:0] PCLinkW,
|
||||
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
|
||||
// Hazard Unit signals
|
||||
input logic [`XLEN-1:0] FPIntDivResultW,
|
||||
// Hazard Unit signals
|
||||
output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E,
|
||||
output logic [4:0] RdE, RdM, RdW
|
||||
);
|
||||
@ -85,7 +86,7 @@ module datapath (
|
||||
// Writeback stage signals
|
||||
logic [`XLEN-1:0] SCResultW;
|
||||
logic [`XLEN-1:0] ResultW;
|
||||
logic [`XLEN-1:0] IFResultW, IFCvtResultW;
|
||||
logic [`XLEN-1:0] IFResultW, IFCvtResultW, MulDivResultW;
|
||||
|
||||
// Decode stage
|
||||
assign Rs1D = InstrD[19:15];
|
||||
@ -125,10 +126,16 @@ module datapath (
|
||||
if (`F_SUPPORTED) begin:fpmux
|
||||
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
|
||||
if (`IDIV_ON_FPU) begin
|
||||
mux2 #(`XLEN) divresultmuxW(MDUResultW, FPIntDivResultW, DivW, MulDivResultW);
|
||||
end else begin
|
||||
assign MulDivResultW = MDUResultW;
|
||||
end
|
||||
end else begin:fpmux
|
||||
assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW;
|
||||
assign MulDivResultW = MDUResultW;
|
||||
end
|
||||
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW);
|
||||
|
||||
// handle Store Conditional result if atomic extension supported
|
||||
if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};
|
||||
|
@ -58,6 +58,7 @@ module ieu (
|
||||
output logic InvalidateICacheM, FlushDCacheM,
|
||||
|
||||
// Writeback stage
|
||||
input logic [`XLEN-1:0] FPIntDivResultW,
|
||||
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
|
||||
input logic [`XLEN-1:0] FCvtIntResW,
|
||||
output logic [4:0] RdW,
|
||||
@ -83,6 +84,7 @@ module ieu (
|
||||
logic SCE;
|
||||
logic [4:0] RdE;
|
||||
logic FWriteIntM;
|
||||
logic DivW;
|
||||
|
||||
// forwarding signals
|
||||
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E;
|
||||
@ -99,15 +101,15 @@ module ieu (
|
||||
.Funct3E, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
|
||||
.CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
|
||||
.RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM,
|
||||
.StallW, .FlushW, .RegWriteW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
|
||||
.StallW, .FlushW, .RegWriteW, .DivW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
|
||||
|
||||
datapath dp(
|
||||
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
|
||||
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE,
|
||||
.PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
|
||||
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW,
|
||||
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
|
||||
.CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
|
||||
.StallW, .FlushW, .RegWriteW, .DivW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
|
||||
.CSRReadValW, .MDUResultW, .FPIntDivResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
|
||||
|
||||
forward fw(
|
||||
.Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW,
|
||||
|
@ -59,10 +59,17 @@ module muldiv (
|
||||
|
||||
// Divide
|
||||
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
|
||||
assign DivE = MDUE & Funct3E[2];
|
||||
assign DivSignedE = ~Funct3E[0];
|
||||
intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
|
||||
// When F extensions are supported, use the FPU divider instead
|
||||
if (`IDIV_ON_FPU) begin
|
||||
assign QuotM = 0;
|
||||
assign RemM = 0;
|
||||
assign DivBusyE = 0;
|
||||
end else begin
|
||||
assign DivE = MDUE & Funct3E[2];
|
||||
assign DivSignedE = ~Funct3E[0];
|
||||
intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
|
||||
end
|
||||
|
||||
// Result multiplexer
|
||||
always_comb
|
||||
|
@ -99,6 +99,7 @@ module wallypipelinedcore (
|
||||
logic FpLoadStoreM;
|
||||
logic [1:0] FResSelW;
|
||||
logic [4:0] SetFflagsM;
|
||||
logic [`XLEN-1:0] FPIntDivResultW;
|
||||
|
||||
// memory management unit signals
|
||||
logic ITLBWriteF;
|
||||
@ -228,7 +229,7 @@ module wallypipelinedcore (
|
||||
.RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM,
|
||||
|
||||
// Writeback stage
|
||||
.CSRReadValW, .MDUResultW,
|
||||
.CSRReadValW, .MDUResultW, .FPIntDivResultW,
|
||||
.RdW, .ReadDataW(ReadDataW[`XLEN-1:0]),
|
||||
.InstrValidM,
|
||||
.FCvtIntResW,
|
||||
@ -405,7 +406,8 @@ module wallypipelinedcore (
|
||||
.FCvtIntW, // fpu result selection
|
||||
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
|
||||
.IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
|
||||
.SetFflagsM // FPU flags (to privileged unit)
|
||||
.SetFflagsM, // FPU flags (to privileged unit)
|
||||
.FPIntDivResultW
|
||||
); // floating point unit
|
||||
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
|
||||
assign FStallD = 0;
|
||||
|
@ -1413,6 +1413,7 @@ string imperas32f[] = '{
|
||||
|
||||
string arch32f[] = '{
|
||||
`RISCVARCHTEST,
|
||||
"rv32i_m/F/src/fdiv_b20-01.S",
|
||||
"rv32i_m/F/src/fadd_b10-01.S",
|
||||
"rv32i_m/F/src/fadd_b1-01.S",
|
||||
"rv32i_m/F/src/fadd_b11-01.S",
|
||||
|
@ -8,8 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
|
||||
current_dir = $(shell pwd)
|
||||
#XLEN ?= 64
|
||||
|
||||
all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64
|
||||
#all: root fsd_fld_tempfix wally32
|
||||
all: root arch32 wally32 wally32e arch64 wally64
|
||||
|
||||
root:
|
||||
mkdir -p $(work_dir)
|
||||
@ -20,14 +19,8 @@ root:
|
||||
sed 's,{0},$(current_dir),g;s,{1},64gc,g' config.ini > config64.ini
|
||||
sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini
|
||||
|
||||
fsd_fld_tempfix:
|
||||
# this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests
|
||||
# https://github.com/riscv-non-isa/riscv-arch-test/issues/266
|
||||
find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fld-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
|
||||
find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fsd-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
|
||||
|
||||
arch32:
|
||||
riscof --verbose debug run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
|
||||
riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
|
||||
rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv32i_m/ || echo "error suppressed"
|
||||
|
||||
arch64:
|
||||
|
Loading…
Reference in New Issue
Block a user