Merge branch 'main' into cache

Conflicts:
	wally-pipelined/src/cache/dmapped.sv
	wally-pipelined/src/cache/line.sv
	wally-pipelined/src/ifu/icache.sv
This commit is contained in:
Jarred Allen 2021-04-14 18:24:32 -04:00
commit c1e2e58ebe
42 changed files with 136624 additions and 307799 deletions

View File

@ -75,14 +75,14 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par
// Assign the read and write addresses in cache memory
always_comb begin
assign ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
assign ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
assign ReadSet = ReadPAdr[SETEND:SETBEGIN];
assign ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
ReadSet = ReadPAdr[SETEND:SETBEGIN];
ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
assign WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
assign WriteSet = WritePAdr[SETEND:SETBEGIN];
assign WriteTag = WritePAdr[TAGEND:TAGBEGIN];
WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
WriteSet = WritePAdr[SETEND:SETBEGIN];
WriteTag = WritePAdr[TAGEND:TAGBEGIN];
end
// Depth is number of bits in one "word" of the memory, width is number of such words

View File

@ -61,7 +61,7 @@ module pagetablewalker (
output logic MMUTranslationComplete,
// Faults
output logic InstrPageFaultM, LoadPageFaultM, StorePageFaultM
output logic InstrPageFaultF, LoadPageFaultM, StorePageFaultM
);
// Internal signals
@ -85,7 +85,7 @@ module pagetablewalker (
// Signals for direct, fake translations. Not part of the final Wally version.
logic [`XLEN-1:0] DirectInstrPTE, DirectMemPTE;
logic [9:0] DirectPTEFlags = {2'b0, 8'b00001111};
localparam DirectPTEFlags = {2'b0, 8'b00001111};
logic [`VPN_BITS-1:0] PCPageNumber, MemAdrPageNumber;
@ -133,17 +133,22 @@ module pagetablewalker (
assign PageTypeF = PageType;
assign PageTypeM = PageType;
localparam IDLE = 3'h0;
localparam LEVEL1 = 3'h1;
localparam LEVEL0 = 3'h2;
localparam LEAF = 3'h3;
localparam FAULT = 3'h4;
logic [2:0] WalkerState, NextWalkerState;
generate
if (`XLEN == 32) begin
logic [9:0] VPN1, VPN0;
assign SvMode = SATP_REGW[31];
typedef enum {IDLE, LEVEL1, LEVEL0, LEAF, FAULT} walker_statetype;
walker_statetype WalkerState, NextWalkerState;
// *** Do we need a synchronizer here for walker to talk to ahblite?
flopenl #(.TYPE(walker_statetype)) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
// State transition logic
always_comb begin
@ -154,7 +159,8 @@ module pagetablewalker (
// else if (~ValidPTE || (LeafPTE && BadMegapage))
// NextWalkerState = FAULT;
// *** Leave megapage implementation for later
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF;
// *** need to check if megapage valid/aligned
else if (ValidPTE && LeafPTE) NextWalkerState = LEAF;
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0;
else NextWalkerState = FAULT;
LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0;
@ -165,6 +171,8 @@ module pagetablewalker (
else NextWalkerState = IDLE;
FAULT: if (MMUTranslate) NextWalkerState = LEVEL1;
else NextWalkerState = IDLE;
// Default case should never happen, but is included for linter.
default: NextWalkerState = IDLE;
endcase
end
@ -179,38 +187,41 @@ module pagetablewalker (
// Assign combinational outputs
always_comb begin
// default values
assign TranslationPAdr = '0;
assign PageTableEntry = '0;
assign PageType ='0;
assign MMUTranslationComplete = '0;
assign DTLBWriteM = '0;
assign ITLBWriteF = '0;
assign InstrPageFaultM = '0;
assign LoadPageFaultM = '0;
assign StorePageFaultM = '0;
TranslationPAdr = '0;
PageTableEntry = '0;
PageType ='0;
MMUTranslationComplete = '0;
DTLBWriteM = '0;
ITLBWriteF = '0;
InstrPageFaultF = '0;
LoadPageFaultM = '0;
StorePageFaultM = '0;
case (NextWalkerState)
LEVEL1: begin
assign TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
end
LEVEL0: begin
assign TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
end
LEAF: begin
// Keep physical address alive to prevent HADDR dropping to 0
assign TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
assign PageTableEntry = CurrentPTE;
assign PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00;
assign MMUTranslationComplete = '1;
assign DTLBWriteM = DTLBMissM;
assign ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00;
MMUTranslationComplete = '1;
DTLBWriteM = DTLBMissM;
ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
end
FAULT: begin
assign TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
assign MMUTranslationComplete = '1;
assign InstrPageFaultM = ~DTLBMissM;
assign LoadPageFaultM = DTLBMissM && ~MemStore;
assign StorePageFaultM = DTLBMissM && MemStore;
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
MMUTranslationComplete = '1;
InstrPageFaultF = ~DTLBMissM;
LoadPageFaultM = DTLBMissM && ~MemStore;
StorePageFaultM = DTLBMissM && MemStore;
end
default: begin
// nothing
end
endcase
end
@ -226,30 +237,30 @@ module pagetablewalker (
assign MMUPAdr = TranslationPAdr[31:0];
end else begin
localparam LEVEL2 = 3'h5;
assign SvMode = SATP_REGW[63];
logic [8:0] VPN2, VPN1, VPN0;
logic GigapageMisaligned, BadGigapage;
typedef enum {IDLE, LEVEL2, LEVEL1, LEVEL0, LEAF, FAULT} walker_statetype;
walker_statetype WalkerState, NextWalkerState;
// *** Do we need a synchronizer here for walker to talk to ahblite?
flopenl #(.TYPE(walker_statetype)) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
always_comb begin
case (WalkerState)
IDLE: if (MMUTranslate) NextWalkerState = LEVEL2;
else NextWalkerState = IDLE;
LEVEL2: if (~MMUReady) NextWalkerState = LEVEL2;
else if (ValidPTE && LeafPTE) NextWalkerState = LEAF;
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL1;
else NextWalkerState = FAULT;
LEVEL1: if (~MMUReady) NextWalkerState = LEVEL1;
// else if (~ValidPTE || (LeafPTE && BadMegapage))
// NextWalkerState = FAULT;
// *** Leave megapage implementation for later
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF;
else if (ValidPTE && LeafPTE) NextWalkerState = LEAF;
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0;
else NextWalkerState = FAULT;
LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0;
@ -260,6 +271,8 @@ module pagetablewalker (
else NextWalkerState = IDLE;
FAULT: if (MMUTranslate) NextWalkerState = LEVEL2;
else NextWalkerState = IDLE;
// Default case should never happen, but is included for linter.
default: NextWalkerState = IDLE;
endcase
end
@ -279,42 +292,45 @@ module pagetablewalker (
// *** Should translate this flop block into our flop module notation
always_comb begin
// default values
assign TranslationPAdr = '0;
assign PageTableEntry = '0;
assign PageType = '0;
assign MMUTranslationComplete = '0;
assign DTLBWriteM = '0;
assign ITLBWriteF = '0;
assign InstrPageFaultM = '0;
assign LoadPageFaultM = '0;
assign StorePageFaultM = '0;
TranslationPAdr = '0;
PageTableEntry = '0;
PageType = '0;
MMUTranslationComplete = '0;
DTLBWriteM = '0;
ITLBWriteF = '0;
InstrPageFaultF = '0;
LoadPageFaultM = '0;
StorePageFaultM = '0;
case (NextWalkerState)
LEVEL2: begin
assign TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000};
TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000};
end
LEVEL1: begin
assign TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
end
LEVEL0: begin
assign TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
end
LEAF: begin
// Keep physical address alive to prevent HADDR dropping to 0
assign TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
assign PageTableEntry = CurrentPTE;
assign PageType = (WalkerState == LEVEL2) ? 2'b11 :
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL2) ? 2'b11 :
((WalkerState == LEVEL1) ? 2'b01 : 2'b00);
assign MMUTranslationComplete = '1;
assign DTLBWriteM = DTLBMissM;
assign ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
MMUTranslationComplete = '1;
DTLBWriteM = DTLBMissM;
ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
end
FAULT: begin
assign TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
assign MMUTranslationComplete = '1;
assign InstrPageFaultM = ~DTLBMissM;
assign LoadPageFaultM = DTLBMissM && ~MemStore;
assign StorePageFaultM = DTLBMissM && MemStore;
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
MMUTranslationComplete = '1;
InstrPageFaultF = ~DTLBMissM;
LoadPageFaultM = DTLBMissM && ~MemStore;
StorePageFaultM = DTLBMissM && MemStore;
end
default: begin
// nothing
end
endcase
end
@ -331,4 +347,4 @@ module pagetablewalker (
end
endgenerate
endmodule
endmodule

View File

@ -48,7 +48,7 @@ module add(r, s, t, sum,
// Compound adder
// Consists of 3:2 CSA followed by long compound CPA
assign prodshifted = killprod ? 0 : {56'b0, r2, 2'b0} + {56'b0, s2, 2'b0};
assign prodshifted = killprod ? 0 : {56'b0, r2+s2, 2'b0};
assign sum0 = {1'b0,prodshifted} + t2 + 158'b0;
assign sum1 = {1'b0,prodshifted} + t2 + 158'b1; // +1 from invert of z above

View File

@ -56,7 +56,7 @@ module align(zman, ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t, bs
// addend on right shifts. Handle special cases of shifting
// by too much.
always @(aligncnt or zman or zdenorm)
always @(aligncnt or xzero or yzero or zman or zdenorm or zzero)
begin
// Default to clearing sticky bits
@ -67,26 +67,23 @@ module align(zman, ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t, bs
killprod = xzero | yzero;
// d = aligncnt
// p = 53
if ($signed(aligncnt) <= $signed(-103)) begin //d<=-2p+1
if ($signed(aligncnt) <= $signed(-105)) begin //d<=-2p+1
//product ancored case with saturated shift
sumshift = 163; // 3p+4
sumshiftzero = 0;
shift = {~zdenorm,zman,163'b0} >> sumshift;
shift = {1'b1,zman,163'b0} >> sumshift;
t = zzero ? 0 : {shift[215:52]};
bs = |(shift[51:0]);
//zexpsel = 0;
end else if($signed(aligncnt) <= $signed(1)) begin // -2p+1<d<=2
// set d<=2 to d<=0
end else if($signed(aligncnt) <= $signed(2)) begin // -2p+1<d<=2
// product ancored or cancellation
// warning: set to 55 rather then 56. was there a typo in the book?
sumshift = 57-aligncnt; // p + 3 - d
sumshift = 57-aligncnt; // p + 2 - d
sumshiftzero = 0;
shift = {~zdenorm,zman,163'b0} >> sumshift;
t = zzero ? 0 : {shift[215:52]};
bs = |(shift[51:0]);
//zexpsel = 0;
end else if ($signed(aligncnt)<=$signed(55)) begin // 2 < d <= p+2
// another typo in book? above was 55 changed to 52
// addend ancored case
// used to be 56 \/ somthing doesn't seem right too many typos
sumshift = 57-aligncnt;

View File

@ -0,0 +1,55 @@
module booth(xExt, choose, add1, e, pp);
/////////////////////////////////////////////////////////////////////////////
input [53:0] xExt; // multiplicand xExt
input [2:0] choose; // bits needed to choose which encoding
output [1:0] add1; // do you add 1
output e;
output [54:0] pp; // the resultant encoding
logic [54:0] pp, temp;
logic e;
logic [1:0] add1;
logic [53:0] negx;
//logic temp;
assign negx = ~xExt;
always @(choose, xExt, negx)
case (choose)
3'b000 : pp = 55'b0; // 0
3'b001 : pp = {1'b0, xExt}; // 1
3'b010 : pp = {1'b0, xExt}; // 1
3'b011 : pp = {xExt, 1'b0}; // 2
3'b100 : pp = {negx, 1'b0}; // -2
3'b101 : pp = {1'b1, negx}; // -1
3'b110 : pp = {1'b1, negx}; // -1
3'b111 : pp = 55'hfffffffffffffff; // -0
endcase
always @(choose, xExt, negx)
case (choose)
3'b000 : e = 0; // 0
3'b001 : e = 0; // 1
3'b010 : e = 0; // 1
3'b011 : e = 0; // 2
3'b100 : e = 1; // -2
3'b101 : e = 1; // -1
3'b110 : e = 1; // -1
3'b111 : e = 1; // -0
endcase
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
// assign add1 = choose[2];
always @(choose)
case (choose)
3'b000 : add1 = 2'b0; // 0
3'b001 : add1 = 2'b0; // 1
3'b010 : add1 = 2'b0; // 1
3'b011 : add1 = 2'b0; // 2
3'b100 : add1 = 2'b10; // -2
3'b101 : add1 = 2'b1; // -1
3'b110 : add1 = 2'b1; // -1
3'b111 : add1 = 2'b1; // -0
endcase
endmodule

View File

@ -0,0 +1,90 @@
module add3comp2(a, b, c, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors?
parameter BITS = 4;
input [BITS-1:0] a;
input [BITS-1:0] b;
input [BITS-1:0] c;
output [BITS-1:0] carry;
output [BITS-1:0] sum;
genvar i;
generate
for(i= 0; i<BITS; i=i+1) begin
sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
end
endgenerate
endmodule
module add4comp2(a, b, c, d, carry, sum);
/////////////////////////////////////////////////////////////////////////////
parameter BITS = 4;
input [BITS-1:0] a;
input [BITS-1:0] b;
input [BITS-1:0] c;
input [BITS-1:0] d;
output [BITS:0] carry;
output [BITS-1:0] sum;
logic [BITS-1:0] cout;
logic carryTmp;
genvar i;
sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
generate
for(i= 1; i<BITS-1; i=i+1) begin
sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
end
endgenerate
sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
assign carry[BITS-1] = carryTmp & cout[BITS-1];
assign carry[BITS] = carryTmp ^ cout[BITS-1];
endmodule
module sng3comp2(a, b, c, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors?
input a;
input b;
input c;
output carry;
output sum;
logic axorb;
assign axorb = a ^ b;
assign sum = axorb ^ c;
assign carry = axorb ? c : a;
endmodule
module sng4comp2(a, b, c, d, cin, cout, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into pass gate 4:2 counters?
input a;
input b;
input c;
input d;
input cin;
output cout;
output carry;
output sum;
logic TmpSum;
sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
endmodule

View File

@ -17,7 +17,7 @@
/////////////////////////////////////////////////////////////////////////////
module expgen(xexp, yexp, zexp,
killprod, sumzero, resultdenorm, normcnt, infinity,
invalid, overflow, underflow, inf, xzero, yzero,expplus1,
FmaFlagsM, inf, xzero, yzero,expplus1,
nan, de0, xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, specialsel, zexpsel,
aligncnt, wexp,
prodof, sumof, sumuf, denorm0, ae);
@ -31,9 +31,7 @@ module expgen(xexp, yexp, zexp,
input resultdenorm; // postnormalize rounded result
input [8:0] normcnt; // normalization shift count
input infinity; // generate infinity on overflow
input invalid; // Result invalid
input overflow; // Result overflowed
input underflow; // Result underflowed
input [4:0] FmaFlagsM; // Result invalid
input inf; // Some input is infinity
input nan; // Some input is NaN
input [12:0] de0; // X is NaN NaN
@ -121,10 +119,10 @@ module expgen(xexp, yexp, zexp,
// produces either infinity or the largest finite number, depending on the
// rounding mode. NaNs are propagated or generated.
assign specialres = invalid | nan ? nanres : // KEP added nan
overflow ? infinityres :
assign specialres = FmaFlagsM[4] | nan ? nanres : // invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 11'b11111111111 :
underflow ? 11'b0 : 11'bx;
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;

View File

@ -10,12 +10,13 @@
/////////////////////////////////////////////////////////////////////////////
module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
psign, zsign, xzero, yzero, zzero, vbits, killprod,
inf, nan, invalid, overflow, underflow, inexact);
inf, nan, FmaFlagsM,sticky);
/////////////////////////////////////////////////////////////////////////////
input xnan; // X is NaN
input ynan; // Y is NaN
input znan; // Z is NaN
input sticky; // X is Inf
input xinf; // X is Inf
input yinf; // Y is Inf
input zinf; // Z is Inf
@ -31,10 +32,7 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
input [1:0] vbits; // R and S bits of result
output inf; // Some source is Inf
output nan; // Some source is NaN
output invalid; // Result is invalid
output overflow; // Result overflowed
output underflow; // Result underflowed
output inexact; // Result is not an exact number
output [4:0] FmaFlagsM;
// Internal nodes
@ -55,33 +53,36 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
assign prodinf = prodof && ~xnan && ~ynan;
//KEP added if the product is infinity then sum is infinity
assign suminf = prodinf | sumof && ~xnan && ~ynan && ~znan;
assign suminf = sumof && ~xnan && ~ynan && ~znan;
// Set invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
assign invalid = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
assign FmaFlagsM[4] = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
xzero && yinf || yzero && xinf;// KEP remove case 3) above
assign FmaFlagsM[3] = 0; // divide by zero flag
// Set the overflow flag for the following cases:
// 1) Rounded multiply result would be out of bounds
// 2) Rounded add result would be out of bounds
assign overflow = suminf && ~inf;
assign FmaFlagsM[2] = suminf && ~inf;
// Set the underflow flag for the following cases:
// 1) Any input is denormalized
// 2) Output would be denormalized or smaller
assign underflow = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
// Set the inexact flag for the following cases:
// 1) Multiplication inexact
// 2) Addition inexact
// One of these cases occurred if the R or S bit is set
assign inexact = (vbits[0] || vbits[1] || suminf) && ~(inf || nan);
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nan);
endmodule

View File

@ -15,13 +15,13 @@
// normalize Normalization shifter
// round Rounding of result
// exception Handles exceptional cases
// bypass Handles bypass of result to X or Z inputs
// bypass Handles bypass of result to ReadData1E or ReadData3E inputs
// sign One bit sign handling block
// special Catch special cases (inputs = 0 / infinity / etc.)
//
// The FMAC computes W=X*Y+Z, rounded with the mode specified by
// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to
// the X or Z inputs for use on the next cycle. In addition, four signals
// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IEEE flags.
@ -29,29 +29,17 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module fmac(x, y, z, rn, rz, rp, rm,
earlyres, earlyressel, bypsel, bypplus1, byppostnorm,
w, wbypass, invalid, overflow, underflow, inexact);
module fma(ReadData1E, ReadData2E, ReadData3E, FrmE,
FmaResultM, FmaFlagsM, aligncnt);
/////////////////////////////////////////////////////////////////////////////
input [63:0] x; // input X from reg file
input [63:0] y; // input Y
input [63:0] z; // input Z from reg file
input rn; // Round to Nearest
input rz; // Round toward zero
input rm; // Round toward minus infinity
input rp; // Round toward plus infinity
input [63:0] earlyres; // Early result from other FP logic
input earlyressel; // Select early result, not W
input [1:0] bypsel; // Select W bypass to X, or z
input bypplus1; // Add one in bypass
input byppostnorm; // postnormalize in bypass
output [63:0] w; // output W=X*Y+Z
output [63:0] wbypass; // prerounded output W=X*Y+Z for bypass
output invalid; // Result is invalid
output overflow; // Result overflowed
output underflow; // Result underflowed
output inexact; // Result is not an exact number
input [63:0] ReadData1E; // input 1
input [63:0] ReadData2E; // input 2
input [63:0] ReadData3E; // input 3
input [2:0] FrmE; // Rounding mode
output [63:0] FmaResultM; // output FmaResultM=ReadData1E*ReadData2E+ReadData3E
output [4:0] FmaFlagsM; // status flags
output [12:0] aligncnt; // status flags
// Internal nodes
@ -60,12 +48,12 @@ module fmac(x, y, z, rn, rz, rp, rm,
logic [163:0] t; // output of alignment shifter
logic [163:0] sum; // output of carry prop adder
logic [53:0] v; // normalized sum, R, S bits
logic [12:0] aligncnt; // shift count for alignment
// logic [12:0] aligncnt; // shift count for alignment
logic [8:0] normcnt; // shift count for normalizer
logic [12:0] ae; // multiplier expoent
logic bs; // sticky bit of addend
logic ps; // sticky bit of product
logic killprod; // Z >> product
logic killprod; // ReadData3E >> product
logic negsum; // negate sum
logic invz; // invert addend
logic selsum1; // select +1 mode of sum
@ -73,7 +61,7 @@ module fmac(x, y, z, rn, rz, rp, rm,
logic negsum1; // sum +1 < 0
logic sumzero; // sum = 0
logic infinity; // generate infinity on overflow
logic prodof; // X*Y out of range
logic prodof; // ReadData1E*ReadData2E out of range
logic sumof; // result out of range
logic xzero;
logic yzero;
@ -101,6 +89,9 @@ module fmac(x, y, z, rn, rz, rp, rm,
logic [8:0] sumshift;
logic sumshiftzero;
logic [12:0] de0;
logic isAdd;
assign isAdd = 1;
@ -117,16 +108,16 @@ module fmac(x, y, z, rn, rz, rp, rm,
// Instantiate fraction datapath
multiply multiply(.xman(x[51:0]), .yman(y[51:0]), .*);
align align(.zman(z[51:0]),.*);
multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
align align(.zman(ReadData3E[51:0]),.*);
add add(.*);
lza lza(.*);
normalize normalize(.zexp(z[62:52]),.*);
round round(.xman(x[51:0]), .yman(y[51:0]),.zman(z[51:0]), .wman(w[51:0]),.wsign(w[63]),.*);
normalize normalize(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.*);
round round(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]),.zman(ReadData3E[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
// Instantiate exponent datapath
expgen expgen(.xexp(x[62:52]),.yexp(y[62:52]),.zexp(z[62:52]),.wexp(w[62:52]),.*);
expgen expgen(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.wexp(FmaResultM[62:52]),.*);
// Instantiate special case detection across datapath & exponent path
special special(.*);
@ -134,8 +125,8 @@ module fmac(x, y, z, rn, rz, rp, rm,
// Instantiate control logic
sign sign(.xsign(x[63]),.ysign(y[63]),.zsign(z[63]),.wsign(w[63]),.*);
flag flag(.zsign(z[63]),.vbits(v[1:0]),.*);
sign sign(.xsign(ReadData1E[63]),.ysign(ReadData2E[63]),.zsign(ReadData3E[63]),.wsign(FmaResultM[63]),.*);
flag flag(.zsign(ReadData3E[63]),.vbits(v[1:0]),.*);
endmodule

View File

@ -30,7 +30,7 @@ module lza(sum, normcnt, sumzero);
always @ ( sum)
begin
i = 0;
while (~sum[108-i] && i < 108) i = i+1; // search for leading one
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
normcnt = i; // compute shift count
end

View File

@ -10,8 +10,124 @@ module multiply(xman, yman, xdenorm, ydenorm, xzero, yzero, r, s);
input yzero; // Z is denorm
output [105:0] r; // partial product 1
output [105:0] s; // partial product 2
wire [54:0] yExt; //y with appended 0 and assumed 1
wire [53:0] xExt; //y with assumed 1
wire [26:0][1:0] add1;
wire [26:0][54:0] pp;
wire [26:0] e;
logic [17:0][105:0] lv1add;
logic [11:0][105:0] lv2add;
logic [7:0][105:0] lv3add;
logic [3:0][105:0] lv4add;
logic [21:0][106:0] carryTmp;
wire [26:0][105:0] acc;
// wire [105:0] acc
genvar i;
assign r = 106'b0;
assign s = {53'b0,~(xdenorm|xzero),xman} * {53'b0,~(ydenorm|yzero),yman};
assign xExt = {2'b0,~(xdenorm|xzero),xman};
assign yExt = {2'b0,~(ydenorm|yzero),yman, 1'b0};
generate
for(i=0; i<27; i=i+1) begin
booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
end
endgenerate
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
assign acc[1] = {50'b01,~e[1],pp[1],add1[0]};
assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0};
assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0};
assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0};
assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0};
assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0};
assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0};
assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0};
assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0};
assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0};
assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0};
assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0};
assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0};
assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0};
assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0};
assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0};
assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0};
assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0};
assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0};
assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0};
assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0};
assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0};
assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0};
assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0};
assign acc[25] = {~e[25],pp[25],add1[24], 48'b0};
assign acc[26] = {pp[26],add1[25], 50'b0};
//*** resize adders
generate
for(i=0; i<9; i=i+1) begin
add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
.carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1]));
assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0};
end
endgenerate
generate
for(i=0; i<6; i=i+1) begin
add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
.carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1]));
assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0};
end
endgenerate
generate
for(i=0; i<4; i=i+1) begin
add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
.carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1]));
assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0};
end
endgenerate
generate
for(i=0; i<2; i=i+1) begin
add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0};
end
endgenerate
add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
.carry(carryTmp[21]), .sum(s));
assign r = {carryTmp[21][104:0], 1'b0};
// assign r = 0;
// assign s = acc[0] +
// acc[1] +
// acc[2] +
// acc[3] +
// acc[4] +
// acc[5] +
// acc[6] +
// acc[7] +
// acc[8] +
// acc[9] +
// acc[10] +
// acc[11] +
// acc[12] +
// acc[13] +
// acc[14] +
// acc[15] +
// acc[16] +
// acc[17] +
// acc[18] +
// acc[19] +
// acc[20] +
// acc[21] +
// acc[22] +
// acc[23] +
// acc[24] +
// acc[25] +
// acc[26];
// assign s = {53'b0,~(xdenorm|xzero),xman} * {53'b0,~(ydenorm|yzero),yman};
// assign r = 0;
endmodule

View File

@ -14,9 +14,11 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero, sumzero, xzero, yzero, bs, ps, denorm0, xdenorm, ydenorm, zdenorm, sticky, de0, resultdenorm, v);
module normalize(sum, xexp, yexp, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero, sumzero, xzero, zzero, yzero, bs, ps, denorm0, xdenorm, ydenorm, zdenorm, sticky, de0, resultdenorm, v);
/////////////////////////////////////////////////////////////////////////////
input [163:0] sum; // sum
input [62:52] xexp; // sum
input [62:52] yexp; // sum
input [62:52] zexp; // sum
input [8:0] normcnt; // normalization shift count
input [12:0] ae; // normalization shift count
@ -33,6 +35,7 @@ module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero,
input zdenorm; // Input Z is denormalized
input xzero;
input yzero;
input zzero;
output sticky; //sticky bit
output [12:0] de0;
output resultdenorm; // Input Z is denormalized
@ -47,6 +50,7 @@ module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero,
logic [9:0] sumshifttmp;
logic [163:0] sumshiftedtmp; // shifted sum
logic sticky;
logic isShiftLeft1;
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// When the sum is zero, normalization does not apply and only the
@ -60,21 +64,23 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// The sticky bit calculation is actually built into the shifter and
// does not require a true subtraction shown in the model.
assign isShiftLeft1 = (aligncnt == 1 ||aligncnt == 0 || $signed(aligncnt) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2;
assign tmp = ($signed(ae-normcnt+2) >= $signed(-1022));
always @(sum or sumshift or ae or aligncnt or normcnt or bs or zexp or zdenorm)
always @(sum or sumshift or ae or aligncnt or normcnt or bs or isShiftLeft1 or zexp or zdenorm)
begin
// d = aligncnt
// l = normcnt
// p = 53
// ea + eb = ae
// set d<=2 to d<=0
if ($signed(aligncnt)<=$signed(1)) begin //d<=2
if ($signed(aligncnt)<=$signed(2)) begin //d<=2
// product anchored or cancellation
if ($signed(ae-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin
//normal result
de0 = xzero|yzero ? zexp : ae-normcnt+2+xdenorm+ydenorm;
resultdenorm = |sum & ~|de0;
sumshifted = resultdenorm ? sum << sumshift : sum << (55+normcnt); // p+2+l
de0 = xzero|yzero ? zexp : ae-normcnt+xdenorm+ydenorm+57;
resultdenorm = |sum & ~|de0 | de0[12];
// if z is zero then there was a 56 bit shift of the product
sumshifted = resultdenorm ? sum << sumshift-zzero+isShiftLeft1 : sum << normcnt; // p+2+l
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bs;
//de0 = ae-normcnt+2-1023;
@ -90,8 +96,8 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
sumshifttmp = {1'b0,sumshift} - 2;
sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
tmp2 = (sumshifttmp[9] || sumshifted[162]);
tmp3 = sumshifted[161];
tmp2 = ((sumshifttmp[9] & sumshift[0]) || sumshifted[162]);
tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshift[1]));
tmp4 = sumshifted[160];
tmp5 = sumshifted[159];
// for some reason use exp = zexp + {0,1,2}
@ -112,25 +118,31 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bs;
//de0 = zexp-1;
de0 = zexp;
end else if(sumshifted[160]) begin
v = sumshifted[159:106];
de0 = zexp+zdenorm;
end else if(sumshifted[160]& ~zdenorm) begin
de0 = zexp-1;
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
sticky = (|sumshifted[105:0]) | bs;
//de0 = zexp-1;
de0 = zexp-1;
end else if(sumshifted[159]) begin
v = sumshifted[158:105];
end else if(sumshifted[159]& ~zdenorm) begin
//v = sumshifted[158:105];
de0 = zexp-2;
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
sticky = (|sumshifted[104:0]) | bs;
//de0 = zexp-1;
de0 = zexp-2;
end else begin
end else if(zdenorm) begin
v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bs;
//de0 = zexp-1;
de0 = zexp;
end else begin
de0 = 0;
sumshifted = sum << sumshift-1; // p+2+l
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bs;
end
resultdenorm = ~(|de0);
resultdenorm = (~|de0 | de0[12]);
end
end

View File

@ -13,22 +13,17 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module round(v, sticky, rz, rn, rp, rm, wsign,
invalid, overflow, underflow, inf, nan, xnan, ynan, znan,
module round(v, sticky, FrmE, wsign,
FmaFlagsM, inf, nan, xnan, ynan, znan,
xman, yman, zman,
wman, infinity, specialsel,expplus1);
/////////////////////////////////////////////////////////////////////////////
input [53:0] v; // normalized sum, R, S bits
input sticky; //sticky bit
input rz; // Round toward zero
input rn; // Round toward nearest
input rp; // Round toward plus infinity
input rm; // Round toward minus infinity
input [2:0] FrmE;
input wsign; // Sign of result
input invalid; // Trap on infinity, NaN, denorm
input overflow; // Result overflowed
input underflow; // Result underflowed
input [4:0] FmaFlagsM;
input inf; // Some input is infinity
input nan; // Some input is NaN
input xnan; // X is NaN
@ -45,7 +40,7 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
// Internal nodes
wire plus1; // Round by adding one
logic plus1; // Round by adding one
wire [52:0] v1; // Result + 1 (for rounding)
wire [51:0] specialres; // Result of exceptional case
wire [51:0] infinityres; // Infinity or largest real number
@ -62,9 +57,19 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
// 0xx - do nothing
// 100 - tie - plus1 if v[2] = 1
// 101/110/111 - plus1
assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
(rp & ~wsign) |
(rm & wsign);
always @ (FrmE, v, wsign, sticky) begin
case (FrmE)
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
3'b001: plus1 = 0;//round to zero
3'b010: plus1 = wsign;//round down
3'b011: plus1 = ~wsign;//round up
3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
default: plus1 = 1'bx;
endcase
end
// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
// (rp & ~wsign) |
// (rm & wsign);
//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
// rp && ~wsign && (v[1] || v[0]) ||
// rm && wsign && (v[1] || v[0]);
@ -84,17 +89,17 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
// inputs to the wide muxes can be combined at the expense of more
// complicated non-critical control in the circuit implementation.
assign specialsel = overflow || underflow || invalid ||
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
nan || inf;
assign specialres = invalid | nan ? nanres : //KEP added nan
overflow ? infinityres :
assign specialres = FmaFlagsM[4] | nan ? nanres : //invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 52'b0 :
underflow ? 52'b0 : 52'bx; // default to undefined
FmaFlagsM[1] ? 52'b0 : 52'bx; // underflow
// Overflow is handled differently for different rounding modes
// Round is to either infinity or to maximum finite number
assign infinity = rn || (rp && ~wsign) || (rm && wsign);
assign infinity = |FrmE;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
// Invalid operations produce a quiet NaN. The result should

View File

@ -10,23 +10,24 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm, overflow,
sumzero, nan, invalid, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign);
module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, FrmE, FmaFlagsM, zzero,
sumzero, nan, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign, isAdd);
////////////////////////////////////////////////////////////////////////////I
input xsign; // Sign of X
input ysign; // Sign of Y
input zsign; // Sign of Z
input zzero;
input isAdd;
input negsum0; // Sum in +O mode is negative
input negsum1; // Sum in +1 mode is negative
input bs; // sticky bit from addend
input ps; // sticky bit from product
input killprod; // Product forced to zero
input rm; // Round toward minus infinity
input overflow; // Round toward minus infinity
input [2:0] FrmE; // Round toward minus infinity
input [4:0] FmaFlagsM; // Round toward minus infinity
input sumzero; // Sum = O
input nan; // Some input is NaN
input invalid; // Result invalid
input xinf; // X = Inf
input yinf; // Y = Inf
input zinf; // Y = Inf
@ -96,10 +97,24 @@ logic tmp;
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
assign zerosign = (~invz && killprod) ? zsign : rm;
//assign zerosign = (~invz && killprod) ? zsign : rm;//***look into
// assign zerosign = (~invz && killprod) ? zsign : 0;
// zero sign
// if product underflows then use psign
// otherwise
// addition
// if cancelation then 0 unless round to -inf
// otherwise psign
// subtraction
// if cancelation then 0 unless round to -inf
// otherwise psign
assign zerosign = FmaFlagsM[1] ? psign :
(isAdd ? (psign^zsign ? FrmE == 3'b010 : psign) :
(psign^zsign ? psign : FrmE == 3'b010));
assign infsign = zinf ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
//assign infsign = xinf ? (yinf ? psign : xsign) : yinf ? ysign : zsign;//original
assign tmp = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
assign wsign = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
endmodule

View File

@ -10,49 +10,49 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module special(x, y, z, ae, xzero, yzero, zzero,
module special(ReadData1E, ReadData2E, ReadData3E, ae, xzero, yzero, zzero,
xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, xinf, yinf, zinf);
/////////////////////////////////////////////////////////////////////////////
input [63:0] x; // Input x
input [63:0] y; // Input Y
input [63:0] z; // Input z
input [63:0] ReadData1E; // Input ReadData1E
input [63:0] ReadData2E; // Input ReadData2E
input [63:0] ReadData3E; // Input ReadData3E
input [12:0] ae; // exponent of product
output xzero; // Input x = 0
output yzero; // Input y = 0
output zzero; // Input z = 0
output xnan; // x is NaN
output ynan; // y is NaN
output znan; // z is NaN
output xdenorm; // x is denormalized
output ydenorm; // y is denormalized
output zdenorm; // z is denormalized
output xzero; // Input ReadData1E = 0
output yzero; // Input ReadData2E = 0
output zzero; // Input ReadData3E = 0
output xnan; // ReadData1E is NaN
output ynan; // ReadData2E is NaN
output znan; // ReadData3E is NaN
output xdenorm; // ReadData1E is denormalized
output ydenorm; // ReadData2E is denormalized
output zdenorm; // ReadData3E is denormalized
output proddenorm; // product is denormalized
output xinf; // x is infinity
output yinf; // y is infinity
output zinf; // z is infinity
output xinf; // ReadData1E is infinity
output yinf; // ReadData2E is infinity
output zinf; // ReadData3E is infinity
// In the actual circuit design, the gates looking at bits
// 51:0 and at bits 62:52 should be shared among the various detectors.
// Check if input is NaN
assign xnan = &x[62:52] && |x[51:0];
assign ynan = &y[62:52] && |y[51:0];
assign znan = &z[62:52] && |z[51:0];
assign xnan = &ReadData1E[62:52] && |ReadData1E[51:0];
assign ynan = &ReadData2E[62:52] && |ReadData2E[51:0];
assign znan = &ReadData3E[62:52] && |ReadData3E[51:0];
// Check if input is denormalized
assign xdenorm = ~(|x[62:52]) && |x[51:0];
assign ydenorm = ~(|y[62:52]) && |y[51:0];
assign zdenorm = ~(|z[62:52]) && |z[51:0];
assign xdenorm = ~(|ReadData1E[62:52]) && |ReadData1E[51:0];
assign ydenorm = ~(|ReadData2E[62:52]) && |ReadData2E[51:0];
assign zdenorm = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
assign proddenorm = &ae & ~xzero & ~yzero; //KEP is the product denormalized
// Check if input is infinity
assign xinf = &x[62:52] && ~(|x[51:0]);
assign yinf = &y[62:52] && ~(|y[51:0]);
assign zinf = &z[62:52] && ~(|z[51:0]);
assign xinf = &ReadData1E[62:52] && ~(|ReadData1E[51:0]);
assign yinf = &ReadData2E[62:52] && ~(|ReadData2E[51:0]);
assign zinf = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);
// Check if inputs are all zero
// Also forces denormalized inputs to zero.
@ -60,11 +60,11 @@ module special(x, y, z, ae, xzero, yzero, zzero,
// to just check if the exponent is zero.
// KATHERINE - commented following (21/01/11)
// assign xzero = ~(|x[62:0]) || xdenorm;
// assign yzero = ~(|y[62:0]) || ydenorm;
// assign zzero = ~(|z[62:0]) || zdenorm;
// assign xzero = ~(|ReadData1E[62:0]) || xdenorm;
// assign yzero = ~(|ReadData2E[62:0]) || ydenorm;
// assign zzero = ~(|ReadData3E[62:0]) || zdenorm;
// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number
assign xzero = ~(|x[62:0]);
assign yzero = ~(|y[62:0]);
assign zzero = ~(|z[62:0]);
assign xzero = ~(|ReadData1E[62:0]);
assign yzero = ~(|ReadData2E[62:0]);
assign zzero = ~(|ReadData3E[62:0]);
endmodule

View File

@ -1,16 +1 @@
0010000000000000 bf4fdffffff7fffe 800ffffffffffffe 800003fbfffffefe 801003fbfffffefe Wrong zdenorm 308227
0010000000000000 be6fffffbffffff7 8000000000000000 800000001fffffc0 800000000fffffe0 Wrong 313753
001ffffffffffffe 3fddfbffffffffff 000ffffffffffffe 000efdfffffffffd 001efdfffffffffd Wrong zdenorm 551371
3befe000ffffffff 800ffffffffffffe 0000000000000000 0000000000000000 8000000000000000 Wrong ydenorm unflw 665575
000007fffffffffe 3f6ffffffe01fffe 000ffffffffffffe 00000007ffffff7e 00100007ffffff7e Wrong xdenorm zdenorm 768727
3fdffffffffffffe 000ffffffffffffe 8000000000000001 7feffffffffffff6 0007fffffffffffe Wrong ydenorm zdenorm 1049939
7fe0000000000001 4000000000000000 ffefffffffffffff 7ff0000000000000 7cb8000000000000 Wrong w=+inf 2602745
000fff000000000f 3ff00800001fffff 8010000000000000 7f7bfe007ff8381e 000006ff801ffe0e Wrong xdenorm 3117277
8000000000000001 40211275ffe5ee3c 0000000000000001 fcfe24ebffcbdc78 8000000000000008 Wrong xdenorm zdenorm 3148591
801fffffffffffff bfdffffffffffffe 0000000000021fff 0000000000021ffe 0010000000021ffe Wrong zdenorm 3537867
801ffffffffffffe 0010000000000001 0000000000000000 0000000000000000 8000000000000000 Wrong unflw 3564269
bca0000000000001 000fffffc000001e 8000000000000000 8000000000000001 8000000000000000 Wrong ydenorm 3717769
bcafffffffffffff 800ffffffffffffe 8000000000000000 0000000000000002 0000000000000001 Wrong ydenorm 3807413
7fec5fed92358a74 400000001bffffff ffefc0003ffffffe 7ff0000000000000 7fe8ffdb47bad466 Wrong w=+inf 3889689
bfdfffffffffffff 3fdf1f3616aa73e1 3fd0000000000001 3fd07064f4aac611 3f7c193d2ab1843f Wrong 4099063
3fd07dfffffffffe 8010000000000001 0000000000000001 ffe07dfffffffffb 80041f7fffffffff Wrong zdenorm 4716133
c3f000200003fffe 0000000000000001 001ffffffffffffe 80cffc400007fffd 80cffc400007fffc Wrong FmaResultM= -64 ydenorm 1119653

Binary file not shown.

View File

@ -20,19 +20,19 @@ void main() {
// b68ffff8000000ff_3f9080000007ffff_b6307ffbe0080080_00001
char ch;
int i,j,n;
char x[17];
char y[17];
char z[17];
char ReadData1E[17];
char ReadData2E[17];
char ReadData3E[17];
char ans[81];
char flags[3];
int rn,rz,rm,rp;
long stop = 4099063;
int FrmE;
long stop = 1119653;
int debug = 1;
//my_string = (char *) malloc (nbytes + 1);
//bytes_read = getline (&my_string, &nbytes, stdin);
for(n=0; n < 613; n++) {//613 for 10000
for(n=0; n < 305; n++) {//613 for 10000
if(getline(&ln,&nbytes,fp) < 0 || feof(fp)) break;
if(k == stop && debug == 1) break;
k++;
@ -41,71 +41,59 @@ void main() {
if(!feof(fp)) {
strncpy(x, ln, 16); x[16]=0;
strncpy(y, &ln[17], 16); y[16]=0;
strncpy(z, &ln[34], 16); z[16]=0;
// fprintf(stdout,"[%s]\n[%s]\n", ln,z);
strncpy(ReadData1E, ln, 16); ReadData1E[16]=0;
strncpy(ReadData2E, &ln[17], 16); ReadData2E[16]=0;
strncpy(ReadData3E, &ln[34], 16); ReadData3E[16]=0;
// fprintf(stdout,"[%s]\n[%s]\n", ln,ReadData3E);
strncpy(ans, &ln[51], 16); ans[16]=0;
strncpy(flags,&ln[68],2); flags[2]=0;
// fprintf(stdout,"[%s]\n[%s]\n", ln,z);
fprintf(fq," x = 64'h%s;\n",x);
fprintf(fq," y = 64'h%s;\n",y);
fprintf(fq," z = 64'h%s;\n",z);
// fprintf(stdout,"[%s]\n[%s]\n", ln,ReadData3E);
fprintf(fq," ReadData1E = 64'h%s;\n",ReadData1E);
fprintf(fq," ReadData2E = 64'h%s;\n",ReadData2E);
fprintf(fq," ReadData3E = 64'h%s;\n",ReadData3E);
fprintf(fq," ans = 64'h%s;\n", ans);
// fprintf(fq," flags = 5'h%s;\n", flags);
{
//rn=1; rz=0; rm=0; rp=0;
fprintf(fq," rn = %d;\n",1);
fprintf(fq," rz = %d;\n", 0);
fprintf(fq," rm = %d;\n", 0);
fprintf(fq," rp = %d;\n", 0);
}
{
fprintf(fq," earlyres = 64'b0;\n");
fprintf(fq," earlyressel = 0;\n");
}
{
fprintf(fq," bypsel= 2'b0;\n"); //, bysel);
fprintf(fq," bypplus1 = 0;\n"); //, byp1);
fprintf(fq," byppostnorm = 0;\n"); //, bypnorm);
fprintf(fq," FrmE = 3'b000;\n");
}
fprintf(fq,"#10\n");
// IEEE 754-2008 section 6.3 states "When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
//fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h \",x,y,w, ans);\n");
//fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h \",ReadData1E,ReadData2E,FmaResultM, ans);\n");
fprintf(fq," // IEEE 754-2008 section 6.3 states: \"When ether an input or result is NaN, this\n");
fprintf(fq," // standard does not interpret the sign of a NaN.\"\n");
fprintf(fq," wnan = &w[62:52] && |w[51:0]; \n");
fprintf(fq," xnan = &x[62:52] && |x[51:0]; \n");
fprintf(fq," ynan = &y[62:52] && |y[51:0]; \n");
fprintf(fq," znan = &z[62:52] && |z[51:0]; \n");
fprintf(fq," wnan = &FmaResultM[62:52] && |FmaResultM[51:0]; \n");
fprintf(fq," xnan = &ReadData1E[62:52] && |ReadData1E[51:0]; \n");
fprintf(fq," ynan = &ReadData2E[62:52] && |ReadData2E[51:0]; \n");
fprintf(fq," znan = &ReadData3E[62:52] && |ReadData3E[51:0]; \n");
fprintf(fq," ansnan = &ans[62:52] && |ans[51:0]; \n");
fprintf(fq," xnorm = ~(|x[62:52]) && |x[51:0] ? {x[50:0], 1'b0} : x; \n");
fprintf(fq," ynorm = ~(|y[62:52]) && |y[51:0] ? {y[50:0], 1'b0} : y;\n");
fprintf(fq," s = ({54'b1,xnorm} + (bypsel && bypplus1)) * {54'b1,ynorm}; \n");
// fprintf(fq," if(!(~(|x[62:52]) && |x[51:0] || ~(|y[62:52]) && |y[51:0])) begin\n");
fprintf(fq," xnorm = ~(|ReadData1E[62:52]) && |ReadData1E[51:0] ? {ReadData1E[50:0], 1'b0} : ReadData1E; \n");
fprintf(fq," ynorm = ~(|ReadData2E[62:52]) && |ReadData2E[51:0] ? {ReadData2E[50:0], 1'b0} : ReadData2E;\n");
// fprintf(fq," s = ({54'b1,xnorm} + (bypsel && bypplus1)) * {54'b1,ynorm}; \n");
// fprintf(fq," if(!(~(|ReadData1E[62:52]) && |ReadData1E[51:0] || ~(|ReadData2E[62:52]) && |ReadData2E[51:0])) begin\n");
// not looknig at negative zero results right now
//fprintf(fq," if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) && !(w == 64'h8000000000000000 && ans == 64'b0)) begin\n");
// fprintf(fq," if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) ) begin\n");
fprintf(fq," if((!wnan && (w != ans)) || (wnan && ansnan && ~(((xnan && (w[62:0] == {x[62:52],1'b1,x[50:0]})) || (ynan && (w[62:0] == {y[62:52],1'b1,y[50:0]})) || (znan && (w[62:0] == {z[62:52],1'b1,z[50:0]})) || (w[62:0] == ans[62:0])) ))) begin\n");
fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h %%h Wrong \",x,y, z, w, ans);\n");
//fprintf(fq," if( (nan && (FmaResultM[62:0] != ans[62:0])) || (!nan && (FmaResultM != ans)) && !(FmaResultM == 64'h8000000000000000 && ans == 64'b0)) begin\n");
// fprintf(fq," if( (nan && (FmaResultM[62:0] != ans[62:0])) || (!nan && (FmaResultM != ans)) ) begin\n");
fprintf(fq," if((!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {ReadData1E[62:52],1'b1,ReadData1E[50:0]})) || (ynan && (FmaResultM[62:0] == {ReadData2E[62:52],1'b1,ReadData2E[50:0]})) || (znan && (FmaResultM[62:0] == {ReadData3E[62:52],1'b1,ReadData3E[50:0]})) || (FmaResultM[62:0] == ans[62:0])) ))) begin\n");
fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h %%h Wrong \",ReadData1E,ReadData2E, ReadData3E, FmaResultM, ans);\n");
//fprintf(fq," $fwrite(fp, \"%%h \",s);\n");
fprintf(fq," if(w == 64'h8000000000000000) $fwrite(fp, \"w=-zero \");\n");
fprintf(fq," if(~(|x[62:52]) && |x[51:0]) $fwrite(fp, \"xdenorm \");\n");
fprintf(fq," if(~(|y[62:52]) && |y[51:0]) $fwrite(fp, \"ydenorm \");\n");
fprintf(fq," if(~(|z[62:52]) && |z[51:0]) $fwrite(fp, \"zdenorm \");\n");
fprintf(fq," if(invalid != 0) $fwrite(fp, \"invld \");\n");
fprintf(fq," if(overflow != 0) $fwrite(fp, \"ovrflw \");\n");
fprintf(fq," if(underflow != 0) $fwrite(fp, \"unflw \");\n");
fprintf(fq," if(w == 64'hFFF0000000000000) $fwrite(fp, \"w=-inf \");\n");
fprintf(fq," if(w == 64'h7FF0000000000000) $fwrite(fp, \"w=+inf \");\n");
fprintf(fq," if(w > 64'h7FF0000000000000 && w < 64'h7FF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
fprintf(fq," if(w > 64'hFFF8000000000000 && w < 64'hFFF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
fprintf(fq," if(w >= 64'h7FF8000000000000 && w <= 64'h7FFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
fprintf(fq," if(w >= 64'hFFF8000000000000 && w <= 64'hFFFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
fprintf(fq," $fwrite(fp, \"FmaResultM=%%d \",$signed(aligncnt));\n");
fprintf(fq," if(FmaResultM == 64'h8000000000000000) $fwrite(fp, \"FmaResultM=-zero \");\n");
fprintf(fq," if(~(|ReadData1E[62:52]) && |ReadData1E[51:0]) $fwrite(fp, \"xdenorm \");\n");
fprintf(fq," if(~(|ReadData2E[62:52]) && |ReadData2E[51:0]) $fwrite(fp, \"ydenorm \");\n");
fprintf(fq," if(~(|ReadData3E[62:52]) && |ReadData3E[51:0]) $fwrite(fp, \"zdenorm \");\n");
fprintf(fq," if(FmaFlagsM[4] != 0) $fwrite(fp, \"invld \");\n");
fprintf(fq," if(FmaFlagsM[2] != 0) $fwrite(fp, \"ovrflw \");\n");
fprintf(fq," if(FmaFlagsM[1] != 0) $fwrite(fp, \"unflw \");\n");
fprintf(fq," if(FmaResultM == 64'hFFF0000000000000) $fwrite(fp, \"FmaResultM=-inf \");\n");
fprintf(fq," if(FmaResultM == 64'h7FF0000000000000) $fwrite(fp, \"FmaResultM=+inf \");\n");
fprintf(fq," if(FmaResultM > 64'h7FF0000000000000 && FmaResultM < 64'h7FF8000000000000 ) $fwrite(fp, \"FmaResultM=sigNaN \");\n");
fprintf(fq," if(FmaResultM > 64'hFFF8000000000000 && FmaResultM < 64'hFFF8000000000000 ) $fwrite(fp, \"FmaResultM=sigNaN \");\n");
fprintf(fq," if(FmaResultM >= 64'h7FF8000000000000 && FmaResultM <= 64'h7FFfffffffffffff ) $fwrite(fp, \"FmaResultM=qutNaN \");\n");
fprintf(fq," if(FmaResultM >= 64'hFFF8000000000000 && FmaResultM <= 64'hFFFfffffffffffff ) $fwrite(fp, \"FmaResultM=qutNaN \");\n");
fprintf(fq," if(ans == 64'hFFF0000000000000) $fwrite(fp, \"ans=-inf \");\n");
fprintf(fq," if(ans == 64'h7FF0000000000000) $fwrite(fp, \"ans=+inf \");\n");

File diff suppressed because it is too large Load Diff

View File

@ -2,38 +2,27 @@
module tb;
reg [63:0] x;
reg [63:0] y;
reg [63:0] z;
reg [63:0] ans;
reg rn;
reg rz;
reg rm;
reg rp;
reg [63:0] earlyres;
reg earlyressel;
reg [1:0] bypsel;
reg bypplus1;
reg byppostnorm;
wire [63:0] w;
wire [63:0] wbypass;
wire invalid;
wire overflow;
wire underflow;
wire inexact;
reg [63:0] ReadData1E;
reg [63:0] ReadData2E;
reg [63:0] ReadData3E;
reg [63:0] ans;
reg [2:0] FrmE;
wire [63:0] FmaResultM;
wire [4:0] FmaFlagsM;
integer fp;
reg wnan;
reg xnan;
reg ynan;
reg znan;
wire [12:0] aligncnt;
reg ansnan;
reg [105:0] s; // partial product 2
reg [51:0] xnorm;
reg [51:0] ynorm;
localparam period = 20;
fmac UUT(.*);
fma UUT(.*);
initial

View File

@ -1 +1 @@
testfloat_gen f64_mulAdd -n 6133248 -rnear_even -seed 113355 -level 1 >> testFloat
testfloat_gen f64_mulAdd -n 6133248 -rminMag -seed 113355 -level 1 >> testFloat

View File

@ -50,7 +50,7 @@ module FA_array (S, C, A, B, Ci) ;
genvar i;
generate
for (i = 0; i < n; i = i + 1) begin : index
fa FA1(.S(S[i]), .C(C[i]), .A(A[i]), .B(B[i]), .Ci(Ci[i]));
fa FA1(.sum(S[i]), .carry(C[i]), .a(A[i]), .b(B[i]), .c(Ci[i]));
end
endgenerate

View File

@ -22,6 +22,7 @@ module fpu (
//signals, modules, and combinational logic closely defined.
//used for OSU DP-size hardware to wally XLEN interfacing
integer XLENDIFF;
assign XLENDIFF = `XLEN - 64;
integer XLENDIFFN;
@ -465,13 +466,18 @@ module fpu (
always_comb begin
//zero extension
if(`XLEN > 64) begin
FPUResultW <= {FPUResultDirW,{XLENDIFF{1'b0}}};
end
// Teo 04/13/2021
// Commented out XLENDIFF{1'b0} due to error:
// Repetition multiplier must be constant.
//if(`XLEN > 64) begin
// FPUResultW <= {FPUResultDirW,{XLENDIFF{1'b0}}};
//end
//truncate
else begin
//else begin
FPUResultW <= FPUResultDirW[63:64-`XLEN];
end
//end
end

View File

@ -24,9 +24,6 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
`include "wally-constants.vh"
module cam_line #(parameter KEY_BITS = 20,
parameter HIGH_SEGMENT_BITS = 10) (
input clk, reset,
@ -74,6 +71,6 @@ module cam_line #(parameter KEY_BITS = 20,
// should automatically match.
page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, VirtualPageNumberQuery);
assign Match = ({1'b1, VirtualPageNumberQuery} == Key);
assign Match = ({1'b1, VirtualPageNumberQuery} == {Valid, Key});
endmodule
endmodule

View File

@ -4,8 +4,8 @@
// Written: jtorrey@hmc.edu 16 February 2021
// Modified:
//
// Purpose: Example translation lookaside buffer
// Cache of virtural-to-physical address translations
// Purpose: Translation lookaside buffer
// Cache of virtural-to-physical address translations
//
// A component of the Wally configurable RISC-V project.
//
@ -24,9 +24,6 @@
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
`include "wally-constants.vh"
/**
* sv32 specs
* ----------
@ -52,6 +49,9 @@
* least recently)
*/
`include "wally-config.vh"
`include "wally-constants.vh"
// The TLB will have 2**ENTRY_BITS total entries
module tlb #(parameter ENTRY_BITS = 3) (
input clk, reset,
@ -127,7 +127,8 @@ module tlb #(parameter ENTRY_BITS = 3) (
assign PageOffset = VirtualAddress[11:0];
// Currently use random replacement algorithm
tlb_rand rdm(.*);
// tlb_rand rdm(.*);
tlb_lru lru(.*);
tlb_ram #(ENTRY_BITS) ram(.*);
tlb_cam #(ENTRY_BITS, `VPN_BITS, `VPN_SEGMENT_BITS) cam(.*);

View File

@ -64,6 +64,8 @@ module tlb_cam #(parameter ENTRY_BITS = 3,
endgenerate
// In case there are multiple matches in the CAM, select only one
// *** it might be guaranteed that the CAM will never have multiple matches.
// If so, this is just an encoder
priority_encoder #(ENTRY_BITS) match_priority(Matches, VPNIndex);
assign CAMHit = |Matches & ~TLBFlush;

View File

@ -0,0 +1,69 @@
///////////////////////////////////////////
// tlb_lru.sv
//
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 16 February 2021
// Modified:
//
// Purpose: Implementation of bit pseudo least-recently-used algorithm for
// cache evictions. Outputs the index of the next entry to be written.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module tlb_lru #(parameter ENTRY_BITS = 3) (
input clk, reset,
input TLBWrite,
input TLBFlush,
input [ENTRY_BITS-1:0] VPNIndex,
input CAMHit,
output [ENTRY_BITS-1:0] WriteIndex
);
localparam NENTRIES = 2**ENTRY_BITS;
// Keep a "recently-used" record for each TLB entry. On access, set to 1
logic [NENTRIES-1:0] RUBits, RUBitsNext, RUBitsAccessed;
// One-hot encodings of which line is being accessed
logic [NENTRIES-1:0] ReadLineOneHot, WriteLineOneHot, AccessLineOneHot;
// High if the next access causes all RU bits to be 1
logic AllUsed;
// Convert indices to one-hot encodings
decoder #(ENTRY_BITS) read_decoder(VPNIndex, ReadLineOneHot);
// *** should output writelineonehot so we don't have to decode WriteIndex outside
decoder #(ENTRY_BITS) write_decoder(WriteIndex, WriteLineOneHot);
// Find the first line not recently used
priority_encoder #(ENTRY_BITS) first_nru(~RUBits, WriteIndex);
// Access either the hit line or written line
assign AccessLineOneHot = (TLBWrite) ? WriteLineOneHot : ReadLineOneHot;
// Raise the bit of the recently accessed line
assign RUBitsAccessed = AccessLineOneHot | RUBits;
// Determine whether we need to reset the RU bits to all zeroes
assign AllUsed = &(RUBitsAccessed);
assign RUBitsNext = (AllUsed) ? AccessLineOneHot : RUBitsAccessed;
// Update LRU state on any TLB hit or write
flopenrc #(NENTRIES) lru_state(clk, reset, TLBFlush, (CAMHit || TLBWrite),
RUBitsNext, RUBits);
endmodule

View File

@ -57,4 +57,4 @@ module tlb_ram #(parameter ENTRY_BITS = 3) (
ram[i] = `XLEN'b0;
end
endmodule
endmodule

View File

@ -29,7 +29,7 @@ module tlb_rand #(parameter ENTRY_BITS = 3) (
);
logic [31:0] data;
assign data = $urandom;
assign data = 32'b0;
assign WriteIndex = data[ENTRY_BITS-1:0];
endmodule

View File

@ -1479,21 +1479,15 @@ module shifter_l64 (Z, A, Shift);
logic [63:0] stage3;
logic [63:0] stage4;
logic [63:0] stage5;
logic [31:0] thirtytwozeros = 32'h0;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [63:0] Z;
mux2 #(64) mx01(A, {A[31:0], thirtytwozeros}, Shift[5], stage1);
mux2 #(64) mx02(stage1, {stage1[47:0], sixteenzeros}, Shift[4], stage2);
mux2 #(64) mx03(stage2, {stage2[55:0], eightzeros}, Shift[3], stage3);
mux2 #(64) mx04(stage3, {stage3[59:0], fourzeros}, Shift[2], stage4);
mux2 #(64) mx05(stage4, {stage4[61:0], twozeros}, Shift[1], stage5);
mux2 #(64) mx06(stage5, {stage5[62:0], onezero}, Shift[0], Z);
mux2 #(64) mx01(A, {A[31:0], 32'h0}, Shift[5], stage1);
mux2 #(64) mx02(stage1, {stage1[47:0], 16'h0}, Shift[4], stage2);
mux2 #(64) mx03(stage2, {stage2[55:0], 8'h0}, Shift[3], stage3);
mux2 #(64) mx04(stage3, {stage3[59:0], 4'h0}, Shift[2], stage4);
mux2 #(64) mx05(stage4, {stage4[61:0], 2'h0}, Shift[1], stage5);
mux2 #(64) mx06(stage5, {stage5[62:0], 1'h0}, Shift[0], Z);
endmodule // shifter_l64
@ -1507,21 +1501,15 @@ module shifter_r64 (Z, A, Shift);
logic [63:0] stage3;
logic [63:0] stage4;
logic [63:0] stage5;
logic [31:0] thirtytwozeros = 32'h0;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [63:0] Z;
mux2 #(64) mx01(A, {thirtytwozeros, A[63:32]}, Shift[5], stage1);
mux2 #(64) mx02(stage1, {sixteenzeros, stage1[63:16]}, Shift[4], stage2);
mux2 #(64) mx03(stage2, {eightzeros, stage2[63:8]}, Shift[3], stage3);
mux2 #(64) mx04(stage3, {fourzeros, stage3[63:4]}, Shift[2], stage4);
mux2 #(64) mx05(stage4, {twozeros, stage4[63:2]}, Shift[1], stage5);
mux2 #(64) mx06(stage5, {onezero, stage5[63:1]}, Shift[0], Z);
mux2 #(64) mx01(A, {32'h0, A[63:32]}, Shift[5], stage1);
mux2 #(64) mx02(stage1, {16'h0, stage1[63:16]}, Shift[4], stage2);
mux2 #(64) mx03(stage2, {8'h0, stage2[63:8]}, Shift[3], stage3);
mux2 #(64) mx04(stage3, {4'h0, stage3[63:4]}, Shift[2], stage4);
mux2 #(64) mx05(stage4, {2'h0, stage4[63:2]}, Shift[1], stage5);
mux2 #(64) mx06(stage5, {1'h0, stage5[63:1]}, Shift[0], Z);
endmodule // shifter_r64
@ -1534,19 +1522,14 @@ module shifter_l32 (Z, A, Shift);
logic [31:0] stage2;
logic [31:0] stage3;
logic [31:0] stage4;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [31:0] Z;
mux2 #(32) mx01(A, {A[15:0], sixteenzeros}, Shift[4], stage1);
mux2 #(32) mx02(stage1, {stage1[23:0], eightzeros}, Shift[3], stage2);
mux2 #(32) mx03(stage2, {stage2[27:0], fourzeros}, Shift[2], stage3);
mux2 #(32) mx04(stage3, {stage3[29:0], twozeros}, Shift[1], stage4);
mux2 #(32) mx05(stage4, {stage4[30:0], onezero}, Shift[0], Z);
mux2 #(32) mx01(A, {A[15:0], 16'h0}, Shift[4], stage1);
mux2 #(32) mx02(stage1, {stage1[23:0], 8'h0}, Shift[3], stage2);
mux2 #(32) mx03(stage2, {stage2[27:0], 4'h0}, Shift[2], stage3);
mux2 #(32) mx04(stage3, {stage3[29:0], 2'h0}, Shift[1], stage4);
mux2 #(32) mx05(stage4, {stage4[30:0], 1'h0}, Shift[0], Z);
endmodule // shifter_l32
@ -1559,19 +1542,14 @@ module shifter_r32 (Z, A, Shift);
logic [31:0] stage2;
logic [31:0] stage3;
logic [31:0] stage4;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [31:0] Z;
mux2 #(32) mx01(A, {sixteenzeros, A[31:16]}, Shift[4], stage1);
mux2 #(32) mx02(stage1, {eightzeros, stage1[31:8]}, Shift[3], stage2);
mux2 #(32) mx03(stage2, {fourzeros, stage2[31:4]}, Shift[2], stage3);
mux2 #(32) mx04(stage3, {twozeros, stage3[31:2]}, Shift[1], stage4);
mux2 #(32) mx05(stage4, {onezero, stage4[31:1]}, Shift[0], Z);
mux2 #(32) mx01(A, {16'h0, A[31:16]}, Shift[4], stage1);
mux2 #(32) mx02(stage1, {8'h0, stage1[31:8]}, Shift[3], stage2);
mux2 #(32) mx03(stage2, {4'h0, stage2[31:4]}, Shift[2], stage3);
mux2 #(32) mx04(stage3, {2'h0, stage3[31:2]}, Shift[1], stage4);
mux2 #(32) mx05(stage4, {1'h0, stage4[31:1]}, Shift[0], Z);
endmodule // shifter_r32

View File

@ -80,7 +80,7 @@ module csrc (
for (j=0; j<= `COUNTERS; j = j+1) begin
// Write enables
if (j !==1) begin
if (j != 1) begin
assign WriteHPMCOUNTERM[j] = CSRMWriteM && (CSRAdrM == MHPMCOUNTER[j]);
// Count Signals
assign HPMCOUNTERPlusM[j] = HPMCOUNTER_REGW[j] + {63'b0, MCOUNTEN[j] & ~MCOUNTINHIBIT_REGW[j]};

View File

@ -49,13 +49,13 @@ module csri #(parameter
// assumes no N-mode user interrupts
always_comb begin
IntInM = 0; // *** does this really work
IntInM[11] = ExtIntM & ~MIDELEG_REGW[9]; // MEIP
IntInM[9] = ExtIntM & MIDELEG_REGW[9]; // SEIP
IntInM[7] = TimerIntM & ~MIDELEG_REGW[5]; // MTIP
IntInM[5] = TimerIntM & MIDELEG_REGW[5]; // STIP
IntInM[3] = SwIntM & ~MIDELEG_REGW[1]; // MSIP
IntInM[1] = SwIntM & MIDELEG_REGW[1]; // SSIP
IntInM = 0; // *** does this overwriting technique really synthesize
IP_REGW[11] = ExtIntM & ~MIDELEG_REGW[9]; // MEIP
IntInM[9] = ExtIntM & MIDELEG_REGW[9]; // SEIP
IntInM[7] = TimerIntM & ~MIDELEG_REGW[5]; // MTIP
IntInM[5] = TimerIntM & MIDELEG_REGW[5]; // STIP
IntInM[3] = SwIntM & ~MIDELEG_REGW[1]; // MSIP
IntInM[1] = SwIntM & MIDELEG_REGW[1]; // SSIP
end
// Interrupt Write Enables
@ -77,14 +77,14 @@ module csri #(parameter
assign SIP_WRITE_MASK = 12'h000;
end
always @(posedge clk, posedge reset) begin
if (reset) IP_REGW <= 12'b0;
else if (WriteMIPM) IP_REGW <= (CSRWriteValM & MIP_WRITE_MASK) | IntInM; // MTIP unclearable
else if (WriteSIPM) IP_REGW <= (CSRWriteValM & SIP_WRITE_MASK) | IntInM; // MTIP unclearable
if (reset) IP_REGW[9:0] <= 10'b0;
else if (WriteMIPM) IP_REGW[9:0] <= (CSRWriteValM[9:0] & MIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable
else if (WriteSIPM) IP_REGW[9:0] <= (CSRWriteValM[9:0] & SIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable
// else if (WriteUIPM) IP_REGW = (CSRWriteValM & 12'hBBB) | (NextIPM & 12'h080); // MTIP unclearable
else IP_REGW <= IP_REGW | IntInM; // *** check this turns off interrupts properly even when MIDELEG changes
else IP_REGW[9:0] <= IP_REGW[9:0] | IntInM[9:0]; // *** check this turns off interrupts properly even when MIDELEG changes
end
always @(posedge clk, posedge reset) begin
if (reset) IE_REGW <= 12'b0;
if (reset) IE_REGW <= 12'b0;
else if (WriteMIEM) IE_REGW <= (CSRWriteValM & 12'hAAA); // MIE controls M and S fields
else if (WriteSIEM) IE_REGW <= (CSRWriteValM & 12'h222) | (IE_REGW & 12'h888); // only S fields
// else if (WriteUIEM) IE_REGW = (CSRWriteValM & 12'h111) | (IE_REGW & 12'hAAA); // only U field

View File

@ -40,7 +40,7 @@ module privileged (
input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
input logic [3:0] InstrClassM,
input logic PrivilegedM,
input logic InstrPageFaultM, LoadPageFaultM, StorePageFaultM,
input logic InstrPageFaultF, LoadPageFaultM, StorePageFaultM,
input logic InstrMisalignedFaultM, InstrAccessFaultF, IllegalIEUInstrFaultD,
input logic LoadMisalignedFaultM, LoadAccessFaultM,
input logic StoreMisalignedFaultM, StoreAccessFaultM,
@ -62,8 +62,9 @@ module privileged (
logic uretM, sretM, mretM, ecallM, ebreakM, wfiM, sfencevmaM;
logic IllegalCSRAccessM;
logic IllegalIEUInstrFaultE, IllegalIEUInstrFaultM;
logic InstrAccessFaultD, InstrAccessFaultE, InstrAccessFaultM;
logic IllegalIEUInstrFaultE, IllegalIEUInstrFaultM;
logic InstrPageFaultD, InstrPageFaultE, InstrPageFaultM;
logic InstrAccessFaultD, InstrAccessFaultE, InstrAccessFaultM;
logic IllegalInstrFaultM;
logic BreakpointFaultM, EcallFaultM;
@ -129,13 +130,15 @@ module privileged (
// assign StorePageFaultM = 0;
// pipeline fault signals
flopenrc #(1) faultregD(clk, reset, FlushD, ~StallD, InstrAccessFaultF, InstrAccessFaultD);
flopenrc #(2) faultregE(clk, reset, FlushE, ~StallE,
{IllegalIEUInstrFaultD, InstrAccessFaultD}, // ** vs IllegalInstrFaultInD
{IllegalIEUInstrFaultE, InstrAccessFaultE});
flopenrc #(2) faultregM(clk, reset, FlushM, ~StallM,
{IllegalIEUInstrFaultE, InstrAccessFaultE},
{IllegalIEUInstrFaultM, InstrAccessFaultM});
flopenrc #(2) faultregD(clk, reset, FlushD, ~StallD,
{InstrPageFaultF, InstrAccessFaultF},
{InstrPageFaultD, InstrAccessFaultD});
flopenrc #(3) faultregE(clk, reset, FlushE, ~StallE,
{IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD}, // ** vs IllegalInstrFaultInD
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE});
flopenrc #(3) faultregM(clk, reset, FlushM, ~StallM,
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE},
{IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM});
trap trap(.*);

View File

@ -49,7 +49,7 @@ module trap (
logic InterruptM;
// Determine pending enabled interrupts
assign MIntGlobalEnM = (PrivilegeModeW != `M_MODE) || STATUS_MIE; // if M ints enabled or lower priv 3.1.9
assign MIntGlobalEnM = {12{(PrivilegeModeW != `M_MODE) || STATUS_MIE}}; // if M ints enabled or lower priv 3.1.9
assign SIntGlobalEnM = (PrivilegeModeW == `U_MODE) || STATUS_SIE; // if S ints enabled or lower priv 3.1.9
assign PendingIntsM = (MIP_REGW & MIE_REGW) & ((MIntGlobalEnM & 12'h888) | (SIntGlobalEnM & 12'h222));
assign InterruptM = |PendingIntsM; // interrupt if any sources are pending

View File

@ -41,13 +41,15 @@ module uart (
// UART interface signals
logic [2:0] A;
logic MEMRb, MEMWb;
logic MEMRb, MEMWb, memread, memwrite;
logic [7:0] Din, Dout;
// rename processor interface signals to match PC16550D and provide one-byte interface
flopr #(1) memreadreg(HCLK, ~HRESETn, ~(HSELUART & ~HWRITE), MEMRb);
flopr #(1) memwritereg(HCLK, ~HRESETn, ~(HSELUART & HWRITE), MEMWb);
flopr #(1) memreadreg(HCLK, ~HRESETn, (HSELUART & ~HWRITE), memread);
flopr #(1) memwritereg(HCLK, ~HRESETn, (HSELUART & HWRITE), memwrite);
flopr #(3) haddrreg(HCLK, ~HRESETn, HADDR[2:0], A);
assign MEMRb = ~memread;
assign MEMWb = ~memwrite;
assign HRESPUART = 0; // OK
assign HREADYUART = 1; // should idle high during address phase and respond high when done; will need to be modified if UART ever needs more than 1 cycle to do something

View File

@ -24,6 +24,7 @@
///////////////////////////////////////////
`include "wally-config.vh"
`include "wally-constants.vh"
/* verilator lint_on UNUSED */
module wallypipelinedhart (
@ -76,7 +77,7 @@ module wallypipelinedhart (
logic InstrMisalignedFaultM;
logic DataMisalignedM;
logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD;
logic InstrPageFaultM, LoadPageFaultM, StorePageFaultM;
logic InstrPageFaultF, LoadPageFaultM, StorePageFaultM;
logic LoadMisalignedFaultM, LoadAccessFaultM;
logic StoreMisalignedFaultM, StoreAccessFaultM;
logic [`XLEN-1:0] InstrMisalignedAdrM;

View File

@ -279,14 +279,15 @@ module testbench_busybear();
end
end
string sepc_lit = "SEPC";
`define CHECK_CSR2(CSR, PATH) \
string CSR; \
logic [63:0] expected``CSR``; \
//CSR checking \
always @(``PATH``.``CSR``_REGW) begin \
if ($time > 1) begin \
if (sepc_lit.icompare(`"CSR`")) begin #1; end \
if ("SEPC" == `"CSR`") begin #1; end \
if ("SCAUSE" == `"CSR`") begin #2; end \
if ("SSTATUS" == `"CSR`") begin #3; end \
scan_file_csr = $fscanf(data_file_csr, "%s\n", CSR); \
scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \
if(CSR.icompare(`"CSR`")) begin \
@ -463,8 +464,10 @@ module testbench_busybear();
32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ
32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J
speculative = 1;
32'bXXXXXXXXXXXXXXXX1001000000000010: // C.EBREAK:
32'bXXXXXXXXXXXXXXXX1001000000000010, // C.EBREAK:
32'bXXXXXXXXXXXXXXXXX000XXXXX1110011: // Something that's not CSRR*
speculative = 0; // tbh don't really know what should happen here
32'b000110000000XXXXXXXXXXXXX1110011, // CSR* SATP, *
32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR
32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL
speculative = 1;

View File

@ -352,7 +352,7 @@ module testbench();
};
string tests64periph[] = '{
"rv64i-periph/WALLY-PLIC", "2000"
"rv64i-periph/WALLY-PLIC", "2080"
};
string tests32periph[] = '{
@ -402,7 +402,7 @@ module testbench();
if (TESTSPERIPH) begin
tests = tests32periph;
end else begin
tests = {tests32i,tests32periph};
tests = {tests32i};//,tests32periph}; *** broken at the moment
if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};
else tests = {tests, tests32iNOc};
if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};

View File

@ -1,413 +0,0 @@
///////////////////////////////////////////
// testbench-imperas.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Wally Testbench and helper modules
// Applies test programs from the Imperas suite
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module testbench();
parameter DEBUG = 0;
parameter TESTSBP = 0;
logic clk;
logic reset;
int test, i, errors, totalerrors;
logic [31:0] sig32[0:10000];
logic [`XLEN-1:0] signature[0:10000];
logic [`XLEN-1:0] testadr;
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
logic [31:0] InstrW;
logic [`XLEN-1:0] meminit;
string tests64i[] = {
"peripherals/WALLY-PLIC", "2000"
//"peripherals/WALLY-UART", "2000"
};
string tests64ic[] = {
};
string tests64iNOc[] = {
};
string tests64m[] = {
};
string tests64a[] = {
};
string tests32a[] = {
};
string tests32m[] = {
};
string tests32ic[] = {
};
string tests32iNOc[] = {
};
string tests32i[] = {
};
string testsBP64[] = {
};
string tests64p[] = {
};
string tests[];
string ProgramAddrMapFile, ProgramLabelMapFile;
logic [`AHBW-1:0] HRDATAEXT;
logic HREADYEXT, HRESPEXT;
logic [31:0] HADDR;
logic [`AHBW-1:0] HWDATA;
logic HWRITE;
logic [2:0] HSIZE;
logic [2:0] HBURST;
logic [3:0] HPROT;
logic [1:0] HTRANS;
logic HMASTLOCK;
logic HCLK, HRESETn;
logic [`XLEN-1:0] PCW;
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW);
flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW);
// pick tests based on modes supported
initial begin
if (`XLEN == 64) begin // RV64
if (TESTSBP) begin
tests = testsBP64;
end else begin
tests = {tests64i};
if (`C_SUPPORTED) tests = {tests, tests64ic};
else tests = {tests, tests64iNOc};
if (`M_SUPPORTED) tests = {tests, tests64m};
// if (`F_SUPPORTED) tests = {tests64f, tests};
// if (`D_SUPPORTED) tests = {tests64d, tests};
if (`A_SUPPORTED) tests = {tests, tests64a};
end
// tests = {tests64a, tests};
tests = {tests, tests64p};
end else begin // RV32
// *** add the 32 bit bp tests
tests = {tests32i};
if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};
else tests = {tests, tests32iNOc};
if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
// if (`F_SUPPORTED) tests = {tests32f, tests};
if (`A_SUPPORTED) tests = {tests, tests32a};
end
// tests = tests64p;
end
string signame, memfilename;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
logic UARTSin, UARTSout;
// instantiate device to be tested
assign GPIOPinsIn = 0;
assign UARTSin = 1;
assign HREADYEXT = 1;
assign HRESPEXT = 0;
assign HRDATAEXT = 0;
wallypipelinedsoc dut(.*);
// Track names of instructions
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, InstrW, InstrFName, InstrDName,
InstrEName, InstrMName, InstrWName);
// initialize tests
initial
begin
test = 0;
totalerrors = 0;
testadr = 0;
// fill memory with defined values to reduce Xs in simulation
if (`XLEN == 32) meminit = 32'hFEDC0123;
else meminit = 64'hFEDCBA9876543210;
for (i=0; i<=65535; i = i+1) begin
//dut.imem.RAM[i] = meminit;
// dut.uncore.RAM[i] = meminit;
end
// read test vectors into memory
memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
$readmemh(memfilename, dut.imem.RAM);
$readmemh(memfilename, dut.uncore.dtim.RAM);
ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
$display("Read memfile %s", memfilename);
reset = 1; # 42; reset = 0;
end
// generate clock to sequence tests
always
begin
clk = 1; # 5; clk = 0; # 5;
end
// check results
always @(negedge clk)
begin
if (dut.hart.priv.EcallFaultM &&
(dut.hart.ieu.dp.regf.rf[3] == 1 || (dut.hart.ieu.dp.regf.we3 && dut.hart.ieu.dp.regf.a3 == 3 && dut.hart.ieu.dp.regf.wd3 == 1))) begin
$display("Code ended with ecall with gp = 1");
#60; // give time for instructions in pipeline to finish
// clear signature to prevent contamination from previous tests
for(i=0; i<10000; i=i+1) begin
sig32[i] = 'bx;
end
// read signature, reformat in 64 bits if necessary
signame = {"../../imperas-riscv-tests/work/", tests[test], ".signature.output"};
$readmemh(signame, sig32);
i = 0;
while (i < 10000) begin
if (`XLEN == 32) begin
signature[i] = sig32[i];
i = i+1;
end else begin
signature[i/2] = {sig32[i+1], sig32[i]};
i = i + 2;
end
end
// Check errors
i = 0;
errors = 0;
if (`XLEN == 32)
testadr = (`TIMBASE+tests[test+1].atohex())/4;
else
testadr = (`TIMBASE+tests[test+1].atohex())/8;
/* verilator lint_off INFINITELOOP */
while (signature[i] !== 'bx) begin
//$display("signature[%h] = %h", i, signature[i]);
if (signature[i] !== dut.uncore.dtim.RAM[testadr+i]) begin
if (signature[i+4] !== 'bx || signature[i] !== 32'hFFFFFFFF) begin
// report errors unless they are garbage at the end of the sim
// kind of hacky test for garbage right now
errors = errors+1;
$display(" Error on test %s result %d: adr = %h sim = %h, signature = %h",
tests[test], i, (testadr+i)*`XLEN/8, dut.uncore.dtim.RAM[testadr+i], signature[i]);
end
end
i = i + 1;
end
/* verilator lint_on INFINITELOOP */
if (errors == 0) $display("%s succeeded. Brilliant!!!", tests[test]);
else begin
$display("%s failed with %d errors. :(", tests[test], errors);
totalerrors = totalerrors+1;
end
test = test + 2;
if (test == tests.size()) begin
if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
else $display("FAIL: %d test programs had errors", totalerrors);
$stop;
end
else begin
memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
$readmemh(memfilename, dut.imem.RAM);
$readmemh(memfilename, dut.uncore.dtim.RAM);
$display("Read memfile %s", memfilename);
ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
reset = 1; # 17; reset = 0;
end
end
end // always @ (negedge clk)
// track the current function or global label
if (DEBUG == 1) begin : functionRadix
function_radix function_radix(.reset(reset),
.ProgramAddrMapFile(ProgramAddrMapFile),
.ProgramLabelMapFile(ProgramLabelMapFile));
end
// initialize the branch predictor
initial begin
$readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.Predictor.DirPredictor.PHT.memory);
$readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.TargetPredictor.memory.memory);
end
endmodule
/* verilator lint_on STMTDLY */
/* verilator lint_on WIDTH */
module instrTrackerTB(
input logic clk, reset, FlushE,
input logic [31:0] InstrF, InstrD,
input logic [31:0] InstrE, InstrM,
input logic [31:0] InstrW,
// output logic [31:0] InstrW,
output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// stage Instr to Writeback for visualization
// flopr #(32) InstrWReg(clk, reset, InstrM, InstrW);
instrNameDecTB fdec(InstrF, InstrFName);
instrNameDecTB ddec(InstrD, InstrDName);
instrNameDecTB edec(InstrE, InstrEName);
instrNameDecTB mdec(InstrM, InstrMName);
instrNameDecTB wdec(InstrW, InstrWName);
endmodule
// decode the instruction name, to help the test bench
module instrNameDecTB(
input logic [31:0] instr,
output string name);
logic [6:0] op;
logic [2:0] funct3;
logic [6:0] funct7;
logic [11:0] imm;
assign op = instr[6:0];
assign funct3 = instr[14:12];
assign funct7 = instr[31:25];
assign imm = instr[31:20];
// it would be nice to add the operands to the name
// create another variable called decoded
always_comb
casez({op, funct3})
10'b0000000_000: name = "BAD";
10'b0000011_000: name = "LB";
10'b0000011_001: name = "LH";
10'b0000011_010: name = "LW";
10'b0000011_011: name = "LD";
10'b0000011_100: name = "LBU";
10'b0000011_101: name = "LHU";
10'b0000011_110: name = "LWU";
10'b0010011_000: if (instr[31:15] == 0 && instr[11:7] ==0) name = "NOP/FLUSH";
else name = "ADDI";
10'b0010011_001: if (funct7[6:1] == 6'b000000) name = "SLLI";
else name = "ILLEGAL";
10'b0010011_010: name = "SLTI";
10'b0010011_011: name = "SLTIU";
10'b0010011_100: name = "XORI";
10'b0010011_101: if (funct7[6:1] == 6'b000000) name = "SRLI";
else if (funct7[6:1] == 6'b010000) name = "SRAI";
else name = "ILLEGAL";
10'b0010011_110: name = "ORI";
10'b0010011_111: name = "ANDI";
10'b0010111_???: name = "AUIPC";
10'b0100011_000: name = "SB";
10'b0100011_001: name = "SH";
10'b0100011_010: name = "SW";
10'b0100011_011: name = "SD";
10'b0011011_000: name = "ADDIW";
10'b0011011_001: name = "SLLIW";
10'b0011011_101: if (funct7 == 7'b0000000) name = "SRLIW";
else if (funct7 == 7'b0100000) name = "SRAIW";
else name = "ILLEGAL";
10'b0111011_000: if (funct7 == 7'b0000000) name = "ADDW";
else if (funct7 == 7'b0100000) name = "SUBW";
else if (funct7 == 7'b0000001) name = "MULW";
else name = "ILLEGAL";
10'b0111011_001: if (funct7 == 7'b0000000) name = "SLLW";
else if (funct7 == 7'b0000001) name = "DIVW";
else name = "ILLEGAL";
10'b0111011_101: if (funct7 == 7'b0000000) name = "SRLW";
else if (funct7 == 7'b0100000) name = "SRAW";
else if (funct7 == 7'b0000001) name = "DIVUW";
else name = "ILLEGAL";
10'b0111011_110: if (funct7 == 7'b0000001) name = "REMW";
else name = "ILLEGAL";
10'b0111011_111: if (funct7 == 7'b0000001) name = "REMUW";
else name = "ILLEGAL";
10'b0110011_000: if (funct7 == 7'b0000000) name = "ADD";
else if (funct7 == 7'b0000001) name = "MUL";
else if (funct7 == 7'b0100000) name = "SUB";
else name = "ILLEGAL";
10'b0110011_001: if (funct7 == 7'b0000000) name = "SLL";
else if (funct7 == 7'b0000001) name = "MULH";
else name = "ILLEGAL";
10'b0110011_010: if (funct7 == 7'b0000000) name = "SLT";
else if (funct7 == 7'b0000001) name = "MULHSU";
else name = "ILLEGAL";
10'b0110011_011: if (funct7 == 7'b0000000) name = "SLTU";
else if (funct7 == 7'b0000001) name = "MULHU";
else name = "ILLEGAL";
10'b0110011_100: if (funct7 == 7'b0000000) name = "XOR";
else if (funct7 == 7'b0000001) name = "DIV";
else name = "ILLEGAL";
10'b0110011_101: if (funct7 == 7'b0000000) name = "SRL";
else if (funct7 == 7'b0000001) name = "DIVU";
else if (funct7 == 7'b0100000) name = "SRA";
else name = "ILLEGAL";
10'b0110011_110: if (funct7 == 7'b0000000) name = "OR";
else if (funct7 == 7'b0000001) name = "REM";
else name = "ILLEGAL";
10'b0110011_111: if (funct7 == 7'b0000000) name = "AND";
else if (funct7 == 7'b0000001) name = "REMU";
else name = "ILLEGAL";
10'b0110111_???: name = "LUI";
10'b1100011_000: name = "BEQ";
10'b1100011_001: name = "BNE";
10'b1100011_100: name = "BLT";
10'b1100011_101: name = "BGE";
10'b1100011_110: name = "BLTU";
10'b1100011_111: name = "BGEU";
10'b1100111_000: name = "JALR";
10'b1101111_???: name = "JAL";
10'b1110011_000: if (imm == 0) name = "ECALL";
else if (imm == 1) name = "EBREAK";
else if (imm == 2) name = "URET";
else if (imm == 258) name = "SRET";
else if (imm == 770) name = "MRET";
else name = "ILLEGAL";
10'b1110011_001: name = "CSRRW";
10'b1110011_010: name = "CSRRS";
10'b1110011_011: name = "CSRRC";
10'b1110011_101: name = "CSRRWI";
10'b1110011_110: name = "CSRRSI";
10'b1110011_111: name = "CSRRCI";
10'b0101111_010: if (funct7[6:2] == 5'b00010) name = "LR.W";
else if (funct7[6:2] == 5'b00011) name = "SC.W";
else if (funct7[6:2] == 5'b00001) name = "AMOSWAP.W";
else if (funct7[6:2] == 5'b00000) name = "AMOADD.W";
else if (funct7[6:2] == 5'b00100) name = "AMOAXOR.W";
else if (funct7[6:2] == 5'b01100) name = "AMOAND.W";
else if (funct7[6:2] == 5'b01000) name = "AMOOR.W";
else if (funct7[6:2] == 5'b10000) name = "AMOMIN.W";
else if (funct7[6:2] == 5'b10100) name = "AMOMAX.W";
else if (funct7[6:2] == 5'b11000) name = "AMOMINU.W";
else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.W";
else name = "ILLEGAL";
10'b0101111_011: if (funct7[6:2] == 5'b00010) name = "LR.D";
else if (funct7[6:2] == 5'b00011) name = "SC.D";
else if (funct7[6:2] == 5'b00001) name = "AMOSWAP.D";
else if (funct7[6:2] == 5'b00000) name = "AMOADD.D";
else if (funct7[6:2] == 5'b00100) name = "AMOAXOR.D";
else if (funct7[6:2] == 5'b01100) name = "AMOAND.D";
else if (funct7[6:2] == 5'b01000) name = "AMOOR.D";
else if (funct7[6:2] == 5'b10000) name = "AMOMIN.D";
else if (funct7[6:2] == 5'b10100) name = "AMOMAX.D";
else if (funct7[6:2] == 5'b11000) name = "AMOMINU.D";
else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.D";
else name = "ILLEGAL";
10'b0001111_???: name = "FENCE";
default: name = "ILLEGAL";
endcase
endmodule