mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge branch 'main' into cache
Conflicts: wally-pipelined/src/cache/dmapped.sv wally-pipelined/src/cache/line.sv wally-pipelined/src/ifu/icache.sv
This commit is contained in:
commit
c1e2e58ebe
14
wally-pipelined/src/cache/dmapped.sv
vendored
14
wally-pipelined/src/cache/dmapped.sv
vendored
@ -75,14 +75,14 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par
|
||||
|
||||
// Assign the read and write addresses in cache memory
|
||||
always_comb begin
|
||||
assign ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
|
||||
assign ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
|
||||
assign ReadSet = ReadPAdr[SETEND:SETBEGIN];
|
||||
assign ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
|
||||
ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
|
||||
ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
|
||||
ReadSet = ReadPAdr[SETEND:SETBEGIN];
|
||||
ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
|
||||
|
||||
assign WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
|
||||
assign WriteSet = WritePAdr[SETEND:SETBEGIN];
|
||||
assign WriteTag = WritePAdr[TAGEND:TAGBEGIN];
|
||||
WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
|
||||
WriteSet = WritePAdr[SETEND:SETBEGIN];
|
||||
WriteTag = WritePAdr[TAGEND:TAGBEGIN];
|
||||
end
|
||||
|
||||
// Depth is number of bits in one "word" of the memory, width is number of such words
|
||||
|
@ -61,7 +61,7 @@ module pagetablewalker (
|
||||
output logic MMUTranslationComplete,
|
||||
|
||||
// Faults
|
||||
output logic InstrPageFaultM, LoadPageFaultM, StorePageFaultM
|
||||
output logic InstrPageFaultF, LoadPageFaultM, StorePageFaultM
|
||||
);
|
||||
|
||||
// Internal signals
|
||||
@ -85,7 +85,7 @@ module pagetablewalker (
|
||||
|
||||
// Signals for direct, fake translations. Not part of the final Wally version.
|
||||
logic [`XLEN-1:0] DirectInstrPTE, DirectMemPTE;
|
||||
logic [9:0] DirectPTEFlags = {2'b0, 8'b00001111};
|
||||
localparam DirectPTEFlags = {2'b0, 8'b00001111};
|
||||
|
||||
logic [`VPN_BITS-1:0] PCPageNumber, MemAdrPageNumber;
|
||||
|
||||
@ -133,17 +133,22 @@ module pagetablewalker (
|
||||
assign PageTypeF = PageType;
|
||||
assign PageTypeM = PageType;
|
||||
|
||||
localparam IDLE = 3'h0;
|
||||
localparam LEVEL1 = 3'h1;
|
||||
localparam LEVEL0 = 3'h2;
|
||||
localparam LEAF = 3'h3;
|
||||
localparam FAULT = 3'h4;
|
||||
|
||||
logic [2:0] WalkerState, NextWalkerState;
|
||||
|
||||
generate
|
||||
if (`XLEN == 32) begin
|
||||
logic [9:0] VPN1, VPN0;
|
||||
|
||||
assign SvMode = SATP_REGW[31];
|
||||
|
||||
typedef enum {IDLE, LEVEL1, LEVEL0, LEAF, FAULT} walker_statetype;
|
||||
walker_statetype WalkerState, NextWalkerState;
|
||||
|
||||
// *** Do we need a synchronizer here for walker to talk to ahblite?
|
||||
flopenl #(.TYPE(walker_statetype)) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
|
||||
// State transition logic
|
||||
always_comb begin
|
||||
@ -154,7 +159,8 @@ module pagetablewalker (
|
||||
// else if (~ValidPTE || (LeafPTE && BadMegapage))
|
||||
// NextWalkerState = FAULT;
|
||||
// *** Leave megapage implementation for later
|
||||
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF;
|
||||
// *** need to check if megapage valid/aligned
|
||||
else if (ValidPTE && LeafPTE) NextWalkerState = LEAF;
|
||||
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0;
|
||||
@ -165,6 +171,8 @@ module pagetablewalker (
|
||||
else NextWalkerState = IDLE;
|
||||
FAULT: if (MMUTranslate) NextWalkerState = LEVEL1;
|
||||
else NextWalkerState = IDLE;
|
||||
// Default case should never happen, but is included for linter.
|
||||
default: NextWalkerState = IDLE;
|
||||
endcase
|
||||
end
|
||||
|
||||
@ -179,38 +187,41 @@ module pagetablewalker (
|
||||
// Assign combinational outputs
|
||||
always_comb begin
|
||||
// default values
|
||||
assign TranslationPAdr = '0;
|
||||
assign PageTableEntry = '0;
|
||||
assign PageType ='0;
|
||||
assign MMUTranslationComplete = '0;
|
||||
assign DTLBWriteM = '0;
|
||||
assign ITLBWriteF = '0;
|
||||
assign InstrPageFaultM = '0;
|
||||
assign LoadPageFaultM = '0;
|
||||
assign StorePageFaultM = '0;
|
||||
TranslationPAdr = '0;
|
||||
PageTableEntry = '0;
|
||||
PageType ='0;
|
||||
MMUTranslationComplete = '0;
|
||||
DTLBWriteM = '0;
|
||||
ITLBWriteF = '0;
|
||||
InstrPageFaultF = '0;
|
||||
LoadPageFaultM = '0;
|
||||
StorePageFaultM = '0;
|
||||
|
||||
case (NextWalkerState)
|
||||
LEVEL1: begin
|
||||
assign TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
|
||||
TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
|
||||
end
|
||||
LEVEL0: begin
|
||||
assign TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
end
|
||||
LEAF: begin
|
||||
// Keep physical address alive to prevent HADDR dropping to 0
|
||||
assign TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
assign PageTableEntry = CurrentPTE;
|
||||
assign PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00;
|
||||
assign MMUTranslationComplete = '1;
|
||||
assign DTLBWriteM = DTLBMissM;
|
||||
assign ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
PageTableEntry = CurrentPTE;
|
||||
PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00;
|
||||
MMUTranslationComplete = '1;
|
||||
DTLBWriteM = DTLBMissM;
|
||||
ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
|
||||
end
|
||||
FAULT: begin
|
||||
assign TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
assign MMUTranslationComplete = '1;
|
||||
assign InstrPageFaultM = ~DTLBMissM;
|
||||
assign LoadPageFaultM = DTLBMissM && ~MemStore;
|
||||
assign StorePageFaultM = DTLBMissM && MemStore;
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
MMUTranslationComplete = '1;
|
||||
InstrPageFaultF = ~DTLBMissM;
|
||||
LoadPageFaultM = DTLBMissM && ~MemStore;
|
||||
StorePageFaultM = DTLBMissM && MemStore;
|
||||
end
|
||||
default: begin
|
||||
// nothing
|
||||
end
|
||||
endcase
|
||||
end
|
||||
@ -226,30 +237,30 @@ module pagetablewalker (
|
||||
assign MMUPAdr = TranslationPAdr[31:0];
|
||||
|
||||
end else begin
|
||||
localparam LEVEL2 = 3'h5;
|
||||
|
||||
assign SvMode = SATP_REGW[63];
|
||||
|
||||
logic [8:0] VPN2, VPN1, VPN0;
|
||||
|
||||
logic GigapageMisaligned, BadGigapage;
|
||||
|
||||
typedef enum {IDLE, LEVEL2, LEVEL1, LEVEL0, LEAF, FAULT} walker_statetype;
|
||||
walker_statetype WalkerState, NextWalkerState;
|
||||
|
||||
// *** Do we need a synchronizer here for walker to talk to ahblite?
|
||||
flopenl #(.TYPE(walker_statetype)) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
|
||||
always_comb begin
|
||||
case (WalkerState)
|
||||
IDLE: if (MMUTranslate) NextWalkerState = LEVEL2;
|
||||
else NextWalkerState = IDLE;
|
||||
LEVEL2: if (~MMUReady) NextWalkerState = LEVEL2;
|
||||
else if (ValidPTE && LeafPTE) NextWalkerState = LEAF;
|
||||
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL1;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL1: if (~MMUReady) NextWalkerState = LEVEL1;
|
||||
// else if (~ValidPTE || (LeafPTE && BadMegapage))
|
||||
// NextWalkerState = FAULT;
|
||||
// *** Leave megapage implementation for later
|
||||
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF;
|
||||
else if (ValidPTE && LeafPTE) NextWalkerState = LEAF;
|
||||
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0;
|
||||
@ -260,6 +271,8 @@ module pagetablewalker (
|
||||
else NextWalkerState = IDLE;
|
||||
FAULT: if (MMUTranslate) NextWalkerState = LEVEL2;
|
||||
else NextWalkerState = IDLE;
|
||||
// Default case should never happen, but is included for linter.
|
||||
default: NextWalkerState = IDLE;
|
||||
endcase
|
||||
end
|
||||
|
||||
@ -279,42 +292,45 @@ module pagetablewalker (
|
||||
// *** Should translate this flop block into our flop module notation
|
||||
always_comb begin
|
||||
// default values
|
||||
assign TranslationPAdr = '0;
|
||||
assign PageTableEntry = '0;
|
||||
assign PageType = '0;
|
||||
assign MMUTranslationComplete = '0;
|
||||
assign DTLBWriteM = '0;
|
||||
assign ITLBWriteF = '0;
|
||||
assign InstrPageFaultM = '0;
|
||||
assign LoadPageFaultM = '0;
|
||||
assign StorePageFaultM = '0;
|
||||
TranslationPAdr = '0;
|
||||
PageTableEntry = '0;
|
||||
PageType = '0;
|
||||
MMUTranslationComplete = '0;
|
||||
DTLBWriteM = '0;
|
||||
ITLBWriteF = '0;
|
||||
InstrPageFaultF = '0;
|
||||
LoadPageFaultM = '0;
|
||||
StorePageFaultM = '0;
|
||||
|
||||
case (NextWalkerState)
|
||||
LEVEL2: begin
|
||||
assign TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000};
|
||||
TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000};
|
||||
end
|
||||
LEVEL1: begin
|
||||
assign TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
|
||||
TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
|
||||
end
|
||||
LEVEL0: begin
|
||||
assign TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
end
|
||||
LEAF: begin
|
||||
// Keep physical address alive to prevent HADDR dropping to 0
|
||||
assign TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
assign PageTableEntry = CurrentPTE;
|
||||
assign PageType = (WalkerState == LEVEL2) ? 2'b11 :
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
PageTableEntry = CurrentPTE;
|
||||
PageType = (WalkerState == LEVEL2) ? 2'b11 :
|
||||
((WalkerState == LEVEL1) ? 2'b01 : 2'b00);
|
||||
assign MMUTranslationComplete = '1;
|
||||
assign DTLBWriteM = DTLBMissM;
|
||||
assign ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
|
||||
MMUTranslationComplete = '1;
|
||||
DTLBWriteM = DTLBMissM;
|
||||
ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
|
||||
end
|
||||
FAULT: begin
|
||||
assign TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
assign MMUTranslationComplete = '1;
|
||||
assign InstrPageFaultM = ~DTLBMissM;
|
||||
assign LoadPageFaultM = DTLBMissM && ~MemStore;
|
||||
assign StorePageFaultM = DTLBMissM && MemStore;
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
MMUTranslationComplete = '1;
|
||||
InstrPageFaultF = ~DTLBMissM;
|
||||
LoadPageFaultM = DTLBMissM && ~MemStore;
|
||||
StorePageFaultM = DTLBMissM && MemStore;
|
||||
end
|
||||
default: begin
|
||||
// nothing
|
||||
end
|
||||
endcase
|
||||
end
|
||||
@ -331,4 +347,4 @@ module pagetablewalker (
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -48,7 +48,7 @@ module add(r, s, t, sum,
|
||||
|
||||
// Compound adder
|
||||
// Consists of 3:2 CSA followed by long compound CPA
|
||||
assign prodshifted = killprod ? 0 : {56'b0, r2, 2'b0} + {56'b0, s2, 2'b0};
|
||||
assign prodshifted = killprod ? 0 : {56'b0, r2+s2, 2'b0};
|
||||
assign sum0 = {1'b0,prodshifted} + t2 + 158'b0;
|
||||
assign sum1 = {1'b0,prodshifted} + t2 + 158'b1; // +1 from invert of z above
|
||||
|
||||
|
@ -56,7 +56,7 @@ module align(zman, ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t, bs
|
||||
// addend on right shifts. Handle special cases of shifting
|
||||
// by too much.
|
||||
|
||||
always @(aligncnt or zman or zdenorm)
|
||||
always @(aligncnt or xzero or yzero or zman or zdenorm or zzero)
|
||||
begin
|
||||
|
||||
// Default to clearing sticky bits
|
||||
@ -67,26 +67,23 @@ module align(zman, ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t, bs
|
||||
killprod = xzero | yzero;
|
||||
// d = aligncnt
|
||||
// p = 53
|
||||
if ($signed(aligncnt) <= $signed(-103)) begin //d<=-2p+1
|
||||
if ($signed(aligncnt) <= $signed(-105)) begin //d<=-2p+1
|
||||
//product ancored case with saturated shift
|
||||
sumshift = 163; // 3p+4
|
||||
sumshiftzero = 0;
|
||||
shift = {~zdenorm,zman,163'b0} >> sumshift;
|
||||
shift = {1'b1,zman,163'b0} >> sumshift;
|
||||
t = zzero ? 0 : {shift[215:52]};
|
||||
bs = |(shift[51:0]);
|
||||
//zexpsel = 0;
|
||||
end else if($signed(aligncnt) <= $signed(1)) begin // -2p+1<d<=2
|
||||
// set d<=2 to d<=0
|
||||
end else if($signed(aligncnt) <= $signed(2)) begin // -2p+1<d<=2
|
||||
// product ancored or cancellation
|
||||
// warning: set to 55 rather then 56. was there a typo in the book?
|
||||
sumshift = 57-aligncnt; // p + 3 - d
|
||||
sumshift = 57-aligncnt; // p + 2 - d
|
||||
sumshiftzero = 0;
|
||||
shift = {~zdenorm,zman,163'b0} >> sumshift;
|
||||
t = zzero ? 0 : {shift[215:52]};
|
||||
bs = |(shift[51:0]);
|
||||
//zexpsel = 0;
|
||||
end else if ($signed(aligncnt)<=$signed(55)) begin // 2 < d <= p+2
|
||||
// another typo in book? above was 55 changed to 52
|
||||
// addend ancored case
|
||||
// used to be 56 \/ somthing doesn't seem right too many typos
|
||||
sumshift = 57-aligncnt;
|
||||
|
55
wally-pipelined/src/fpu/FMA/booth.sv
Normal file
55
wally-pipelined/src/fpu/FMA/booth.sv
Normal file
@ -0,0 +1,55 @@
|
||||
module booth(xExt, choose, add1, e, pp);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [53:0] xExt; // multiplicand xExt
|
||||
input [2:0] choose; // bits needed to choose which encoding
|
||||
output [1:0] add1; // do you add 1
|
||||
output e;
|
||||
output [54:0] pp; // the resultant encoding
|
||||
|
||||
logic [54:0] pp, temp;
|
||||
logic e;
|
||||
logic [1:0] add1;
|
||||
logic [53:0] negx;
|
||||
//logic temp;
|
||||
|
||||
assign negx = ~xExt;
|
||||
|
||||
always @(choose, xExt, negx)
|
||||
case (choose)
|
||||
3'b000 : pp = 55'b0; // 0
|
||||
3'b001 : pp = {1'b0, xExt}; // 1
|
||||
3'b010 : pp = {1'b0, xExt}; // 1
|
||||
3'b011 : pp = {xExt, 1'b0}; // 2
|
||||
3'b100 : pp = {negx, 1'b0}; // -2
|
||||
3'b101 : pp = {1'b1, negx}; // -1
|
||||
3'b110 : pp = {1'b1, negx}; // -1
|
||||
3'b111 : pp = 55'hfffffffffffffff; // -0
|
||||
endcase
|
||||
|
||||
always @(choose, xExt, negx)
|
||||
case (choose)
|
||||
3'b000 : e = 0; // 0
|
||||
3'b001 : e = 0; // 1
|
||||
3'b010 : e = 0; // 1
|
||||
3'b011 : e = 0; // 2
|
||||
3'b100 : e = 1; // -2
|
||||
3'b101 : e = 1; // -1
|
||||
3'b110 : e = 1; // -1
|
||||
3'b111 : e = 1; // -0
|
||||
endcase
|
||||
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
|
||||
// assign add1 = choose[2];
|
||||
always @(choose)
|
||||
case (choose)
|
||||
3'b000 : add1 = 2'b0; // 0
|
||||
3'b001 : add1 = 2'b0; // 1
|
||||
3'b010 : add1 = 2'b0; // 1
|
||||
3'b011 : add1 = 2'b0; // 2
|
||||
3'b100 : add1 = 2'b10; // -2
|
||||
3'b101 : add1 = 2'b1; // -1
|
||||
3'b110 : add1 = 2'b1; // -1
|
||||
3'b111 : add1 = 2'b1; // -0
|
||||
endcase
|
||||
|
||||
endmodule
|
90
wally-pipelined/src/fpu/FMA/compressors.sv
Normal file
90
wally-pipelined/src/fpu/FMA/compressors.sv
Normal file
@ -0,0 +1,90 @@
|
||||
module add3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
parameter BITS = 4;
|
||||
input [BITS-1:0] a;
|
||||
input [BITS-1:0] b;
|
||||
input [BITS-1:0] c;
|
||||
output [BITS-1:0] carry;
|
||||
output [BITS-1:0] sum;
|
||||
genvar i;
|
||||
|
||||
generate
|
||||
for(i= 0; i<BITS; i=i+1) begin
|
||||
sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
||||
module add4comp2(a, b, c, d, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
parameter BITS = 4;
|
||||
input [BITS-1:0] a;
|
||||
input [BITS-1:0] b;
|
||||
input [BITS-1:0] c;
|
||||
input [BITS-1:0] d;
|
||||
output [BITS:0] carry;
|
||||
output [BITS-1:0] sum;
|
||||
|
||||
logic [BITS-1:0] cout;
|
||||
logic carryTmp;
|
||||
genvar i;
|
||||
|
||||
|
||||
sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
|
||||
|
||||
generate
|
||||
for(i= 1; i<BITS-1; i=i+1) begin
|
||||
sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
|
||||
|
||||
assign carry[BITS-1] = carryTmp & cout[BITS-1];
|
||||
assign carry[BITS] = carryTmp ^ cout[BITS-1];
|
||||
|
||||
endmodule
|
||||
|
||||
module sng3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
input a;
|
||||
input b;
|
||||
input c;
|
||||
output carry;
|
||||
output sum;
|
||||
|
||||
logic axorb;
|
||||
|
||||
assign axorb = a ^ b;
|
||||
assign sum = axorb ^ c;
|
||||
|
||||
assign carry = axorb ? c : a;
|
||||
|
||||
endmodule
|
||||
|
||||
module sng4comp2(a, b, c, d, cin, cout, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into pass gate 4:2 counters?
|
||||
|
||||
input a;
|
||||
input b;
|
||||
input c;
|
||||
input d;
|
||||
input cin;
|
||||
output cout;
|
||||
output carry;
|
||||
output sum;
|
||||
|
||||
logic TmpSum;
|
||||
|
||||
sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
|
||||
sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
|
||||
|
||||
endmodule
|
@ -17,7 +17,7 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen(xexp, yexp, zexp,
|
||||
killprod, sumzero, resultdenorm, normcnt, infinity,
|
||||
invalid, overflow, underflow, inf, xzero, yzero,expplus1,
|
||||
FmaFlagsM, inf, xzero, yzero,expplus1,
|
||||
nan, de0, xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, specialsel, zexpsel,
|
||||
aligncnt, wexp,
|
||||
prodof, sumof, sumuf, denorm0, ae);
|
||||
@ -31,9 +31,7 @@ module expgen(xexp, yexp, zexp,
|
||||
input resultdenorm; // postnormalize rounded result
|
||||
input [8:0] normcnt; // normalization shift count
|
||||
input infinity; // generate infinity on overflow
|
||||
input invalid; // Result invalid
|
||||
input overflow; // Result overflowed
|
||||
input underflow; // Result underflowed
|
||||
input [4:0] FmaFlagsM; // Result invalid
|
||||
input inf; // Some input is infinity
|
||||
input nan; // Some input is NaN
|
||||
input [12:0] de0; // X is NaN NaN
|
||||
@ -121,10 +119,10 @@ module expgen(xexp, yexp, zexp,
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
|
||||
assign specialres = invalid | nan ? nanres : // KEP added nan
|
||||
overflow ? infinityres :
|
||||
assign specialres = FmaFlagsM[4] | nan ? nanres : // invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 11'b11111111111 :
|
||||
underflow ? 11'b0 : 11'bx;
|
||||
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
|
||||
|
||||
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
|
||||
|
||||
|
@ -10,12 +10,13 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
|
||||
psign, zsign, xzero, yzero, zzero, vbits, killprod,
|
||||
inf, nan, invalid, overflow, underflow, inexact);
|
||||
inf, nan, FmaFlagsM,sticky);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input xnan; // X is NaN
|
||||
input ynan; // Y is NaN
|
||||
input znan; // Z is NaN
|
||||
input sticky; // X is Inf
|
||||
input xinf; // X is Inf
|
||||
input yinf; // Y is Inf
|
||||
input zinf; // Z is Inf
|
||||
@ -31,10 +32,7 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
|
||||
input [1:0] vbits; // R and S bits of result
|
||||
output inf; // Some source is Inf
|
||||
output nan; // Some source is NaN
|
||||
output invalid; // Result is invalid
|
||||
output overflow; // Result overflowed
|
||||
output underflow; // Result underflowed
|
||||
output inexact; // Result is not an exact number
|
||||
output [4:0] FmaFlagsM;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
@ -55,33 +53,36 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
|
||||
|
||||
assign prodinf = prodof && ~xnan && ~ynan;
|
||||
//KEP added if the product is infinity then sum is infinity
|
||||
assign suminf = prodinf | sumof && ~xnan && ~ynan && ~znan;
|
||||
assign suminf = sumof && ~xnan && ~ynan && ~znan;
|
||||
|
||||
// Set invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
|
||||
assign invalid = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
|
||||
assign FmaFlagsM[4] = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
|
||||
xzero && yinf || yzero && xinf;// KEP remove case 3) above
|
||||
|
||||
assign FmaFlagsM[3] = 0; // divide by zero flag
|
||||
|
||||
|
||||
// Set the overflow flag for the following cases:
|
||||
// 1) Rounded multiply result would be out of bounds
|
||||
// 2) Rounded add result would be out of bounds
|
||||
|
||||
assign overflow = suminf && ~inf;
|
||||
assign FmaFlagsM[2] = suminf && ~inf;
|
||||
|
||||
// Set the underflow flag for the following cases:
|
||||
// 1) Any input is denormalized
|
||||
// 2) Output would be denormalized or smaller
|
||||
|
||||
assign underflow = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
|
||||
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
|
||||
|
||||
// Set the inexact flag for the following cases:
|
||||
// 1) Multiplication inexact
|
||||
// 2) Addition inexact
|
||||
// One of these cases occurred if the R or S bit is set
|
||||
|
||||
assign inexact = (vbits[0] || vbits[1] || suminf) && ~(inf || nan);
|
||||
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nan);
|
||||
|
||||
endmodule
|
||||
|
@ -15,13 +15,13 @@
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to X or Z inputs
|
||||
// bypass Handles bypass of result to ReadData1E or ReadData3E inputs
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (inputs = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes W=X*Y+Z, rounded with the mode specified by
|
||||
// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the X or Z inputs for use on the next cycle. In addition, four signals
|
||||
// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IEEE flags.
|
||||
@ -29,29 +29,17 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fmac(x, y, z, rn, rz, rp, rm,
|
||||
earlyres, earlyressel, bypsel, bypplus1, byppostnorm,
|
||||
w, wbypass, invalid, overflow, underflow, inexact);
|
||||
module fma(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
FmaResultM, FmaFlagsM, aligncnt);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] x; // input X from reg file
|
||||
input [63:0] y; // input Y
|
||||
input [63:0] z; // input Z from reg file
|
||||
input rn; // Round to Nearest
|
||||
input rz; // Round toward zero
|
||||
input rm; // Round toward minus infinity
|
||||
input rp; // Round toward plus infinity
|
||||
input [63:0] earlyres; // Early result from other FP logic
|
||||
input earlyressel; // Select early result, not W
|
||||
input [1:0] bypsel; // Select W bypass to X, or z
|
||||
input bypplus1; // Add one in bypass
|
||||
input byppostnorm; // postnormalize in bypass
|
||||
output [63:0] w; // output W=X*Y+Z
|
||||
output [63:0] wbypass; // prerounded output W=X*Y+Z for bypass
|
||||
output invalid; // Result is invalid
|
||||
output overflow; // Result overflowed
|
||||
output underflow; // Result underflowed
|
||||
output inexact; // Result is not an exact number
|
||||
input [63:0] ReadData1E; // input 1
|
||||
input [63:0] ReadData2E; // input 2
|
||||
input [63:0] ReadData3E; // input 3
|
||||
input [2:0] FrmE; // Rounding mode
|
||||
output [63:0] FmaResultM; // output FmaResultM=ReadData1E*ReadData2E+ReadData3E
|
||||
output [4:0] FmaFlagsM; // status flags
|
||||
output [12:0] aligncnt; // status flags
|
||||
|
||||
// Internal nodes
|
||||
|
||||
@ -60,12 +48,12 @@ module fmac(x, y, z, rn, rz, rp, rm,
|
||||
logic [163:0] t; // output of alignment shifter
|
||||
logic [163:0] sum; // output of carry prop adder
|
||||
logic [53:0] v; // normalized sum, R, S bits
|
||||
logic [12:0] aligncnt; // shift count for alignment
|
||||
// logic [12:0] aligncnt; // shift count for alignment
|
||||
logic [8:0] normcnt; // shift count for normalizer
|
||||
logic [12:0] ae; // multiplier expoent
|
||||
logic bs; // sticky bit of addend
|
||||
logic ps; // sticky bit of product
|
||||
logic killprod; // Z >> product
|
||||
logic killprod; // ReadData3E >> product
|
||||
logic negsum; // negate sum
|
||||
logic invz; // invert addend
|
||||
logic selsum1; // select +1 mode of sum
|
||||
@ -73,7 +61,7 @@ module fmac(x, y, z, rn, rz, rp, rm,
|
||||
logic negsum1; // sum +1 < 0
|
||||
logic sumzero; // sum = 0
|
||||
logic infinity; // generate infinity on overflow
|
||||
logic prodof; // X*Y out of range
|
||||
logic prodof; // ReadData1E*ReadData2E out of range
|
||||
logic sumof; // result out of range
|
||||
logic xzero;
|
||||
logic yzero;
|
||||
@ -101,6 +89,9 @@ module fmac(x, y, z, rn, rz, rp, rm,
|
||||
logic [8:0] sumshift;
|
||||
logic sumshiftzero;
|
||||
logic [12:0] de0;
|
||||
logic isAdd;
|
||||
|
||||
assign isAdd = 1;
|
||||
|
||||
|
||||
|
||||
@ -117,16 +108,16 @@ module fmac(x, y, z, rn, rz, rp, rm,
|
||||
|
||||
// Instantiate fraction datapath
|
||||
|
||||
multiply multiply(.xman(x[51:0]), .yman(y[51:0]), .*);
|
||||
align align(.zman(z[51:0]),.*);
|
||||
multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
|
||||
align align(.zman(ReadData3E[51:0]),.*);
|
||||
add add(.*);
|
||||
lza lza(.*);
|
||||
normalize normalize(.zexp(z[62:52]),.*);
|
||||
round round(.xman(x[51:0]), .yman(y[51:0]),.zman(z[51:0]), .wman(w[51:0]),.wsign(w[63]),.*);
|
||||
normalize normalize(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.*);
|
||||
round round(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]),.zman(ReadData3E[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen expgen(.xexp(x[62:52]),.yexp(y[62:52]),.zexp(z[62:52]),.wexp(w[62:52]),.*);
|
||||
expgen expgen(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.wexp(FmaResultM[62:52]),.*);
|
||||
// Instantiate special case detection across datapath & exponent path
|
||||
|
||||
special special(.*);
|
||||
@ -134,8 +125,8 @@ module fmac(x, y, z, rn, rz, rp, rm,
|
||||
|
||||
// Instantiate control logic
|
||||
|
||||
sign sign(.xsign(x[63]),.ysign(y[63]),.zsign(z[63]),.wsign(w[63]),.*);
|
||||
flag flag(.zsign(z[63]),.vbits(v[1:0]),.*);
|
||||
sign sign(.xsign(ReadData1E[63]),.ysign(ReadData2E[63]),.zsign(ReadData3E[63]),.wsign(FmaResultM[63]),.*);
|
||||
flag flag(.zsign(ReadData3E[63]),.vbits(v[1:0]),.*);
|
||||
|
||||
endmodule
|
||||
|
@ -30,7 +30,7 @@ module lza(sum, normcnt, sumzero);
|
||||
always @ ( sum)
|
||||
begin
|
||||
i = 0;
|
||||
while (~sum[108-i] && i < 108) i = i+1; // search for leading one
|
||||
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
|
||||
normcnt = i; // compute shift count
|
||||
end
|
||||
|
||||
|
@ -10,8 +10,124 @@ module multiply(xman, yman, xdenorm, ydenorm, xzero, yzero, r, s);
|
||||
input yzero; // Z is denorm
|
||||
output [105:0] r; // partial product 1
|
||||
output [105:0] s; // partial product 2
|
||||
|
||||
wire [54:0] yExt; //y with appended 0 and assumed 1
|
||||
wire [53:0] xExt; //y with assumed 1
|
||||
wire [26:0][1:0] add1;
|
||||
wire [26:0][54:0] pp;
|
||||
wire [26:0] e;
|
||||
logic [17:0][105:0] lv1add;
|
||||
logic [11:0][105:0] lv2add;
|
||||
logic [7:0][105:0] lv3add;
|
||||
logic [3:0][105:0] lv4add;
|
||||
logic [21:0][106:0] carryTmp;
|
||||
wire [26:0][105:0] acc;
|
||||
// wire [105:0] acc
|
||||
genvar i;
|
||||
|
||||
assign r = 106'b0;
|
||||
assign s = {53'b0,~(xdenorm|xzero),xman} * {53'b0,~(ydenorm|yzero),yman};
|
||||
assign xExt = {2'b0,~(xdenorm|xzero),xman};
|
||||
assign yExt = {2'b0,~(ydenorm|yzero),yman, 1'b0};
|
||||
|
||||
generate
|
||||
for(i=0; i<27; i=i+1) begin
|
||||
booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
|
||||
assign acc[1] = {50'b01,~e[1],pp[1],add1[0]};
|
||||
assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0};
|
||||
assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0};
|
||||
assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0};
|
||||
assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0};
|
||||
assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0};
|
||||
assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0};
|
||||
assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0};
|
||||
assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0};
|
||||
assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0};
|
||||
assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0};
|
||||
assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0};
|
||||
assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0};
|
||||
assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0};
|
||||
assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0};
|
||||
assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0};
|
||||
assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0};
|
||||
assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0};
|
||||
assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0};
|
||||
assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0};
|
||||
assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0};
|
||||
assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0};
|
||||
assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0};
|
||||
assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0};
|
||||
assign acc[25] = {~e[25],pp[25],add1[24], 48'b0};
|
||||
assign acc[26] = {pp[26],add1[25], 50'b0};
|
||||
|
||||
//*** resize adders
|
||||
generate
|
||||
for(i=0; i<9; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
|
||||
.carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1]));
|
||||
assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<6; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
|
||||
.carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1]));
|
||||
assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<4; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
|
||||
.carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1]));
|
||||
assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
generate
|
||||
for(i=0; i<2; i=i+1) begin
|
||||
add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
|
||||
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
|
||||
assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
|
||||
.carry(carryTmp[21]), .sum(s));
|
||||
assign r = {carryTmp[21][104:0], 1'b0};
|
||||
// assign r = 0;
|
||||
// assign s = acc[0] +
|
||||
// acc[1] +
|
||||
// acc[2] +
|
||||
// acc[3] +
|
||||
// acc[4] +
|
||||
// acc[5] +
|
||||
// acc[6] +
|
||||
// acc[7] +
|
||||
// acc[8] +
|
||||
// acc[9] +
|
||||
// acc[10] +
|
||||
// acc[11] +
|
||||
// acc[12] +
|
||||
// acc[13] +
|
||||
// acc[14] +
|
||||
// acc[15] +
|
||||
// acc[16] +
|
||||
// acc[17] +
|
||||
// acc[18] +
|
||||
// acc[19] +
|
||||
// acc[20] +
|
||||
// acc[21] +
|
||||
// acc[22] +
|
||||
// acc[23] +
|
||||
// acc[24] +
|
||||
// acc[25] +
|
||||
// acc[26];
|
||||
|
||||
// assign s = {53'b0,~(xdenorm|xzero),xman} * {53'b0,~(ydenorm|yzero),yman};
|
||||
// assign r = 0;
|
||||
endmodule
|
||||
|
@ -14,9 +14,11 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero, sumzero, xzero, yzero, bs, ps, denorm0, xdenorm, ydenorm, zdenorm, sticky, de0, resultdenorm, v);
|
||||
module normalize(sum, xexp, yexp, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero, sumzero, xzero, zzero, yzero, bs, ps, denorm0, xdenorm, ydenorm, zdenorm, sticky, de0, resultdenorm, v);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
input [163:0] sum; // sum
|
||||
input [62:52] xexp; // sum
|
||||
input [62:52] yexp; // sum
|
||||
input [62:52] zexp; // sum
|
||||
input [8:0] normcnt; // normalization shift count
|
||||
input [12:0] ae; // normalization shift count
|
||||
@ -33,6 +35,7 @@ module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero,
|
||||
input zdenorm; // Input Z is denormalized
|
||||
input xzero;
|
||||
input yzero;
|
||||
input zzero;
|
||||
output sticky; //sticky bit
|
||||
output [12:0] de0;
|
||||
output resultdenorm; // Input Z is denormalized
|
||||
@ -47,6 +50,7 @@ module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero,
|
||||
logic [9:0] sumshifttmp;
|
||||
logic [163:0] sumshiftedtmp; // shifted sum
|
||||
logic sticky;
|
||||
logic isShiftLeft1;
|
||||
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
|
||||
// When the sum is zero, normalization does not apply and only the
|
||||
@ -60,21 +64,23 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
// The sticky bit calculation is actually built into the shifter and
|
||||
// does not require a true subtraction shown in the model.
|
||||
|
||||
assign isShiftLeft1 = (aligncnt == 1 ||aligncnt == 0 || $signed(aligncnt) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2;
|
||||
assign tmp = ($signed(ae-normcnt+2) >= $signed(-1022));
|
||||
always @(sum or sumshift or ae or aligncnt or normcnt or bs or zexp or zdenorm)
|
||||
always @(sum or sumshift or ae or aligncnt or normcnt or bs or isShiftLeft1 or zexp or zdenorm)
|
||||
begin
|
||||
// d = aligncnt
|
||||
// l = normcnt
|
||||
// p = 53
|
||||
// ea + eb = ae
|
||||
// set d<=2 to d<=0
|
||||
if ($signed(aligncnt)<=$signed(1)) begin //d<=2
|
||||
if ($signed(aligncnt)<=$signed(2)) begin //d<=2
|
||||
// product anchored or cancellation
|
||||
if ($signed(ae-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin
|
||||
//normal result
|
||||
de0 = xzero|yzero ? zexp : ae-normcnt+2+xdenorm+ydenorm;
|
||||
resultdenorm = |sum & ~|de0;
|
||||
sumshifted = resultdenorm ? sum << sumshift : sum << (55+normcnt); // p+2+l
|
||||
de0 = xzero|yzero ? zexp : ae-normcnt+xdenorm+ydenorm+57;
|
||||
resultdenorm = |sum & ~|de0 | de0[12];
|
||||
// if z is zero then there was a 56 bit shift of the product
|
||||
sumshifted = resultdenorm ? sum << sumshift-zzero+isShiftLeft1 : sum << normcnt; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bs;
|
||||
//de0 = ae-normcnt+2-1023;
|
||||
@ -90,8 +96,8 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
sumshifttmp = {1'b0,sumshift} - 2;
|
||||
sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
|
||||
tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
|
||||
tmp2 = (sumshifttmp[9] || sumshifted[162]);
|
||||
tmp3 = sumshifted[161];
|
||||
tmp2 = ((sumshifttmp[9] & sumshift[0]) || sumshifted[162]);
|
||||
tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshift[1]));
|
||||
tmp4 = sumshifted[160];
|
||||
tmp5 = sumshifted[159];
|
||||
// for some reason use exp = zexp + {0,1,2}
|
||||
@ -112,25 +118,31 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bs;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp;
|
||||
end else if(sumshifted[160]) begin
|
||||
v = sumshifted[159:106];
|
||||
de0 = zexp+zdenorm;
|
||||
end else if(sumshifted[160]& ~zdenorm) begin
|
||||
de0 = zexp-1;
|
||||
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
|
||||
sticky = (|sumshifted[105:0]) | bs;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp-1;
|
||||
end else if(sumshifted[159]) begin
|
||||
v = sumshifted[158:105];
|
||||
end else if(sumshifted[159]& ~zdenorm) begin
|
||||
//v = sumshifted[158:105];
|
||||
de0 = zexp-2;
|
||||
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
|
||||
sticky = (|sumshifted[104:0]) | bs;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp-2;
|
||||
end else begin
|
||||
end else if(zdenorm) begin
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bs;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp;
|
||||
end else begin
|
||||
de0 = 0;
|
||||
sumshifted = sum << sumshift-1; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bs;
|
||||
end
|
||||
|
||||
resultdenorm = ~(|de0);
|
||||
resultdenorm = (~|de0 | de0[12]);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -13,22 +13,17 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module round(v, sticky, rz, rn, rp, rm, wsign,
|
||||
invalid, overflow, underflow, inf, nan, xnan, ynan, znan,
|
||||
module round(v, sticky, FrmE, wsign,
|
||||
FmaFlagsM, inf, nan, xnan, ynan, znan,
|
||||
xman, yman, zman,
|
||||
wman, infinity, specialsel,expplus1);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [53:0] v; // normalized sum, R, S bits
|
||||
input sticky; //sticky bit
|
||||
input rz; // Round toward zero
|
||||
input rn; // Round toward nearest
|
||||
input rp; // Round toward plus infinity
|
||||
input rm; // Round toward minus infinity
|
||||
input [2:0] FrmE;
|
||||
input wsign; // Sign of result
|
||||
input invalid; // Trap on infinity, NaN, denorm
|
||||
input overflow; // Result overflowed
|
||||
input underflow; // Result underflowed
|
||||
input [4:0] FmaFlagsM;
|
||||
input inf; // Some input is infinity
|
||||
input nan; // Some input is NaN
|
||||
input xnan; // X is NaN
|
||||
@ -45,7 +40,7 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire plus1; // Round by adding one
|
||||
logic plus1; // Round by adding one
|
||||
wire [52:0] v1; // Result + 1 (for rounding)
|
||||
wire [51:0] specialres; // Result of exceptional case
|
||||
wire [51:0] infinityres; // Infinity or largest real number
|
||||
@ -62,9 +57,19 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - plus1 if v[2] = 1
|
||||
// 101/110/111 - plus1
|
||||
assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
|
||||
(rp & ~wsign) |
|
||||
(rm & wsign);
|
||||
always @ (FrmE, v, wsign, sticky) begin
|
||||
case (FrmE)
|
||||
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
|
||||
3'b001: plus1 = 0;//round to zero
|
||||
3'b010: plus1 = wsign;//round down
|
||||
3'b011: plus1 = ~wsign;//round up
|
||||
3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
|
||||
default: plus1 = 1'bx;
|
||||
endcase
|
||||
end
|
||||
// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
|
||||
// (rp & ~wsign) |
|
||||
// (rm & wsign);
|
||||
//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
|
||||
// rp && ~wsign && (v[1] || v[0]) ||
|
||||
// rm && wsign && (v[1] || v[0]);
|
||||
@ -84,17 +89,17 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
|
||||
// inputs to the wide muxes can be combined at the expense of more
|
||||
// complicated non-critical control in the circuit implementation.
|
||||
|
||||
assign specialsel = overflow || underflow || invalid ||
|
||||
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
|
||||
nan || inf;
|
||||
assign specialres = invalid | nan ? nanres : //KEP added nan
|
||||
overflow ? infinityres :
|
||||
assign specialres = FmaFlagsM[4] | nan ? nanres : //invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 52'b0 :
|
||||
underflow ? 52'b0 : 52'bx; // default to undefined
|
||||
FmaFlagsM[1] ? 52'b0 : 52'bx; // underflow
|
||||
|
||||
// Overflow is handled differently for different rounding modes
|
||||
// Round is to either infinity or to maximum finite number
|
||||
|
||||
assign infinity = rn || (rp && ~wsign) || (rm && wsign);
|
||||
assign infinity = |FrmE;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
|
||||
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
|
||||
|
||||
// Invalid operations produce a quiet NaN. The result should
|
||||
|
@ -10,23 +10,24 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm, overflow,
|
||||
sumzero, nan, invalid, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign);
|
||||
module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, FrmE, FmaFlagsM, zzero,
|
||||
sumzero, nan, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign, isAdd);
|
||||
////////////////////////////////////////////////////////////////////////////I
|
||||
|
||||
input xsign; // Sign of X
|
||||
input ysign; // Sign of Y
|
||||
input zsign; // Sign of Z
|
||||
input zzero;
|
||||
input isAdd;
|
||||
input negsum0; // Sum in +O mode is negative
|
||||
input negsum1; // Sum in +1 mode is negative
|
||||
input bs; // sticky bit from addend
|
||||
input ps; // sticky bit from product
|
||||
input killprod; // Product forced to zero
|
||||
input rm; // Round toward minus infinity
|
||||
input overflow; // Round toward minus infinity
|
||||
input [2:0] FrmE; // Round toward minus infinity
|
||||
input [4:0] FmaFlagsM; // Round toward minus infinity
|
||||
input sumzero; // Sum = O
|
||||
input nan; // Some input is NaN
|
||||
input invalid; // Result invalid
|
||||
input xinf; // X = Inf
|
||||
input yinf; // Y = Inf
|
||||
input zinf; // Y = Inf
|
||||
@ -96,10 +97,24 @@ logic tmp;
|
||||
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
|
||||
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
|
||||
|
||||
assign zerosign = (~invz && killprod) ? zsign : rm;
|
||||
//assign zerosign = (~invz && killprod) ? zsign : rm;//***look into
|
||||
// assign zerosign = (~invz && killprod) ? zsign : 0;
|
||||
// zero sign
|
||||
// if product underflows then use psign
|
||||
// otherwise
|
||||
// addition
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
// subtraction
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
|
||||
assign zerosign = FmaFlagsM[1] ? psign :
|
||||
(isAdd ? (psign^zsign ? FrmE == 3'b010 : psign) :
|
||||
(psign^zsign ? psign : FrmE == 3'b010));
|
||||
assign infsign = zinf ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
|
||||
//assign infsign = xinf ? (yinf ? psign : xsign) : yinf ? ysign : zsign;//original
|
||||
assign tmp = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
|
||||
assign wsign = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
|
||||
assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
|
||||
assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
|
||||
|
||||
endmodule
|
||||
|
@ -10,49 +10,49 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module special(x, y, z, ae, xzero, yzero, zzero,
|
||||
module special(ReadData1E, ReadData2E, ReadData3E, ae, xzero, yzero, zzero,
|
||||
xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, xinf, yinf, zinf);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] x; // Input x
|
||||
input [63:0] y; // Input Y
|
||||
input [63:0] z; // Input z
|
||||
input [63:0] ReadData1E; // Input ReadData1E
|
||||
input [63:0] ReadData2E; // Input ReadData2E
|
||||
input [63:0] ReadData3E; // Input ReadData3E
|
||||
input [12:0] ae; // exponent of product
|
||||
output xzero; // Input x = 0
|
||||
output yzero; // Input y = 0
|
||||
output zzero; // Input z = 0
|
||||
output xnan; // x is NaN
|
||||
output ynan; // y is NaN
|
||||
output znan; // z is NaN
|
||||
output xdenorm; // x is denormalized
|
||||
output ydenorm; // y is denormalized
|
||||
output zdenorm; // z is denormalized
|
||||
output xzero; // Input ReadData1E = 0
|
||||
output yzero; // Input ReadData2E = 0
|
||||
output zzero; // Input ReadData3E = 0
|
||||
output xnan; // ReadData1E is NaN
|
||||
output ynan; // ReadData2E is NaN
|
||||
output znan; // ReadData3E is NaN
|
||||
output xdenorm; // ReadData1E is denormalized
|
||||
output ydenorm; // ReadData2E is denormalized
|
||||
output zdenorm; // ReadData3E is denormalized
|
||||
output proddenorm; // product is denormalized
|
||||
output xinf; // x is infinity
|
||||
output yinf; // y is infinity
|
||||
output zinf; // z is infinity
|
||||
output xinf; // ReadData1E is infinity
|
||||
output yinf; // ReadData2E is infinity
|
||||
output zinf; // ReadData3E is infinity
|
||||
|
||||
// In the actual circuit design, the gates looking at bits
|
||||
// 51:0 and at bits 62:52 should be shared among the various detectors.
|
||||
|
||||
// Check if input is NaN
|
||||
|
||||
assign xnan = &x[62:52] && |x[51:0];
|
||||
assign ynan = &y[62:52] && |y[51:0];
|
||||
assign znan = &z[62:52] && |z[51:0];
|
||||
assign xnan = &ReadData1E[62:52] && |ReadData1E[51:0];
|
||||
assign ynan = &ReadData2E[62:52] && |ReadData2E[51:0];
|
||||
assign znan = &ReadData3E[62:52] && |ReadData3E[51:0];
|
||||
|
||||
// Check if input is denormalized
|
||||
|
||||
assign xdenorm = ~(|x[62:52]) && |x[51:0];
|
||||
assign ydenorm = ~(|y[62:52]) && |y[51:0];
|
||||
assign zdenorm = ~(|z[62:52]) && |z[51:0];
|
||||
assign xdenorm = ~(|ReadData1E[62:52]) && |ReadData1E[51:0];
|
||||
assign ydenorm = ~(|ReadData2E[62:52]) && |ReadData2E[51:0];
|
||||
assign zdenorm = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
|
||||
assign proddenorm = &ae & ~xzero & ~yzero; //KEP is the product denormalized
|
||||
|
||||
// Check if input is infinity
|
||||
|
||||
assign xinf = &x[62:52] && ~(|x[51:0]);
|
||||
assign yinf = &y[62:52] && ~(|y[51:0]);
|
||||
assign zinf = &z[62:52] && ~(|z[51:0]);
|
||||
assign xinf = &ReadData1E[62:52] && ~(|ReadData1E[51:0]);
|
||||
assign yinf = &ReadData2E[62:52] && ~(|ReadData2E[51:0]);
|
||||
assign zinf = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);
|
||||
|
||||
// Check if inputs are all zero
|
||||
// Also forces denormalized inputs to zero.
|
||||
@ -60,11 +60,11 @@ module special(x, y, z, ae, xzero, yzero, zzero,
|
||||
// to just check if the exponent is zero.
|
||||
|
||||
// KATHERINE - commented following (21/01/11)
|
||||
// assign xzero = ~(|x[62:0]) || xdenorm;
|
||||
// assign yzero = ~(|y[62:0]) || ydenorm;
|
||||
// assign zzero = ~(|z[62:0]) || zdenorm;
|
||||
// assign xzero = ~(|ReadData1E[62:0]) || xdenorm;
|
||||
// assign yzero = ~(|ReadData2E[62:0]) || ydenorm;
|
||||
// assign zzero = ~(|ReadData3E[62:0]) || zdenorm;
|
||||
// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number
|
||||
assign xzero = ~(|x[62:0]);
|
||||
assign yzero = ~(|y[62:0]);
|
||||
assign zzero = ~(|z[62:0]);
|
||||
assign xzero = ~(|ReadData1E[62:0]);
|
||||
assign yzero = ~(|ReadData2E[62:0]);
|
||||
assign zzero = ~(|ReadData3E[62:0]);
|
||||
endmodule
|
||||
|
@ -1,16 +1 @@
|
||||
0010000000000000 bf4fdffffff7fffe 800ffffffffffffe 800003fbfffffefe 801003fbfffffefe Wrong zdenorm 308227
|
||||
0010000000000000 be6fffffbffffff7 8000000000000000 800000001fffffc0 800000000fffffe0 Wrong 313753
|
||||
001ffffffffffffe 3fddfbffffffffff 000ffffffffffffe 000efdfffffffffd 001efdfffffffffd Wrong zdenorm 551371
|
||||
3befe000ffffffff 800ffffffffffffe 0000000000000000 0000000000000000 8000000000000000 Wrong ydenorm unflw 665575
|
||||
000007fffffffffe 3f6ffffffe01fffe 000ffffffffffffe 00000007ffffff7e 00100007ffffff7e Wrong xdenorm zdenorm 768727
|
||||
3fdffffffffffffe 000ffffffffffffe 8000000000000001 7feffffffffffff6 0007fffffffffffe Wrong ydenorm zdenorm 1049939
|
||||
7fe0000000000001 4000000000000000 ffefffffffffffff 7ff0000000000000 7cb8000000000000 Wrong w=+inf 2602745
|
||||
000fff000000000f 3ff00800001fffff 8010000000000000 7f7bfe007ff8381e 000006ff801ffe0e Wrong xdenorm 3117277
|
||||
8000000000000001 40211275ffe5ee3c 0000000000000001 fcfe24ebffcbdc78 8000000000000008 Wrong xdenorm zdenorm 3148591
|
||||
801fffffffffffff bfdffffffffffffe 0000000000021fff 0000000000021ffe 0010000000021ffe Wrong zdenorm 3537867
|
||||
801ffffffffffffe 0010000000000001 0000000000000000 0000000000000000 8000000000000000 Wrong unflw 3564269
|
||||
bca0000000000001 000fffffc000001e 8000000000000000 8000000000000001 8000000000000000 Wrong ydenorm 3717769
|
||||
bcafffffffffffff 800ffffffffffffe 8000000000000000 0000000000000002 0000000000000001 Wrong ydenorm 3807413
|
||||
7fec5fed92358a74 400000001bffffff ffefc0003ffffffe 7ff0000000000000 7fe8ffdb47bad466 Wrong w=+inf 3889689
|
||||
bfdfffffffffffff 3fdf1f3616aa73e1 3fd0000000000001 3fd07064f4aac611 3f7c193d2ab1843f Wrong 4099063
|
||||
3fd07dfffffffffe 8010000000000001 0000000000000001 ffe07dfffffffffb 80041f7fffffffff Wrong zdenorm 4716133
|
||||
c3f000200003fffe 0000000000000001 001ffffffffffffe 80cffc400007fffd 80cffc400007fffc Wrong FmaResultM= -64 ydenorm 1119653
|
||||
|
Binary file not shown.
@ -20,19 +20,19 @@ void main() {
|
||||
// b68ffff8000000ff_3f9080000007ffff_b6307ffbe0080080_00001
|
||||
char ch;
|
||||
int i,j,n;
|
||||
char x[17];
|
||||
char y[17];
|
||||
char z[17];
|
||||
char ReadData1E[17];
|
||||
char ReadData2E[17];
|
||||
char ReadData3E[17];
|
||||
char ans[81];
|
||||
char flags[3];
|
||||
int rn,rz,rm,rp;
|
||||
long stop = 4099063;
|
||||
int FrmE;
|
||||
long stop = 1119653;
|
||||
int debug = 1;
|
||||
//my_string = (char *) malloc (nbytes + 1);
|
||||
//bytes_read = getline (&my_string, &nbytes, stdin);
|
||||
|
||||
|
||||
for(n=0; n < 613; n++) {//613 for 10000
|
||||
for(n=0; n < 305; n++) {//613 for 10000
|
||||
if(getline(&ln,&nbytes,fp) < 0 || feof(fp)) break;
|
||||
if(k == stop && debug == 1) break;
|
||||
k++;
|
||||
@ -41,71 +41,59 @@ void main() {
|
||||
|
||||
if(!feof(fp)) {
|
||||
|
||||
strncpy(x, ln, 16); x[16]=0;
|
||||
strncpy(y, &ln[17], 16); y[16]=0;
|
||||
strncpy(z, &ln[34], 16); z[16]=0;
|
||||
// fprintf(stdout,"[%s]\n[%s]\n", ln,z);
|
||||
strncpy(ReadData1E, ln, 16); ReadData1E[16]=0;
|
||||
strncpy(ReadData2E, &ln[17], 16); ReadData2E[16]=0;
|
||||
strncpy(ReadData3E, &ln[34], 16); ReadData3E[16]=0;
|
||||
// fprintf(stdout,"[%s]\n[%s]\n", ln,ReadData3E);
|
||||
strncpy(ans, &ln[51], 16); ans[16]=0;
|
||||
strncpy(flags,&ln[68],2); flags[2]=0;
|
||||
|
||||
// fprintf(stdout,"[%s]\n[%s]\n", ln,z);
|
||||
fprintf(fq," x = 64'h%s;\n",x);
|
||||
fprintf(fq," y = 64'h%s;\n",y);
|
||||
fprintf(fq," z = 64'h%s;\n",z);
|
||||
// fprintf(stdout,"[%s]\n[%s]\n", ln,ReadData3E);
|
||||
fprintf(fq," ReadData1E = 64'h%s;\n",ReadData1E);
|
||||
fprintf(fq," ReadData2E = 64'h%s;\n",ReadData2E);
|
||||
fprintf(fq," ReadData3E = 64'h%s;\n",ReadData3E);
|
||||
fprintf(fq," ans = 64'h%s;\n", ans);
|
||||
// fprintf(fq," flags = 5'h%s;\n", flags);
|
||||
|
||||
|
||||
{
|
||||
//rn=1; rz=0; rm=0; rp=0;
|
||||
fprintf(fq," rn = %d;\n",1);
|
||||
fprintf(fq," rz = %d;\n", 0);
|
||||
fprintf(fq," rm = %d;\n", 0);
|
||||
fprintf(fq," rp = %d;\n", 0);
|
||||
}
|
||||
{
|
||||
fprintf(fq," earlyres = 64'b0;\n");
|
||||
fprintf(fq," earlyressel = 0;\n");
|
||||
}
|
||||
{
|
||||
|
||||
fprintf(fq," bypsel= 2'b0;\n"); //, bysel);
|
||||
fprintf(fq," bypplus1 = 0;\n"); //, byp1);
|
||||
fprintf(fq," byppostnorm = 0;\n"); //, bypnorm);
|
||||
fprintf(fq," FrmE = 3'b000;\n");
|
||||
}
|
||||
fprintf(fq,"#10\n");
|
||||
// IEEE 754-2008 section 6.3 states "When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
//fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h \",x,y,w, ans);\n");
|
||||
//fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h \",ReadData1E,ReadData2E,FmaResultM, ans);\n");
|
||||
fprintf(fq," // IEEE 754-2008 section 6.3 states: \"When ether an input or result is NaN, this\n");
|
||||
fprintf(fq," // standard does not interpret the sign of a NaN.\"\n");
|
||||
fprintf(fq," wnan = &w[62:52] && |w[51:0]; \n");
|
||||
fprintf(fq," xnan = &x[62:52] && |x[51:0]; \n");
|
||||
fprintf(fq," ynan = &y[62:52] && |y[51:0]; \n");
|
||||
fprintf(fq," znan = &z[62:52] && |z[51:0]; \n");
|
||||
fprintf(fq," wnan = &FmaResultM[62:52] && |FmaResultM[51:0]; \n");
|
||||
fprintf(fq," xnan = &ReadData1E[62:52] && |ReadData1E[51:0]; \n");
|
||||
fprintf(fq," ynan = &ReadData2E[62:52] && |ReadData2E[51:0]; \n");
|
||||
fprintf(fq," znan = &ReadData3E[62:52] && |ReadData3E[51:0]; \n");
|
||||
fprintf(fq," ansnan = &ans[62:52] && |ans[51:0]; \n");
|
||||
fprintf(fq," xnorm = ~(|x[62:52]) && |x[51:0] ? {x[50:0], 1'b0} : x; \n");
|
||||
fprintf(fq," ynorm = ~(|y[62:52]) && |y[51:0] ? {y[50:0], 1'b0} : y;\n");
|
||||
fprintf(fq," s = ({54'b1,xnorm} + (bypsel && bypplus1)) * {54'b1,ynorm}; \n");
|
||||
// fprintf(fq," if(!(~(|x[62:52]) && |x[51:0] || ~(|y[62:52]) && |y[51:0])) begin\n");
|
||||
fprintf(fq," xnorm = ~(|ReadData1E[62:52]) && |ReadData1E[51:0] ? {ReadData1E[50:0], 1'b0} : ReadData1E; \n");
|
||||
fprintf(fq," ynorm = ~(|ReadData2E[62:52]) && |ReadData2E[51:0] ? {ReadData2E[50:0], 1'b0} : ReadData2E;\n");
|
||||
// fprintf(fq," s = ({54'b1,xnorm} + (bypsel && bypplus1)) * {54'b1,ynorm}; \n");
|
||||
// fprintf(fq," if(!(~(|ReadData1E[62:52]) && |ReadData1E[51:0] || ~(|ReadData2E[62:52]) && |ReadData2E[51:0])) begin\n");
|
||||
// not looknig at negative zero results right now
|
||||
//fprintf(fq," if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) && !(w == 64'h8000000000000000 && ans == 64'b0)) begin\n");
|
||||
// fprintf(fq," if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) ) begin\n");
|
||||
fprintf(fq," if((!wnan && (w != ans)) || (wnan && ansnan && ~(((xnan && (w[62:0] == {x[62:52],1'b1,x[50:0]})) || (ynan && (w[62:0] == {y[62:52],1'b1,y[50:0]})) || (znan && (w[62:0] == {z[62:52],1'b1,z[50:0]})) || (w[62:0] == ans[62:0])) ))) begin\n");
|
||||
fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h %%h Wrong \",x,y, z, w, ans);\n");
|
||||
//fprintf(fq," if( (nan && (FmaResultM[62:0] != ans[62:0])) || (!nan && (FmaResultM != ans)) && !(FmaResultM == 64'h8000000000000000 && ans == 64'b0)) begin\n");
|
||||
// fprintf(fq," if( (nan && (FmaResultM[62:0] != ans[62:0])) || (!nan && (FmaResultM != ans)) ) begin\n");
|
||||
fprintf(fq," if((!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {ReadData1E[62:52],1'b1,ReadData1E[50:0]})) || (ynan && (FmaResultM[62:0] == {ReadData2E[62:52],1'b1,ReadData2E[50:0]})) || (znan && (FmaResultM[62:0] == {ReadData3E[62:52],1'b1,ReadData3E[50:0]})) || (FmaResultM[62:0] == ans[62:0])) ))) begin\n");
|
||||
fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h %%h Wrong \",ReadData1E,ReadData2E, ReadData3E, FmaResultM, ans);\n");
|
||||
//fprintf(fq," $fwrite(fp, \"%%h \",s);\n");
|
||||
fprintf(fq," if(w == 64'h8000000000000000) $fwrite(fp, \"w=-zero \");\n");
|
||||
fprintf(fq," if(~(|x[62:52]) && |x[51:0]) $fwrite(fp, \"xdenorm \");\n");
|
||||
fprintf(fq," if(~(|y[62:52]) && |y[51:0]) $fwrite(fp, \"ydenorm \");\n");
|
||||
fprintf(fq," if(~(|z[62:52]) && |z[51:0]) $fwrite(fp, \"zdenorm \");\n");
|
||||
fprintf(fq," if(invalid != 0) $fwrite(fp, \"invld \");\n");
|
||||
fprintf(fq," if(overflow != 0) $fwrite(fp, \"ovrflw \");\n");
|
||||
fprintf(fq," if(underflow != 0) $fwrite(fp, \"unflw \");\n");
|
||||
fprintf(fq," if(w == 64'hFFF0000000000000) $fwrite(fp, \"w=-inf \");\n");
|
||||
fprintf(fq," if(w == 64'h7FF0000000000000) $fwrite(fp, \"w=+inf \");\n");
|
||||
fprintf(fq," if(w > 64'h7FF0000000000000 && w < 64'h7FF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
|
||||
fprintf(fq," if(w > 64'hFFF8000000000000 && w < 64'hFFF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
|
||||
fprintf(fq," if(w >= 64'h7FF8000000000000 && w <= 64'h7FFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
|
||||
fprintf(fq," if(w >= 64'hFFF8000000000000 && w <= 64'hFFFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
|
||||
fprintf(fq," $fwrite(fp, \"FmaResultM=%%d \",$signed(aligncnt));\n");
|
||||
fprintf(fq," if(FmaResultM == 64'h8000000000000000) $fwrite(fp, \"FmaResultM=-zero \");\n");
|
||||
fprintf(fq," if(~(|ReadData1E[62:52]) && |ReadData1E[51:0]) $fwrite(fp, \"xdenorm \");\n");
|
||||
fprintf(fq," if(~(|ReadData2E[62:52]) && |ReadData2E[51:0]) $fwrite(fp, \"ydenorm \");\n");
|
||||
fprintf(fq," if(~(|ReadData3E[62:52]) && |ReadData3E[51:0]) $fwrite(fp, \"zdenorm \");\n");
|
||||
fprintf(fq," if(FmaFlagsM[4] != 0) $fwrite(fp, \"invld \");\n");
|
||||
fprintf(fq," if(FmaFlagsM[2] != 0) $fwrite(fp, \"ovrflw \");\n");
|
||||
fprintf(fq," if(FmaFlagsM[1] != 0) $fwrite(fp, \"unflw \");\n");
|
||||
fprintf(fq," if(FmaResultM == 64'hFFF0000000000000) $fwrite(fp, \"FmaResultM=-inf \");\n");
|
||||
fprintf(fq," if(FmaResultM == 64'h7FF0000000000000) $fwrite(fp, \"FmaResultM=+inf \");\n");
|
||||
fprintf(fq," if(FmaResultM > 64'h7FF0000000000000 && FmaResultM < 64'h7FF8000000000000 ) $fwrite(fp, \"FmaResultM=sigNaN \");\n");
|
||||
fprintf(fq," if(FmaResultM > 64'hFFF8000000000000 && FmaResultM < 64'hFFF8000000000000 ) $fwrite(fp, \"FmaResultM=sigNaN \");\n");
|
||||
fprintf(fq," if(FmaResultM >= 64'h7FF8000000000000 && FmaResultM <= 64'h7FFfffffffffffff ) $fwrite(fp, \"FmaResultM=qutNaN \");\n");
|
||||
fprintf(fq," if(FmaResultM >= 64'hFFF8000000000000 && FmaResultM <= 64'hFFFfffffffffffff ) $fwrite(fp, \"FmaResultM=qutNaN \");\n");
|
||||
|
||||
fprintf(fq," if(ans == 64'hFFF0000000000000) $fwrite(fp, \"ans=-inf \");\n");
|
||||
fprintf(fq," if(ans == 64'h7FF0000000000000) $fwrite(fp, \"ans=+inf \");\n");
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,38 +2,27 @@
|
||||
module tb;
|
||||
|
||||
|
||||
reg [63:0] x;
|
||||
reg [63:0] y;
|
||||
reg [63:0] z;
|
||||
reg [63:0] ans;
|
||||
reg rn;
|
||||
reg rz;
|
||||
reg rm;
|
||||
reg rp;
|
||||
reg [63:0] earlyres;
|
||||
reg earlyressel;
|
||||
reg [1:0] bypsel;
|
||||
reg bypplus1;
|
||||
reg byppostnorm;
|
||||
wire [63:0] w;
|
||||
wire [63:0] wbypass;
|
||||
wire invalid;
|
||||
wire overflow;
|
||||
wire underflow;
|
||||
wire inexact;
|
||||
reg [63:0] ReadData1E;
|
||||
reg [63:0] ReadData2E;
|
||||
reg [63:0] ReadData3E;
|
||||
reg [63:0] ans;
|
||||
reg [2:0] FrmE;
|
||||
wire [63:0] FmaResultM;
|
||||
wire [4:0] FmaFlagsM;
|
||||
|
||||
integer fp;
|
||||
reg wnan;
|
||||
reg xnan;
|
||||
reg ynan;
|
||||
reg znan;
|
||||
wire [12:0] aligncnt;
|
||||
reg ansnan;
|
||||
reg [105:0] s; // partial product 2
|
||||
reg [51:0] xnorm;
|
||||
reg [51:0] ynorm;
|
||||
|
||||
localparam period = 20;
|
||||
fmac UUT(.*);
|
||||
fma UUT(.*);
|
||||
|
||||
|
||||
initial
|
||||
|
@ -1 +1 @@
|
||||
testfloat_gen f64_mulAdd -n 6133248 -rnear_even -seed 113355 -level 1 >> testFloat
|
||||
testfloat_gen f64_mulAdd -n 6133248 -rminMag -seed 113355 -level 1 >> testFloat
|
||||
|
@ -50,7 +50,7 @@ module FA_array (S, C, A, B, Ci) ;
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < n; i = i + 1) begin : index
|
||||
fa FA1(.S(S[i]), .C(C[i]), .A(A[i]), .B(B[i]), .Ci(Ci[i]));
|
||||
fa FA1(.sum(S[i]), .carry(C[i]), .a(A[i]), .b(B[i]), .c(Ci[i]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
@ -22,6 +22,7 @@ module fpu (
|
||||
//signals, modules, and combinational logic closely defined.
|
||||
|
||||
//used for OSU DP-size hardware to wally XLEN interfacing
|
||||
|
||||
integer XLENDIFF;
|
||||
assign XLENDIFF = `XLEN - 64;
|
||||
integer XLENDIFFN;
|
||||
@ -465,13 +466,18 @@ module fpu (
|
||||
always_comb begin
|
||||
|
||||
//zero extension
|
||||
if(`XLEN > 64) begin
|
||||
FPUResultW <= {FPUResultDirW,{XLENDIFF{1'b0}}};
|
||||
end
|
||||
|
||||
// Teo 04/13/2021
|
||||
// Commented out XLENDIFF{1'b0} due to error:
|
||||
// Repetition multiplier must be constant.
|
||||
|
||||
//if(`XLEN > 64) begin
|
||||
// FPUResultW <= {FPUResultDirW,{XLENDIFF{1'b0}}};
|
||||
//end
|
||||
//truncate
|
||||
else begin
|
||||
//else begin
|
||||
FPUResultW <= FPUResultDirW[63:64-`XLEN];
|
||||
end
|
||||
//end
|
||||
|
||||
end
|
||||
|
||||
|
@ -24,9 +24,6 @@
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
`include "wally-constants.vh"
|
||||
|
||||
module cam_line #(parameter KEY_BITS = 20,
|
||||
parameter HIGH_SEGMENT_BITS = 10) (
|
||||
input clk, reset,
|
||||
@ -74,6 +71,6 @@ module cam_line #(parameter KEY_BITS = 20,
|
||||
// should automatically match.
|
||||
page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, VirtualPageNumberQuery);
|
||||
|
||||
assign Match = ({1'b1, VirtualPageNumberQuery} == Key);
|
||||
assign Match = ({1'b1, VirtualPageNumberQuery} == {Valid, Key});
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -4,8 +4,8 @@
|
||||
// Written: jtorrey@hmc.edu 16 February 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Example translation lookaside buffer
|
||||
// Cache of virtural-to-physical address translations
|
||||
// Purpose: Translation lookaside buffer
|
||||
// Cache of virtural-to-physical address translations
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
@ -24,9 +24,6 @@
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
`include "wally-constants.vh"
|
||||
|
||||
/**
|
||||
* sv32 specs
|
||||
* ----------
|
||||
@ -52,6 +49,9 @@
|
||||
* least recently)
|
||||
*/
|
||||
|
||||
`include "wally-config.vh"
|
||||
`include "wally-constants.vh"
|
||||
|
||||
// The TLB will have 2**ENTRY_BITS total entries
|
||||
module tlb #(parameter ENTRY_BITS = 3) (
|
||||
input clk, reset,
|
||||
@ -127,7 +127,8 @@ module tlb #(parameter ENTRY_BITS = 3) (
|
||||
assign PageOffset = VirtualAddress[11:0];
|
||||
|
||||
// Currently use random replacement algorithm
|
||||
tlb_rand rdm(.*);
|
||||
// tlb_rand rdm(.*);
|
||||
tlb_lru lru(.*);
|
||||
|
||||
tlb_ram #(ENTRY_BITS) ram(.*);
|
||||
tlb_cam #(ENTRY_BITS, `VPN_BITS, `VPN_SEGMENT_BITS) cam(.*);
|
||||
|
@ -64,6 +64,8 @@ module tlb_cam #(parameter ENTRY_BITS = 3,
|
||||
endgenerate
|
||||
|
||||
// In case there are multiple matches in the CAM, select only one
|
||||
// *** it might be guaranteed that the CAM will never have multiple matches.
|
||||
// If so, this is just an encoder
|
||||
priority_encoder #(ENTRY_BITS) match_priority(Matches, VPNIndex);
|
||||
|
||||
assign CAMHit = |Matches & ~TLBFlush;
|
||||
|
69
wally-pipelined/src/mmu/tlb_lru.sv
Normal file
69
wally-pipelined/src/mmu/tlb_lru.sv
Normal file
@ -0,0 +1,69 @@
|
||||
///////////////////////////////////////////
|
||||
// tlb_lru.sv
|
||||
//
|
||||
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 16 February 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Implementation of bit pseudo least-recently-used algorithm for
|
||||
// cache evictions. Outputs the index of the next entry to be written.
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
module tlb_lru #(parameter ENTRY_BITS = 3) (
|
||||
input clk, reset,
|
||||
input TLBWrite,
|
||||
input TLBFlush,
|
||||
input [ENTRY_BITS-1:0] VPNIndex,
|
||||
input CAMHit,
|
||||
output [ENTRY_BITS-1:0] WriteIndex
|
||||
);
|
||||
|
||||
localparam NENTRIES = 2**ENTRY_BITS;
|
||||
|
||||
// Keep a "recently-used" record for each TLB entry. On access, set to 1
|
||||
logic [NENTRIES-1:0] RUBits, RUBitsNext, RUBitsAccessed;
|
||||
|
||||
// One-hot encodings of which line is being accessed
|
||||
logic [NENTRIES-1:0] ReadLineOneHot, WriteLineOneHot, AccessLineOneHot;
|
||||
|
||||
// High if the next access causes all RU bits to be 1
|
||||
logic AllUsed;
|
||||
|
||||
// Convert indices to one-hot encodings
|
||||
decoder #(ENTRY_BITS) read_decoder(VPNIndex, ReadLineOneHot);
|
||||
// *** should output writelineonehot so we don't have to decode WriteIndex outside
|
||||
decoder #(ENTRY_BITS) write_decoder(WriteIndex, WriteLineOneHot);
|
||||
|
||||
// Find the first line not recently used
|
||||
priority_encoder #(ENTRY_BITS) first_nru(~RUBits, WriteIndex);
|
||||
|
||||
// Access either the hit line or written line
|
||||
assign AccessLineOneHot = (TLBWrite) ? WriteLineOneHot : ReadLineOneHot;
|
||||
|
||||
// Raise the bit of the recently accessed line
|
||||
assign RUBitsAccessed = AccessLineOneHot | RUBits;
|
||||
|
||||
// Determine whether we need to reset the RU bits to all zeroes
|
||||
assign AllUsed = &(RUBitsAccessed);
|
||||
assign RUBitsNext = (AllUsed) ? AccessLineOneHot : RUBitsAccessed;
|
||||
|
||||
// Update LRU state on any TLB hit or write
|
||||
flopenrc #(NENTRIES) lru_state(clk, reset, TLBFlush, (CAMHit || TLBWrite),
|
||||
RUBitsNext, RUBits);
|
||||
|
||||
endmodule
|
@ -57,4 +57,4 @@ module tlb_ram #(parameter ENTRY_BITS = 3) (
|
||||
ram[i] = `XLEN'b0;
|
||||
end
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -29,7 +29,7 @@ module tlb_rand #(parameter ENTRY_BITS = 3) (
|
||||
);
|
||||
|
||||
logic [31:0] data;
|
||||
assign data = $urandom;
|
||||
assign data = 32'b0;
|
||||
assign WriteIndex = data[ENTRY_BITS-1:0];
|
||||
|
||||
endmodule
|
||||
|
@ -1479,21 +1479,15 @@ module shifter_l64 (Z, A, Shift);
|
||||
logic [63:0] stage3;
|
||||
logic [63:0] stage4;
|
||||
logic [63:0] stage5;
|
||||
logic [31:0] thirtytwozeros = 32'h0;
|
||||
logic [15:0] sixteenzeros = 16'h0;
|
||||
logic [ 7:0] eightzeros = 8'h0;
|
||||
logic [ 3:0] fourzeros = 4'h0;
|
||||
logic [ 1:0] twozeros = 2'b00;
|
||||
logic onezero = 1'b0;
|
||||
|
||||
output logic [63:0] Z;
|
||||
|
||||
mux2 #(64) mx01(A, {A[31:0], thirtytwozeros}, Shift[5], stage1);
|
||||
mux2 #(64) mx02(stage1, {stage1[47:0], sixteenzeros}, Shift[4], stage2);
|
||||
mux2 #(64) mx03(stage2, {stage2[55:0], eightzeros}, Shift[3], stage3);
|
||||
mux2 #(64) mx04(stage3, {stage3[59:0], fourzeros}, Shift[2], stage4);
|
||||
mux2 #(64) mx05(stage4, {stage4[61:0], twozeros}, Shift[1], stage5);
|
||||
mux2 #(64) mx06(stage5, {stage5[62:0], onezero}, Shift[0], Z);
|
||||
mux2 #(64) mx01(A, {A[31:0], 32'h0}, Shift[5], stage1);
|
||||
mux2 #(64) mx02(stage1, {stage1[47:0], 16'h0}, Shift[4], stage2);
|
||||
mux2 #(64) mx03(stage2, {stage2[55:0], 8'h0}, Shift[3], stage3);
|
||||
mux2 #(64) mx04(stage3, {stage3[59:0], 4'h0}, Shift[2], stage4);
|
||||
mux2 #(64) mx05(stage4, {stage4[61:0], 2'h0}, Shift[1], stage5);
|
||||
mux2 #(64) mx06(stage5, {stage5[62:0], 1'h0}, Shift[0], Z);
|
||||
|
||||
endmodule // shifter_l64
|
||||
|
||||
@ -1507,21 +1501,15 @@ module shifter_r64 (Z, A, Shift);
|
||||
logic [63:0] stage3;
|
||||
logic [63:0] stage4;
|
||||
logic [63:0] stage5;
|
||||
logic [31:0] thirtytwozeros = 32'h0;
|
||||
logic [15:0] sixteenzeros = 16'h0;
|
||||
logic [ 7:0] eightzeros = 8'h0;
|
||||
logic [ 3:0] fourzeros = 4'h0;
|
||||
logic [ 1:0] twozeros = 2'b00;
|
||||
logic onezero = 1'b0;
|
||||
|
||||
output logic [63:0] Z;
|
||||
|
||||
mux2 #(64) mx01(A, {thirtytwozeros, A[63:32]}, Shift[5], stage1);
|
||||
mux2 #(64) mx02(stage1, {sixteenzeros, stage1[63:16]}, Shift[4], stage2);
|
||||
mux2 #(64) mx03(stage2, {eightzeros, stage2[63:8]}, Shift[3], stage3);
|
||||
mux2 #(64) mx04(stage3, {fourzeros, stage3[63:4]}, Shift[2], stage4);
|
||||
mux2 #(64) mx05(stage4, {twozeros, stage4[63:2]}, Shift[1], stage5);
|
||||
mux2 #(64) mx06(stage5, {onezero, stage5[63:1]}, Shift[0], Z);
|
||||
mux2 #(64) mx01(A, {32'h0, A[63:32]}, Shift[5], stage1);
|
||||
mux2 #(64) mx02(stage1, {16'h0, stage1[63:16]}, Shift[4], stage2);
|
||||
mux2 #(64) mx03(stage2, {8'h0, stage2[63:8]}, Shift[3], stage3);
|
||||
mux2 #(64) mx04(stage3, {4'h0, stage3[63:4]}, Shift[2], stage4);
|
||||
mux2 #(64) mx05(stage4, {2'h0, stage4[63:2]}, Shift[1], stage5);
|
||||
mux2 #(64) mx06(stage5, {1'h0, stage5[63:1]}, Shift[0], Z);
|
||||
|
||||
endmodule // shifter_r64
|
||||
|
||||
@ -1534,19 +1522,14 @@ module shifter_l32 (Z, A, Shift);
|
||||
logic [31:0] stage2;
|
||||
logic [31:0] stage3;
|
||||
logic [31:0] stage4;
|
||||
logic [15:0] sixteenzeros = 16'h0;
|
||||
logic [ 7:0] eightzeros = 8'h0;
|
||||
logic [ 3:0] fourzeros = 4'h0;
|
||||
logic [ 1:0] twozeros = 2'b00;
|
||||
logic onezero = 1'b0;
|
||||
|
||||
output logic [31:0] Z;
|
||||
|
||||
mux2 #(32) mx01(A, {A[15:0], sixteenzeros}, Shift[4], stage1);
|
||||
mux2 #(32) mx02(stage1, {stage1[23:0], eightzeros}, Shift[3], stage2);
|
||||
mux2 #(32) mx03(stage2, {stage2[27:0], fourzeros}, Shift[2], stage3);
|
||||
mux2 #(32) mx04(stage3, {stage3[29:0], twozeros}, Shift[1], stage4);
|
||||
mux2 #(32) mx05(stage4, {stage4[30:0], onezero}, Shift[0], Z);
|
||||
mux2 #(32) mx01(A, {A[15:0], 16'h0}, Shift[4], stage1);
|
||||
mux2 #(32) mx02(stage1, {stage1[23:0], 8'h0}, Shift[3], stage2);
|
||||
mux2 #(32) mx03(stage2, {stage2[27:0], 4'h0}, Shift[2], stage3);
|
||||
mux2 #(32) mx04(stage3, {stage3[29:0], 2'h0}, Shift[1], stage4);
|
||||
mux2 #(32) mx05(stage4, {stage4[30:0], 1'h0}, Shift[0], Z);
|
||||
|
||||
endmodule // shifter_l32
|
||||
|
||||
@ -1559,19 +1542,14 @@ module shifter_r32 (Z, A, Shift);
|
||||
logic [31:0] stage2;
|
||||
logic [31:0] stage3;
|
||||
logic [31:0] stage4;
|
||||
logic [15:0] sixteenzeros = 16'h0;
|
||||
logic [ 7:0] eightzeros = 8'h0;
|
||||
logic [ 3:0] fourzeros = 4'h0;
|
||||
logic [ 1:0] twozeros = 2'b00;
|
||||
logic onezero = 1'b0;
|
||||
|
||||
output logic [31:0] Z;
|
||||
|
||||
mux2 #(32) mx01(A, {sixteenzeros, A[31:16]}, Shift[4], stage1);
|
||||
mux2 #(32) mx02(stage1, {eightzeros, stage1[31:8]}, Shift[3], stage2);
|
||||
mux2 #(32) mx03(stage2, {fourzeros, stage2[31:4]}, Shift[2], stage3);
|
||||
mux2 #(32) mx04(stage3, {twozeros, stage3[31:2]}, Shift[1], stage4);
|
||||
mux2 #(32) mx05(stage4, {onezero, stage4[31:1]}, Shift[0], Z);
|
||||
mux2 #(32) mx01(A, {16'h0, A[31:16]}, Shift[4], stage1);
|
||||
mux2 #(32) mx02(stage1, {8'h0, stage1[31:8]}, Shift[3], stage2);
|
||||
mux2 #(32) mx03(stage2, {4'h0, stage2[31:4]}, Shift[2], stage3);
|
||||
mux2 #(32) mx04(stage3, {2'h0, stage3[31:2]}, Shift[1], stage4);
|
||||
mux2 #(32) mx05(stage4, {1'h0, stage4[31:1]}, Shift[0], Z);
|
||||
|
||||
endmodule // shifter_r32
|
||||
|
||||
|
@ -80,7 +80,7 @@ module csrc (
|
||||
|
||||
for (j=0; j<= `COUNTERS; j = j+1) begin
|
||||
// Write enables
|
||||
if (j !==1) begin
|
||||
if (j != 1) begin
|
||||
assign WriteHPMCOUNTERM[j] = CSRMWriteM && (CSRAdrM == MHPMCOUNTER[j]);
|
||||
// Count Signals
|
||||
assign HPMCOUNTERPlusM[j] = HPMCOUNTER_REGW[j] + {63'b0, MCOUNTEN[j] & ~MCOUNTINHIBIT_REGW[j]};
|
||||
|
@ -49,13 +49,13 @@ module csri #(parameter
|
||||
// assumes no N-mode user interrupts
|
||||
|
||||
always_comb begin
|
||||
IntInM = 0; // *** does this really work
|
||||
IntInM[11] = ExtIntM & ~MIDELEG_REGW[9]; // MEIP
|
||||
IntInM[9] = ExtIntM & MIDELEG_REGW[9]; // SEIP
|
||||
IntInM[7] = TimerIntM & ~MIDELEG_REGW[5]; // MTIP
|
||||
IntInM[5] = TimerIntM & MIDELEG_REGW[5]; // STIP
|
||||
IntInM[3] = SwIntM & ~MIDELEG_REGW[1]; // MSIP
|
||||
IntInM[1] = SwIntM & MIDELEG_REGW[1]; // SSIP
|
||||
IntInM = 0; // *** does this overwriting technique really synthesize
|
||||
IP_REGW[11] = ExtIntM & ~MIDELEG_REGW[9]; // MEIP
|
||||
IntInM[9] = ExtIntM & MIDELEG_REGW[9]; // SEIP
|
||||
IntInM[7] = TimerIntM & ~MIDELEG_REGW[5]; // MTIP
|
||||
IntInM[5] = TimerIntM & MIDELEG_REGW[5]; // STIP
|
||||
IntInM[3] = SwIntM & ~MIDELEG_REGW[1]; // MSIP
|
||||
IntInM[1] = SwIntM & MIDELEG_REGW[1]; // SSIP
|
||||
end
|
||||
|
||||
// Interrupt Write Enables
|
||||
@ -77,14 +77,14 @@ module csri #(parameter
|
||||
assign SIP_WRITE_MASK = 12'h000;
|
||||
end
|
||||
always @(posedge clk, posedge reset) begin
|
||||
if (reset) IP_REGW <= 12'b0;
|
||||
else if (WriteMIPM) IP_REGW <= (CSRWriteValM & MIP_WRITE_MASK) | IntInM; // MTIP unclearable
|
||||
else if (WriteSIPM) IP_REGW <= (CSRWriteValM & SIP_WRITE_MASK) | IntInM; // MTIP unclearable
|
||||
if (reset) IP_REGW[9:0] <= 10'b0;
|
||||
else if (WriteMIPM) IP_REGW[9:0] <= (CSRWriteValM[9:0] & MIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable
|
||||
else if (WriteSIPM) IP_REGW[9:0] <= (CSRWriteValM[9:0] & SIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable
|
||||
// else if (WriteUIPM) IP_REGW = (CSRWriteValM & 12'hBBB) | (NextIPM & 12'h080); // MTIP unclearable
|
||||
else IP_REGW <= IP_REGW | IntInM; // *** check this turns off interrupts properly even when MIDELEG changes
|
||||
else IP_REGW[9:0] <= IP_REGW[9:0] | IntInM[9:0]; // *** check this turns off interrupts properly even when MIDELEG changes
|
||||
end
|
||||
always @(posedge clk, posedge reset) begin
|
||||
if (reset) IE_REGW <= 12'b0;
|
||||
if (reset) IE_REGW <= 12'b0;
|
||||
else if (WriteMIEM) IE_REGW <= (CSRWriteValM & 12'hAAA); // MIE controls M and S fields
|
||||
else if (WriteSIEM) IE_REGW <= (CSRWriteValM & 12'h222) | (IE_REGW & 12'h888); // only S fields
|
||||
// else if (WriteUIEM) IE_REGW = (CSRWriteValM & 12'h111) | (IE_REGW & 12'hAAA); // only U field
|
||||
|
@ -40,7 +40,7 @@ module privileged (
|
||||
input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
|
||||
input logic [3:0] InstrClassM,
|
||||
input logic PrivilegedM,
|
||||
input logic InstrPageFaultM, LoadPageFaultM, StorePageFaultM,
|
||||
input logic InstrPageFaultF, LoadPageFaultM, StorePageFaultM,
|
||||
input logic InstrMisalignedFaultM, InstrAccessFaultF, IllegalIEUInstrFaultD,
|
||||
input logic LoadMisalignedFaultM, LoadAccessFaultM,
|
||||
input logic StoreMisalignedFaultM, StoreAccessFaultM,
|
||||
@ -62,8 +62,9 @@ module privileged (
|
||||
|
||||
logic uretM, sretM, mretM, ecallM, ebreakM, wfiM, sfencevmaM;
|
||||
logic IllegalCSRAccessM;
|
||||
logic IllegalIEUInstrFaultE, IllegalIEUInstrFaultM;
|
||||
logic InstrAccessFaultD, InstrAccessFaultE, InstrAccessFaultM;
|
||||
logic IllegalIEUInstrFaultE, IllegalIEUInstrFaultM;
|
||||
logic InstrPageFaultD, InstrPageFaultE, InstrPageFaultM;
|
||||
logic InstrAccessFaultD, InstrAccessFaultE, InstrAccessFaultM;
|
||||
logic IllegalInstrFaultM;
|
||||
|
||||
logic BreakpointFaultM, EcallFaultM;
|
||||
@ -129,13 +130,15 @@ module privileged (
|
||||
// assign StorePageFaultM = 0;
|
||||
|
||||
// pipeline fault signals
|
||||
flopenrc #(1) faultregD(clk, reset, FlushD, ~StallD, InstrAccessFaultF, InstrAccessFaultD);
|
||||
flopenrc #(2) faultregE(clk, reset, FlushE, ~StallE,
|
||||
{IllegalIEUInstrFaultD, InstrAccessFaultD}, // ** vs IllegalInstrFaultInD
|
||||
{IllegalIEUInstrFaultE, InstrAccessFaultE});
|
||||
flopenrc #(2) faultregM(clk, reset, FlushM, ~StallM,
|
||||
{IllegalIEUInstrFaultE, InstrAccessFaultE},
|
||||
{IllegalIEUInstrFaultM, InstrAccessFaultM});
|
||||
flopenrc #(2) faultregD(clk, reset, FlushD, ~StallD,
|
||||
{InstrPageFaultF, InstrAccessFaultF},
|
||||
{InstrPageFaultD, InstrAccessFaultD});
|
||||
flopenrc #(3) faultregE(clk, reset, FlushE, ~StallE,
|
||||
{IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD}, // ** vs IllegalInstrFaultInD
|
||||
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE});
|
||||
flopenrc #(3) faultregM(clk, reset, FlushM, ~StallM,
|
||||
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE},
|
||||
{IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM});
|
||||
|
||||
trap trap(.*);
|
||||
|
||||
|
@ -49,7 +49,7 @@ module trap (
|
||||
logic InterruptM;
|
||||
|
||||
// Determine pending enabled interrupts
|
||||
assign MIntGlobalEnM = (PrivilegeModeW != `M_MODE) || STATUS_MIE; // if M ints enabled or lower priv 3.1.9
|
||||
assign MIntGlobalEnM = {12{(PrivilegeModeW != `M_MODE) || STATUS_MIE}}; // if M ints enabled or lower priv 3.1.9
|
||||
assign SIntGlobalEnM = (PrivilegeModeW == `U_MODE) || STATUS_SIE; // if S ints enabled or lower priv 3.1.9
|
||||
assign PendingIntsM = (MIP_REGW & MIE_REGW) & ((MIntGlobalEnM & 12'h888) | (SIntGlobalEnM & 12'h222));
|
||||
assign InterruptM = |PendingIntsM; // interrupt if any sources are pending
|
||||
|
@ -41,13 +41,15 @@ module uart (
|
||||
|
||||
// UART interface signals
|
||||
logic [2:0] A;
|
||||
logic MEMRb, MEMWb;
|
||||
logic MEMRb, MEMWb, memread, memwrite;
|
||||
logic [7:0] Din, Dout;
|
||||
|
||||
// rename processor interface signals to match PC16550D and provide one-byte interface
|
||||
flopr #(1) memreadreg(HCLK, ~HRESETn, ~(HSELUART & ~HWRITE), MEMRb);
|
||||
flopr #(1) memwritereg(HCLK, ~HRESETn, ~(HSELUART & HWRITE), MEMWb);
|
||||
flopr #(1) memreadreg(HCLK, ~HRESETn, (HSELUART & ~HWRITE), memread);
|
||||
flopr #(1) memwritereg(HCLK, ~HRESETn, (HSELUART & HWRITE), memwrite);
|
||||
flopr #(3) haddrreg(HCLK, ~HRESETn, HADDR[2:0], A);
|
||||
assign MEMRb = ~memread;
|
||||
assign MEMWb = ~memwrite;
|
||||
|
||||
assign HRESPUART = 0; // OK
|
||||
assign HREADYUART = 1; // should idle high during address phase and respond high when done; will need to be modified if UART ever needs more than 1 cycle to do something
|
||||
|
@ -24,6 +24,7 @@
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
`include "wally-constants.vh"
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
module wallypipelinedhart (
|
||||
@ -76,7 +77,7 @@ module wallypipelinedhart (
|
||||
logic InstrMisalignedFaultM;
|
||||
logic DataMisalignedM;
|
||||
logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD;
|
||||
logic InstrPageFaultM, LoadPageFaultM, StorePageFaultM;
|
||||
logic InstrPageFaultF, LoadPageFaultM, StorePageFaultM;
|
||||
logic LoadMisalignedFaultM, LoadAccessFaultM;
|
||||
logic StoreMisalignedFaultM, StoreAccessFaultM;
|
||||
logic [`XLEN-1:0] InstrMisalignedAdrM;
|
||||
|
@ -279,14 +279,15 @@ module testbench_busybear();
|
||||
end
|
||||
end
|
||||
|
||||
string sepc_lit = "SEPC";
|
||||
`define CHECK_CSR2(CSR, PATH) \
|
||||
string CSR; \
|
||||
logic [63:0] expected``CSR``; \
|
||||
//CSR checking \
|
||||
always @(``PATH``.``CSR``_REGW) begin \
|
||||
if ($time > 1) begin \
|
||||
if (sepc_lit.icompare(`"CSR`")) begin #1; end \
|
||||
if ("SEPC" == `"CSR`") begin #1; end \
|
||||
if ("SCAUSE" == `"CSR`") begin #2; end \
|
||||
if ("SSTATUS" == `"CSR`") begin #3; end \
|
||||
scan_file_csr = $fscanf(data_file_csr, "%s\n", CSR); \
|
||||
scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \
|
||||
if(CSR.icompare(`"CSR`")) begin \
|
||||
@ -463,8 +464,10 @@ module testbench_busybear();
|
||||
32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ
|
||||
32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J
|
||||
speculative = 1;
|
||||
32'bXXXXXXXXXXXXXXXX1001000000000010: // C.EBREAK:
|
||||
32'bXXXXXXXXXXXXXXXX1001000000000010, // C.EBREAK:
|
||||
32'bXXXXXXXXXXXXXXXXX000XXXXX1110011: // Something that's not CSRR*
|
||||
speculative = 0; // tbh don't really know what should happen here
|
||||
32'b000110000000XXXXXXXXXXXXX1110011, // CSR* SATP, *
|
||||
32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR
|
||||
32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL
|
||||
speculative = 1;
|
||||
|
@ -352,7 +352,7 @@ module testbench();
|
||||
};
|
||||
|
||||
string tests64periph[] = '{
|
||||
"rv64i-periph/WALLY-PLIC", "2000"
|
||||
"rv64i-periph/WALLY-PLIC", "2080"
|
||||
};
|
||||
|
||||
string tests32periph[] = '{
|
||||
@ -402,7 +402,7 @@ module testbench();
|
||||
if (TESTSPERIPH) begin
|
||||
tests = tests32periph;
|
||||
end else begin
|
||||
tests = {tests32i,tests32periph};
|
||||
tests = {tests32i};//,tests32periph}; *** broken at the moment
|
||||
if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};
|
||||
else tests = {tests, tests32iNOc};
|
||||
if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
|
||||
|
@ -1,413 +0,0 @@
|
||||
///////////////////////////////////////////
|
||||
// testbench-imperas.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 9 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Wally Testbench and helper modules
|
||||
// Applies test programs from the Imperas suite
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module testbench();
|
||||
parameter DEBUG = 0;
|
||||
parameter TESTSBP = 0;
|
||||
|
||||
logic clk;
|
||||
logic reset;
|
||||
|
||||
int test, i, errors, totalerrors;
|
||||
logic [31:0] sig32[0:10000];
|
||||
logic [`XLEN-1:0] signature[0:10000];
|
||||
logic [`XLEN-1:0] testadr;
|
||||
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
|
||||
logic [31:0] InstrW;
|
||||
logic [`XLEN-1:0] meminit;
|
||||
|
||||
string tests64i[] = {
|
||||
"peripherals/WALLY-PLIC", "2000"
|
||||
//"peripherals/WALLY-UART", "2000"
|
||||
};
|
||||
string tests64ic[] = {
|
||||
};
|
||||
string tests64iNOc[] = {
|
||||
};
|
||||
string tests64m[] = {
|
||||
};
|
||||
string tests64a[] = {
|
||||
};
|
||||
string tests32a[] = {
|
||||
};
|
||||
string tests32m[] = {
|
||||
};
|
||||
string tests32ic[] = {
|
||||
};
|
||||
string tests32iNOc[] = {
|
||||
};
|
||||
string tests32i[] = {
|
||||
};
|
||||
string testsBP64[] = {
|
||||
};
|
||||
string tests64p[] = {
|
||||
};
|
||||
|
||||
string tests[];
|
||||
string ProgramAddrMapFile, ProgramLabelMapFile;
|
||||
logic [`AHBW-1:0] HRDATAEXT;
|
||||
logic HREADYEXT, HRESPEXT;
|
||||
logic [31:0] HADDR;
|
||||
logic [`AHBW-1:0] HWDATA;
|
||||
logic HWRITE;
|
||||
logic [2:0] HSIZE;
|
||||
logic [2:0] HBURST;
|
||||
logic [3:0] HPROT;
|
||||
logic [1:0] HTRANS;
|
||||
logic HMASTLOCK;
|
||||
logic HCLK, HRESETn;
|
||||
logic [`XLEN-1:0] PCW;
|
||||
|
||||
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW);
|
||||
flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW);
|
||||
// pick tests based on modes supported
|
||||
initial begin
|
||||
if (`XLEN == 64) begin // RV64
|
||||
if (TESTSBP) begin
|
||||
tests = testsBP64;
|
||||
end else begin
|
||||
tests = {tests64i};
|
||||
if (`C_SUPPORTED) tests = {tests, tests64ic};
|
||||
else tests = {tests, tests64iNOc};
|
||||
if (`M_SUPPORTED) tests = {tests, tests64m};
|
||||
// if (`F_SUPPORTED) tests = {tests64f, tests};
|
||||
// if (`D_SUPPORTED) tests = {tests64d, tests};
|
||||
if (`A_SUPPORTED) tests = {tests, tests64a};
|
||||
end
|
||||
// tests = {tests64a, tests};
|
||||
tests = {tests, tests64p};
|
||||
end else begin // RV32
|
||||
// *** add the 32 bit bp tests
|
||||
tests = {tests32i};
|
||||
if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};
|
||||
else tests = {tests, tests32iNOc};
|
||||
if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
|
||||
// if (`F_SUPPORTED) tests = {tests32f, tests};
|
||||
if (`A_SUPPORTED) tests = {tests, tests32a};
|
||||
end
|
||||
|
||||
// tests = tests64p;
|
||||
end
|
||||
|
||||
|
||||
string signame, memfilename;
|
||||
|
||||
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
|
||||
logic UARTSin, UARTSout;
|
||||
|
||||
// instantiate device to be tested
|
||||
assign GPIOPinsIn = 0;
|
||||
assign UARTSin = 1;
|
||||
assign HREADYEXT = 1;
|
||||
assign HRESPEXT = 0;
|
||||
assign HRDATAEXT = 0;
|
||||
|
||||
wallypipelinedsoc dut(.*);
|
||||
|
||||
// Track names of instructions
|
||||
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
|
||||
dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
|
||||
dut.hart.ifu.InstrM, InstrW, InstrFName, InstrDName,
|
||||
InstrEName, InstrMName, InstrWName);
|
||||
|
||||
// initialize tests
|
||||
initial
|
||||
begin
|
||||
test = 0;
|
||||
totalerrors = 0;
|
||||
testadr = 0;
|
||||
// fill memory with defined values to reduce Xs in simulation
|
||||
if (`XLEN == 32) meminit = 32'hFEDC0123;
|
||||
else meminit = 64'hFEDCBA9876543210;
|
||||
for (i=0; i<=65535; i = i+1) begin
|
||||
//dut.imem.RAM[i] = meminit;
|
||||
// dut.uncore.RAM[i] = meminit;
|
||||
end
|
||||
// read test vectors into memory
|
||||
memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
|
||||
$readmemh(memfilename, dut.imem.RAM);
|
||||
$readmemh(memfilename, dut.uncore.dtim.RAM);
|
||||
ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
|
||||
ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
|
||||
$display("Read memfile %s", memfilename);
|
||||
reset = 1; # 42; reset = 0;
|
||||
end
|
||||
|
||||
// generate clock to sequence tests
|
||||
always
|
||||
begin
|
||||
clk = 1; # 5; clk = 0; # 5;
|
||||
end
|
||||
|
||||
// check results
|
||||
always @(negedge clk)
|
||||
begin
|
||||
if (dut.hart.priv.EcallFaultM &&
|
||||
(dut.hart.ieu.dp.regf.rf[3] == 1 || (dut.hart.ieu.dp.regf.we3 && dut.hart.ieu.dp.regf.a3 == 3 && dut.hart.ieu.dp.regf.wd3 == 1))) begin
|
||||
$display("Code ended with ecall with gp = 1");
|
||||
#60; // give time for instructions in pipeline to finish
|
||||
// clear signature to prevent contamination from previous tests
|
||||
for(i=0; i<10000; i=i+1) begin
|
||||
sig32[i] = 'bx;
|
||||
end
|
||||
|
||||
// read signature, reformat in 64 bits if necessary
|
||||
signame = {"../../imperas-riscv-tests/work/", tests[test], ".signature.output"};
|
||||
$readmemh(signame, sig32);
|
||||
i = 0;
|
||||
while (i < 10000) begin
|
||||
if (`XLEN == 32) begin
|
||||
signature[i] = sig32[i];
|
||||
i = i+1;
|
||||
end else begin
|
||||
signature[i/2] = {sig32[i+1], sig32[i]};
|
||||
i = i + 2;
|
||||
end
|
||||
end
|
||||
|
||||
// Check errors
|
||||
i = 0;
|
||||
errors = 0;
|
||||
if (`XLEN == 32)
|
||||
testadr = (`TIMBASE+tests[test+1].atohex())/4;
|
||||
else
|
||||
testadr = (`TIMBASE+tests[test+1].atohex())/8;
|
||||
/* verilator lint_off INFINITELOOP */
|
||||
while (signature[i] !== 'bx) begin
|
||||
//$display("signature[%h] = %h", i, signature[i]);
|
||||
if (signature[i] !== dut.uncore.dtim.RAM[testadr+i]) begin
|
||||
if (signature[i+4] !== 'bx || signature[i] !== 32'hFFFFFFFF) begin
|
||||
// report errors unless they are garbage at the end of the sim
|
||||
// kind of hacky test for garbage right now
|
||||
errors = errors+1;
|
||||
$display(" Error on test %s result %d: adr = %h sim = %h, signature = %h",
|
||||
tests[test], i, (testadr+i)*`XLEN/8, dut.uncore.dtim.RAM[testadr+i], signature[i]);
|
||||
end
|
||||
end
|
||||
i = i + 1;
|
||||
end
|
||||
/* verilator lint_on INFINITELOOP */
|
||||
if (errors == 0) $display("%s succeeded. Brilliant!!!", tests[test]);
|
||||
else begin
|
||||
$display("%s failed with %d errors. :(", tests[test], errors);
|
||||
totalerrors = totalerrors+1;
|
||||
end
|
||||
test = test + 2;
|
||||
if (test == tests.size()) begin
|
||||
if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
|
||||
else $display("FAIL: %d test programs had errors", totalerrors);
|
||||
$stop;
|
||||
end
|
||||
else begin
|
||||
memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
|
||||
$readmemh(memfilename, dut.imem.RAM);
|
||||
$readmemh(memfilename, dut.uncore.dtim.RAM);
|
||||
$display("Read memfile %s", memfilename);
|
||||
ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
|
||||
ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
|
||||
reset = 1; # 17; reset = 0;
|
||||
end
|
||||
end
|
||||
end // always @ (negedge clk)
|
||||
|
||||
// track the current function or global label
|
||||
if (DEBUG == 1) begin : functionRadix
|
||||
function_radix function_radix(.reset(reset),
|
||||
.ProgramAddrMapFile(ProgramAddrMapFile),
|
||||
.ProgramLabelMapFile(ProgramLabelMapFile));
|
||||
end
|
||||
|
||||
// initialize the branch predictor
|
||||
initial begin
|
||||
$readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.Predictor.DirPredictor.PHT.memory);
|
||||
$readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.TargetPredictor.memory.memory);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
/* verilator lint_on STMTDLY */
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
module instrTrackerTB(
|
||||
input logic clk, reset, FlushE,
|
||||
input logic [31:0] InstrF, InstrD,
|
||||
input logic [31:0] InstrE, InstrM,
|
||||
input logic [31:0] InstrW,
|
||||
// output logic [31:0] InstrW,
|
||||
output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
|
||||
|
||||
// stage Instr to Writeback for visualization
|
||||
// flopr #(32) InstrWReg(clk, reset, InstrM, InstrW);
|
||||
|
||||
instrNameDecTB fdec(InstrF, InstrFName);
|
||||
instrNameDecTB ddec(InstrD, InstrDName);
|
||||
instrNameDecTB edec(InstrE, InstrEName);
|
||||
instrNameDecTB mdec(InstrM, InstrMName);
|
||||
instrNameDecTB wdec(InstrW, InstrWName);
|
||||
endmodule
|
||||
|
||||
// decode the instruction name, to help the test bench
|
||||
module instrNameDecTB(
|
||||
input logic [31:0] instr,
|
||||
output string name);
|
||||
|
||||
logic [6:0] op;
|
||||
logic [2:0] funct3;
|
||||
logic [6:0] funct7;
|
||||
logic [11:0] imm;
|
||||
|
||||
assign op = instr[6:0];
|
||||
assign funct3 = instr[14:12];
|
||||
assign funct7 = instr[31:25];
|
||||
assign imm = instr[31:20];
|
||||
|
||||
// it would be nice to add the operands to the name
|
||||
// create another variable called decoded
|
||||
|
||||
always_comb
|
||||
casez({op, funct3})
|
||||
10'b0000000_000: name = "BAD";
|
||||
10'b0000011_000: name = "LB";
|
||||
10'b0000011_001: name = "LH";
|
||||
10'b0000011_010: name = "LW";
|
||||
10'b0000011_011: name = "LD";
|
||||
10'b0000011_100: name = "LBU";
|
||||
10'b0000011_101: name = "LHU";
|
||||
10'b0000011_110: name = "LWU";
|
||||
10'b0010011_000: if (instr[31:15] == 0 && instr[11:7] ==0) name = "NOP/FLUSH";
|
||||
else name = "ADDI";
|
||||
10'b0010011_001: if (funct7[6:1] == 6'b000000) name = "SLLI";
|
||||
else name = "ILLEGAL";
|
||||
10'b0010011_010: name = "SLTI";
|
||||
10'b0010011_011: name = "SLTIU";
|
||||
10'b0010011_100: name = "XORI";
|
||||
10'b0010011_101: if (funct7[6:1] == 6'b000000) name = "SRLI";
|
||||
else if (funct7[6:1] == 6'b010000) name = "SRAI";
|
||||
else name = "ILLEGAL";
|
||||
10'b0010011_110: name = "ORI";
|
||||
10'b0010011_111: name = "ANDI";
|
||||
10'b0010111_???: name = "AUIPC";
|
||||
10'b0100011_000: name = "SB";
|
||||
10'b0100011_001: name = "SH";
|
||||
10'b0100011_010: name = "SW";
|
||||
10'b0100011_011: name = "SD";
|
||||
10'b0011011_000: name = "ADDIW";
|
||||
10'b0011011_001: name = "SLLIW";
|
||||
10'b0011011_101: if (funct7 == 7'b0000000) name = "SRLIW";
|
||||
else if (funct7 == 7'b0100000) name = "SRAIW";
|
||||
else name = "ILLEGAL";
|
||||
10'b0111011_000: if (funct7 == 7'b0000000) name = "ADDW";
|
||||
else if (funct7 == 7'b0100000) name = "SUBW";
|
||||
else if (funct7 == 7'b0000001) name = "MULW";
|
||||
else name = "ILLEGAL";
|
||||
10'b0111011_001: if (funct7 == 7'b0000000) name = "SLLW";
|
||||
else if (funct7 == 7'b0000001) name = "DIVW";
|
||||
else name = "ILLEGAL";
|
||||
10'b0111011_101: if (funct7 == 7'b0000000) name = "SRLW";
|
||||
else if (funct7 == 7'b0100000) name = "SRAW";
|
||||
else if (funct7 == 7'b0000001) name = "DIVUW";
|
||||
else name = "ILLEGAL";
|
||||
10'b0111011_110: if (funct7 == 7'b0000001) name = "REMW";
|
||||
else name = "ILLEGAL";
|
||||
10'b0111011_111: if (funct7 == 7'b0000001) name = "REMUW";
|
||||
else name = "ILLEGAL";
|
||||
10'b0110011_000: if (funct7 == 7'b0000000) name = "ADD";
|
||||
else if (funct7 == 7'b0000001) name = "MUL";
|
||||
else if (funct7 == 7'b0100000) name = "SUB";
|
||||
else name = "ILLEGAL";
|
||||
10'b0110011_001: if (funct7 == 7'b0000000) name = "SLL";
|
||||
else if (funct7 == 7'b0000001) name = "MULH";
|
||||
else name = "ILLEGAL";
|
||||
10'b0110011_010: if (funct7 == 7'b0000000) name = "SLT";
|
||||
else if (funct7 == 7'b0000001) name = "MULHSU";
|
||||
else name = "ILLEGAL";
|
||||
10'b0110011_011: if (funct7 == 7'b0000000) name = "SLTU";
|
||||
else if (funct7 == 7'b0000001) name = "MULHU";
|
||||
else name = "ILLEGAL";
|
||||
10'b0110011_100: if (funct7 == 7'b0000000) name = "XOR";
|
||||
else if (funct7 == 7'b0000001) name = "DIV";
|
||||
else name = "ILLEGAL";
|
||||
10'b0110011_101: if (funct7 == 7'b0000000) name = "SRL";
|
||||
else if (funct7 == 7'b0000001) name = "DIVU";
|
||||
else if (funct7 == 7'b0100000) name = "SRA";
|
||||
else name = "ILLEGAL";
|
||||
10'b0110011_110: if (funct7 == 7'b0000000) name = "OR";
|
||||
else if (funct7 == 7'b0000001) name = "REM";
|
||||
else name = "ILLEGAL";
|
||||
10'b0110011_111: if (funct7 == 7'b0000000) name = "AND";
|
||||
else if (funct7 == 7'b0000001) name = "REMU";
|
||||
else name = "ILLEGAL";
|
||||
10'b0110111_???: name = "LUI";
|
||||
10'b1100011_000: name = "BEQ";
|
||||
10'b1100011_001: name = "BNE";
|
||||
10'b1100011_100: name = "BLT";
|
||||
10'b1100011_101: name = "BGE";
|
||||
10'b1100011_110: name = "BLTU";
|
||||
10'b1100011_111: name = "BGEU";
|
||||
10'b1100111_000: name = "JALR";
|
||||
10'b1101111_???: name = "JAL";
|
||||
10'b1110011_000: if (imm == 0) name = "ECALL";
|
||||
else if (imm == 1) name = "EBREAK";
|
||||
else if (imm == 2) name = "URET";
|
||||
else if (imm == 258) name = "SRET";
|
||||
else if (imm == 770) name = "MRET";
|
||||
else name = "ILLEGAL";
|
||||
10'b1110011_001: name = "CSRRW";
|
||||
10'b1110011_010: name = "CSRRS";
|
||||
10'b1110011_011: name = "CSRRC";
|
||||
10'b1110011_101: name = "CSRRWI";
|
||||
10'b1110011_110: name = "CSRRSI";
|
||||
10'b1110011_111: name = "CSRRCI";
|
||||
10'b0101111_010: if (funct7[6:2] == 5'b00010) name = "LR.W";
|
||||
else if (funct7[6:2] == 5'b00011) name = "SC.W";
|
||||
else if (funct7[6:2] == 5'b00001) name = "AMOSWAP.W";
|
||||
else if (funct7[6:2] == 5'b00000) name = "AMOADD.W";
|
||||
else if (funct7[6:2] == 5'b00100) name = "AMOAXOR.W";
|
||||
else if (funct7[6:2] == 5'b01100) name = "AMOAND.W";
|
||||
else if (funct7[6:2] == 5'b01000) name = "AMOOR.W";
|
||||
else if (funct7[6:2] == 5'b10000) name = "AMOMIN.W";
|
||||
else if (funct7[6:2] == 5'b10100) name = "AMOMAX.W";
|
||||
else if (funct7[6:2] == 5'b11000) name = "AMOMINU.W";
|
||||
else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.W";
|
||||
else name = "ILLEGAL";
|
||||
10'b0101111_011: if (funct7[6:2] == 5'b00010) name = "LR.D";
|
||||
else if (funct7[6:2] == 5'b00011) name = "SC.D";
|
||||
else if (funct7[6:2] == 5'b00001) name = "AMOSWAP.D";
|
||||
else if (funct7[6:2] == 5'b00000) name = "AMOADD.D";
|
||||
else if (funct7[6:2] == 5'b00100) name = "AMOAXOR.D";
|
||||
else if (funct7[6:2] == 5'b01100) name = "AMOAND.D";
|
||||
else if (funct7[6:2] == 5'b01000) name = "AMOOR.D";
|
||||
else if (funct7[6:2] == 5'b10000) name = "AMOMIN.D";
|
||||
else if (funct7[6:2] == 5'b10100) name = "AMOMAX.D";
|
||||
else if (funct7[6:2] == 5'b11000) name = "AMOMINU.D";
|
||||
else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.D";
|
||||
else name = "ILLEGAL";
|
||||
10'b0001111_???: name = "FENCE";
|
||||
default: name = "ILLEGAL";
|
||||
endcase
|
||||
endmodule
|
Loading…
Reference in New Issue
Block a user