forked from Github_Repos/cvw
Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main
Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main
This commit is contained in:
commit
ef55b30e99
@ -26,6 +26,7 @@
|
||||
|
||||
// include shared configuration
|
||||
`include "wally-shared.vh"
|
||||
// `include "../../../config/shared/wally-shared.vh"
|
||||
|
||||
`define BUILDROOT 0
|
||||
`define BUSYBEAR 0
|
||||
|
215
wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
Normal file
215
wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
Normal file
@ -0,0 +1,215 @@
|
||||
|
||||
`include "../../../config/rv64icfd/wally-config.vh"
|
||||
module testbench3();
|
||||
|
||||
logic [31:0] errors=0;
|
||||
logic [31:0] vectornum=0;
|
||||
logic [`FLEN*4+7:0] testvectors[6133248:0];
|
||||
|
||||
// logic [63:0] X,Y,Z;
|
||||
logic [`FLEN-1:0] ans;
|
||||
logic [7:0] flags;
|
||||
logic [2:0] FrmE;
|
||||
logic FmtE;
|
||||
logic [`FLEN-1:0] FMAResM;
|
||||
logic [4:0] FMAFlgM;
|
||||
integer fp;
|
||||
logic [2:0] FOpCtrlE;
|
||||
logic [2*`NF+1:0] ProdManE;
|
||||
logic [3*`NF+5:0] AlignedAddendE;
|
||||
logic [`NE+1:0] ProdExpE;
|
||||
logic AddendStickyE;
|
||||
logic KillProdE;
|
||||
// logic XZeroE;
|
||||
// logic YZeroE;
|
||||
// logic ZZeroE;
|
||||
// logic XDenormE;
|
||||
// logic YDenormE;
|
||||
// logic ZDenormE;
|
||||
// logic XInfE;
|
||||
// logic YInfE;
|
||||
// logic ZInfE;
|
||||
// logic XNaNE;
|
||||
// logic YNaNE;
|
||||
// logic ZNaNE;
|
||||
|
||||
logic wnan;
|
||||
// logic XNaNE;
|
||||
// logic YNaNE;
|
||||
// logic ZNaNE;
|
||||
logic ansnan, clk;
|
||||
|
||||
|
||||
assign FOpCtrlE = 3'b0;
|
||||
|
||||
// nearest even - 000
|
||||
// twords zero - 001
|
||||
// down - 010
|
||||
// up - 011
|
||||
// nearest max mag - 100
|
||||
assign FrmE = 3'b000;
|
||||
assign FmtE = 1'b0;
|
||||
|
||||
logic [`FLEN-1:0] X, Y, Z;
|
||||
// logic FmtE;
|
||||
// logic [2:0] FOpCtrlE;
|
||||
logic XSgnE, YSgnE, ZSgnE;
|
||||
logic [`NE-1:0] XExpE, YExpE, ZExpE;
|
||||
logic [`NF-1:0] XFracE, YFracE, ZFracE;
|
||||
logic XAssumed1E, YAssumed1E, ZAssumed1E;
|
||||
logic XNormE;
|
||||
logic XNaNE, YNaNE, ZNaNE;
|
||||
logic XSNaNE, YSNaNE, ZSNaNE;
|
||||
logic XDenormE, YDenormE, ZDenormE;
|
||||
logic XZeroE, YZeroE, ZZeroE;
|
||||
logic [`NE-1:0] BiasE;
|
||||
logic XInfE, YInfE, ZInfE;
|
||||
logic XExpMaxE;
|
||||
//***rename to make significand = 1.frac m = significand
|
||||
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
|
||||
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
|
||||
logic [`FLEN-1:0] Addend; // value to add (Z or zero)
|
||||
logic YExpMaxE, ZExpMaxE; // input exponent all 1s
|
||||
|
||||
assign Addend = FOpCtrlE[2] ? (`FLEN)'(0) : Z; // Z is only used in the FMA, and is set to Zero if a multiply opperation
|
||||
assign XSgnE = FmtE ? X[`FLEN-1] : X[31];
|
||||
assign YSgnE = FmtE ? Y[`FLEN-1] : Y[31];
|
||||
assign ZSgnE = FmtE ? Addend[`FLEN-1] : Addend[31];
|
||||
|
||||
assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};//{X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]};
|
||||
assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};//{Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]};
|
||||
assign ZExpE = FmtE ? Addend[62:52] : {3'b0, Addend[30:23]};//{Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]};
|
||||
|
||||
assign XFracE = FmtE ? X[`NF-1:0] : {X[22:0], 29'b0};
|
||||
assign YFracE = FmtE ? Y[`NF-1:0] : {Y[22:0], 29'b0};
|
||||
assign ZFracE = FmtE ? Addend[`NF-1:0] : {Addend[22:0], 29'b0};
|
||||
|
||||
assign XAssumed1E = FmtE ? |X[62:52] : |X[30:23];
|
||||
assign YAssumed1E = FmtE ? |Y[62:52] : |Y[30:23];
|
||||
assign ZAssumed1E = FmtE ? |Z[62:52] : |Z[30:23];
|
||||
|
||||
assign XExpZero = ~XAssumed1E;
|
||||
assign YExpZero = ~YAssumed1E;
|
||||
assign ZExpZero = ~ZAssumed1E;
|
||||
|
||||
assign XFracZero = ~|XFracE;
|
||||
assign YFracZero = ~|YFracE;
|
||||
assign ZFracZero = ~|ZFracE;
|
||||
|
||||
assign XExpMaxE = FmtE ? &X[62:52] : &X[30:23];
|
||||
assign YExpMaxE = FmtE ? &Y[62:52] : &Y[30:23];
|
||||
assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23];
|
||||
|
||||
assign XNormE = ~(XExpMaxE|XExpZero);
|
||||
|
||||
assign XNaNE = XExpMaxE & ~XFracZero;
|
||||
assign YNaNE = YExpMaxE & ~YFracZero;
|
||||
assign ZNaNE = ZExpMaxE & ~ZFracZero;
|
||||
|
||||
assign XSNaNE = XNaNE&~XFracE[`NF-1];
|
||||
assign YSNaNE = YNaNE&~YFracE[`NF-1];
|
||||
assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
|
||||
|
||||
assign XDenormE = XExpZero & ~XFracZero;
|
||||
assign YDenormE = YExpZero & ~YFracZero;
|
||||
assign ZDenormE = ZExpZero & ~ZFracZero;
|
||||
|
||||
assign XInfE = XExpMaxE & XFracZero;
|
||||
assign YInfE = YExpMaxE & YFracZero;
|
||||
assign ZInfE = ZExpMaxE & ZFracZero;
|
||||
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
assign BiasE = FmtE ? {1'b0, {`NE-1{1'b1}}} : 13'h7f;
|
||||
|
||||
assign wnan = FmtE ? &FMAResM[`FLEN-2:`NF] && |FMAResM[`NF-1:0] : &FMAResM[30:23] && |FMAResM[22:0];
|
||||
// assign XNaNE = FmtE ? &X[62:52] && |X[51:0] : &X[62:55] && |X[54:32];
|
||||
// assign YNaNE = FmtE ? &Y[62:52] && |Y[51:0] : &Y[62:55] && |Y[54:32];
|
||||
// assign ZNaNE = FmtE ? &Z[62:52] && |Z[51:0] : &Z[62:55] && |Z[54:32];
|
||||
assign ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[22:0];
|
||||
// instantiate device under test
|
||||
fma1 UUT1(.XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}), .*);
|
||||
fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .ZSgnM(ZSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
|
||||
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM,
|
||||
.FOpCtrlM(FOpCtrlE[2:0]), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .AlignedAddendM(AlignedAddendE), .ProdManM(ProdManE),
|
||||
.FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM);
|
||||
|
||||
|
||||
// generate clock
|
||||
always
|
||||
begin
|
||||
clk = 1; #5; clk = 0; #5;
|
||||
end
|
||||
// at start of test, load vectors
|
||||
// and pulse reset
|
||||
initial
|
||||
begin
|
||||
$readmemh("testFloatNoSpace", testvectors);
|
||||
end
|
||||
// apply test vectors on rising edge of clk
|
||||
always @(posedge clk)
|
||||
begin
|
||||
#1;
|
||||
if (FmtE==1'b1) {X, Y, Z, ans, flags} = testvectors[vectornum];
|
||||
else begin X = {{32{1'b1}}, testvectors[vectornum][135:104]};
|
||||
Y = {{32{1'b1}}, testvectors[vectornum][103:72]};
|
||||
Z = {{32{1'b1}}, testvectors[vectornum][71:40]};
|
||||
ans = {{32{1'b1}}, testvectors[vectornum][39:8]};
|
||||
flags = testvectors[vectornum][7:0];
|
||||
end
|
||||
end
|
||||
// check results on falling edge of clk
|
||||
always @(negedge clk) begin
|
||||
|
||||
// fp = $fopen("/home/kparry/riscv-wally/wally-pipelined/src/fpu/FMA/tbgen/results.dat","w");
|
||||
if((FmtE==1'b1) & (FMAFlgM != flags[4:0] || (!wnan && (FMAResM != ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] == {XExpE,1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] == {YExpE,1'b1,Y[`NF-2:0]})) || (ZNaNE && (FMAResM[`FLEN-2:0] == {ZExpE,1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] == ans[`FLEN-2:0]))))) begin
|
||||
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
|
||||
if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
|
||||
if(XDenormE) $display( "xdenorm ");
|
||||
if(YDenormE) $display( "ydenorm ");
|
||||
if(ZDenormE) $display( "zdenorm ");
|
||||
if(FMAFlgM[4] != 0) $display( "invld ");
|
||||
if(FMAFlgM[2] != 0) $display( "ovrflw ");
|
||||
if(FMAFlgM[1] != 0) $display( "unflw ");
|
||||
if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} && FMAResM[`NF-1:0] == 0) $display( "FMAResM=-inf ");
|
||||
if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} && FMAResM[`NF-1:0] == 0) $display( "FMAResM=+inf ");
|
||||
if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} && FMAResM[`NF-1:0] != 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
|
||||
if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} && FMAResM[`NF-1:0] != 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
|
||||
if(ans[`FLEN] && ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] == 0) $display( "ans=-inf ");
|
||||
if(~ans[`FLEN] && ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] == 0) $display( "ans=+inf ");
|
||||
if(ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] != 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
|
||||
if(ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] != 0 && ans[`NF-1]) $display( "ans=qutNaN ");
|
||||
errors = errors + 1;
|
||||
|
||||
$stop;
|
||||
end
|
||||
if((FmtE==1'b0)&(FMAFlgM != flags[4:0] || (!wnan && (FMAResM != ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[30:0] == {X[30:23],1'b1,X[21:0]})) || (YNaNE && (FMAResM[30:0] == {Y[30:23],1'b1,Y[21:0]})) || (ZNaNE && (FMAResM[30:0] == {Z[30:23],1'b1,Z[21:0]})) || (FMAResM[30:0] == ans[30:0]))) ))) begin
|
||||
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
|
||||
if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
|
||||
if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
|
||||
if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
|
||||
if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
|
||||
if(FMAFlgM[4] != 0) $display( "invld ");
|
||||
if(FMAFlgM[2] != 0) $display( "ovrflw ");
|
||||
if(FMAFlgM[1] != 0) $display( "unflw ");
|
||||
if(FMAResM == 64'hFF80000000000000) $display( "FMAResM=-inf ");
|
||||
if(FMAResM == 64'h7F80000000000000) $display( "FMAResM=+inf ");
|
||||
if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
|
||||
if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
|
||||
if(ans == 64'hFF80000000000000) $display( "ans=-inf ");
|
||||
if(ans == 64'h7F80000000000000) $display( "ans=+inf ");
|
||||
if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
|
||||
if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
|
||||
errors = errors + 1;
|
||||
//if (errors == 10)
|
||||
$stop;
|
||||
end
|
||||
vectornum = vectornum + 1;
|
||||
if (testvectors[vectornum] === 194'bx) begin
|
||||
$display("%d tests completed with %d errors", vectornum, errors);
|
||||
$stop;
|
||||
end
|
||||
end
|
||||
endmodule
|
3
wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
Executable file
3
wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
Executable file
@ -0,0 +1,3 @@
|
||||
testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat
|
||||
tr -d ' ' < testFloat > testFloatNoSpace
|
||||
|
0
wally-pipelined/src/fpu/adder.sv
Executable file → Normal file
0
wally-pipelined/src/fpu/adder.sv
Executable file → Normal file
@ -1,117 +0,0 @@
|
||||
// Kogge-Stone Prefix Adder
|
||||
module bk15 (cout, sum, a, b, cin);
|
||||
|
||||
input [14:0] a, b;
|
||||
input cin;
|
||||
|
||||
output [14:0] sum;
|
||||
output cout;
|
||||
|
||||
wire [15:0] p,g;
|
||||
wire [15:1] h,c;
|
||||
|
||||
// pre-computation
|
||||
assign p={a|b,1'b1};
|
||||
assign g={a&b, cin};
|
||||
|
||||
// prefix tree
|
||||
kogge_stone prefix_tree(h, c, p[14:0], g[14:0]);
|
||||
|
||||
// post-computation
|
||||
assign h[15]=g[15]|c[15];
|
||||
assign sum=p[15:1]^h|g[15:1]&c;
|
||||
assign cout=p[15]&h[15];
|
||||
|
||||
endmodule // bk15
|
||||
|
||||
module kogge_stone (h, c, p, g);
|
||||
|
||||
input [14:0] p;
|
||||
input [14:0] g;
|
||||
|
||||
output [15:1] h;
|
||||
output [15:1] c;
|
||||
logic H_1_0,H_2_1,I_2_1,H_3_2,I_3_2,H_4_3,I_4_3,H_5_4,I_5_4,H_6_5,I_6_5,H_7_6,I_7_6,H_8_7,I_8_7,H_9_8,I_9_8,H_10_9
|
||||
,I_10_9,H_11_10,I_11_10,H_12_11,I_12_11,H_13_12,I_13_12,H_14_13,I_14_13,H_2_0,H_3_0,H_4_1,I_4_1,H_5_2,I_5_2,H_6_3
|
||||
,I_6_3,H_7_4,I_7_4,H_8_5,I_8_5,H_9_6,I_9_6,H_10_7,I_10_7,H_11_8,I_11_8,H_12_9,I_12_9,H_13_10,I_13_10,H_14_11,I_14_11
|
||||
,H_4_0,H_5_0,H_6_0,H_7_0,H_8_1,I_8_1,H_9_2,I_9_2,H_10_3,I_10_3,H_11_4,I_11_4,H_12_5,I_12_5,H_13_6,I_13_6,H_14_7
|
||||
,I_14_7,H_8_0,H_9_0,H_10_0,H_11_0,H_12_0,H_13_0,H_14_0;
|
||||
|
||||
// parallel-prefix, Kogge-Stone
|
||||
|
||||
// Stage 1: Generates G/P pairs that span 1 bits
|
||||
rgry g_1_0 (H_1_0, {g[1],g[0]});
|
||||
rblk b_2_1 (H_2_1, I_2_1, {g[2],g[1]}, {p[1],p[0]});
|
||||
rblk b_3_2 (H_3_2, I_3_2, {g[3],g[2]}, {p[2],p[1]});
|
||||
rblk b_4_3 (H_4_3, I_4_3, {g[4],g[3]}, {p[3],p[2]});
|
||||
rblk b_5_4 (H_5_4, I_5_4, {g[5],g[4]}, {p[4],p[3]});
|
||||
rblk b_6_5 (H_6_5, I_6_5, {g[6],g[5]}, {p[5],p[4]});
|
||||
rblk b_7_6 (H_7_6, I_7_6, {g[7],g[6]}, {p[6],p[5]});
|
||||
rblk b_8_7 (H_8_7, I_8_7, {g[8],g[7]}, {p[7],p[6]});
|
||||
|
||||
rblk b_9_8 (H_9_8, I_9_8, {g[9],g[8]}, {p[8],p[7]});
|
||||
rblk b_10_9 (H_10_9, I_10_9, {g[10],g[9]}, {p[9],p[8]});
|
||||
rblk b_11_10 (H_11_10, I_11_10, {g[11],g[10]}, {p[10],p[9]});
|
||||
rblk b_12_11 (H_12_11, I_12_11, {g[12],g[11]}, {p[11],p[10]});
|
||||
rblk b_13_12 (H_13_12, I_13_12, {g[13],g[12]}, {p[12],p[11]});
|
||||
rblk b_14_13 (H_14_13, I_14_13, {g[14],g[13]}, {p[13],p[12]});
|
||||
|
||||
// Stage 2: Generates G/P pairs that span 2 bits
|
||||
grey g_2_0 (H_2_0, {H_2_1,g[0]}, I_2_1);
|
||||
grey g_3_0 (H_3_0, {H_3_2,H_1_0}, I_3_2);
|
||||
black b_4_1 (H_4_1, I_4_1, {H_4_3,H_2_1}, {I_4_3,I_2_1});
|
||||
black b_5_2 (H_5_2, I_5_2, {H_5_4,H_3_2}, {I_5_4,I_3_2});
|
||||
black b_6_3 (H_6_3, I_6_3, {H_6_5,H_4_3}, {I_6_5,I_4_3});
|
||||
black b_7_4 (H_7_4, I_7_4, {H_7_6,H_5_4}, {I_7_6,I_5_4});
|
||||
black b_8_5 (H_8_5, I_8_5, {H_8_7,H_6_5}, {I_8_7,I_6_5});
|
||||
black b_9_6 (H_9_6, I_9_6, {H_9_8,H_7_6}, {I_9_8,I_7_6});
|
||||
|
||||
black b_10_7 (H_10_7, I_10_7, {H_10_9,H_8_7}, {I_10_9,I_8_7});
|
||||
black b_11_8 (H_11_8, I_11_8, {H_11_10,H_9_8}, {I_11_10,I_9_8});
|
||||
black b_12_9 (H_12_9, I_12_9, {H_12_11,H_10_9}, {I_12_11,I_10_9});
|
||||
black b_13_10 (H_13_10, I_13_10, {H_13_12,H_11_10}, {I_13_12,I_11_10});
|
||||
black b_14_11 (H_14_11, I_14_11, {H_14_13,H_12_11}, {I_14_13,I_12_11});
|
||||
|
||||
// Stage 3: Generates G/P pairs that span 4 bits
|
||||
grey g_4_0 (H_4_0, {H_4_1,g[0]}, I_4_1);
|
||||
grey g_5_0 (H_5_0, {H_5_2,H_1_0}, I_5_2);
|
||||
grey g_6_0 (H_6_0, {H_6_3,H_2_0}, I_6_3);
|
||||
grey g_7_0 (H_7_0, {H_7_4,H_3_0}, I_7_4);
|
||||
black b_8_1 (H_8_1, I_8_1, {H_8_5,H_4_1}, {I_8_5,I_4_1});
|
||||
black b_9_2 (H_9_2, I_9_2, {H_9_6,H_5_2}, {I_9_6,I_5_2});
|
||||
black b_10_3 (H_10_3, I_10_3, {H_10_7,H_6_3}, {I_10_7,I_6_3});
|
||||
black b_11_4 (H_11_4, I_11_4, {H_11_8,H_7_4}, {I_11_8,I_7_4});
|
||||
|
||||
black b_12_5 (H_12_5, I_12_5, {H_12_9,H_8_5}, {I_12_9,I_8_5});
|
||||
black b_13_6 (H_13_6, I_13_6, {H_13_10,H_9_6}, {I_13_10,I_9_6});
|
||||
black b_14_7 (H_14_7, I_14_7, {H_14_11,H_10_7}, {I_14_11,I_10_7});
|
||||
|
||||
// Stage 4: Generates G/P pairs that span 8 bits
|
||||
grey g_8_0 (H_8_0, {H_8_1,g[0]}, I_8_1);
|
||||
grey g_9_0 (H_9_0, {H_9_2,H_1_0}, I_9_2);
|
||||
grey g_10_0 (H_10_0, {H_10_3,H_2_0}, I_10_3);
|
||||
grey g_11_0 (H_11_0, {H_11_4,H_3_0}, I_11_4);
|
||||
grey g_12_0 (H_12_0, {H_12_5,H_4_0}, I_12_5);
|
||||
grey g_13_0 (H_13_0, {H_13_6,H_5_0}, I_13_6);
|
||||
grey g_14_0 (H_14_0, {H_14_7,H_6_0}, I_14_7);
|
||||
|
||||
// Final Stage: Apply c_k+1=p_k&H_k_0
|
||||
assign c[1]=g[0];
|
||||
|
||||
assign h[1]=H_1_0; assign c[2]=p[1]&H_1_0;
|
||||
assign h[2]=H_2_0; assign c[3]=p[2]&H_2_0;
|
||||
assign h[3]=H_3_0; assign c[4]=p[3]&H_3_0;
|
||||
assign h[4]=H_4_0; assign c[5]=p[4]&H_4_0;
|
||||
assign h[5]=H_5_0; assign c[6]=p[5]&H_5_0;
|
||||
assign h[6]=H_6_0; assign c[7]=p[6]&H_6_0;
|
||||
assign h[7]=H_7_0; assign c[8]=p[7]&H_7_0;
|
||||
assign h[8]=H_8_0; assign c[9]=p[8]&H_8_0;
|
||||
|
||||
assign h[9]=H_9_0; assign c[10]=p[9]&H_9_0;
|
||||
assign h[10]=H_10_0; assign c[11]=p[10]&H_10_0;
|
||||
assign h[11]=H_11_0; assign c[12]=p[11]&H_11_0;
|
||||
assign h[12]=H_12_0; assign c[13]=p[12]&H_12_0;
|
||||
assign h[13]=H_13_0; assign c[14]=p[13]&H_13_0;
|
||||
assign h[14]=H_14_0; assign c[15]=p[14]&H_14_0;
|
||||
|
||||
endmodule // kogge_stone
|
@ -1,43 +0,0 @@
|
||||
|
||||
// Black cell
|
||||
module black(gout, pout, gin, pin);
|
||||
|
||||
input [1:0] gin, pin;
|
||||
output gout, pout;
|
||||
|
||||
assign pout=pin[1]&pin[0];
|
||||
assign gout=gin[1]|(pin[1]&gin[0]);
|
||||
|
||||
endmodule // black
|
||||
|
||||
// Grey cell
|
||||
module grey(gout, gin, pin);
|
||||
|
||||
input[1:0] gin;
|
||||
input pin;
|
||||
output gout;
|
||||
|
||||
assign gout=gin[1]|(pin&gin[0]);
|
||||
|
||||
endmodule // grey
|
||||
|
||||
// reduced Black cell
|
||||
module rblk(hout, iout, gin, pin);
|
||||
|
||||
input [1:0] gin, pin;
|
||||
output hout, iout;
|
||||
|
||||
assign iout=pin[1]&pin[0];
|
||||
assign hout=gin[1]|gin[0];
|
||||
|
||||
endmodule // rblk
|
||||
|
||||
// reduced Grey cell
|
||||
module rgry(hout, gin);
|
||||
|
||||
input[1:0] gin;
|
||||
output hout;
|
||||
|
||||
assign hout=gin[1]|gin[0];
|
||||
|
||||
endmodule // rgry
|
0
wally-pipelined/src/fpu/cla12.sv
Executable file → Normal file
0
wally-pipelined/src/fpu/cla12.sv
Executable file → Normal file
0
wally-pipelined/src/fpu/cla52.sv
Executable file → Normal file
0
wally-pipelined/src/fpu/cla52.sv
Executable file → Normal file
@ -207,7 +207,7 @@ module cla64 (S, X, Y, Sub);
|
||||
assign Bbar = B ^ {64{Sub}};
|
||||
|
||||
endmodule // cla64
|
||||
|
||||
|
||||
// This module performs 64-bit subtraction. It is used to get the two's complement
|
||||
// of main addition or subtraction in the floating point adder.
|
||||
|
||||
|
@ -5,19 +5,19 @@
|
||||
// and modifies the sign of op1. The converted operands are Float1
|
||||
// and Float2.
|
||||
|
||||
module convert_inputs(Float1, Float2, op1, op2, op_type, P);
|
||||
|
||||
input [63:0] op1; // 1st input operand (A)
|
||||
input [63:0] op2; // 2nd input operand (B)
|
||||
input [3:0] op_type; // Function opcode
|
||||
input P; // Result Precision (0 for double, 1 for single)
|
||||
module convert_inputs(
|
||||
input [63:0] op1, // 1st input operand (A)
|
||||
input [63:0] op2, // 2nd input operand (B)
|
||||
input [3:0] op_type, // Function opcode
|
||||
input P, // Result Precision (0 for double, 1 for single)
|
||||
|
||||
output [63:0] Float1; // Converted 1st input operand
|
||||
output [63:0] Float2; // Converted 2nd input operand
|
||||
|
||||
wire conv_SP; // Convert from SP to DP
|
||||
wire negate; // Operation is negation
|
||||
wire abs_val; // Operation is absolute value
|
||||
output [63:0] Float1, // Converted 1st input operand
|
||||
output [63:0] Float2 // Converted 2nd input operand
|
||||
);
|
||||
|
||||
wire conv_SP; // Convert from SP to DP
|
||||
wire negate; // Operation is negation
|
||||
wire abs_val; // Operation is absolute value
|
||||
wire Zexp1; // One if the exponent of op1 is zero
|
||||
wire Zexp2; // One if the exponent of op2 is zero
|
||||
wire Oexp1; // One if the exponent of op1 is all ones
|
||||
@ -33,14 +33,6 @@ module convert_inputs(Float1, Float2, op1, op2, op_type, P);
|
||||
assign Zexp2 = ~(|op2[30:23]);
|
||||
assign Oexp1 = (&op1[30:23]);
|
||||
assign Oexp2 = (&op2[30:23]);
|
||||
// assign Zexp1 = ~(op1[62] | op1[61] | op1[60] | op1[59] |
|
||||
// op1[58] | op1[57] | op1[56] | op1[55]);
|
||||
// assign Zexp2 = ~(op2[62] | op2[61] | op2[60] | op2[59] |
|
||||
// op2[58] | op2[57] | op2[56] | op2[55]);
|
||||
// assign Oexp1 = (op1[62] & op1[61] & op1[60] & op1[59] &
|
||||
// op1[58] & op1[57] & op1[56] & op1[55]);
|
||||
// assign Oexp2 = (op2[62] & op2[61] & op2[60] & op2[59] &
|
||||
// op2[58] & op2[57] & op2[56] &op2[55]);
|
||||
|
||||
// Conditionally convert op1. Lower 29 bits are zero for single precision.
|
||||
assign Float1[62:29] = conv_SP ? {op1[30], {3{(~op1[30]&~Zexp1)|Oexp1}}, op1[29:0]}
|
||||
@ -57,7 +49,7 @@ module convert_inputs(Float1, Float2, op1, op2, op_type, P);
|
||||
// is negation (op_type = 101) or absolute value (op_type = 100)
|
||||
|
||||
assign negate = op_type[2] & ~op_type[1] & op_type[0];
|
||||
assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0];
|
||||
assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0]; //*** remove abs_val
|
||||
assign Float1[63] = conv_SP ? (op1[31] ^ negate) & ~abs_val : (op1[63] ^ negate) & ~abs_val;
|
||||
assign Float2[63] = conv_SP ? op2[31] : op2[63];
|
||||
|
||||
|
@ -3,21 +3,22 @@
|
||||
// it conditionally converts single precision values to double
|
||||
// precision values and modifies the sign of op1.
|
||||
// The converted operands are Float1 and Float2.
|
||||
module convert_inputs_div (Float1, Float2b, op1, op2, op_type, P);
|
||||
module convert_inputs_div (
|
||||
|
||||
input logic [63:0] op1; // 1st input operand (A)
|
||||
input logic [63:0] op2; // 2nd input operand (B)
|
||||
input logic P; // Result Precision (0 for double, 1 for single)
|
||||
input logic op_type; // Operation
|
||||
input logic [63:0] op1, // 1st input operand (A)
|
||||
input logic [63:0] op2, // 2nd input operand (B)
|
||||
input logic P, // Result Precision (0 for double, 1 for single)
|
||||
input logic op_type, // Operation
|
||||
|
||||
output logic [63:0] Float1; // Converted 1st input operand
|
||||
output logic [63:0] Float2b; // Converted 2nd input operand
|
||||
output logic [63:0] Float1, // Converted 1st input operand
|
||||
output logic [63:0] Float2b // Converted 2nd input operand
|
||||
);
|
||||
|
||||
logic [63:0] Float2;
|
||||
logic Zexp1; // One if the exponent of op1 is zero
|
||||
logic Zexp2; // One if the exponent of op2 is zero
|
||||
logic Oexp1; // One if the exponent of op1 is all ones
|
||||
logic Oexp2; // One if the exponent of op2 is all ones
|
||||
logic Zexp1; // One if the exponent of op1 is zero
|
||||
logic Zexp2; // One if the exponent of op2 is zero
|
||||
logic Oexp1; // One if the exponent of op1 is all ones
|
||||
logic Oexp2; // One if the exponent of op2 is all ones
|
||||
|
||||
// Test if the input exponent is zero, because if it is then the
|
||||
// exponent of the converted number should be zero.
|
||||
|
@ -1,25 +1,21 @@
|
||||
module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
|
||||
regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, load_rega, load_regb,
|
||||
load_regc, load_regd, load_regr, load_regs, P, op_type, exp_odd);
|
||||
module divconv (
|
||||
|
||||
input logic [52:0] d, n;
|
||||
input logic [2:0] sel_muxa, sel_muxb;
|
||||
input logic sel_muxr;
|
||||
input logic load_rega, load_regb, load_regc, load_regd;
|
||||
input logic load_regr, load_regs;
|
||||
input logic P;
|
||||
input logic op_type;
|
||||
input logic exp_odd;
|
||||
input logic reset;
|
||||
input logic clk;
|
||||
input logic [52:0] d, n,
|
||||
input logic [2:0] sel_muxa, sel_muxb,
|
||||
input logic sel_muxr,
|
||||
input logic load_rega, load_regb, load_regc, load_regd,
|
||||
input logic load_regr, load_regs,
|
||||
input logic P,
|
||||
input logic op_type,
|
||||
input logic exp_odd,
|
||||
input logic reset,
|
||||
input logic clk,
|
||||
|
||||
output logic [63:0] q1, qp1, qm1;
|
||||
output logic [63:0] q0, qp0, qm0;
|
||||
output logic [63:0] rega_out, regb_out, regc_out, regd_out;
|
||||
output logic [127:0] regr_out;
|
||||
|
||||
supply1 vdd;
|
||||
supply0 vss;
|
||||
output logic [63:0] q1, qp1, qm1,
|
||||
output logic [63:0] q0, qp0, qm0,
|
||||
output logic [63:0] rega_out, regb_out, regc_out, regd_out,
|
||||
output logic [127:0] regr_out
|
||||
);
|
||||
|
||||
logic [63:0] muxa_out, muxb_out;
|
||||
logic [10:0] ia_div, ia_sqrt;
|
||||
@ -36,12 +32,12 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o
|
||||
logic [63:0] q_const, qp_const, qm_const;
|
||||
logic [63:0] d2, n2;
|
||||
logic [11:0] d3;
|
||||
logic muxr_out;
|
||||
logic cout1, cout2, cout3, cout4, cout5, cout6, cout7;
|
||||
logic muxr_out;
|
||||
logic cout1, cout2, cout3, cout4, cout5, cout6, cout7;
|
||||
|
||||
// Check if exponent is odd for sqrt
|
||||
// If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
|
||||
assign d2 = (exp_odd&op_type) ? {vss,d,10'h0} : {d,11'h0};
|
||||
assign d2 = (exp_odd&op_type) ? {1'b0,d,10'h0} : {d,11'h0};
|
||||
assign n2 = op_type ? d2 : {n,11'h0};
|
||||
|
||||
// IA div/sqrt
|
||||
@ -62,10 +58,7 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o
|
||||
mux2 #(64) mx4 (q0, q1, q1[63], mcand_q);
|
||||
mux2 #(64) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);
|
||||
mux2 #(64) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
|
||||
// TDM multiplier (carry/save)
|
||||
multiplier mult1 (mcand, mplier, Sum, Carry);
|
||||
// Q*D - N (reversed but changed in rounder.v to account for sign reversal)
|
||||
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2);
|
||||
// Add ulp for subtraction in remainder
|
||||
mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
|
||||
|
||||
@ -74,24 +67,17 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o
|
||||
mux2 #(64) mx9 ({64'h0000_0000_0000_0A00}, {64'h0000_0140_0000_0000}, P, qp_const);
|
||||
mux2 #(64) mxA ({64'hFFFF_FFFF_FFFF_F9FF}, {64'hFFFF_FF3F_FFFF_FFFF}, P, qm_const);
|
||||
|
||||
// CPA (from CSA)/Remainder addition/subtraction
|
||||
// adder #(128) cpa1 (Sum2, Carry2, muxr_out, mul_out, cout1);
|
||||
assign {cout1, mul_out} = Sum2 + Carry2 + muxr_out;
|
||||
// CPA (from CSA)/Remainder addition/subtraction
|
||||
assign {cout1, mul_out} = (mcand*mplier) + constant + muxr_out;
|
||||
|
||||
// Assuming [1,2) - q1
|
||||
// adder #(64) cpa2 (regb_out, q_const, 1'b0, q_out1, cout2);
|
||||
assign {cout2, q_out1} = regb_out + q_const;
|
||||
// adder #(64) cpa3 (regb_out, qp_const, 1'b0, qp_out1, cout3);
|
||||
assign {cout3, qp_out1} = regb_out + qp_const;
|
||||
// adder #(64) cpa4 (regb_out, qm_const, 1'b1, qm_out1, cout4);
|
||||
assign {cout4, qm_out1} = regb_out + qm_const + 1'b1;
|
||||
// Assuming [0.5,1) - q0
|
||||
// adder #(64) cpa5 ({regb_out[62:0], vss}, q_const, 1'b0, q_out0, cout5);
|
||||
assign {cout5, q_out0} = {regb_out[62:0], vss} + q_const;
|
||||
// adder #(64) cpa6 ({regb_out[62:0], vss}, qp_const, 1'b0, qp_out0, cout6);
|
||||
assign {cout6, qp_out0} = {regb_out[62:0], vss} + qp_const;
|
||||
// adder #(64) cpa7 ({regb_out[62:0], vss}, qm_const, 1'b1, qm_out0, cout7);
|
||||
assign {cout7, qm_out0} = {regb_out[62:0], vss} + qm_const + 1'b1;
|
||||
assign {cout5, q_out0} = {regb_out[62:0], 1'b0} + q_const;
|
||||
assign {cout6, qp_out0} = {regb_out[62:0], 1'b0} + qp_const;
|
||||
assign {cout7, qm_out0} = {regb_out[62:0], 1'b0} + qm_const + 1'b1;
|
||||
|
||||
// One's complement instead of two's complement (for hw efficiency)
|
||||
assign three = {~mul_out[126], mul_out[126], ~mul_out[125:63]};
|
||||
@ -114,151 +100,3 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o
|
||||
|
||||
endmodule // divconv
|
||||
|
||||
// module adder #(parameter WIDTH=8)
|
||||
// (input logic [WIDTH-1:0] a, b,
|
||||
// input logic cin,
|
||||
// output logic [WIDTH-1:0] y,
|
||||
// output logic cout);
|
||||
|
||||
// assign {cout, y} = a + b + cin;
|
||||
|
||||
// endmodule // adder
|
||||
|
||||
// module flopenr #(parameter WIDTH = 8)
|
||||
// (input logic clk, reset, en,
|
||||
// input logic [WIDTH-1:0] d,
|
||||
// output logic [WIDTH-1:0] q);
|
||||
|
||||
// always_ff @(posedge clk, posedge reset)
|
||||
// if (reset) q <= #10 0;
|
||||
// else if (en) q <= #10 d;
|
||||
|
||||
// endmodule // flopenr
|
||||
|
||||
// module flopr #(parameter WIDTH = 8)
|
||||
// (input logic clk, reset,
|
||||
// input logic [WIDTH-1:0] d,
|
||||
// output logic [WIDTH-1:0] q);
|
||||
|
||||
// always_ff @(posedge clk, posedge reset)
|
||||
// if (reset) q <= #10 0;
|
||||
// else q <= #10 d;
|
||||
|
||||
// endmodule // flopr
|
||||
|
||||
// module flopenrc #(parameter WIDTH = 8)
|
||||
// (input logic clk, reset, en, clear,
|
||||
// input logic [WIDTH-1:0] d,
|
||||
// output logic [WIDTH-1:0] q);
|
||||
|
||||
// always_ff @(posedge clk, posedge reset)
|
||||
// if (reset) q <= #10 0;
|
||||
// else if (en)
|
||||
// if (clear) q <= #10 0;
|
||||
// else q <= #10 d;
|
||||
|
||||
// endmodule // flopenrc
|
||||
|
||||
// module floprc #(parameter WIDTH = 8)
|
||||
// (input logic clk, reset, clear,
|
||||
// input logic [WIDTH-1:0] d,
|
||||
// output logic [WIDTH-1:0] q);
|
||||
|
||||
// always_ff @(posedge clk, posedge reset)
|
||||
// if (reset) q <= #10 0;
|
||||
// else
|
||||
// if (clear) q <= #10 0;
|
||||
// else q <= #10 d;
|
||||
|
||||
// endmodule // floprc
|
||||
|
||||
// module mux2 #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] d0, d1,
|
||||
// input logic s,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
|
||||
// assign y = s ? d1 : d0;
|
||||
|
||||
// endmodule // mux2
|
||||
|
||||
// module mux3 #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] d0, d1, d2,
|
||||
// input logic [1:0] s,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
|
||||
// assign y = s[1] ? d2 : (s[0] ? d1 : d0);
|
||||
|
||||
// endmodule // mux3
|
||||
|
||||
// module mux4 #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] d0, d1, d2, d3,
|
||||
// input logic [1:0] s,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
|
||||
// assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
|
||||
|
||||
// endmodule // mux4
|
||||
|
||||
// module mux5 #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] d0, d1, d2, d3, d4,
|
||||
// input logic [2:0] s,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
|
||||
// always_comb
|
||||
// casez (s)
|
||||
// 3'b000 : y = d0;
|
||||
// 3'b001 : y = d1;
|
||||
// 3'b010 : y = d2;
|
||||
// 3'b011 : y = d3;
|
||||
// 3'b1?? : y = d4;
|
||||
// endcase // casez (s)
|
||||
|
||||
// endmodule // mux5
|
||||
|
||||
// module mux6 #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5,
|
||||
// input logic [2:0] s,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
|
||||
// always_comb
|
||||
// casez (s)
|
||||
// 3'b000 : y = d0;
|
||||
// 3'b001 : y = d1;
|
||||
// 3'b010 : y = d2;
|
||||
// 3'b011 : y = d3;
|
||||
// 3'b10? : y = d4;
|
||||
// 3'b11? : y = d5;
|
||||
// endcase // casez (s)
|
||||
|
||||
// endmodule // mux6
|
||||
|
||||
module eqcmp #(parameter WIDTH = 8)
|
||||
(input logic [WIDTH-1:0] a, b,
|
||||
output logic y);
|
||||
|
||||
assign y = (a == b);
|
||||
|
||||
endmodule // eqcmp
|
||||
|
||||
// module fa (input logic a, b, c, output logic sum, carry);
|
||||
|
||||
// assign sum = a^b^c;
|
||||
// assign carry = a&b|a&c|b&c;
|
||||
|
||||
// endmodule // fa
|
||||
|
||||
// module csa #(parameter WIDTH=8)
|
||||
// (input logic [WIDTH-1:0] a, b, c,
|
||||
// output logic [WIDTH-1:0] sum, carry);
|
||||
|
||||
// logic [WIDTH:0] carry_temp;
|
||||
// genvar i;
|
||||
// generate
|
||||
// for (i=0;i<WIDTH;i=i+1)
|
||||
// begin : genbit
|
||||
// fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
|
||||
// end
|
||||
// endgenerate
|
||||
// assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
|
||||
|
||||
// endmodule // csa
|
||||
|
@ -115,6 +115,6 @@ module exception (Ztype, Invalid, Denorm, ANorm, BNorm, Sub, A, B, op_type);
|
||||
|
||||
// Determine if the effective operation is subtraction
|
||||
assign Sub = ~(op_type[3] & ~op_type[0]) & ( (op_type[3] & op_type[0]) | (add_sub & (A[63]^B[63]^op_type[0])) );
|
||||
|
||||
|
||||
endmodule // exception
|
||||
|
||||
|
@ -1,16 +1,13 @@
|
||||
// Exception logic for the floating point adder. Note: We may
|
||||
// actually want to move to where the result is computed.
|
||||
module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
|
||||
module exception_div (
|
||||
|
||||
input logic [63:0] A; // 1st input operand (op1)
|
||||
input logic [63:0] B; // 2nd input operand (op2)
|
||||
input logic op_type; // Determine operation
|
||||
|
||||
output logic [2:0] Ztype; // Indicates type of result (Z)
|
||||
output logic Invalid; // Invalid operation exception
|
||||
output logic Denorm; // Denormalized input
|
||||
output logic ANorm; // A is not zero or Denorm
|
||||
output logic BNorm; // B is not zero or Denorm
|
||||
input logic [63:0] A, // 1st input operand (op1)
|
||||
input logic [63:0] B, // 2nd input operand (op2)
|
||||
input logic op_type, // Determine operation
|
||||
output logic [2:0] Ztype, // Indicates type of result (Z)
|
||||
output logic Invalid // Invalid operation exception
|
||||
);
|
||||
|
||||
logic AzeroM; // '1' if the mantissa of A is zero
|
||||
logic BzeroM; // '1' if the mantissa of B is zero
|
||||
@ -18,8 +15,6 @@ module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
|
||||
logic BzeroE; // '1' if the exponent of B is zero
|
||||
logic AonesE; // '1' if the exponent of A is all ones
|
||||
logic BonesE; // '1' if the exponent of B is all ones
|
||||
logic ADenorm; // '1' if A is a denomalized number
|
||||
logic BDenorm; // '1' if B is a denomalized number
|
||||
logic AInf; // '1' if A is infinite
|
||||
logic BInf; // '1' if B is infinite
|
||||
logic AZero; // '1' if A is 0
|
||||
@ -32,11 +27,10 @@ module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
|
||||
logic ZInf; // '1' if result Z is an infnity
|
||||
logic Zero; // '1' if result is zero
|
||||
|
||||
parameter [51:0] fifty_two_zeros = 52'h0; // Use parameter?
|
||||
|
||||
// Determine if mantissas are all zeros
|
||||
assign AzeroM = (A[51:0] == fifty_two_zeros);
|
||||
assign BzeroM = (B[51:0] == fifty_two_zeros);
|
||||
assign AzeroM = (A[51:0] == 52'h0);
|
||||
assign BzeroM = (B[51:0] == 52'h0);
|
||||
|
||||
// Determine if exponents are all ones or all zeros
|
||||
assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52];
|
||||
@ -45,8 +39,6 @@ module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
|
||||
assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]);
|
||||
|
||||
// Determine special cases. Note: Zero is not really a special case.
|
||||
assign ADenorm = AzeroE & ~AzeroM;
|
||||
assign BDenorm = BzeroE & ~BzeroM;
|
||||
assign AInf = AonesE & AzeroM;
|
||||
assign BInf = BonesE & BzeroM;
|
||||
assign ANaN = AonesE & ~AzeroM;
|
||||
@ -56,17 +48,11 @@ module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
|
||||
assign AZero = AzeroE & AzeroM;
|
||||
assign BZero = BzeroE & BzeroE;
|
||||
|
||||
// A and B are normalized if their exponents are not zero.
|
||||
assign ANorm = ~AzeroE;
|
||||
assign BNorm = ~BzeroE;
|
||||
|
||||
// An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
|
||||
// or (A and B are both Infinite)
|
||||
assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) |
|
||||
(A[63] & op_type);
|
||||
|
||||
// The Denorm flag is set if A is denormlized or if B is normalized
|
||||
assign Denorm = ADenorm | BDenorm;
|
||||
|
||||
// The result is a quiet NaN if (an "Invalid Operation" exception occurs)
|
||||
// or (A is a NaN) or (B is a NaN).
|
||||
|
@ -29,14 +29,14 @@
|
||||
module faddcvt(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushM,
|
||||
input logic StallM,
|
||||
input logic FlushM, // flush the memory stage
|
||||
input logic StallM, // stall the memory stage
|
||||
input logic [63:0] FSrcXE, // 1st input operand (A)
|
||||
input logic [63:0] FSrcYE, // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE, FOpCtrlM, // Function opcode
|
||||
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
|
||||
input logic [2:0] FrmM, // Rounding mode - specify values
|
||||
output logic [63:0] FAddResM, // Result of operation
|
||||
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
|
||||
input logic [2:0] FrmM, // Rounding mode - specify values
|
||||
output logic [63:0] FAddResM, // Result of operation
|
||||
output logic [4:0] FAddFlgM); // IEEE exception flags
|
||||
|
||||
logic [63:0] AddSumE, AddSumM;
|
||||
@ -51,7 +51,6 @@ module faddcvt(
|
||||
logic AddInvalidE, AddInvalidM;
|
||||
logic AddDenormInE, AddDenormInM;
|
||||
logic AddSwapE, AddSwapM;
|
||||
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
|
||||
logic AddSignAE, AddSignAM;
|
||||
logic AddConvertE, AddConvertM;
|
||||
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
|
||||
@ -62,8 +61,9 @@ module faddcvt(
|
||||
fpuaddcvt1 fpadd1 (.FSrcXE, .FSrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
|
||||
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
|
||||
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
|
||||
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
|
||||
.AddDenormInE, .AddConvertE, .AddSwapE);
|
||||
|
||||
// E/M pipeline registers
|
||||
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
|
||||
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
|
||||
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
|
||||
@ -72,9 +72,9 @@ module faddcvt(
|
||||
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
|
||||
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
|
||||
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
|
||||
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
|
||||
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
|
||||
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
|
||||
flopenrc #(14) EMRegAdd9(clk, reset, FlushM, ~StallM,
|
||||
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddSignAE},
|
||||
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM});
|
||||
|
||||
|
||||
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
|
||||
@ -83,53 +83,52 @@ module faddcvt(
|
||||
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
|
||||
endmodule
|
||||
|
||||
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FSrcXE, FSrcYE, FOpCtrlE, FmtE);
|
||||
module fpuaddcvt1 (
|
||||
input logic [63:0] FSrcXE, // 1st input operand (A)
|
||||
input logic [63:0] FSrcYE, // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE, // Function opcode
|
||||
input logic FmtE, // Result Precision (1 for double, 0 for single)
|
||||
|
||||
input logic [63:0] FSrcXE; // 1st input operand (A)
|
||||
input logic [63:0] FSrcYE; // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE; // Function opcode
|
||||
input logic FmtE; // Result Precision (1 for double, 0 for single)
|
||||
output logic [63:0] AddFloat1E,
|
||||
output logic [63:0] AddFloat2E,
|
||||
output logic [10:0] AddExponentE,
|
||||
output logic [10:0] AddExpPostSumE,
|
||||
output logic [11:0] AddExp1DenormE, AddExp2DenormE,//KEP used to be [10:0]
|
||||
output logic [63:0] AddSumE, AddSumTcE,
|
||||
output logic [3:0] AddSelInvE,
|
||||
output logic AddCorrSignE,
|
||||
output logic AddSignAE,
|
||||
output logic AddOp1NormE, AddOp2NormE,
|
||||
output logic AddOpANormE, AddOpBNormE,
|
||||
output logic AddInvalidE,
|
||||
output logic AddDenormInE,
|
||||
output logic AddConvertE,
|
||||
output logic AddSwapE
|
||||
);
|
||||
|
||||
wire [5:0] ZP_mantissaA;
|
||||
wire [5:0] ZP_mantissaB;
|
||||
wire ZV_mantissaA;
|
||||
wire ZV_mantissaB;
|
||||
|
||||
wire P;
|
||||
assign P = ~FmtE;
|
||||
|
||||
wire [63:0] IntValue;
|
||||
wire [11:0] exp1, exp2;
|
||||
wire [11:0] exp_diff1, exp_diff2;
|
||||
wire [11:0] exp_shift;
|
||||
wire [51:0] mantissaA;
|
||||
wire [56:0] mantissaA1;
|
||||
wire [63:0] mantissaA3;
|
||||
wire [51:0] mantissaB;
|
||||
wire [56:0] mantissaB1, mantissaB2;
|
||||
wire [63:0] mantissaB3;
|
||||
wire exp_gt63;
|
||||
wire Sticky_out;
|
||||
wire sub;
|
||||
wire zeroB;
|
||||
wire [5:0] align_shift;
|
||||
|
||||
output logic [63:0] AddFloat1E;
|
||||
output logic [63:0] AddFloat2E;
|
||||
output logic [10:0] AddExponentE;
|
||||
output logic [10:0] AddExpPostSumE;
|
||||
output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
|
||||
output logic [63:0] AddSumE, AddSumTcE;
|
||||
output logic [3:0] AddSelInvE;
|
||||
output logic AddCorrSignE;
|
||||
output logic AddSignAE;
|
||||
output logic AddOp1NormE, AddOp2NormE;
|
||||
output logic AddOpANormE, AddOpBNormE;
|
||||
output logic AddInvalidE;
|
||||
output logic AddDenormInE;
|
||||
// output logic exp_valid;
|
||||
output logic AddConvertE;
|
||||
output logic AddSwapE;
|
||||
output logic AddNormOvflowE;
|
||||
wire [5:0] ZP_mantissaA;
|
||||
wire [5:0] ZP_mantissaB;
|
||||
wire ZV_mantissaA;
|
||||
wire ZV_mantissaB;
|
||||
wire [63:0] IntValue;
|
||||
wire [11:0] exp1, exp2;
|
||||
wire [11:0] exp_diff1, exp_diff2;
|
||||
wire [11:0] exp_shift;
|
||||
wire [51:0] mantissaA;
|
||||
wire [56:0] mantissaA1;
|
||||
wire [63:0] mantissaA3;
|
||||
wire [51:0] mantissaB;
|
||||
wire [56:0] mantissaB1, mantissaB2;
|
||||
wire [63:0] mantissaB3;
|
||||
wire exp_gt63;
|
||||
wire Sticky_out;
|
||||
wire sub;
|
||||
wire zeroB;
|
||||
wire [5:0] align_shift;
|
||||
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the FOpCtrlE , and their precision P.
|
||||
@ -137,7 +136,7 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
|
||||
convert_inputs conv1 (AddFloat1E, AddFloat2E, FSrcXE, FSrcYE, FOpCtrlE, P);
|
||||
convert_inputs conv1 (.Float1(AddFloat1E), .Float2(AddFloat2E), .op1(FSrcXE), .op2(FSrcYE), .op_type(FOpCtrlE), .P);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "AddSelInvE" is used in
|
||||
@ -247,7 +246,7 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
|
||||
|
||||
// Finds normal underflow result to determine whether to round final exponent down
|
||||
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
|
||||
assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
|
||||
// assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
@ -281,32 +280,28 @@ endmodule // fpadd
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
|
||||
module fpuaddcvt2 (
|
||||
input [2:0] FrmM, // Rounding mode - specify values
|
||||
input [3:0] FOpCtrlM, // Function opcode
|
||||
input FmtM, // Result Precision (0 for double, 1 for single)
|
||||
input [63:0] AddSumM, AddSumTcM,
|
||||
input [63:0] AddFloat1M,
|
||||
input [63:0] AddFloat2M,
|
||||
input [11:0] AddExp1DenormM, AddExp2DenormM,
|
||||
input [10:0] AddExponentM, AddExpPostSumM,
|
||||
input [3:0] AddSelInvM,
|
||||
input AddOp1NormM, AddOp2NormM,
|
||||
input AddOpANormM, AddOpBNormM,
|
||||
input AddInvalidM,
|
||||
input AddDenormInM,
|
||||
input AddSignAM,
|
||||
input AddCorrSignM,
|
||||
input AddConvertM,
|
||||
input AddSwapM,
|
||||
|
||||
input [2:0] FrmM; // Rounding mode - specify values
|
||||
input [3:0] FOpCtrlM; // Function opcode
|
||||
input FmtM; // Result Precision (0 for double, 1 for single)
|
||||
// input AddOvEnM; // Overflow trap enabled
|
||||
// input AddUnEnM; // Underflow trap enabled
|
||||
input [63:0] AddSumM, AddSumTcM;
|
||||
input [63:0] AddFloat1M;
|
||||
input [63:0] AddFloat2M;
|
||||
input [11:0] AddExp1DenormM, AddExp2DenormM;
|
||||
input [10:0] AddExponentM, AddExpPostSumM; //exp_pre;
|
||||
//input exp_valid;
|
||||
input [3:0] AddSelInvM;
|
||||
input AddOp1NormM, AddOp2NormM;
|
||||
input AddOpANormM, AddOpBNormM;
|
||||
input AddInvalidM;
|
||||
input AddDenormInM;
|
||||
input AddSignAM;
|
||||
input AddCorrSignM;
|
||||
input AddConvertM;
|
||||
input AddSwapM;
|
||||
// input AddNormOvflowM;
|
||||
|
||||
output [63:0] FAddResM; // Result of operation
|
||||
output [4:0] FAddFlgM; // IEEE exception flags
|
||||
output [63:0] FAddResM, // Result of operation
|
||||
output [4:0] FAddFlgM // IEEE exception flags
|
||||
);
|
||||
wire AddDenormM; // AddDenormM on input or output
|
||||
|
||||
wire P;
|
||||
@ -322,7 +317,6 @@ module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPos
|
||||
wire Sticky_out;
|
||||
wire sign_corr;
|
||||
wire zeroB;
|
||||
wire [10:0] AddExpPostSumM;
|
||||
wire mantissa_comp;
|
||||
wire mantissa_comp_sum;
|
||||
wire mantissa_comp_sum_tc;
|
||||
|
@ -2,19 +2,21 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fclassify (
|
||||
input logic XSgnE,
|
||||
input logic XNaNE,
|
||||
input logic XSNaNE,
|
||||
input logic XNormE,
|
||||
input logic XDenormE,
|
||||
input logic XZeroE,
|
||||
input logic XInfE,
|
||||
output logic [63:0] ClassResE
|
||||
input logic XSgnE, // sign bit
|
||||
input logic XNaNE, // is NaN
|
||||
input logic XSNaNE, // is signaling NaN
|
||||
input logic XNormE, // is normal
|
||||
input logic XDenormE, // is denormal
|
||||
input logic XZeroE, // is zero
|
||||
input logic XInfE, // is infinity
|
||||
output logic [63:0] ClassResE // classify result
|
||||
);
|
||||
|
||||
logic PInf, PZero, PNorm, PDenorm;
|
||||
logic NInf, NZero, NNorm, NDenorm;
|
||||
|
||||
|
||||
// determine the sub categories
|
||||
assign PInf = ~XSgnE&XInfE;
|
||||
assign NInf = XSgnE&XInfE;
|
||||
assign PNorm = ~XSgnE&XNormE;
|
||||
|
@ -1,20 +1,21 @@
|
||||
|
||||
module fctrl (
|
||||
input logic [6:0] Funct7D,
|
||||
input logic [6:0] OpD,
|
||||
input logic [4:0] Rs2D,
|
||||
input logic [2:0] Funct3D,
|
||||
input logic [2:0] FRM_REGW,
|
||||
output logic IllegalFPUInstrD,
|
||||
output logic FRegWriteD,
|
||||
output logic FDivStartD,
|
||||
output logic [2:0] FResultSelD,
|
||||
output logic [3:0] FOpCtrlD,
|
||||
output logic [1:0] FResSelD,
|
||||
output logic [1:0] FIntResSelD,
|
||||
output logic FmtD,
|
||||
output logic [2:0] FrmD,
|
||||
output logic FWriteIntD);
|
||||
input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision
|
||||
input logic [6:0] OpD, // bits 6:0 of instruction
|
||||
input logic [4:0] Rs2D, // bits 24:20 of instruction
|
||||
input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode
|
||||
input logic [2:0] FRM_REGW, // rounding mode from CSR
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic FRegWriteD, // FP register write enable
|
||||
output logic FDivStartD, // Start division or squareroot
|
||||
output logic [2:0] FResultSelD, // select result to be written to fp register
|
||||
output logic [3:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
|
||||
output logic [1:0] FResSelD, // select one of the results done in the memory stage
|
||||
output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
|
||||
output logic FmtD, // precision - single-0 double-1
|
||||
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
output logic FWriteIntD // is the result written to the integer register
|
||||
);
|
||||
|
||||
`define FCTRLW 15
|
||||
logic [`FCTRLW-1:0] ControlsD;
|
||||
@ -100,16 +101,43 @@ module fctrl (
|
||||
endcase
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
|
||||
// unswizzle control bits
|
||||
assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
|
||||
|
||||
// if dynamic rounding, choose FRM_REGW
|
||||
// rounding modes:
|
||||
// 000 - round to nearest, ties to even
|
||||
// 001 - round twords 0 - round to min magnitude
|
||||
// 010 - round down - round twords negitive infinity
|
||||
// 011 - round up - round twords positive infinity
|
||||
// 100 - round to nearest, ties to max magnitude - round to nearest, ties away from zero
|
||||
// 111 - dynamic - choose FRM_REGW as rounding mode
|
||||
assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
|
||||
|
||||
// Precision
|
||||
// 0-single
|
||||
// 1-double
|
||||
// 0-single
|
||||
// 1-double
|
||||
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
|
||||
|
||||
// FResultSel:
|
||||
// 000 - ReadRes - load
|
||||
// 001 - FMARes - FMA and multiply
|
||||
// 010 - FAddRes - add and fp to fp
|
||||
// 011 - FDivRes - divide and squareroot
|
||||
// 100 - FRes - anything that is written to the fp register and is ready in the memory stage
|
||||
// FResSel:
|
||||
// 00 - SrcA - move to fp register
|
||||
// 01 - SgnRes - sign injection
|
||||
// 10 - CmpRes - min/max
|
||||
// 11 - CvtRes - convert to fp
|
||||
|
||||
// FIntResSel:
|
||||
// 00 - CmpRes - less than, equal, or less than or equal
|
||||
// 01 - FSrcX - move to int register
|
||||
// 10 - ClassRes - classify
|
||||
// 11 - CvtRes - convert to signed/unsigned int
|
||||
|
||||
// OpCtrl values:
|
||||
// div/sqrt
|
||||
// fdiv = ???0
|
||||
// fsqrt = ???1
|
||||
@ -120,7 +148,7 @@ module fctrl (
|
||||
// feq = ?010
|
||||
// flt = ?001
|
||||
// fle = ?011
|
||||
// {?, is min or max, is eq or le, is lt or le}
|
||||
// {?, is min or max, is eq or le, is lt or le}
|
||||
|
||||
//fma/mult
|
||||
// fmadd = ?000
|
||||
@ -128,7 +156,7 @@ module fctrl (
|
||||
// fnmsub = ?010 -(a*b)+c
|
||||
// fnmadd = ?011 -(a*b)-c
|
||||
// fmul = ?100
|
||||
// {?, is mul, is negitive, is sub}
|
||||
// {?, is mul, negate product, negate addend}
|
||||
|
||||
// sgn inj
|
||||
// fsgnj = ??00
|
||||
@ -138,37 +166,28 @@ module fctrl (
|
||||
// add/sub/cnvt
|
||||
// fadd = 0000
|
||||
// fsub = 0001
|
||||
// cnvt
|
||||
// fcvt.s.d = 0111
|
||||
// fcvt.d.s = 0111
|
||||
// Fmt controls the output for fp -> fp
|
||||
|
||||
// convert
|
||||
// fcvt.w.s = 0010
|
||||
// fcvt.wu.s = 0110
|
||||
// fcvt.s.w = 0001
|
||||
// fcvt.s.wu = 0101
|
||||
// fcvt.s.d = 0000
|
||||
// fcvt.l.s = 1010
|
||||
// fcvt.lu.s = 1110
|
||||
// fcvt.s.l = 1001
|
||||
// fcvt.s.lu = 1101
|
||||
// fcvt.w.d = 0010
|
||||
// fcvt.w.d = 0010
|
||||
// fcvt.wu.d = 0110
|
||||
// fcvt.d.w = 0001
|
||||
// fcvt.d.wu = 0101
|
||||
// fcvt.d.s = 0000
|
||||
// fcvt.l.d = 1010
|
||||
// fcvt.lu.d = 1110
|
||||
// fcvt.d.l = 1001
|
||||
// fcvt.d.lu = 1101
|
||||
// {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
|
||||
|
||||
// fmv.w.x = ???0
|
||||
// fmv.w.d = ???1
|
||||
|
||||
// flw = ?000
|
||||
// fld = ?001
|
||||
// fsw = ?010
|
||||
// fsd = ?011
|
||||
// fmv.x.w = ?100
|
||||
// fmv.x.d = ?101
|
||||
// {?, is mv, is store, is double or fmv}
|
||||
// {long, unsigned, to int, from int}
|
||||
|
||||
|
||||
endmodule
|
||||
|
@ -1,36 +1,37 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
// `include "../../config/rv64icfd/wally-config.vh"
|
||||
module fcvt (
|
||||
input logic XSgnE,
|
||||
input logic [10:0] XExpE,
|
||||
input logic [52:0] XManE,
|
||||
input logic XZeroE,
|
||||
input logic XNaNE,
|
||||
input logic XInfE,
|
||||
input logic XDenormE,
|
||||
input logic [10:0] BiasE,
|
||||
input logic [`XLEN-1:0] SrcAE, // integer input
|
||||
input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below)
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [63:0] CvtResE, // convert final result
|
||||
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
input logic XSgnE, // X's sign
|
||||
input logic [10:0] XExpE, // X's exponent
|
||||
input logic [52:0] XManE, // X's fraction
|
||||
input logic XZeroE, // is X zero
|
||||
input logic XNaNE, // is X NaN
|
||||
input logic XInfE, // is X infinity
|
||||
input logic XDenormE, // is X denormalized
|
||||
input logic [10:0] BiasE, // bias - depends on precision (max exponent/2)
|
||||
input logic [`XLEN-1:0] SrcAE, // integer input
|
||||
input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below)
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [63:0] CvtResE, // convert final result
|
||||
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
logic ResSgn; // FP result's sign
|
||||
logic [10:0] ResExp,TmpExp; // FP result's exponent
|
||||
logic [51:0] ResFrac; // FP result's fraction
|
||||
logic [5:0] LZResP; // lz output
|
||||
logic [7:0] Bits; // how many bits are in the integer result
|
||||
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
|
||||
logic [64+51:0] ShiftedManTmp; // Shifted mantissa
|
||||
logic [64+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|)
|
||||
logic [64+1:0] ShiftedMan; // shifted mantissa truncated
|
||||
logic ResSgn; // FP result's sign
|
||||
logic [10:0] ResExp,TmpExp; // FP result's exponent
|
||||
logic [51:0] ResFrac; // FP result's fraction
|
||||
logic [5:0] LZResP; // lz output
|
||||
logic [7:0] Bits; // how many bits are in the integer result
|
||||
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
|
||||
logic [64+51:0] ShiftedManTmp; // Shifted mantissa
|
||||
logic [64+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|)
|
||||
logic [64+1:0] ShiftedMan; // shifted mantissa truncated
|
||||
logic [64:0] RoundedTmp; // full size rounded result - in case of overfow
|
||||
logic [63:0] Rounded; // rounded result
|
||||
logic [12:0] ExpVal; // unbiased X exponent
|
||||
logic [12:0] ShiftCnt; // how much is the mantissa shifted
|
||||
logic [64-1:0] IntIn; // trimed integer input
|
||||
logic [64-1:0] PosInt; // absolute value of the integer input
|
||||
logic [64-1:0] IntIn; // trimed integer input
|
||||
logic [64-1:0] PosInt; // absolute value of the integer input
|
||||
logic [63:0] CvtIntRes; // interger result from the fp -> int instructions
|
||||
logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions
|
||||
logic Of, Uf; // did the integer result underflow or overflow
|
||||
@ -61,11 +62,9 @@ module fcvt (
|
||||
// {long, unsigned, to int, from int}
|
||||
|
||||
// calculate signals based off the input and output's size
|
||||
// assign Bias = FmtE ? 12'h3ff : 12'h7f;
|
||||
assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
|
||||
assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
|
||||
//assign SubBits = In64 ? 8'd64 : 8'd32;
|
||||
assign SubBits = 8'd64;
|
||||
assign Res64 = (FOpCtrlE[1]&FOpCtrlE[3]) | (FmtE&FOpCtrlE[0]);
|
||||
assign In64 = (FOpCtrlE[0]&FOpCtrlE[3]) | (FmtE&FOpCtrlE[1]);
|
||||
assign SubBits = In64 ? 8'd64 : 8'd32;
|
||||
assign Bits = Res64 ? 8'd64 : 8'd32;
|
||||
|
||||
// calulate the unbiased exponent
|
||||
@ -80,15 +79,6 @@ module fcvt (
|
||||
// determine the integer's sign
|
||||
assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
|
||||
|
||||
// generate
|
||||
// if(`XLEN == 64)
|
||||
// lz64 lz(LZResP, LZResV, PosInt);
|
||||
// else if(`XLEN == 32) begin
|
||||
// assign LZResP[5] = 1'b0;
|
||||
// lz32 lz(LZResP[4:0], LZResV, PosInt);
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
// Leading one detector
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
@ -98,7 +88,7 @@ module fcvt (
|
||||
end
|
||||
|
||||
// if no one was found set to zero otherwise calculate the exponent
|
||||
assign TmpExp = i==`XLEN ? 0 : BiasE + SubBits - LZResP;
|
||||
assign TmpExp = i==`XLEN ? 0 : FmtE ? 1023 + SubBits - LZResP : 127 + SubBits - LZResP;
|
||||
|
||||
|
||||
|
||||
|
@ -1,256 +0,0 @@
|
||||
// //
|
||||
// // File name : fpdiv
|
||||
// // Title : Floating-Point Divider/Square-Root
|
||||
// // project : FPU
|
||||
// // Library : fpdiv
|
||||
// // Author(s) : James E. Stine, Jr.
|
||||
// // Purpose : definition of main unit to floating-point div/sqrt
|
||||
// // notes :
|
||||
// //
|
||||
// // Copyright Oklahoma State University
|
||||
// //
|
||||
// // Basic Operations
|
||||
// //
|
||||
// // Step 1: Load operands, set flags, and convert SP to DP
|
||||
// // Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// // Step 3: Exponent Logic
|
||||
// // Step 4: Divide/Sqrt using Goldschmidt
|
||||
// // Step 5: Normalize the result.//
|
||||
// // Shift left until normalized. Normalized when the value to the
|
||||
// // left of the binrary point is 1.
|
||||
// // Step 6: Round the result.//
|
||||
// // Step 7: Put quotient/remainder onto output.
|
||||
// //
|
||||
|
||||
// // `timescale 1ps/1ps
|
||||
// module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
|
||||
// FDivStartE, reset, clk, FDivBusyE, HoldInputs);
|
||||
|
||||
// input [63:0] DivInput1E; // 1st input operand (A)
|
||||
// input [63:0] DivInput2E; // 2nd input operand (B)
|
||||
// input [2:0] FrmE; // Rounding mode - specify values
|
||||
// input DivOpType; // Function opcode
|
||||
// input FmtE; // Result Precision (0 for double, 1 for single) //***will need to swap this
|
||||
// input DivOvEn; // Overflow trap enabled
|
||||
// input DivUnEn; // Underflow trap enabled
|
||||
|
||||
// input FDivStartE;
|
||||
// input reset;
|
||||
// input clk;
|
||||
|
||||
// output [63:0] FDivResultM; // Result of operation
|
||||
// output [4:0] FDivSqrtFlgM; // IEEE exception flags
|
||||
// output FDivSqrtDoneE;
|
||||
// output FDivBusyE, HoldInputs;
|
||||
|
||||
// supply1 vdd;
|
||||
// supply0 vss;
|
||||
|
||||
// wire [63:0] Float1;
|
||||
// wire [63:0] Float2;
|
||||
// wire [63:0] IntValue;
|
||||
|
||||
// wire DivDenormM; // DivDenormM on input or output
|
||||
// wire [12:0] exp1, exp2, expF;
|
||||
// wire [12:0] exp_diff, bias;
|
||||
// wire [13:0] exp_sqrt;
|
||||
// wire [12:0] exp_s;
|
||||
// wire [12:0] exp_c;
|
||||
|
||||
// wire [10:0] exponent, exp_pre;
|
||||
// wire [63:0] Result;
|
||||
// wire [52:0] mantissaA;
|
||||
// wire [52:0] mantissaB;
|
||||
// wire [63:0] sum, sum_tc, sum_corr, sum_norm;
|
||||
|
||||
// wire [5:0] align_shift;
|
||||
// wire [5:0] norm_shift;
|
||||
// wire [2:0] sel_inv;
|
||||
// wire op1_Norm, op2_Norm;
|
||||
// wire opA_Norm, opB_Norm;
|
||||
// wire Invalid;
|
||||
// wire DenormIn, DenormIO;
|
||||
// wire [4:0] FlagsIn;
|
||||
// wire exp_gt63;
|
||||
// wire Sticky_out;
|
||||
// wire signResult, sign_corr;
|
||||
// wire corr_sign;
|
||||
// wire zeroB;
|
||||
// wire convert;
|
||||
// wire swap;
|
||||
// wire sub;
|
||||
|
||||
// wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
|
||||
// wire [63:0] rega_out, regb_out, regc_out, regd_out;
|
||||
// wire [127:0] regr_out;
|
||||
// wire [2:0] sel_muxa, sel_muxb;
|
||||
// wire sel_muxr;
|
||||
// wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;
|
||||
|
||||
// wire donev, sel_muxrv, sel_muxsv;
|
||||
// wire [1:0] sel_muxav, sel_muxbv;
|
||||
// wire load_regav, load_regbv, load_regcv;
|
||||
// wire load_regrv, load_regsv;
|
||||
|
||||
// logic exp_cout1, exp_cout2, exp_odd, open;
|
||||
|
||||
|
||||
// // Convert the input operands to their appropriate forms based on
|
||||
// // the orignal operands, the DivOpType , and their precision FmtE.
|
||||
// // Single precision inputs are converted to double precision
|
||||
// // and the sign of the first operand is set appropratiately based on
|
||||
// // if the operation is absolute value or negation.
|
||||
// convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE);
|
||||
|
||||
// // Test for exceptions and return the "Invalid Operation" and
|
||||
// // "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in
|
||||
// // the third pipeline stage to select the result. Also, op1_Norm
|
||||
// // and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized.
|
||||
// // sub is one if the effective operation is subtaction.
|
||||
// exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
|
||||
// Float1, Float2, DivOpType);
|
||||
|
||||
// // Determine Sign/Mantissa
|
||||
// assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType;
|
||||
// assign mantissaA = {vdd, Float1[51:0]};
|
||||
// assign mantissaB = {vdd, Float2[51:0]};
|
||||
// // Perform Exponent Subtraction - expA - expB + Bias
|
||||
// assign exp1 = {2'b0, Float1[62:52]};
|
||||
// assign exp2 = {2'b0, Float2[62:52]};
|
||||
// // bias : DP = 2^{11-1}-1 = 1023
|
||||
// assign bias = {3'h0, 10'h3FF};
|
||||
// // Divide exponent
|
||||
// csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder
|
||||
// exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder?
|
||||
// {vss, exp_s}, {vss, exp_c}, 1'b1);
|
||||
// // Sqrt exponent (check if exponent is odd)
|
||||
// assign exp_odd = Float1[52] ? vss : vdd;
|
||||
// exp_add explogic2 (exp_cout2, exp_sqrt, //***adder?
|
||||
// {vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
|
||||
// // Choose correct exponent
|
||||
// assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;
|
||||
|
||||
// // Main Goldschmidt/Division Routine
|
||||
// divconv goldy (q1, qm1, qp1, q0, qm0, qp0,
|
||||
// rega_out, regb_out, regc_out, regd_out,
|
||||
// regr_out, mantissaB, mantissaA,
|
||||
// sel_muxa, sel_muxb, sel_muxr,
|
||||
// reset, clk,
|
||||
// load_rega, load_regb, load_regc, load_regd,
|
||||
// load_regr, load_regs, FmtE, DivOpType, exp_odd);
|
||||
|
||||
// // FSM : control divider
|
||||
// fsm control (FDivSqrtDoneE, load_rega, load_regb, load_regc, load_regd,
|
||||
// load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
|
||||
// clk, reset, FDivStartE, DivOpType, FDivBusyE, HoldInputs);
|
||||
|
||||
// // Round the mantissa to a 52-bit value, with the leading one
|
||||
// // removed. The rounding units also handles special cases and
|
||||
// // set the exception flags.
|
||||
// //***add max magnitude and swap negitive and positive infinity
|
||||
// rounder_div divround1 (Result, DenormIO, FlagsIn,
|
||||
// FrmE, FmtE, DivOvEn, DivUnEn, expF,
|
||||
// sel_inv, Invalid, DenormIn, signResult,
|
||||
// q1, qm1, qp1, q0, qm0, qp0, regr_out);
|
||||
|
||||
// // Store the final result and the exception flags in registers.
|
||||
// flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM);
|
||||
// flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM);
|
||||
// flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM);
|
||||
|
||||
// endmodule // fpadd
|
||||
|
||||
// //
|
||||
// // Brent-Kung Prefix Adder
|
||||
// // (yes, it is 14 bits as my generator is broken for 13 bits :(
|
||||
// // assume, synthesizer will delete stuff not needed )
|
||||
// //
|
||||
// module exp_add (cout, sum, a, b, cin);
|
||||
|
||||
// input [13:0] a, b;
|
||||
// input cin;
|
||||
|
||||
// output [13:0] sum;
|
||||
// output cout;
|
||||
|
||||
// wire [14:0] p,g;
|
||||
// wire [13:0] c;
|
||||
|
||||
// // pre-computation
|
||||
// assign p={a^b,1'b0};
|
||||
// assign g={a&b, cin};
|
||||
|
||||
// // prefix tree
|
||||
// brent_kung prefix_tree(c, p[13:0], g[13:0]);
|
||||
|
||||
// // post-computation
|
||||
// assign sum=p[14:1]^c;
|
||||
// assign cout=g[14]|(p[14]&c[13]);
|
||||
|
||||
// endmodule // exp_add
|
||||
|
||||
// module brent_kung (c, p, g);
|
||||
|
||||
// input [13:0] p;
|
||||
// input [13:0] g;
|
||||
// output [14:1] c;
|
||||
|
||||
// logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
|
||||
// logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
|
||||
// logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
|
||||
// // parallel-prefix, Brent-Kung
|
||||
|
||||
// // Stage 1: Generates G/FmtE pairs that span 1 bits
|
||||
// grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
|
||||
// black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
|
||||
// black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
|
||||
// black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
|
||||
// black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
|
||||
// black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
|
||||
// black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
|
||||
|
||||
// // Stage 2: Generates G/FmtE pairs that span 2 bits
|
||||
// grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
|
||||
// black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
|
||||
// black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
|
||||
|
||||
// // Stage 3: Generates G/FmtE pairs that span 4 bits
|
||||
// grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
|
||||
|
||||
// // Stage 4: Generates G/FmtE pairs that span 8 bits
|
||||
|
||||
// // Stage 5: Generates G/FmtE pairs that span 4 bits
|
||||
// grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
|
||||
|
||||
// // Stage 6: Generates G/FmtE pairs that span 2 bits
|
||||
// grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
|
||||
// grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
|
||||
// grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
|
||||
|
||||
// // Last grey cell stage
|
||||
// grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
|
||||
// grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
|
||||
// grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
|
||||
// grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
|
||||
// grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
|
||||
// grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
|
||||
|
||||
// // Final Stage: Apply c_k+1=G_k_0
|
||||
// assign c[1]=g[0];
|
||||
// assign c[2]=G_1_0;
|
||||
// assign c[3]=G_2_0;
|
||||
// assign c[4]=G_3_0;
|
||||
// assign c[5]=G_4_0;
|
||||
// assign c[6]=G_5_0;
|
||||
// assign c[7]=G_6_0;
|
||||
// assign c[8]=G_7_0;
|
||||
// assign c[9]=G_8_0;
|
||||
|
||||
// assign c[10]=G_9_0;
|
||||
// assign c[11]=G_10_0;
|
||||
// assign c[12]=G_11_0;
|
||||
// assign c[13]=G_12_0;
|
||||
// assign c[14]=G_13_0;
|
||||
|
||||
// endmodule // brent_kung
|
||||
|
@ -26,41 +26,47 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fhazard(
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E,
|
||||
input logic FRegWriteM, FRegWriteW,
|
||||
input logic [4:0] RdM, RdW,
|
||||
input logic [2:0] FResultSelM,
|
||||
output logic FStallD,
|
||||
output logic [1:0] FForwardXE, FForwardYE, FForwardZE
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
|
||||
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
|
||||
input logic [4:0] RdM, RdW, // the adress being written to
|
||||
input logic [2:0] FResultSelM, // the result being selected
|
||||
output logic FStallD, // stall the decode stage
|
||||
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value
|
||||
);
|
||||
|
||||
|
||||
always_comb begin
|
||||
// set ReadData as default
|
||||
// set defaults
|
||||
FForwardXE = 2'b00; // choose FRD1E
|
||||
FForwardYE = 2'b00; // choose FRD2E
|
||||
FForwardZE = 2'b00; // choose FRD3E
|
||||
FStallD = 0;
|
||||
|
||||
if ((Adr1E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) FForwardXE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
|
||||
|
||||
// if the needed value is in the memory stage - input 1
|
||||
if ((Adr1E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResultSelM == 3'b100) FForwardXE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
if ((Adr2E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) FForwardYE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
|
||||
// if the needed value is in the memory stage - input 2
|
||||
if ((Adr2E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResultSelM == 3'b100) FForwardYE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
if ((Adr3E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) FForwardZE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
|
||||
|
||||
// if the needed value is in the memory stage - input 3
|
||||
if ((Adr3E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResultSelM == 3'b100) FForwardZE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
|
||||
|
||||
end
|
||||
|
||||
|
@ -26,41 +26,50 @@
|
||||
// `include "../../../config/rv64icfd/wally-config.vh"
|
||||
|
||||
module fma(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushM,
|
||||
input logic StallM,
|
||||
input logic FmtE, FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic XSgnE, YSgnE, ZSgnE,
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE,
|
||||
input logic [`NF:0] XManE, YManE, ZManE,
|
||||
input logic XSgnM, YSgnM, ZSgnM,
|
||||
input logic [`NE-1:0] XExpM, YExpM, ZExpM, // ***needed
|
||||
input logic [`NF:0] XManM, YManM, ZManM,
|
||||
input logic XDenormE, YDenormE, ZDenormE,
|
||||
input logic XZeroE, YZeroE, ZZeroE,
|
||||
input logic XNaNM, YNaNM, ZNaNM,
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM,
|
||||
input logic XZeroM, YZeroM, ZZeroM,
|
||||
input logic XInfM, YInfM, ZInfM,
|
||||
input logic [10:0] BiasE,
|
||||
output logic [`FLEN-1:0] FMAResM,
|
||||
output logic [4:0] FMAFlgM);
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushM, // flush the memory stage
|
||||
input logic StallM, // stall memory stage
|
||||
input logic FmtE, FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic XSgnE, YSgnE, ZSgnE, // input signs - execute stage
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // input exponents - execute stage
|
||||
input logic [`NF:0] XManE, YManE, ZManE, // input mantissa - execute stage
|
||||
input logic XSgnM, YSgnM, ZSgnM, // input signs - memory stage
|
||||
input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents - memory stage
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage
|
||||
input logic XDenormE, YDenormE, ZDenormE, // is denorm
|
||||
input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage
|
||||
input logic XNaNM, YNaNM, ZNaNM, // is NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN
|
||||
input logic XZeroM, YZeroM, ZZeroM, // is zero - memory stage
|
||||
input logic XInfM, YInfM, ZInfM, // is infinity
|
||||
input logic [10:0] BiasE, // bias - depends on precison (max exponent/2)
|
||||
output logic [`FLEN-1:0] FMAResM, // FMA result
|
||||
output logic [4:0] FMAFlgM); // FMA flags
|
||||
|
||||
//fma/mult
|
||||
// fmadd = ?000
|
||||
// fmsub = ?001
|
||||
// fnmsub = ?010 -(a*b)+c
|
||||
// fnmadd = ?011 -(a*b)-c
|
||||
// fmul = ?100
|
||||
// {?, is mul, negate product, negate addend}
|
||||
|
||||
// signals transfered between pipeline stages
|
||||
logic [2*`NF+1:0] ProdManE, ProdManM;
|
||||
logic [3*`NF+5:0] AlignedAddendE, AlignedAddendM;
|
||||
logic [`NE+1:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic [`NE+1:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
|
||||
fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.BiasE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
|
||||
// E/M pipeline registers
|
||||
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
|
||||
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
|
||||
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
@ -82,8 +91,8 @@ module fma1(
|
||||
// input logic XSgnE, YSgnE, ZSgnE,
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
|
||||
input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format]
|
||||
input logic XDenormE, YDenormE, ZDenormE,
|
||||
input logic XZeroE, YZeroE, ZZeroE,
|
||||
input logic XDenormE, YDenormE, ZDenormE, // is the input denormal
|
||||
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
|
||||
input logic [`NE-1:0] BiasE,
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
@ -94,8 +103,8 @@ module fma1(
|
||||
output logic KillProdE // set the product to zero before addition if the product is too small to matter
|
||||
);
|
||||
|
||||
logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format *** is this enough bits?
|
||||
logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
|
||||
logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
|
||||
logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
|
||||
logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -200,32 +209,33 @@ module fma2(
|
||||
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
|
||||
logic [`NF-1:0] ResultFrac; // Result fraction
|
||||
logic [`NE-1:0] ResultExp; // Result exponent
|
||||
logic ResultSgn; // Result sign
|
||||
logic PSgn; // product sign
|
||||
|
||||
logic [`NF-1:0] ResultFrac; // Result fraction
|
||||
logic [`NE-1:0] ResultExp; // Result exponent
|
||||
logic ResultSgn; // Result sign
|
||||
logic PSgn; // product sign
|
||||
logic [2*`NF+1:0] ProdMan2; // product being added
|
||||
logic [3*`NF+6:0] AlignedAddend2; // possibly inverted aligned Z
|
||||
logic [3*`NF+5:0] Sum; // positive sum
|
||||
logic [3*`NF+6:0] PreSum; // possibly negitive sum
|
||||
logic [`NE+1:0] SumExp; // exponent of the normalized sum
|
||||
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic [`NE+1:0] SumExpTmpMinus1; // SumExpTmp-1
|
||||
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
|
||||
logic [`NF+2:0] NormSum; // normalized sum
|
||||
logic [`NE+1:0] SumExp; // exponent of the normalized sum
|
||||
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic [`NE+1:0] SumExpTmpMinus1; // SumExpTmp-1
|
||||
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
|
||||
logic [`NF+2:0] NormSum; // normalized sum
|
||||
logic [3*`NF+5:0] SumShifted; // sum shifted for normalization
|
||||
logic [8:0] NormCnt; // output of the leading zero detector //***change this later
|
||||
logic NormSumSticky; // sticky bit calulated from the normalized sum
|
||||
logic SumZero; // is the sum zero
|
||||
logic NegSum; // is the sum negitive
|
||||
logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z)
|
||||
logic ResultDenorm; // is the result denormalized
|
||||
logic Sticky; // Sticky bit
|
||||
logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding
|
||||
logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag)
|
||||
logic Invalid,Underflow,Overflow,Inexact; // flags
|
||||
logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later
|
||||
logic SubBySmallNum; // was there supposed to be a subtraction by a small number
|
||||
logic [8:0] NormCnt; // output of the leading zero detector //***change this later
|
||||
logic NormSumSticky; // sticky bit calulated from the normalized sum
|
||||
logic SumZero; // is the sum zero
|
||||
logic NegSum; // is the sum negitive
|
||||
logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z)
|
||||
logic ResultDenorm; // is the result denormalized
|
||||
logic Sticky; // Sticky bit
|
||||
logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding
|
||||
logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag)
|
||||
logic Invalid,Underflow,Overflow,Inexact; // flags
|
||||
logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later
|
||||
logic SubBySmallNum; // was there supposed to be a subtraction by a small number
|
||||
logic [`FLEN-1:0] Addend; // value to add (Z or zero)
|
||||
logic ZeroSgn; // the result's sign if the sum is zero
|
||||
logic ResultSgnTmp; // the result's sign assuming the result is not zero
|
||||
@ -306,11 +316,12 @@ module fma2(
|
||||
assign SumZero = ~(|Sum);
|
||||
|
||||
// determine the length of the fraction based on precision
|
||||
//assign FracLen = FmtM ? `NF : 13'd23;
|
||||
assign FracLen = `NF;
|
||||
assign FracLen = FmtM ? `NF : 13'd23;
|
||||
//assign FracLen = `NF;
|
||||
|
||||
// Determine if the result is denormal
|
||||
assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4));
|
||||
|
||||
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
@ -458,16 +469,18 @@ module fma2(
|
||||
// 1) any input is a signaling NaN
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
assign MaxExp = FmtM ? {`NE{1'b1}} : 13'd255;
|
||||
|
||||
assign MaxExp = FmtM ? {`NE{1'b1}} : {8{1'b1}};
|
||||
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
|
||||
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
// - Don't set the overflow flag if an overflowed result isn't outputed
|
||||
assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
assign Overflow = FullResultExp >= {MaxExp} & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Set Underflow flag if the number is too small to be represented in normal numbers
|
||||
// - Don't set the underflow flag if the result is exact
|
||||
|
||||
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky|UfGuard)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
@ -504,8 +517,8 @@ module fma2(
|
||||
YNaNM ? YNaNResult :
|
||||
ZNaNM ? ZNaNResult :
|
||||
Invalid ? InvalidResult : // has to be before inf
|
||||
XInfM ? FmtM ? {PSgn, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgn, XExpM[7:0], XManM[51:29]} :
|
||||
YInfM ? FmtM ? {PSgn, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgn, YExpM[7:0], YManM[51:29]} :
|
||||
XInfM ? FmtM ? {PSgn, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgn, XExpM[7:0], XManM[51:29]} :
|
||||
YInfM ? FmtM ? {PSgn, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgn, YExpM[7:0], YManM[51:29]} :
|
||||
ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
|
||||
Overflow ? OverflowResult :
|
||||
KillProdM ? KillProdResult : // has to be after Underflow
|
||||
|
@ -1,286 +0,0 @@
|
||||
///////////////////////////////////////////
|
||||
//
|
||||
// Written: James.Stine@okstate.edu 1 February 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: FP Add/Sub instructions
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
//
|
||||
// Basic and Denormalized Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and convert SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
|
||||
// or of (exp1 = exp2 AND mnt1 < mnt2)
|
||||
// Step 4: Shift the mantissa corresponding to the smaller exponent,
|
||||
// and extend precision by three bits to the right.
|
||||
// Step 5: Add or subtract the mantissas.
|
||||
// Step 6: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 7: Round the result.//
|
||||
// Step 8: Put sum onto output.
|
||||
//
|
||||
|
||||
module fpadd (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);
|
||||
|
||||
input [63:0] op1; // 1st input operand (A)
|
||||
input [63:0] op2; // 2nd input operand (B)
|
||||
input [2:0] rm; // Rounding mode - specify values
|
||||
input [3:0] op_type; // Function opcode
|
||||
input P; // Result Precision (0 for double, 1 for single)
|
||||
input OvEn; // Overflow trap enabled
|
||||
input UnEn; // Underflow trap enabled
|
||||
|
||||
output [63:0] AS_Result; // Result of operation
|
||||
output [4:0] Flags; // IEEE exception flags
|
||||
output Denorm; // Denorm on input or output
|
||||
|
||||
wire [63:0] Float1;
|
||||
wire [63:0] Float2;
|
||||
wire [63:0] IntValue;
|
||||
wire [11:0] exp1, exp2;
|
||||
wire [11:0] exp_diff1, exp_diff2;
|
||||
wire [10:0] exponent, exp_pre;
|
||||
wire [11:0] exp_shift;
|
||||
wire [63:0] Result;
|
||||
wire [51:0] mantissaA;
|
||||
wire [56:0] mantissaA1;
|
||||
wire [63:0] mantissaA3;
|
||||
wire [51:0] mantissaB;
|
||||
wire [56:0] mantissaB1, mantissaB2;
|
||||
wire [63:0] mantissaB3;
|
||||
wire [63:0] sum, sum_tc, sum_corr, sum_norm, sum_norm_w_bypass;
|
||||
wire [5:0] align_shift;
|
||||
wire [5:0] norm_shift, norm_shift_denorm;
|
||||
wire [3:0] sel_inv;
|
||||
wire op1_Norm, op2_Norm;
|
||||
wire opA_Norm, opB_Norm;
|
||||
wire Invalid;
|
||||
wire DenormIn, DenormIO;
|
||||
wire [4:0] FlagsIn;
|
||||
wire exp_valid;
|
||||
wire exp_gt63;
|
||||
wire Sticky_out;
|
||||
wire signA, sign_corr;
|
||||
wire corr_sign;
|
||||
wire zeroB;
|
||||
wire convert;
|
||||
wire swap;
|
||||
wire sub;
|
||||
wire [10:0] exponent_postsum;
|
||||
wire mantissa_comp;
|
||||
wire mantissa_comp_sum;
|
||||
wire mantissa_comp_sum_tc;
|
||||
wire Float1_sum_comp;
|
||||
wire Float2_sum_comp;
|
||||
wire Float1_sum_tc_comp;
|
||||
wire Float2_sum_tc_comp;
|
||||
wire [5:0] ZP_mantissaA;
|
||||
wire [5:0] ZP_mantissaB;
|
||||
wire ZV_mantissaA;
|
||||
wire ZV_mantissaB;
|
||||
wire normal_underflow;
|
||||
wire normal_overflow;
|
||||
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the op_type , and their precision P.
|
||||
// Single precision inputs are converted to double precision
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
|
||||
convert_inputs conv1 (Float1, Float2, op1, op2, op_type, P);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "sel_inv" is used in
|
||||
// the third pipeline stage to select the result. Also, op1_Norm
|
||||
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
|
||||
exception exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, sub,
|
||||
Float1, Float2, op_type);
|
||||
|
||||
// Perform Exponent Subtraction (used for alignment). For performance
|
||||
// both exponent subtractions are performed in parallel. This was
|
||||
// changed to a behavior level to allow the tools to try to optimize
|
||||
// the two parallel additions. The input values are zero-extended to 12
|
||||
// bits prior to performing the addition.
|
||||
|
||||
assign exp1 = {1'b0, Float1[62:52]};
|
||||
assign exp2 = {1'b0, Float2[62:52]};
|
||||
assign exp_diff1 = exp1 - exp2;
|
||||
assign exp_diff2 = DenormIn ? ({Float2[63], exp2[10:0]} - {Float1[63], exp1[10:0]}): exp2 - exp1;
|
||||
|
||||
// The second operand (B) should be set to zero, if op_type does not
|
||||
// specify addition or subtraction
|
||||
assign zeroB = op_type[2] | op_type[1];
|
||||
|
||||
// Swapped operands if zeroB is not one and exp1 < exp2.
|
||||
// Swapping causes exp2 to be used for the result exponent.
|
||||
// Only the exponent of the larger operand is used to determine
|
||||
// the final result.
|
||||
assign swap = exp_diff1[11] & ~zeroB;
|
||||
assign exponent = swap ? exp2[10:0] : exp1[10:0];
|
||||
assign exponent_postsum = swap ? exp2[10:0] : exp1[10:0];
|
||||
assign mantissaA = swap ? Float2[51:0] : Float1[51:0];
|
||||
assign mantissaB = swap ? Float1[51:0] : Float2[51:0];
|
||||
assign signA = swap ? Float2[63] : Float1[63];
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
// zero; otherwise, it is one.
|
||||
// modified to 52 bits to detect leading zeroes on denormalized mantissas
|
||||
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
|
||||
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
|
||||
|
||||
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
|
||||
assign exp1_denorm = swap ? (exp1 - ZP_mantissaB) : (exp1 - ZP_mantissaA);
|
||||
assign exp2_denorm = swap ? (exp2 - ZP_mantissaA) : (exp2 - ZP_mantissaB);
|
||||
|
||||
// Finds normal underflow result to determine whether to round final exponent down
|
||||
// Comparison between each float and the resulting sum of the primary cla adder/subtractor and cla subtractor
|
||||
assign Float1_sum_comp = (Float1[51:0] > sum[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float2_sum_comp = (Float2[51:0] > sum[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float1_sum_tc_comp = (Float1[51:0] > sum_tc[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float2_sum_tc_comp = (Float2[51:0] > sum_tc[51:0]) ? 1'b0 : 1'b1;
|
||||
|
||||
// Determines the correct Float value to compare based on swap result
|
||||
assign mantissa_comp_sum = swap ? Float2_sum_comp : Float1_sum_comp;
|
||||
assign mantissa_comp_sum_tc = swap ? Float2_sum_tc_comp : Float1_sum_tc_comp;
|
||||
|
||||
// Determines the correct comparison result based on operation and sign of resulting sum
|
||||
assign mantissa_comp = (op_type[0] ^ sum[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
|
||||
|
||||
// If the signs are different and both operands aren't denormalized
|
||||
// the normal underflow bit is needed and therefore updated.
|
||||
assign normal_underflow = ((Float1[63] ~^ Float2[63]) & (opA_Norm | opB_Norm)) ? mantissa_comp : 1'b0;
|
||||
|
||||
// Determine the alignment shift and limit it to 63. If any bit from
|
||||
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
|
||||
assign exp_shift = swap ? exp_diff2 : exp_diff1;
|
||||
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
|
||||
| exp_shift[8] | exp_shift[7] | exp_shift[6];
|
||||
assign align_shift = exp_shift | {6{exp_gt63}};
|
||||
|
||||
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
|
||||
// 001.M[51]M[50] ... M[1]M[0]00
|
||||
// Unless the number has an exponent of zero, in which case it
|
||||
// is unpacked as
|
||||
// 000.00 ... 00
|
||||
// This effectively flushes denormalized values to zero.
|
||||
// The three bits of to the left of the binary point prevent overflow
|
||||
// and loss of sign information. The two bits to the right of the
|
||||
// original mantissa form the "guard" and "round" bits that are used
|
||||
// to round the result.
|
||||
assign opA_Norm = swap ? op2_Norm : op1_Norm;
|
||||
assign opB_Norm = swap ? op1_Norm : op2_Norm;
|
||||
assign mantissaA1 = {2'h0, opA_Norm, mantissaA[51:0]&{52{opA_Norm}}, 2'h0};
|
||||
assign mantissaB1 = {2'h0, opB_Norm, mantissaB[51:0]&{52{opB_Norm}}, 2'h0};
|
||||
|
||||
// Perform mantissa alignment using a 57-bit barrel shifter
|
||||
// If any of the bits shifted out are one, Sticky_out is set.
|
||||
// The size of the barrel shifter could be reduced by two bits
|
||||
// by not adding the leading two zeros until after the shift.
|
||||
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
|
||||
|
||||
// Place either the sign-extened 32-bit value or the original 64-bit value
|
||||
// into IntValue (to be used for integer to floating point conversion)
|
||||
assign IntValue [31:0] = op1[31:0];
|
||||
assign IntValue [63:32] = op_type[0] ? {32{op1[31]}} : op1[63:32];
|
||||
|
||||
// If doing an integer to floating point conversion, mantissaA3 is set to
|
||||
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
|
||||
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
|
||||
// and the exponent value is left unchanged.
|
||||
// Under denormalized cases, the exponent before the rounder is set to 1
|
||||
// if the normal shift value is 11.
|
||||
assign convert = ~op_type[2] & op_type[1];
|
||||
assign mantissaA3 = (op_type[3]) ? (op_type[0] ? Float1 : ~Float1) : (DenormIn ? ({12'h0, mantissaA}) : (convert ? IntValue : {mantissaA1, 7'h0}));
|
||||
assign exp_pre = DenormIn ?
|
||||
((norm_shift == 6'b001011) ? 11'b00000000001 : (swap ? exp2_denorm : exp1_denorm))
|
||||
: (convert ? 11'b10000111100 : exponent);
|
||||
|
||||
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
|
||||
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
|
||||
// zeros.
|
||||
assign mantissaB3[63:7] = (op_type[3]) ? (57'h0) : (DenormIn ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
|
||||
assign mantissaB3[6] = (op_type[3]) ? (1'b0) : (DenormIn ? mantissaB[6] : Sticky_out & ~zeroB);
|
||||
assign mantissaB3[5:0] = (op_type[3]) ? (6'h01) : (DenormIn ? mantissaB[5:0] : 6'h0);
|
||||
|
||||
// The sign of the result needs to be corrected if the true
|
||||
// operation is subtraction and the input operands were swapped.
|
||||
assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
|
||||
|
||||
// 64-bit Mantissa Adder/Subtractor
|
||||
cla64 add1 (sum, mantissaA3, mantissaB3, sub); //***adder
|
||||
|
||||
// 64-bit Mantissa Subtractor - to get the two's complement of the
|
||||
// result when the sign from the adder/subtractor is negative.
|
||||
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); //***adder
|
||||
|
||||
// Determine the correct sign of the result
|
||||
assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63];
|
||||
|
||||
// If the sum is negative, use its two complement instead.
|
||||
// This value has to be 64-bits to correctly handle the
|
||||
// case 10...00
|
||||
assign sum_corr = (DenormIn & (opA_Norm | opB_Norm) & ( ( (Float1[63] ~^ Float2[63]) & op_type[0] ) | ((Float1[63] ^ Float2[63]) & ~op_type[0]) ))
|
||||
? (sum[63] ? sum : sum_tc) : ( (op_type[3]) ? sum : (sum[63] ? sum_tc : sum));
|
||||
|
||||
// Finds normal underflow result to determine whether to round final exponent down
|
||||
assign normal_overflow = (DenormIn & (sum == 16'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
// zero; otherwise, it is one.
|
||||
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
|
||||
|
||||
assign norm_shift_denorm = (DenormIn & ( (~opA_Norm & ~opB_Norm) | normal_underflow)) ? (6'h00) : (norm_shift);
|
||||
|
||||
// Barell shifter used for normalization. It takes as inputs the
|
||||
// the corrected sum and the amount by which the sum should
|
||||
// be right shifted. It outputs the normalized sum.
|
||||
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
|
||||
|
||||
assign sum_norm_w_bypass = (op_type[3]) ? (op_type[0] ? ~sum_corr : sum_corr) : (sum_norm);
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. If the result is a single precision number, the actual
|
||||
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
|
||||
// At this point, normalization has already been performed, so we know
|
||||
// exactly where the rounding point is. The rounding units also
|
||||
// handles special cases and set the exception flags.
|
||||
|
||||
// Changed DenormIO -> Denorm and FlagsIn -> Flags in order to
|
||||
// help in processor reservation station detection of load/stores. In
|
||||
// other words, the processor would like to know ahead of time that
|
||||
// if the result is an exception then don't load or store.
|
||||
rounder round1 (Result, DenormIO, FlagsIn, rm, P, OvEn, UnEn, exp_valid,
|
||||
sel_inv, Invalid, DenormIn, convert, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
|
||||
exponent_postsum, op1_Norm, op2_Norm, Float1[63:52], Float2[63:52],
|
||||
normal_overflow, normal_underflow, swap, op_type, sum);
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
assign AS_Result = Result;
|
||||
assign {Denorm, Flags} = {DenormIO, FlagsIn};
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -24,133 +24,117 @@
|
||||
|
||||
// `timescale 1ps/1ps
|
||||
module fpdiv (
|
||||
input logic [63:0] op1, // 1st input operand (A)
|
||||
input logic [63:0] op2, // 2nd input operand (B)
|
||||
input logic [1:0] rm, // Rounding mode - specify values
|
||||
input logic op_type, // Function opcode
|
||||
input logic P, // Result Precision (0 for double, 1 for single)
|
||||
input logic OvEn, // Overflow trap enabled
|
||||
input logic UnEn, // Underflow trap enabled
|
||||
input logic start,
|
||||
input logic reset,
|
||||
input logic clk,
|
||||
output logic done,
|
||||
output logic FDivBusyE,
|
||||
output logic HoldInputs,
|
||||
output logic [63:0] AS_Result, // Result of operation
|
||||
output logic [4:0] Flags); // IEEE exception flags
|
||||
logic Denorm; // Denorm on input or output
|
||||
// output done;
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic start,
|
||||
input logic [63:0] op1, // 1st input operand (A)
|
||||
input logic [63:0] op2, // 2nd input operand (B)
|
||||
input logic [1:0] rm, // Rounding mode - specify values
|
||||
input logic op_type, // Function opcode
|
||||
input logic P, // Result Precision (0 for double, 1 for single)
|
||||
input logic OvEn, // Overflow trap enabled
|
||||
input logic UnEn, // Underflow trap enabled
|
||||
output logic done,
|
||||
output logic FDivBusyE,
|
||||
output logic [63:0] AS_Result, // Result of operation
|
||||
output logic [4:0] Flags); // IEEE exception flags
|
||||
|
||||
supply1 vdd;
|
||||
supply0 vss;
|
||||
|
||||
wire [63:0] Float1;
|
||||
wire [63:0] Float2;
|
||||
wire [63:0] IntValue;
|
||||
logic [63:0] Float1;
|
||||
logic [63:0] Float2;
|
||||
|
||||
wire [12:0] exp1, exp2, expF;
|
||||
wire [12:0] exp_diff, bias;
|
||||
wire [13:0] exp_sqrt;
|
||||
wire [12:0] exp_s;
|
||||
wire [12:0] exp_c;
|
||||
logic [12:0] exp1, exp2, expF;
|
||||
logic [12:0] exp_diff, bias;
|
||||
logic [13:0] exp_sqrt;
|
||||
logic [12:0] exp_s;
|
||||
logic [12:0] exp_c;
|
||||
|
||||
wire [10:0] exponent, exp_pre;
|
||||
wire [63:0] Result;
|
||||
wire [52:0] mantissaA;
|
||||
wire [52:0] mantissaB;
|
||||
wire [63:0] sum, sum_tc, sum_corr, sum_norm;
|
||||
logic [10:0] exponent;
|
||||
logic [63:0] Result;
|
||||
logic [52:0] mantissaA;
|
||||
logic [52:0] mantissaB;
|
||||
|
||||
wire [5:0] align_shift;
|
||||
wire [5:0] norm_shift;
|
||||
wire [2:0] sel_inv;
|
||||
wire op1_Norm, op2_Norm;
|
||||
wire opA_Norm, opB_Norm;
|
||||
wire Invalid;
|
||||
wire DenormIn, DenormIO;
|
||||
wire [4:0] FlagsIn;
|
||||
wire exp_gt63;
|
||||
wire Sticky_out;
|
||||
wire signResult, sign_corr;
|
||||
wire corr_sign;
|
||||
wire zeroB;
|
||||
wire convert;
|
||||
wire swap;
|
||||
wire sub;
|
||||
logic [2:0] sel_inv;
|
||||
logic Invalid;
|
||||
logic [4:0] FlagsIn;
|
||||
logic signResult;
|
||||
logic convert;
|
||||
logic sub;
|
||||
|
||||
wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
|
||||
wire [63:0] rega_out, regb_out, regc_out, regd_out;
|
||||
wire [127:0] regr_out;
|
||||
wire [2:0] sel_muxa, sel_muxb;
|
||||
wire sel_muxr;
|
||||
wire load_rega, load_regb, load_regc, load_regd, load_regr;
|
||||
logic [63:0] q1, qm1, qp1, q0, qm0, qp0;
|
||||
logic [63:0] rega_out, regb_out, regc_out, regd_out;
|
||||
logic [127:0] regr_out;
|
||||
logic [2:0] sel_muxa, sel_muxb;
|
||||
logic sel_muxr;
|
||||
logic load_rega, load_regb, load_regc, load_regd, load_regr;
|
||||
|
||||
wire donev, sel_muxrv, sel_muxsv;
|
||||
wire [1:0] sel_muxav, sel_muxbv;
|
||||
wire load_regav, load_regbv, load_regcv;
|
||||
wire load_regrv, load_regs;
|
||||
logic exp_cout1, exp_cout2;
|
||||
logic exp_odd, open;
|
||||
// logic reset;
|
||||
logic load_regs;
|
||||
logic exp_cout1, exp_cout2;
|
||||
logic exp_odd, open;
|
||||
|
||||
// div/sqrt
|
||||
// fdiv = 0
|
||||
// fsqrt = 1
|
||||
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the op_type , and their precision P.
|
||||
// Single precision inputs are converted to double precision
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
convert_inputs_div conv1 (Float1, Float2, op1, op2, op_type, P);
|
||||
convert_inputs_div conv1 (.op1, .op2, .op_type, .P,
|
||||
// outputs:
|
||||
.Float1, .Float2b(Float2));
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "sel_inv" is used in
|
||||
// the third pipeline stage to select the result. Also, op1_Norm
|
||||
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
exception_div exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
|
||||
Float1, Float2, op_type);
|
||||
exception_div exc1 (.A(Float1), .B(Float2), .op_type,
|
||||
// output:
|
||||
.Ztype(sel_inv), .Invalid);
|
||||
|
||||
// Determine Sign/Mantissa
|
||||
assign signResult = (Float1[63]^Float2[63]);
|
||||
assign mantissaA = {vdd, Float1[51:0]};
|
||||
assign mantissaB = {vdd, Float2[51:0]};
|
||||
assign mantissaA = {1'b1, Float1[51:0]};
|
||||
assign mantissaB = {1'b1, Float2[51:0]};
|
||||
// Perform Exponent Subtraction - expA - expB + Bias
|
||||
assign exp1 = {2'b0, Float1[62:52]};
|
||||
assign exp2 = {2'b0, Float2[62:52]};
|
||||
// bias : DP = 2^{11-1}-1 = 1023
|
||||
assign bias = {3'h0, 10'h3FF};
|
||||
// Divide exponent
|
||||
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
|
||||
// adder #(14) explogic1 ({vss, exp_s}, {vss, exp_c}, 1'b1, {open, exp_diff}, exp_cout1);
|
||||
assign {exp_cout1, open, exp_diff} = {vss, exp_s} + {vss, exp_c} + 1'b1;
|
||||
assign {exp_cout1, open, exp_diff} = exp1 - exp2 + bias;
|
||||
|
||||
// Sqrt exponent (check if exponent is odd)
|
||||
assign exp_odd = Float1[52] ? vss : vdd;
|
||||
// adder #(14) explogic2 ({vss, exp1}, {4'h0, 10'h3ff}, exp_odd, exp_sqrt, exp_cout2);
|
||||
assign {exp_cout2, exp_sqrt} = {vss, exp1} + {4'h0, 10'h3ff} + exp_odd;
|
||||
assign exp_odd = Float1[52] ? 1'b0 : 1'b1;
|
||||
assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + exp_odd;
|
||||
// Choose correct exponent
|
||||
assign expF = op_type ? exp_sqrt[13:1] : exp_diff;
|
||||
|
||||
// Main Goldschmidt/Division Routine
|
||||
divconv goldy (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
|
||||
regr_out, mantissaB, mantissaA, sel_muxa, sel_muxb, sel_muxr,
|
||||
reset, clk, load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, P, op_type, exp_odd);
|
||||
divconv goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, .rega_out, .regb_out, .regc_out, .regd_out,
|
||||
.regr_out, .d(mantissaB), .n(mantissaA), .sel_muxa, .sel_muxb, .sel_muxr,
|
||||
.reset, .clk, .load_rega, .load_regb, .load_regc, .load_regd,
|
||||
.load_regr, .load_regs, .P, .op_type, .exp_odd);
|
||||
|
||||
// FSM : control divider
|
||||
fsm control (done, load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
|
||||
clk, reset, start, op_type, FDivBusyE, HoldInputs);
|
||||
fsm control (.clk, .reset, .start, .op_type,
|
||||
// outputs:
|
||||
.done, .load_rega, .load_regb, .load_regc, .load_regd,
|
||||
.load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr,
|
||||
.divBusy(FDivBusyE));
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. The rounding units also handles special cases and
|
||||
// set the exception flags.
|
||||
rounder_div round1 (Result, DenormIO, FlagsIn,
|
||||
rm, P, OvEn, UnEn, expF,
|
||||
sel_inv, Invalid, DenormIn, signResult,
|
||||
q1, qm1, qp1, q0, qm0, qp0, regr_out);
|
||||
rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF),
|
||||
.sel_inv, .Invalid, .SignR(signResult),
|
||||
.q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out,
|
||||
// outputs:
|
||||
.Result, .Flags(FlagsIn));
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
flopenr #(64) rega (clk, reset, done, Result, AS_Result);
|
||||
flopenr #(1) regb (clk, reset, done, DenormIO, Denorm);
|
||||
flopenr #(64) rega (clk, reset, done, Result, AS_Result);
|
||||
flopenr #(5) regc (clk, reset, done, FlagsIn, Flags);
|
||||
|
||||
endmodule // fpadd
|
||||
|
@ -25,142 +25,173 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fpu (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [2:0] FRM_REGW, // Rounding mode from CSR
|
||||
input logic [31:0] InstrD,
|
||||
input logic [`XLEN-1:0] ReadDataW, // Read data from memory
|
||||
input logic [`XLEN-1:0] SrcAE, // Integer input being processed
|
||||
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
|
||||
input logic StallE, StallM, StallW,
|
||||
input logic FlushE, FlushM, FlushW,
|
||||
input logic [4:0] RdE, RdM, RdW,
|
||||
output logic FRegWriteM,
|
||||
output logic FStallD, // Stall the decode stage
|
||||
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
|
||||
output logic [`XLEN-1:0] FIntResM,
|
||||
output logic FDivBusyE, // Is the divison/sqrt unit busy
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic [4:0] SetFflagsM); // FPU result
|
||||
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
|
||||
// *** folder at same level of src for tests fpu tests
|
||||
// qa.b
|
||||
// u1.52 - u sunsigned, q signed
|
||||
generate
|
||||
if (`F_SUPPORTED | `D_SUPPORTED) begin
|
||||
// control logic signal instantiation
|
||||
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
|
||||
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
|
||||
logic FDivStartD, FDivStartE; // Start division
|
||||
logic FWriteIntD; // Write to integer register
|
||||
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal
|
||||
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
|
||||
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE, FResSelM;
|
||||
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E;
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [2:0] FRM_REGW, // Rounding mode from CSR
|
||||
input logic [31:0] InstrD, // instruction from IFU
|
||||
input logic [`XLEN-1:0] ReadDataW,// Read data from memory
|
||||
input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU)
|
||||
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU)
|
||||
input logic StallE, StallM, StallW, // stall signals from HZU
|
||||
input logic FlushE, FlushM, FlushW, // flush signals from HZU
|
||||
input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU)
|
||||
output logic FRegWriteM, // FP register write enable
|
||||
output logic FStallD, // Stall the decode stage
|
||||
output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
|
||||
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic [4:0] SetFflagsM // FMA flags (to privileged unit)
|
||||
);
|
||||
|
||||
//*** make everything FLEN at some point
|
||||
//*** add the 128 bit support to the if statement when needed
|
||||
//*** make new tests for fp using testfloat that include flag checking and all rounding modes
|
||||
//*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec
|
||||
//*** only fma/mul and fp <-> int convert flags have been tested. test the others.
|
||||
|
||||
// FPU specifics:
|
||||
// - uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
// single stored in a double: | 32 1s | single precision value |
|
||||
// - sets the underflow after rounding
|
||||
|
||||
generate if (`F_SUPPORTED | `D_SUPPORTED) begin
|
||||
|
||||
// control signals
|
||||
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
|
||||
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
|
||||
logic FDivStartD, FDivStartE; // Start division or squareroot
|
||||
logic FWriteIntD; // Write to integer register
|
||||
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
|
||||
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register
|
||||
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
|
||||
|
||||
// regfile signals
|
||||
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`XLEN-1:0] FSrcXMAligned;
|
||||
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
|
||||
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding)
|
||||
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
|
||||
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
|
||||
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding)
|
||||
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
|
||||
|
||||
// unpacking signals
|
||||
logic XSgnE, YSgnE, ZSgnE;
|
||||
logic [10:0] XExpE, YExpE, ZExpE;
|
||||
logic [52:0] XManE, YManE, ZManE;
|
||||
logic XNaNE, YNaNE, ZNaNE;
|
||||
logic XSNaNE, YSNaNE, ZSNaNE;
|
||||
logic XDenormE, YDenormE, ZDenormE;
|
||||
logic XZeroE, YZeroE, ZZeroE;
|
||||
logic [10:0] BiasE;
|
||||
logic XInfE, YInfE, ZInfE;
|
||||
logic XExpMaxE;
|
||||
logic XNormE;
|
||||
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
|
||||
logic XSgnM, YSgnM, ZSgnM; // input's sign - memory stage
|
||||
logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
|
||||
logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage
|
||||
logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage
|
||||
logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage
|
||||
logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2)
|
||||
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
|
||||
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
|
||||
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
|
||||
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
|
||||
logic XDenormE, YDenormE, ZDenormE; // is the input denormalized
|
||||
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
|
||||
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
|
||||
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
|
||||
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic XNormE; // is X normal
|
||||
|
||||
logic XSgnM, YSgnM, ZSgnM;
|
||||
logic [10:0] XExpM, YExpM, ZExpM;
|
||||
logic [52:0] XManM, YManM, ZManM;
|
||||
logic XNaNM, YNaNM, ZNaNM;
|
||||
logic XSNaNM, YSNaNM, ZSNaNM;
|
||||
logic XZeroM, YZeroM, ZZeroM;
|
||||
logic XInfM, YInfM, ZInfM;
|
||||
|
||||
// div/sqrt signals
|
||||
logic [63:0] FDivResultM, FDivResultW;
|
||||
logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW;
|
||||
logic FDivSqrtDoneE;
|
||||
logic [63:0] DivInput1E, DivInput2E;
|
||||
logic HoldInputs; // keep forwarded inputs arround durring division
|
||||
// result and flag signals
|
||||
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
|
||||
logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags
|
||||
|
||||
logic [63:0] FMAResM, FMAResW; // FMA/multiply result
|
||||
logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result
|
||||
|
||||
//fpu signals
|
||||
logic [63:0] FMAResM, FMAResW;
|
||||
logic [4:0] FMAFlgM, FMAFlgW;
|
||||
logic [63:0] ReadResW; // read result (load instruction)
|
||||
|
||||
logic [63:0] FAddResM, FAddResW; // add/FP -> FP convert result
|
||||
logic [4:0] FAddFlgM, FAddFlgW; // add/FP -> FP convert flags
|
||||
|
||||
logic [63:0] CvtResE, CvtResM; // FP <-> int convert result
|
||||
logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this
|
||||
|
||||
logic [63:0] ReadResW;
|
||||
|
||||
// add/cvt signals
|
||||
logic [63:0] FAddResM, FAddResW;
|
||||
logic [4:0] FAddFlgM, FAddFlgW;
|
||||
logic [63:0] CvtResE, CvtResM;
|
||||
logic [4:0] CvtFlgE, CvtFlgM;
|
||||
|
||||
// cmp signals
|
||||
logic CmpNVE, CmpNVM, CmpNVW;
|
||||
logic [63:0] CmpResE, CmpResM, CmpResW;
|
||||
|
||||
// fsgn signals
|
||||
logic [63:0] SgnResE, SgnResM;
|
||||
logic SgnNVE, SgnNVM, SgnNVW;
|
||||
logic [63:0] FResM, FResW;
|
||||
logic [4:0] FFlgM, FFlgW;
|
||||
|
||||
// instantiation of W stage regfile signals
|
||||
logic [63:0] AlignedSrcAM;
|
||||
|
||||
// classify signals
|
||||
logic [63:0] ClassResE, ClassResM;
|
||||
|
||||
// 64-bit FPU result
|
||||
logic [63:0] FPUResultW;
|
||||
logic [4:0] FPUFlagsW;
|
||||
logic [63:0] ClassResE, ClassResM; // classify result
|
||||
|
||||
logic [63:0] CmpResE, CmpResM; // compare result
|
||||
logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid)
|
||||
|
||||
logic [63:0] SgnResE, SgnResM; // sign injection result
|
||||
logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid)
|
||||
|
||||
logic [63:0] FResM, FResW; // selected result that is ready in the memory stage
|
||||
logic [4:0] FFlgM; // selected flag that is ready in the memory stage
|
||||
|
||||
logic [63:0] FPUResultW; // final FP result being written to the FP register
|
||||
|
||||
// other signals
|
||||
logic FDivSqrtDoneE; // is divide done
|
||||
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
|
||||
logic FDivClk; // clock for divide/squareroot unit
|
||||
logic [63:0] AlignedSrcAM; // align SrcA to the floating point format
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
//DECODE STAGE
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
// calculate FP control signals
|
||||
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
|
||||
// outputs:
|
||||
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
|
||||
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
|
||||
|
||||
// top-level controller for FPU
|
||||
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
|
||||
.FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
|
||||
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
|
||||
// FP register file
|
||||
// - can read 3 registers and write 1 register every cycle
|
||||
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
|
||||
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW),
|
||||
.wd4(FPUResultW),
|
||||
// outputs:
|
||||
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
|
||||
|
||||
// regfile instantiation
|
||||
fregfile fregfile (clk, reset, FRegWriteW,
|
||||
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
|
||||
FPUResultW,
|
||||
FRD1D, FRD2D, FRD3D);
|
||||
|
||||
//*****************
|
||||
// D/E pipe registers
|
||||
//*****************
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// D/E pipeline registers
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
|
||||
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
|
||||
flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(18) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
//EXECUTION STAGE
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// Hazard unit for FPU
|
||||
// - determines if any forwarding or stalls are needed
|
||||
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
|
||||
// outputs:
|
||||
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
|
||||
|
||||
// Hazard unit for FPU
|
||||
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD,
|
||||
.FForwardXE, .FForwardYE, .FForwardZE);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
|
||||
@ -168,128 +199,190 @@ module fpu (
|
||||
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
|
||||
mux2 #(64) fzmulmux(FPreSrcZE, 64'b0, FOpCtrlE[2], FSrcZE); // Force Z to be 0 for multiply instructions
|
||||
|
||||
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE),
|
||||
.FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE,
|
||||
.ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XNaNE, .YNaNE, .ZNaNE,
|
||||
.XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
|
||||
// first of two-stage instance of floating-point fused multiply-add unit
|
||||
|
||||
// unpacking unit
|
||||
// - splits FP inputs into their various parts
|
||||
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
|
||||
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE,
|
||||
// outputs:
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
|
||||
|
||||
// FMA
|
||||
// - two stage FMA
|
||||
// - execute stage - multiplication and addend shifting
|
||||
// - memory stage - addition and rounding
|
||||
// - handles FMA and multiply instructions
|
||||
// - contains some E/M pipleine registers
|
||||
// *** currently handles FLEN and 32 bits(dont know if 32 works with 128 - easy to fix) - change to handle only the supported formats
|
||||
fma fma (.clk, .reset, .FlushM, .StallM,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .
|
||||
ZManE, .XDenormE, .YDenormE,
|
||||
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
|
||||
.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM,
|
||||
.YManM, .ZManM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
|
||||
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
|
||||
.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM,
|
||||
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
|
||||
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
|
||||
.FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]),
|
||||
.FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
|
||||
|
||||
// first and only instance of floating-point divider
|
||||
logic fpdivClk;
|
||||
.FmtE, .FmtM, .FrmM,
|
||||
// outputs:
|
||||
.FMAFlgM, .FMAResM);
|
||||
|
||||
// clock gater
|
||||
// - creates a clock that only runs durring divide/sqrt instructions
|
||||
// - using the seperate clock gives the divide/sqrt unit some to get set up
|
||||
// *** the module says not to use in synthisis
|
||||
clockgater fpdivclkg(.E(FDivStartE),
|
||||
.SE(1'b0),
|
||||
.CLK(clk),
|
||||
.ECLK(fpdivClk));
|
||||
.ECLK(FDivClk));
|
||||
|
||||
// capture the inputs for div/sqrt
|
||||
// capture the inputs for divide/sqrt
|
||||
// - if not captured any forwarded inputs will change durring computation
|
||||
// - this problem is caused by stalling the execute stage
|
||||
// - the other units don't have this problem, only div/sqrt stalls the execute stage
|
||||
flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
|
||||
.en(1'b1), .clear(FDivSqrtDoneE),
|
||||
.reset(reset), .clk(HoldInputs));
|
||||
.reset(reset), .clk(FDivBusyE));
|
||||
flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
|
||||
.en(1'b1), .clear(FDivSqrtDoneE),
|
||||
.reset(reset), .clk(HoldInputs));
|
||||
//*** add round to nearest ties to max magnitude
|
||||
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
|
||||
.P(~FmtE), .FDivBusyE, .HoldInputs,
|
||||
.OvEn(1'b1), .UnEn(1'b1),
|
||||
.start(FDivStartE), .reset, .clk(fpdivClk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM));
|
||||
|
||||
// .DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
|
||||
// .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
|
||||
// .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
|
||||
// assign FDivBusyE = 0;
|
||||
|
||||
// first of two-stage instance of floating-point add/cvt unit
|
||||
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
|
||||
.FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
|
||||
|
||||
// first and only instance of floating-point comparator
|
||||
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), .FSrcXE,
|
||||
.FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE,
|
||||
.Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE);
|
||||
|
||||
// first and only instance of floating-point sign converter
|
||||
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE);
|
||||
|
||||
// first and only instance of floating-point classify unit
|
||||
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
|
||||
|
||||
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
|
||||
.reset(reset), .clk(FDivBusyE));
|
||||
|
||||
// output for store instructions
|
||||
//*** change to use the unpacking unit if possible
|
||||
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
|
||||
.reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1),
|
||||
// outputs:
|
||||
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
|
||||
|
||||
|
||||
// add/FP <-> FP convert
|
||||
// - computation is done in two stages
|
||||
// - contains some E/M pipleine registers
|
||||
//*** remove uneeded logic
|
||||
//*** change to use the unpacking unit if possible
|
||||
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE,
|
||||
// outputs:
|
||||
.FAddResM, .FAddFlgM);
|
||||
|
||||
// compare unit
|
||||
// - computation is done in one stage
|
||||
// - writes to FP file durring min/max instructions
|
||||
// - other comparisons write a 1 or 0 to the integer register
|
||||
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}),
|
||||
.FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]),
|
||||
.FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE,
|
||||
// outputs:
|
||||
.Invalid(CmpNVE), .CmpResE);
|
||||
|
||||
// sign injection unit
|
||||
// - computation is done in one stage
|
||||
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE,
|
||||
// outputs:
|
||||
.SgnNVE, .SgnResE);
|
||||
|
||||
// classify
|
||||
// - computation is done in one stage
|
||||
// - most of the work is done in the unpacking unit
|
||||
// - result is written to the integer register
|
||||
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE,
|
||||
// outputs:
|
||||
.XSNaNE, .ClassResE);
|
||||
|
||||
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE,
|
||||
// outputs:
|
||||
.CvtResE, .CvtFlgE);
|
||||
|
||||
// data to be stored in memory - to IEU
|
||||
// - FP uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
assign FWriteDataE = FSrcYE[`XLEN-1:0];
|
||||
|
||||
//*****************
|
||||
|
||||
|
||||
|
||||
|
||||
//***will synth remove registers of values that are always zero?
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// E/M pipe registers
|
||||
//*****************
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
|
||||
// flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM);
|
||||
// flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM);
|
||||
flopenrc #(65) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
|
||||
flopenrc #(65) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
|
||||
flopenrc #(65) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZManE}, {ZSgnM,ZExpM,ZManM});
|
||||
flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM,
|
||||
flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
|
||||
flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
|
||||
flopenrc #(65) EMFpReg4(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZManE}, {ZSgnM,ZExpM,ZManM});
|
||||
flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM,
|
||||
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
|
||||
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
|
||||
|
||||
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
|
||||
flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
|
||||
flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
|
||||
flopenrc #(1) EMRegCmpFlg(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
|
||||
|
||||
flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
|
||||
flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
|
||||
flopenrc #(64) EMRegSgnRes(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
|
||||
flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
|
||||
|
||||
flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
|
||||
flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
|
||||
flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
|
||||
flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
|
||||
|
||||
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
|
||||
|
||||
flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
|
||||
{FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
|
||||
|
||||
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
//BEGIN MEMORY STAGE
|
||||
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
|
||||
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
|
||||
|
||||
// mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned);
|
||||
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Align SrcA to MSB when single precicion
|
||||
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
|
||||
mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM);
|
||||
|
||||
// select a result that may be written to the FP register
|
||||
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
|
||||
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
|
||||
|
||||
//*****************
|
||||
// select the result that may be written to the integer register - to IEU
|
||||
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
|
||||
|
||||
// FPU flag selection - to privileged
|
||||
mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
|
||||
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
|
||||
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
|
||||
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
|
||||
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
|
||||
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
|
||||
flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
|
||||
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
|
||||
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
|
||||
|
||||
//#########################################
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BEGIN WRITEBACK STAGE
|
||||
//#########################################
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// put ReadData into NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
// - for load instruction
|
||||
mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
|
||||
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
|
||||
|
||||
// select the result to be written to the FP register
|
||||
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResW, FResW, FResultSelW, FPUResultW);
|
||||
|
||||
|
||||
end else begin // no F_SUPPORTED; tie outputs low
|
||||
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
|
||||
assign FStallD = 0;
|
||||
assign FWriteIntE = 0;
|
||||
assign FWriteIntM = 0;
|
||||
@ -299,7 +392,7 @@ module fpu (
|
||||
assign FDivBusyE = 0;
|
||||
assign IllegalFPUInstrD = 1;
|
||||
assign SetFflagsM = 0;
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule // fpu
|
||||
|
@ -26,10 +26,10 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fregfile (
|
||||
input logic clk, reset,
|
||||
input logic we4,
|
||||
input logic [ 4:0] a1, a2, a3, a4,
|
||||
input logic [63:0] wd4, //KEP `XLEN-1 changed to 63 (lint warning) *** figure out if double can be suported when XLEN = 32
|
||||
input logic clk, reset,
|
||||
input logic we4,
|
||||
input logic [ 4:0] a1, a2, a3, a4,
|
||||
input logic [63:0] wd4,
|
||||
output logic [63:0] rd1, rd2, rd3);
|
||||
|
||||
logic [63:0] rf[31:0];
|
||||
|
@ -1,15 +1,15 @@
|
||||
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
|
||||
|
||||
module fsgn (
|
||||
input logic XSgnE, YSgnE,
|
||||
input logic [63:0] FSrcXE,
|
||||
input logic XExpMaxE,
|
||||
input logic FmtE,
|
||||
input logic [1:0] SgnOpCodeE,
|
||||
output logic [63:0] SgnResE,
|
||||
output logic SgnNVE);
|
||||
input logic XSgnE, YSgnE, // X and Y sign bits
|
||||
input logic [63:0] FSrcXE, // X
|
||||
input logic XExpMaxE, // max possible exponent (all ones)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
input logic [1:0] SgnOpCodeE, // operation control
|
||||
output logic [63:0] SgnResE, // result
|
||||
output logic SgnNVE // invalid flag
|
||||
);
|
||||
|
||||
logic AonesExp;
|
||||
logic ResSgn;
|
||||
|
||||
//op code designation:
|
||||
@ -19,7 +19,12 @@ module fsgn (
|
||||
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
|
||||
//
|
||||
|
||||
// calculate the result's sign
|
||||
assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]);
|
||||
|
||||
// format final result based on precision
|
||||
// - uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
assign SgnResE = FmtE ? {ResSgn, FSrcXE[62:0]} : {FSrcXE[63:32], ResSgn, FSrcXE[30:0]};
|
||||
|
||||
//If the exponent is all ones, then the value is either Inf or NaN,
|
||||
|
@ -1,37 +1,22 @@
|
||||
module fsm (done, load_rega, load_regb, load_regc,
|
||||
load_regd, load_regr, load_regs,
|
||||
sel_muxa, sel_muxb, sel_muxr,
|
||||
clk, reset, start, op_type, divBusy, holdInputs);
|
||||
module fsm (
|
||||
|
||||
input clk;
|
||||
input reset;
|
||||
input start;
|
||||
// input error;
|
||||
input op_type;
|
||||
//***can use divbusy insted of holdinputs
|
||||
output done;
|
||||
output load_rega;
|
||||
output load_regb;
|
||||
output load_regc;
|
||||
output load_regd;
|
||||
output load_regr;
|
||||
output load_regs;
|
||||
|
||||
output [2:0] sel_muxa;
|
||||
output [2:0] sel_muxb;
|
||||
output sel_muxr;
|
||||
output logic divBusy,holdInputs;
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic start,
|
||||
input logic op_type,
|
||||
output logic done, // End of cycles
|
||||
output logic load_rega, // enable for regA
|
||||
output logic load_regb, // enable for regB
|
||||
output logic load_regc, // enable for regC
|
||||
output logic load_regd, // enable for regD
|
||||
output logic load_regr, // enable for rem
|
||||
output logic load_regs, // enable for q,qm,qp
|
||||
output logic [2:0] sel_muxa, // Select muxA
|
||||
output logic [2:0] sel_muxb, // Select muxB
|
||||
output logic sel_muxr, // Select rem mux
|
||||
output logic divBusy // calculation is happening
|
||||
);
|
||||
|
||||
reg done; // End of cycles
|
||||
reg load_rega; // enable for regA
|
||||
reg load_regb; // enable for regB
|
||||
reg load_regc; // enable for regC
|
||||
reg load_regd; // enable for regD
|
||||
reg load_regr; // enable for rem
|
||||
reg load_regs; // enable for q,qm,qp
|
||||
reg [2:0] sel_muxa; // Select muxA
|
||||
reg [2:0] sel_muxb; // Select muxB
|
||||
reg sel_muxr; // Select rem mux
|
||||
|
||||
reg [4:0] CURRENT_STATE;
|
||||
reg [4:0] NEXT_STATE;
|
||||
@ -65,7 +50,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b0;
|
||||
holdInputs = 1'b0;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -81,7 +65,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b1;
|
||||
load_regc = 1'b0;
|
||||
@ -97,7 +80,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b1;
|
||||
load_regc = 1'b0;
|
||||
@ -114,7 +96,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b1;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b1;
|
||||
@ -130,7 +111,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b1;
|
||||
load_regc = 1'b0;
|
||||
@ -146,7 +126,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b1;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b1;
|
||||
@ -162,7 +141,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b1;
|
||||
load_regc = 1'b0;
|
||||
@ -178,7 +156,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b1;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b1;
|
||||
@ -194,7 +171,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b1;
|
||||
load_regc = 1'b0;
|
||||
@ -210,7 +186,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b1;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b1;
|
||||
@ -226,7 +201,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -242,7 +216,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -258,7 +231,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b1;
|
||||
divBusy = 1'b0;
|
||||
holdInputs = 1'b0;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -274,7 +246,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -290,7 +261,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b1;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b1;
|
||||
@ -306,7 +276,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b1;
|
||||
load_regc = 1'b0;
|
||||
@ -322,7 +291,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -338,7 +306,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b1;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b1;
|
||||
@ -354,7 +321,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b1;
|
||||
load_regc = 1'b0;
|
||||
@ -370,7 +336,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -386,7 +351,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b1;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b1;
|
||||
@ -402,7 +366,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b1;
|
||||
load_regc = 1'b0;
|
||||
@ -418,7 +381,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -434,7 +396,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b1;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b1;
|
||||
@ -450,7 +411,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -466,7 +426,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b1;
|
||||
holdInputs = 1'b1;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -482,7 +441,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b1;
|
||||
divBusy = 1'b0;
|
||||
holdInputs = 1'b0;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
@ -498,7 +456,6 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
begin
|
||||
done = 1'b0;
|
||||
divBusy = 1'b0;
|
||||
holdInputs = 1'b0;
|
||||
load_rega = 1'b0;
|
||||
load_regb = 1'b0;
|
||||
load_regc = 1'b0;
|
||||
|
@ -1,461 +0,0 @@
|
||||
|
||||
// `timescale 1ps/1ps
|
||||
// module fsm_div (done, load_rega, load_regb, load_regc,
|
||||
// load_regd, load_regr, load_regs,
|
||||
// sel_muxa, sel_muxb, sel_muxr,
|
||||
// clk, reset, start, error, op_type);
|
||||
|
||||
// input clk;
|
||||
// input reset;
|
||||
// input start;
|
||||
// input error;
|
||||
// input op_type;
|
||||
|
||||
// output done;
|
||||
// output load_rega;
|
||||
// output load_regb;
|
||||
// output load_regc;
|
||||
// output load_regd;
|
||||
// output load_regr;
|
||||
// output load_regs;
|
||||
|
||||
// output [2:0] sel_muxa;
|
||||
// output [2:0] sel_muxb;
|
||||
// output sel_muxr;
|
||||
|
||||
// reg done; // End of cycles
|
||||
// reg load_rega; // enable for regA
|
||||
// reg load_regb; // enable for regB
|
||||
// reg load_regc; // enable for regC
|
||||
// reg load_regd; // enable for regD
|
||||
// reg load_regr; // enable for rem
|
||||
// reg load_regs; // enable for q,qm,qp
|
||||
// reg [2:0] sel_muxa; // Select muxA
|
||||
// reg [2:0] sel_muxb; // Select muxB
|
||||
// reg sel_muxr; // Select rem mux
|
||||
|
||||
// reg [4:0] CURRENT_STATE;
|
||||
// reg [4:0] NEXT_STATE;
|
||||
|
||||
// parameter [4:0]
|
||||
// S0=5'd0, S1=5'd1, S2=5'd2,
|
||||
// S3=5'd3, S4=5'd4, S5=5'd5,
|
||||
// S6=5'd6, S7=5'd7, S8=5'd8,
|
||||
// S9=5'd9, S10=5'd10,
|
||||
// S13=5'd13, S14=5'd14, S15=5'd15,
|
||||
// S16=5'd16, S17=5'd17, S18=5'd18,
|
||||
// S19=5'd19, S20=5'd20, S21=5'd21,
|
||||
// S22=5'd22, S23=5'd23, S24=5'd24,
|
||||
// S25=5'd25, S26=5'd26, S27=5'd27,
|
||||
// S28=5'd28, S29=5'd29, S30=5'd30;
|
||||
|
||||
// always @(posedge clk)
|
||||
// begin
|
||||
// if(reset==1'b1)
|
||||
// CURRENT_STATE<=S0;
|
||||
// else
|
||||
// CURRENT_STATE<=NEXT_STATE;
|
||||
// end
|
||||
|
||||
// always @(*)
|
||||
// begin
|
||||
// case(CURRENT_STATE)
|
||||
// S0: // iteration 0
|
||||
// begin
|
||||
// if (start==1'b0)
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b000;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S0;
|
||||
// end
|
||||
// else if (start==1'b1 && op_type==1'b0)
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b1;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b001;
|
||||
// sel_muxb = 3'b001;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S1;
|
||||
// end // if (start==1'b1 && op_type==1'b0)
|
||||
// else if (start==1'b1 && op_type==1'b1)
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b1;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b010;
|
||||
// sel_muxb = 3'b000;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S13;
|
||||
// end
|
||||
// end // case: S0
|
||||
// S1:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b1;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b1;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b010;
|
||||
// sel_muxb = 3'b000;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S2;
|
||||
// end
|
||||
// S2: // iteration 1
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b1;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b011;
|
||||
// sel_muxb = 3'b011;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S3;
|
||||
// end
|
||||
// S3:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b1;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b1;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b010;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S4;
|
||||
// end
|
||||
// S4: // iteration 2
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b1;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b011;
|
||||
// sel_muxb = 3'b011;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S5;
|
||||
// end
|
||||
// S5:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b1;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b1;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b010;
|
||||
// sel_muxr = 1'b0; // add
|
||||
// NEXT_STATE <= S6;
|
||||
// end
|
||||
// S6: // iteration 3
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b1;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b011;
|
||||
// sel_muxb = 3'b011;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S8;
|
||||
// end
|
||||
// S7:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b1;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b1;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b010;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S8;
|
||||
// end // case: S7
|
||||
// S8: // q,qm,qp
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b1;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b000;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S9;
|
||||
// end
|
||||
// S9: // rem
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b1;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b000;
|
||||
// sel_muxr = 1'b1;
|
||||
// NEXT_STATE <= S10;
|
||||
// end
|
||||
// S10: // done
|
||||
// begin
|
||||
// done = 1'b1;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b000;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S0;
|
||||
// end
|
||||
// S13: // start of sqrt path
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b1;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b010;
|
||||
// sel_muxb = 3'b001;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S14;
|
||||
// end
|
||||
// S14:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b1;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b1;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b001;
|
||||
// sel_muxb = 3'b100;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S15;
|
||||
// end
|
||||
// S15: // iteration 1
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b1;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b011;
|
||||
// sel_muxb = 3'b011;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S16;
|
||||
// end
|
||||
// S16:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b1;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b011;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S17;
|
||||
// end
|
||||
// S17:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b1;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b1;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b100;
|
||||
// sel_muxb = 3'b010;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S18;
|
||||
// end
|
||||
// S18: // iteration 2
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b1;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b011;
|
||||
// sel_muxb = 3'b011;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S19;
|
||||
// end
|
||||
// S19:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b1;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b011;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S20;
|
||||
// end
|
||||
// S20:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b1;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b1;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b100;
|
||||
// sel_muxb = 3'b010;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S21;
|
||||
// end
|
||||
// S21: // iteration 3
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b1;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b011;
|
||||
// sel_muxb = 3'b011;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S22;
|
||||
// end
|
||||
// S22:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b1;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b011;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S23;
|
||||
// end
|
||||
// S23:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b1;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b1;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b100;
|
||||
// sel_muxb = 3'b010;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S24;
|
||||
// end
|
||||
// S24: // q,qm,qp
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b1;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b000;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S25;
|
||||
// end
|
||||
// S25: // rem
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b1;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b011;
|
||||
// sel_muxb = 3'b110;
|
||||
// sel_muxr = 1'b1;
|
||||
// NEXT_STATE <= S26;
|
||||
// end
|
||||
// S26: // done
|
||||
// begin
|
||||
// done = 1'b1;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b000;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S0;
|
||||
// end
|
||||
// default:
|
||||
// begin
|
||||
// done = 1'b0;
|
||||
// load_rega = 1'b0;
|
||||
// load_regb = 1'b0;
|
||||
// load_regc = 1'b0;
|
||||
// load_regd = 1'b0;
|
||||
// load_regr = 1'b0;
|
||||
// load_regs = 1'b0;
|
||||
// sel_muxa = 3'b000;
|
||||
// sel_muxb = 3'b000;
|
||||
// sel_muxr = 1'b0;
|
||||
// NEXT_STATE <= S0;
|
||||
// end
|
||||
// endcase // case(CURRENT_STATE)
|
||||
// end // always @ (CURRENT_STATE or X)
|
||||
|
||||
// endmodule // fsm
|
@ -1,593 +0,0 @@
|
||||
// Ladner-Fischer Prefix Adder
|
||||
|
||||
module ldf128 (cout, sum, a, b, cin);
|
||||
|
||||
input [127:0] a, b;
|
||||
input cin;
|
||||
|
||||
output [127:0] sum;
|
||||
output cout;
|
||||
|
||||
wire [128:0] p,g;
|
||||
wire [127:0] c;
|
||||
|
||||
// pre-computation
|
||||
assign p={a^b,1'b0};
|
||||
assign g={a&b, cin};
|
||||
|
||||
// prefix tree
|
||||
ladner_fischer128 prefix_tree (c, p[127:0], g[127:0]);
|
||||
|
||||
// post-computation
|
||||
assign sum=p[128:1]^c;
|
||||
assign cout=g[128]|(p[128]&c[127]);
|
||||
|
||||
endmodule
|
||||
|
||||
module ladner_fischer128 (c, p, g);
|
||||
|
||||
input [127:0] p;
|
||||
input [127:0] g;
|
||||
|
||||
output [128:1] c;
|
||||
|
||||
|
||||
logic G_1_0, G_3_2, P_3_2, G_5_4, P_5_4, G_7_6, P_7_6, G_9_8, P_9_8, G_11_10, P_11_10, G_13_12
|
||||
, P_13_12, G_15_14, P_15_14, G_17_16, P_17_16, G_19_18, P_19_18, G_21_20, P_21_20, G_23_22
|
||||
, P_23_22, G_25_24, P_25_24, G_27_26, P_27_26, G_29_28, P_29_28, G_31_30, P_31_30, G_33_32
|
||||
, P_33_32, G_35_34, P_35_34, G_37_36, P_37_36, G_39_38, P_39_38, G_41_40, P_41_40, G_43_42
|
||||
, P_43_42, G_45_44, P_45_44, G_47_46, P_47_46, G_49_48, P_49_48, G_51_50, P_51_50, G_53_52
|
||||
, P_53_52, G_55_54, P_55_54, G_57_56, P_57_56, G_59_58, P_59_58, G_61_60, P_61_60, G_63_62
|
||||
, P_63_62, G_65_64, P_65_64, G_67_66, P_67_66, G_69_68, P_69_68, G_71_70, P_71_70, G_73_72
|
||||
, P_73_72, G_75_74, P_75_74, G_77_76, P_77_76, G_79_78, P_79_78, G_81_80, P_81_80, G_83_82
|
||||
, P_83_82, G_85_84, P_85_84, G_87_86, P_87_86, G_89_88, P_89_88, G_91_90, P_91_90, G_93_92
|
||||
, P_93_92, G_95_94, P_95_94, G_97_96, P_97_96, G_99_98, P_99_98, G_101_100, P_101_100, G_103_102
|
||||
, P_103_102, G_105_104, P_105_104, G_107_106, P_107_106, G_109_108, P_109_108, G_111_110, P_111_110
|
||||
, G_113_112, P_113_112, G_115_114, P_115_114, G_117_116, P_117_116, G_119_118, P_119_118, G_121_120
|
||||
, P_121_120, G_123_122, P_123_122, G_125_124, P_125_124, G_127_126, P_127_126, G_3_0, G_7_4, P_7_4
|
||||
, G_11_8, P_11_8, G_15_12, P_15_12, G_19_16, P_19_16, G_23_20, P_23_20, G_27_24, P_27_24, G_31_28
|
||||
, P_31_28, G_35_32, P_35_32, G_39_36, P_39_36, G_43_40, P_43_40, G_47_44, P_47_44, G_51_48, P_51_48
|
||||
, G_55_52, P_55_52, G_59_56, P_59_56, G_63_60, P_63_60, G_67_64, P_67_64, G_71_68, P_71_68, G_75_72
|
||||
, P_75_72, G_79_76, P_79_76, G_83_80, P_83_80, G_87_84, P_87_84, G_91_88, P_91_88, G_95_92, P_95_92
|
||||
, G_99_96, P_99_96, G_103_100, P_103_100, G_107_104, P_107_104, G_111_108, P_111_108, G_115_112
|
||||
, P_115_112, G_119_116, P_119_116, G_123_120, P_123_120, G_127_124, P_127_124, G_5_0, G_7_0, G_13_8
|
||||
, P_13_8, G_15_8, P_15_8, G_21_16, P_21_16, G_23_16, P_23_16, G_29_24, P_29_24, G_31_24, P_31_24
|
||||
, G_37_32, P_37_32, G_39_32, P_39_32, G_45_40, P_45_40, G_47_40, P_47_40, G_53_48, P_53_48, G_55_48
|
||||
, P_55_48, G_61_56, P_61_56, G_63_56, P_63_56, G_69_64, P_69_64, G_71_64, P_71_64, G_77_72, P_77_72
|
||||
, G_79_72, P_79_72, G_85_80, P_85_80, G_87_80, P_87_80, G_93_88, P_93_88, G_95_88, P_95_88, G_101_96
|
||||
, P_101_96, G_103_96, P_103_96, G_109_104, P_109_104, G_111_104, P_111_104, G_117_112, P_117_112
|
||||
, G_119_112, P_119_112, G_125_120, P_125_120, G_127_120, P_127_120, G_9_0, G_11_0, G_13_0, G_15_0, G_25_16
|
||||
, P_25_16, G_27_16, P_27_16, G_29_16, P_29_16, G_31_16, P_31_16, G_41_32, P_41_32, G_43_32, P_43_32, G_45_32
|
||||
, P_45_32, G_47_32, P_47_32, G_57_48, P_57_48, G_59_48, P_59_48, G_61_48, P_61_48, G_63_48, P_63_48, G_73_64
|
||||
, P_73_64, G_75_64, P_75_64, G_77_64, P_77_64, G_79_64, P_79_64, G_89_80, P_89_80, G_91_80, P_91_80
|
||||
, G_93_80, P_93_80, G_95_80, P_95_80, G_105_96, P_105_96, G_107_96, P_107_96, G_109_96, P_109_96
|
||||
, G_111_96, P_111_96, G_121_112, P_121_112, G_123_112, P_123_112, G_125_112, P_125_112, G_127_112
|
||||
, P_127_112, G_17_0, G_19_0, G_21_0, G_23_0, G_25_0, G_27_0, G_29_0, G_31_0, G_49_32, P_49_32, G_51_32
|
||||
, P_51_32, G_53_32, P_53_32, G_55_32, P_55_32, G_57_32, P_57_32, G_59_32, P_59_32, G_61_32, P_61_32
|
||||
, G_63_32, P_63_32, G_81_64, P_81_64, G_83_64, P_83_64, G_85_64, P_85_64, G_87_64, P_87_64, G_89_64, P_89_64
|
||||
, G_91_64, P_91_64, G_93_64, P_93_64, G_95_64, P_95_64, G_113_96, P_113_96, G_115_96, P_115_96
|
||||
, G_117_96, P_117_96, G_119_96, P_119_96, G_121_96, P_121_96, G_123_96, P_123_96, G_125_96, P_125_96
|
||||
, G_127_96, P_127_96, G_33_0, G_35_0, G_37_0, G_39_0, G_41_0, G_43_0, G_45_0, G_47_0, G_49_0, G_51_0
|
||||
, G_53_0, G_55_0, G_57_0, G_59_0, G_61_0, G_63_0, G_97_64, P_97_64, G_99_64, P_99_64, G_101_64, P_101_64
|
||||
, G_103_64, P_103_64, G_105_64, P_105_64, G_107_64, P_107_64, G_109_64, P_109_64, G_111_64, P_111_64
|
||||
, G_113_64, P_113_64, G_115_64, P_115_64, G_117_64, P_117_64, G_119_64, P_119_64, G_121_64, P_121_64
|
||||
, G_123_64, P_123_64, G_125_64, P_125_64, G_127_64, P_127_64, G_65_0, G_67_0, G_69_0, G_71_0, G_73_0
|
||||
, G_75_0, G_77_0, G_79_0, G_81_0, G_83_0, G_85_0, G_87_0, G_89_0, G_91_0, G_93_0, G_95_0, G_97_0
|
||||
, G_99_0, G_101_0, G_103_0, G_105_0, G_107_0, G_109_0, G_111_0, G_113_0, G_115_0, G_117_0, G_119_0
|
||||
, G_121_0, G_123_0, G_125_0, G_127_0, G_2_0, G_4_0, G_6_0, G_8_0, G_10_0, G_12_0, G_14_0, G_16_0
|
||||
, G_18_0, G_20_0, G_22_0, G_24_0, G_26_0, G_28_0, G_30_0, G_32_0, G_34_0, G_36_0, G_38_0, G_40_0
|
||||
, G_42_0, G_44_0, G_46_0, G_48_0, G_50_0, G_52_0, G_54_0, G_56_0, G_58_0, G_60_0, G_62_0, G_64_0
|
||||
, G_66_0, G_68_0, G_70_0, G_72_0, G_74_0, G_76_0, G_78_0, G_80_0, G_82_0, G_84_0, G_86_0, G_88_0
|
||||
, G_90_0, G_92_0, G_94_0, G_96_0, G_98_0, G_100_0, G_102_0, G_104_0, G_106_0, G_108_0, G_110_0, G_112_0
|
||||
, G_114_0, G_116_0, G_118_0, G_120_0, G_122_0, G_124_0, G_126_0;
|
||||
|
||||
// parallel-prefix, Ladner-Fischer
|
||||
|
||||
// Stage 1: Generates G/P pairs that span 1 bits
|
||||
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
|
||||
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
|
||||
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
|
||||
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
|
||||
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
|
||||
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
|
||||
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
|
||||
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
|
||||
|
||||
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
|
||||
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
|
||||
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
|
||||
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
|
||||
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
|
||||
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
|
||||
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
|
||||
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
|
||||
|
||||
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
|
||||
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
|
||||
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
|
||||
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
|
||||
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
|
||||
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
|
||||
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
|
||||
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
|
||||
|
||||
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
|
||||
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
|
||||
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
|
||||
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
|
||||
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
|
||||
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
|
||||
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
|
||||
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
|
||||
|
||||
black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]});
|
||||
black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]});
|
||||
black b_69_68 (G_69_68, P_69_68, {g[69],g[68]}, {p[69],p[68]});
|
||||
black b_71_70 (G_71_70, P_71_70, {g[71],g[70]}, {p[71],p[70]});
|
||||
black b_73_72 (G_73_72, P_73_72, {g[73],g[72]}, {p[73],p[72]});
|
||||
black b_75_74 (G_75_74, P_75_74, {g[75],g[74]}, {p[75],p[74]});
|
||||
black b_77_76 (G_77_76, P_77_76, {g[77],g[76]}, {p[77],p[76]});
|
||||
black b_79_78 (G_79_78, P_79_78, {g[79],g[78]}, {p[79],p[78]});
|
||||
|
||||
black b_81_80 (G_81_80, P_81_80, {g[81],g[80]}, {p[81],p[80]});
|
||||
black b_83_82 (G_83_82, P_83_82, {g[83],g[82]}, {p[83],p[82]});
|
||||
black b_85_84 (G_85_84, P_85_84, {g[85],g[84]}, {p[85],p[84]});
|
||||
black b_87_86 (G_87_86, P_87_86, {g[87],g[86]}, {p[87],p[86]});
|
||||
black b_89_88 (G_89_88, P_89_88, {g[89],g[88]}, {p[89],p[88]});
|
||||
black b_91_90 (G_91_90, P_91_90, {g[91],g[90]}, {p[91],p[90]});
|
||||
black b_93_92 (G_93_92, P_93_92, {g[93],g[92]}, {p[93],p[92]});
|
||||
black b_95_94 (G_95_94, P_95_94, {g[95],g[94]}, {p[95],p[94]});
|
||||
|
||||
black b_97_96 (G_97_96, P_97_96, {g[97],g[96]}, {p[97],p[96]});
|
||||
black b_99_98 (G_99_98, P_99_98, {g[99],g[98]}, {p[99],p[98]});
|
||||
black b_101_100 (G_101_100, P_101_100, {g[101],g[100]}, {p[101],p[100]});
|
||||
black b_103_102 (G_103_102, P_103_102, {g[103],g[102]}, {p[103],p[102]});
|
||||
black b_105_104 (G_105_104, P_105_104, {g[105],g[104]}, {p[105],p[104]});
|
||||
black b_107_106 (G_107_106, P_107_106, {g[107],g[106]}, {p[107],p[106]});
|
||||
black b_109_108 (G_109_108, P_109_108, {g[109],g[108]}, {p[109],p[108]});
|
||||
black b_111_110 (G_111_110, P_111_110, {g[111],g[110]}, {p[111],p[110]});
|
||||
|
||||
black b_113_112 (G_113_112, P_113_112, {g[113],g[112]}, {p[113],p[112]});
|
||||
black b_115_114 (G_115_114, P_115_114, {g[115],g[114]}, {p[115],p[114]});
|
||||
black b_117_116 (G_117_116, P_117_116, {g[117],g[116]}, {p[117],p[116]});
|
||||
black b_119_118 (G_119_118, P_119_118, {g[119],g[118]}, {p[119],p[118]});
|
||||
black b_121_120 (G_121_120, P_121_120, {g[121],g[120]}, {p[121],p[120]});
|
||||
black b_123_122 (G_123_122, P_123_122, {g[123],g[122]}, {p[123],p[122]});
|
||||
black b_125_124 (G_125_124, P_125_124, {g[125],g[124]}, {p[125],p[124]});
|
||||
black b_127_126 (G_127_126, P_127_126, {g[127],g[126]}, {p[127],p[126]});
|
||||
|
||||
|
||||
// Stage 2: Generates G/P pairs that span 2 bits
|
||||
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
|
||||
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
|
||||
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
|
||||
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
|
||||
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
|
||||
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
|
||||
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
|
||||
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
|
||||
|
||||
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
|
||||
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
|
||||
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
|
||||
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
|
||||
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
|
||||
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
|
||||
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
|
||||
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
|
||||
|
||||
black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64});
|
||||
black b_71_68 (G_71_68, P_71_68, {G_71_70,G_69_68}, {P_71_70,P_69_68});
|
||||
black b_75_72 (G_75_72, P_75_72, {G_75_74,G_73_72}, {P_75_74,P_73_72});
|
||||
black b_79_76 (G_79_76, P_79_76, {G_79_78,G_77_76}, {P_79_78,P_77_76});
|
||||
black b_83_80 (G_83_80, P_83_80, {G_83_82,G_81_80}, {P_83_82,P_81_80});
|
||||
black b_87_84 (G_87_84, P_87_84, {G_87_86,G_85_84}, {P_87_86,P_85_84});
|
||||
black b_91_88 (G_91_88, P_91_88, {G_91_90,G_89_88}, {P_91_90,P_89_88});
|
||||
black b_95_92 (G_95_92, P_95_92, {G_95_94,G_93_92}, {P_95_94,P_93_92});
|
||||
|
||||
black b_99_96 (G_99_96, P_99_96, {G_99_98,G_97_96}, {P_99_98,P_97_96});
|
||||
black b_103_100 (G_103_100, P_103_100, {G_103_102,G_101_100}, {P_103_102,P_101_100});
|
||||
black b_107_104 (G_107_104, P_107_104, {G_107_106,G_105_104}, {P_107_106,P_105_104});
|
||||
black b_111_108 (G_111_108, P_111_108, {G_111_110,G_109_108}, {P_111_110,P_109_108});
|
||||
black b_115_112 (G_115_112, P_115_112, {G_115_114,G_113_112}, {P_115_114,P_113_112});
|
||||
black b_119_116 (G_119_116, P_119_116, {G_119_118,G_117_116}, {P_119_118,P_117_116});
|
||||
black b_123_120 (G_123_120, P_123_120, {G_123_122,G_121_120}, {P_123_122,P_121_120});
|
||||
black b_127_124 (G_127_124, P_127_124, {G_127_126,G_125_124}, {P_127_126,P_125_124});
|
||||
|
||||
// Stage 3: Generates G/P pairs that span 4 bits
|
||||
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
|
||||
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
|
||||
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
|
||||
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
|
||||
black b_21_16 (G_21_16, P_21_16, {G_21_20,G_19_16}, {P_21_20,P_19_16});
|
||||
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
|
||||
black b_29_24 (G_29_24, P_29_24, {G_29_28,G_27_24}, {P_29_28,P_27_24});
|
||||
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
|
||||
|
||||
black b_37_32 (G_37_32, P_37_32, {G_37_36,G_35_32}, {P_37_36,P_35_32});
|
||||
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
|
||||
black b_45_40 (G_45_40, P_45_40, {G_45_44,G_43_40}, {P_45_44,P_43_40});
|
||||
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
|
||||
black b_53_48 (G_53_48, P_53_48, {G_53_52,G_51_48}, {P_53_52,P_51_48});
|
||||
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
|
||||
black b_61_56 (G_61_56, P_61_56, {G_61_60,G_59_56}, {P_61_60,P_59_56});
|
||||
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
|
||||
|
||||
black b_69_64 (G_69_64, P_69_64, {G_69_68,G_67_64}, {P_69_68,P_67_64});
|
||||
black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64});
|
||||
black b_77_72 (G_77_72, P_77_72, {G_77_76,G_75_72}, {P_77_76,P_75_72});
|
||||
black b_79_72 (G_79_72, P_79_72, {G_79_76,G_75_72}, {P_79_76,P_75_72});
|
||||
black b_85_80 (G_85_80, P_85_80, {G_85_84,G_83_80}, {P_85_84,P_83_80});
|
||||
black b_87_80 (G_87_80, P_87_80, {G_87_84,G_83_80}, {P_87_84,P_83_80});
|
||||
black b_93_88 (G_93_88, P_93_88, {G_93_92,G_91_88}, {P_93_92,P_91_88});
|
||||
black b_95_88 (G_95_88, P_95_88, {G_95_92,G_91_88}, {P_95_92,P_91_88});
|
||||
|
||||
black b_101_96 (G_101_96, P_101_96, {G_101_100,G_99_96}, {P_101_100,P_99_96});
|
||||
black b_103_96 (G_103_96, P_103_96, {G_103_100,G_99_96}, {P_103_100,P_99_96});
|
||||
black b_109_104 (G_109_104, P_109_104, {G_109_108,G_107_104}, {P_109_108,P_107_104});
|
||||
black b_111_104 (G_111_104, P_111_104, {G_111_108,G_107_104}, {P_111_108,P_107_104});
|
||||
black b_117_112 (G_117_112, P_117_112, {G_117_116,G_115_112}, {P_117_116,P_115_112});
|
||||
black b_119_112 (G_119_112, P_119_112, {G_119_116,G_115_112}, {P_119_116,P_115_112});
|
||||
black b_125_120 (G_125_120, P_125_120, {G_125_124,G_123_120}, {P_125_124,P_123_120});
|
||||
black b_127_120 (G_127_120, P_127_120, {G_127_124,G_123_120}, {P_127_124,P_123_120});
|
||||
|
||||
// Stage 4: Generates G/P pairs that span 8 bits
|
||||
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
|
||||
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
|
||||
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
|
||||
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
|
||||
black b_25_16 (G_25_16, P_25_16, {G_25_24,G_23_16}, {P_25_24,P_23_16});
|
||||
black b_27_16 (G_27_16, P_27_16, {G_27_24,G_23_16}, {P_27_24,P_23_16});
|
||||
black b_29_16 (G_29_16, P_29_16, {G_29_24,G_23_16}, {P_29_24,P_23_16});
|
||||
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
|
||||
|
||||
black b_41_32 (G_41_32, P_41_32, {G_41_40,G_39_32}, {P_41_40,P_39_32});
|
||||
black b_43_32 (G_43_32, P_43_32, {G_43_40,G_39_32}, {P_43_40,P_39_32});
|
||||
black b_45_32 (G_45_32, P_45_32, {G_45_40,G_39_32}, {P_45_40,P_39_32});
|
||||
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
|
||||
black b_57_48 (G_57_48, P_57_48, {G_57_56,G_55_48}, {P_57_56,P_55_48});
|
||||
black b_59_48 (G_59_48, P_59_48, {G_59_56,G_55_48}, {P_59_56,P_55_48});
|
||||
black b_61_48 (G_61_48, P_61_48, {G_61_56,G_55_48}, {P_61_56,P_55_48});
|
||||
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
|
||||
|
||||
black b_73_64 (G_73_64, P_73_64, {G_73_72,G_71_64}, {P_73_72,P_71_64});
|
||||
black b_75_64 (G_75_64, P_75_64, {G_75_72,G_71_64}, {P_75_72,P_71_64});
|
||||
black b_77_64 (G_77_64, P_77_64, {G_77_72,G_71_64}, {P_77_72,P_71_64});
|
||||
black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64});
|
||||
black b_89_80 (G_89_80, P_89_80, {G_89_88,G_87_80}, {P_89_88,P_87_80});
|
||||
black b_91_80 (G_91_80, P_91_80, {G_91_88,G_87_80}, {P_91_88,P_87_80});
|
||||
black b_93_80 (G_93_80, P_93_80, {G_93_88,G_87_80}, {P_93_88,P_87_80});
|
||||
black b_95_80 (G_95_80, P_95_80, {G_95_88,G_87_80}, {P_95_88,P_87_80});
|
||||
|
||||
black b_105_96 (G_105_96, P_105_96, {G_105_104,G_103_96}, {P_105_104,P_103_96});
|
||||
black b_107_96 (G_107_96, P_107_96, {G_107_104,G_103_96}, {P_107_104,P_103_96});
|
||||
black b_109_96 (G_109_96, P_109_96, {G_109_104,G_103_96}, {P_109_104,P_103_96});
|
||||
black b_111_96 (G_111_96, P_111_96, {G_111_104,G_103_96}, {P_111_104,P_103_96});
|
||||
black b_121_112 (G_121_112, P_121_112, {G_121_120,G_119_112}, {P_121_120,P_119_112});
|
||||
black b_123_112 (G_123_112, P_123_112, {G_123_120,G_119_112}, {P_123_120,P_119_112});
|
||||
black b_125_112 (G_125_112, P_125_112, {G_125_120,G_119_112}, {P_125_120,P_119_112});
|
||||
black b_127_112 (G_127_112, P_127_112, {G_127_120,G_119_112}, {P_127_120,P_119_112});
|
||||
|
||||
// Stage 5: Generates G/P pairs that span 16 bits
|
||||
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
|
||||
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
|
||||
grey g_21_0 (G_21_0, {G_21_16,G_15_0}, P_21_16);
|
||||
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
|
||||
grey g_25_0 (G_25_0, {G_25_16,G_15_0}, P_25_16);
|
||||
grey g_27_0 (G_27_0, {G_27_16,G_15_0}, P_27_16);
|
||||
grey g_29_0 (G_29_0, {G_29_16,G_15_0}, P_29_16);
|
||||
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
|
||||
|
||||
black b_49_32 (G_49_32, P_49_32, {G_49_48,G_47_32}, {P_49_48,P_47_32});
|
||||
black b_51_32 (G_51_32, P_51_32, {G_51_48,G_47_32}, {P_51_48,P_47_32});
|
||||
black b_53_32 (G_53_32, P_53_32, {G_53_48,G_47_32}, {P_53_48,P_47_32});
|
||||
black b_55_32 (G_55_32, P_55_32, {G_55_48,G_47_32}, {P_55_48,P_47_32});
|
||||
black b_57_32 (G_57_32, P_57_32, {G_57_48,G_47_32}, {P_57_48,P_47_32});
|
||||
black b_59_32 (G_59_32, P_59_32, {G_59_48,G_47_32}, {P_59_48,P_47_32});
|
||||
black b_61_32 (G_61_32, P_61_32, {G_61_48,G_47_32}, {P_61_48,P_47_32});
|
||||
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
|
||||
|
||||
black b_81_64 (G_81_64, P_81_64, {G_81_80,G_79_64}, {P_81_80,P_79_64});
|
||||
black b_83_64 (G_83_64, P_83_64, {G_83_80,G_79_64}, {P_83_80,P_79_64});
|
||||
black b_85_64 (G_85_64, P_85_64, {G_85_80,G_79_64}, {P_85_80,P_79_64});
|
||||
black b_87_64 (G_87_64, P_87_64, {G_87_80,G_79_64}, {P_87_80,P_79_64});
|
||||
black b_89_64 (G_89_64, P_89_64, {G_89_80,G_79_64}, {P_89_80,P_79_64});
|
||||
black b_91_64 (G_91_64, P_91_64, {G_91_80,G_79_64}, {P_91_80,P_79_64});
|
||||
black b_93_64 (G_93_64, P_93_64, {G_93_80,G_79_64}, {P_93_80,P_79_64});
|
||||
black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64});
|
||||
|
||||
black b_113_96 (G_113_96, P_113_96, {G_113_112,G_111_96}, {P_113_112,P_111_96});
|
||||
black b_115_96 (G_115_96, P_115_96, {G_115_112,G_111_96}, {P_115_112,P_111_96});
|
||||
black b_117_96 (G_117_96, P_117_96, {G_117_112,G_111_96}, {P_117_112,P_111_96});
|
||||
black b_119_96 (G_119_96, P_119_96, {G_119_112,G_111_96}, {P_119_112,P_111_96});
|
||||
black b_121_96 (G_121_96, P_121_96, {G_121_112,G_111_96}, {P_121_112,P_111_96});
|
||||
black b_123_96 (G_123_96, P_123_96, {G_123_112,G_111_96}, {P_123_112,P_111_96});
|
||||
black b_125_96 (G_125_96, P_125_96, {G_125_112,G_111_96}, {P_125_112,P_111_96});
|
||||
black b_127_96 (G_127_96, P_127_96, {G_127_112,G_111_96}, {P_127_112,P_111_96});
|
||||
|
||||
// Stage 6: Generates G/P pairs that span 32 bits
|
||||
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
|
||||
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
|
||||
grey g_37_0 (G_37_0, {G_37_32,G_31_0}, P_37_32);
|
||||
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
|
||||
grey g_41_0 (G_41_0, {G_41_32,G_31_0}, P_41_32);
|
||||
grey g_43_0 (G_43_0, {G_43_32,G_31_0}, P_43_32);
|
||||
grey g_45_0 (G_45_0, {G_45_32,G_31_0}, P_45_32);
|
||||
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
|
||||
|
||||
grey g_49_0 (G_49_0, {G_49_32,G_31_0}, P_49_32);
|
||||
grey g_51_0 (G_51_0, {G_51_32,G_31_0}, P_51_32);
|
||||
grey g_53_0 (G_53_0, {G_53_32,G_31_0}, P_53_32);
|
||||
grey g_55_0 (G_55_0, {G_55_32,G_31_0}, P_55_32);
|
||||
grey g_57_0 (G_57_0, {G_57_32,G_31_0}, P_57_32);
|
||||
grey g_59_0 (G_59_0, {G_59_32,G_31_0}, P_59_32);
|
||||
grey g_61_0 (G_61_0, {G_61_32,G_31_0}, P_61_32);
|
||||
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
|
||||
|
||||
black b_97_64 (G_97_64, P_97_64, {G_97_96,G_95_64}, {P_97_96,P_95_64});
|
||||
black b_99_64 (G_99_64, P_99_64, {G_99_96,G_95_64}, {P_99_96,P_95_64});
|
||||
black b_101_64 (G_101_64, P_101_64, {G_101_96,G_95_64}, {P_101_96,P_95_64});
|
||||
black b_103_64 (G_103_64, P_103_64, {G_103_96,G_95_64}, {P_103_96,P_95_64});
|
||||
black b_105_64 (G_105_64, P_105_64, {G_105_96,G_95_64}, {P_105_96,P_95_64});
|
||||
black b_107_64 (G_107_64, P_107_64, {G_107_96,G_95_64}, {P_107_96,P_95_64});
|
||||
black b_109_64 (G_109_64, P_109_64, {G_109_96,G_95_64}, {P_109_96,P_95_64});
|
||||
black b_111_64 (G_111_64, P_111_64, {G_111_96,G_95_64}, {P_111_96,P_95_64});
|
||||
|
||||
black b_113_64 (G_113_64, P_113_64, {G_113_96,G_95_64}, {P_113_96,P_95_64});
|
||||
black b_115_64 (G_115_64, P_115_64, {G_115_96,G_95_64}, {P_115_96,P_95_64});
|
||||
black b_117_64 (G_117_64, P_117_64, {G_117_96,G_95_64}, {P_117_96,P_95_64});
|
||||
black b_119_64 (G_119_64, P_119_64, {G_119_96,G_95_64}, {P_119_96,P_95_64});
|
||||
black b_121_64 (G_121_64, P_121_64, {G_121_96,G_95_64}, {P_121_96,P_95_64});
|
||||
black b_123_64 (G_123_64, P_123_64, {G_123_96,G_95_64}, {P_123_96,P_95_64});
|
||||
black b_125_64 (G_125_64, P_125_64, {G_125_96,G_95_64}, {P_125_96,P_95_64});
|
||||
black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64});
|
||||
|
||||
// Stage 7: Generates G/P pairs that span 64 bits
|
||||
grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64);
|
||||
grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64);
|
||||
grey g_69_0 (G_69_0, {G_69_64,G_63_0}, P_69_64);
|
||||
grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64);
|
||||
grey g_73_0 (G_73_0, {G_73_64,G_63_0}, P_73_64);
|
||||
grey g_75_0 (G_75_0, {G_75_64,G_63_0}, P_75_64);
|
||||
grey g_77_0 (G_77_0, {G_77_64,G_63_0}, P_77_64);
|
||||
grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64);
|
||||
|
||||
grey g_81_0 (G_81_0, {G_81_64,G_63_0}, P_81_64);
|
||||
grey g_83_0 (G_83_0, {G_83_64,G_63_0}, P_83_64);
|
||||
grey g_85_0 (G_85_0, {G_85_64,G_63_0}, P_85_64);
|
||||
grey g_87_0 (G_87_0, {G_87_64,G_63_0}, P_87_64);
|
||||
grey g_89_0 (G_89_0, {G_89_64,G_63_0}, P_89_64);
|
||||
grey g_91_0 (G_91_0, {G_91_64,G_63_0}, P_91_64);
|
||||
grey g_93_0 (G_93_0, {G_93_64,G_63_0}, P_93_64);
|
||||
grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64);
|
||||
|
||||
grey g_97_0 (G_97_0, {G_97_64,G_63_0}, P_97_64);
|
||||
grey g_99_0 (G_99_0, {G_99_64,G_63_0}, P_99_64);
|
||||
grey g_101_0 (G_101_0, {G_101_64,G_63_0}, P_101_64);
|
||||
grey g_103_0 (G_103_0, {G_103_64,G_63_0}, P_103_64);
|
||||
grey g_105_0 (G_105_0, {G_105_64,G_63_0}, P_105_64);
|
||||
grey g_107_0 (G_107_0, {G_107_64,G_63_0}, P_107_64);
|
||||
grey g_109_0 (G_109_0, {G_109_64,G_63_0}, P_109_64);
|
||||
grey g_111_0 (G_111_0, {G_111_64,G_63_0}, P_111_64);
|
||||
|
||||
grey g_113_0 (G_113_0, {G_113_64,G_63_0}, P_113_64);
|
||||
grey g_115_0 (G_115_0, {G_115_64,G_63_0}, P_115_64);
|
||||
grey g_117_0 (G_117_0, {G_117_64,G_63_0}, P_117_64);
|
||||
grey g_119_0 (G_119_0, {G_119_64,G_63_0}, P_119_64);
|
||||
grey g_121_0 (G_121_0, {G_121_64,G_63_0}, P_121_64);
|
||||
grey g_123_0 (G_123_0, {G_123_64,G_63_0}, P_123_64);
|
||||
grey g_125_0 (G_125_0, {G_125_64,G_63_0}, P_125_64);
|
||||
grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64);
|
||||
|
||||
// Extra grey cell stage
|
||||
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
|
||||
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
|
||||
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
|
||||
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
|
||||
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
|
||||
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
|
||||
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
|
||||
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
|
||||
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
|
||||
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
|
||||
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
|
||||
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
|
||||
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
|
||||
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
|
||||
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
|
||||
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
|
||||
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
|
||||
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
|
||||
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
|
||||
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
|
||||
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
|
||||
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
|
||||
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
|
||||
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
|
||||
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
|
||||
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
|
||||
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
|
||||
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
|
||||
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
|
||||
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
|
||||
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
|
||||
grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]);
|
||||
grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]);
|
||||
grey g_68_0 (G_68_0, {g[68],G_67_0}, p[68]);
|
||||
grey g_70_0 (G_70_0, {g[70],G_69_0}, p[70]);
|
||||
grey g_72_0 (G_72_0, {g[72],G_71_0}, p[72]);
|
||||
grey g_74_0 (G_74_0, {g[74],G_73_0}, p[74]);
|
||||
grey g_76_0 (G_76_0, {g[76],G_75_0}, p[76]);
|
||||
grey g_78_0 (G_78_0, {g[78],G_77_0}, p[78]);
|
||||
grey g_80_0 (G_80_0, {g[80],G_79_0}, p[80]);
|
||||
grey g_82_0 (G_82_0, {g[82],G_81_0}, p[82]);
|
||||
grey g_84_0 (G_84_0, {g[84],G_83_0}, p[84]);
|
||||
grey g_86_0 (G_86_0, {g[86],G_85_0}, p[86]);
|
||||
grey g_88_0 (G_88_0, {g[88],G_87_0}, p[88]);
|
||||
grey g_90_0 (G_90_0, {g[90],G_89_0}, p[90]);
|
||||
grey g_92_0 (G_92_0, {g[92],G_91_0}, p[92]);
|
||||
grey g_94_0 (G_94_0, {g[94],G_93_0}, p[94]);
|
||||
grey g_96_0 (G_96_0, {g[96],G_95_0}, p[96]);
|
||||
grey g_98_0 (G_98_0, {g[98],G_97_0}, p[98]);
|
||||
grey g_100_0 (G_100_0, {g[100],G_99_0}, p[100]);
|
||||
grey g_102_0 (G_102_0, {g[102],G_101_0}, p[102]);
|
||||
grey g_104_0 (G_104_0, {g[104],G_103_0}, p[104]);
|
||||
grey g_106_0 (G_106_0, {g[106],G_105_0}, p[106]);
|
||||
grey g_108_0 (G_108_0, {g[108],G_107_0}, p[108]);
|
||||
grey g_110_0 (G_110_0, {g[110],G_109_0}, p[110]);
|
||||
grey g_112_0 (G_112_0, {g[112],G_111_0}, p[112]);
|
||||
grey g_114_0 (G_114_0, {g[114],G_113_0}, p[114]);
|
||||
grey g_116_0 (G_116_0, {g[116],G_115_0}, p[116]);
|
||||
grey g_118_0 (G_118_0, {g[118],G_117_0}, p[118]);
|
||||
grey g_120_0 (G_120_0, {g[120],G_119_0}, p[120]);
|
||||
grey g_122_0 (G_122_0, {g[122],G_121_0}, p[122]);
|
||||
grey g_124_0 (G_124_0, {g[124],G_123_0}, p[124]);
|
||||
grey g_126_0 (G_126_0, {g[126],G_125_0}, p[126]);
|
||||
|
||||
// Final Stage: Apply c_k+1=G_k_0
|
||||
assign c[1]=g[0];
|
||||
assign c[2]=G_1_0;
|
||||
assign c[3]=G_2_0;
|
||||
assign c[4]=G_3_0;
|
||||
assign c[5]=G_4_0;
|
||||
assign c[6]=G_5_0;
|
||||
assign c[7]=G_6_0;
|
||||
assign c[8]=G_7_0;
|
||||
assign c[9]=G_8_0;
|
||||
|
||||
assign c[10]=G_9_0;
|
||||
assign c[11]=G_10_0;
|
||||
assign c[12]=G_11_0;
|
||||
assign c[13]=G_12_0;
|
||||
assign c[14]=G_13_0;
|
||||
assign c[15]=G_14_0;
|
||||
assign c[16]=G_15_0;
|
||||
assign c[17]=G_16_0;
|
||||
|
||||
assign c[18]=G_17_0;
|
||||
assign c[19]=G_18_0;
|
||||
assign c[20]=G_19_0;
|
||||
assign c[21]=G_20_0;
|
||||
assign c[22]=G_21_0;
|
||||
assign c[23]=G_22_0;
|
||||
assign c[24]=G_23_0;
|
||||
assign c[25]=G_24_0;
|
||||
|
||||
assign c[26]=G_25_0;
|
||||
assign c[27]=G_26_0;
|
||||
assign c[28]=G_27_0;
|
||||
assign c[29]=G_28_0;
|
||||
assign c[30]=G_29_0;
|
||||
assign c[31]=G_30_0;
|
||||
assign c[32]=G_31_0;
|
||||
assign c[33]=G_32_0;
|
||||
|
||||
assign c[34]=G_33_0;
|
||||
assign c[35]=G_34_0;
|
||||
assign c[36]=G_35_0;
|
||||
assign c[37]=G_36_0;
|
||||
assign c[38]=G_37_0;
|
||||
assign c[39]=G_38_0;
|
||||
assign c[40]=G_39_0;
|
||||
assign c[41]=G_40_0;
|
||||
|
||||
assign c[42]=G_41_0;
|
||||
assign c[43]=G_42_0;
|
||||
assign c[44]=G_43_0;
|
||||
assign c[45]=G_44_0;
|
||||
assign c[46]=G_45_0;
|
||||
assign c[47]=G_46_0;
|
||||
assign c[48]=G_47_0;
|
||||
assign c[49]=G_48_0;
|
||||
|
||||
assign c[50]=G_49_0;
|
||||
assign c[51]=G_50_0;
|
||||
assign c[52]=G_51_0;
|
||||
assign c[53]=G_52_0;
|
||||
assign c[54]=G_53_0;
|
||||
assign c[55]=G_54_0;
|
||||
assign c[56]=G_55_0;
|
||||
assign c[57]=G_56_0;
|
||||
|
||||
assign c[58]=G_57_0;
|
||||
assign c[59]=G_58_0;
|
||||
assign c[60]=G_59_0;
|
||||
assign c[61]=G_60_0;
|
||||
assign c[62]=G_61_0;
|
||||
assign c[63]=G_62_0;
|
||||
assign c[64]=G_63_0;
|
||||
assign c[65]=G_64_0;
|
||||
|
||||
assign c[66]=G_65_0;
|
||||
assign c[67]=G_66_0;
|
||||
assign c[68]=G_67_0;
|
||||
assign c[69]=G_68_0;
|
||||
assign c[70]=G_69_0;
|
||||
assign c[71]=G_70_0;
|
||||
assign c[72]=G_71_0;
|
||||
assign c[73]=G_72_0;
|
||||
|
||||
assign c[74]=G_73_0;
|
||||
assign c[75]=G_74_0;
|
||||
assign c[76]=G_75_0;
|
||||
assign c[77]=G_76_0;
|
||||
assign c[78]=G_77_0;
|
||||
assign c[79]=G_78_0;
|
||||
assign c[80]=G_79_0;
|
||||
assign c[81]=G_80_0;
|
||||
|
||||
assign c[82]=G_81_0;
|
||||
assign c[83]=G_82_0;
|
||||
assign c[84]=G_83_0;
|
||||
assign c[85]=G_84_0;
|
||||
assign c[86]=G_85_0;
|
||||
assign c[87]=G_86_0;
|
||||
assign c[88]=G_87_0;
|
||||
assign c[89]=G_88_0;
|
||||
|
||||
assign c[90]=G_89_0;
|
||||
assign c[91]=G_90_0;
|
||||
assign c[92]=G_91_0;
|
||||
assign c[93]=G_92_0;
|
||||
assign c[94]=G_93_0;
|
||||
assign c[95]=G_94_0;
|
||||
assign c[96]=G_95_0;
|
||||
assign c[97]=G_96_0;
|
||||
|
||||
assign c[98]=G_97_0;
|
||||
assign c[99]=G_98_0;
|
||||
assign c[100]=G_99_0;
|
||||
assign c[101]=G_100_0;
|
||||
assign c[102]=G_101_0;
|
||||
assign c[103]=G_102_0;
|
||||
assign c[104]=G_103_0;
|
||||
assign c[105]=G_104_0;
|
||||
|
||||
assign c[106]=G_105_0;
|
||||
assign c[107]=G_106_0;
|
||||
assign c[108]=G_107_0;
|
||||
assign c[109]=G_108_0;
|
||||
assign c[110]=G_109_0;
|
||||
assign c[111]=G_110_0;
|
||||
assign c[112]=G_111_0;
|
||||
assign c[113]=G_112_0;
|
||||
|
||||
assign c[114]=G_113_0;
|
||||
assign c[115]=G_114_0;
|
||||
assign c[116]=G_115_0;
|
||||
assign c[117]=G_116_0;
|
||||
assign c[118]=G_117_0;
|
||||
assign c[119]=G_118_0;
|
||||
assign c[120]=G_119_0;
|
||||
assign c[121]=G_120_0;
|
||||
|
||||
assign c[122]=G_121_0;
|
||||
assign c[123]=G_122_0;
|
||||
assign c[124]=G_123_0;
|
||||
assign c[125]=G_124_0;
|
||||
assign c[126]=G_125_0;
|
||||
assign c[127]=G_126_0;
|
||||
assign c[128]=G_127_0;
|
||||
|
||||
endmodule // ladner_fischer
|
||||
|
@ -1,289 +0,0 @@
|
||||
// Ladner-Fischer Prefix Adder
|
||||
|
||||
module ldf64 (cout, sum, a, b, cin);
|
||||
input [63:0] a, b;
|
||||
input cin;
|
||||
output [63:0] sum;
|
||||
output cout;
|
||||
|
||||
wire [64:0] p,g;
|
||||
wire [63:0] c;
|
||||
|
||||
// pre-computation
|
||||
assign p={a^b,1'b0};
|
||||
assign g={a&b, cin};
|
||||
|
||||
// prefix tree
|
||||
ladner_fischer64 prefix_tree(c, p[63:0], g[63:0]);
|
||||
|
||||
// post-computation
|
||||
assign sum=p[64:1]^c;
|
||||
assign cout=g[64]|(p[64]&c[63]);
|
||||
|
||||
endmodule
|
||||
|
||||
module ladner_fischer64 (c, p, g);
|
||||
|
||||
input [63:0] p;
|
||||
input [63:0] g;
|
||||
|
||||
output [64:1] c;
|
||||
|
||||
logic G_1_0,G_3_2,P_3_2,G_5_4,P_5_4,G_7_6,P_7_6,G_9_8,P_9_8,G_11_10,P_11_10,G_13_12,P_13_12,G_15_14,P_15_14
|
||||
,G_17_16,P_17_16,G_19_18,P_19_18,G_21_20,P_21_20,G_23_22,P_23_22,G_25_24,P_25_24,G_27_26,P_27_26,G_29_28,P_29_28
|
||||
,G_31_30,P_31_30,G_33_32,P_33_32,G_35_34,P_35_34,G_37_36,P_37_36,G_39_38,P_39_38,G_41_40,P_41_40,G_43_42,P_43_42
|
||||
,G_45_44,P_45_44,G_47_46,P_47_46,G_49_48,P_49_48,G_51_50,P_51_50,G_53_52,P_53_52,G_55_54,P_55_54,G_57_56,P_57_56
|
||||
,G_59_58,P_59_58,G_61_60,P_61_60,G_63_62,P_63_62,G_3_0,G_7_4,P_7_4,G_11_8,P_11_8,G_15_12,P_15_12,G_19_16,P_19_16
|
||||
,G_23_20,P_23_20,G_27_24,P_27_24,G_31_28,P_31_28,G_35_32,P_35_32,G_39_36,P_39_36,G_43_40,P_43_40,G_47_44,P_47_44
|
||||
,G_51_48,P_51_48,G_55_52,P_55_52,G_59_56,P_59_56,G_63_60,P_63_60,G_5_0,G_7_0,G_13_8,P_13_8,G_15_8,P_15_8,G_21_16
|
||||
,P_21_16,G_23_16,P_23_16,G_29_24,P_29_24,G_31_24,P_31_24,G_37_32,P_37_32,G_39_32,P_39_32,G_45_40,P_45_40,G_47_40
|
||||
,P_47_40,G_53_48,P_53_48,G_55_48,P_55_48,G_61_56,P_61_56,G_63_56,P_63_56,G_9_0,G_11_0,G_13_0,G_15_0,G_25_16
|
||||
,P_25_16,G_27_16,P_27_16,G_29_16,P_29_16,G_31_16,P_31_16,G_41_32,P_41_32,G_43_32,P_43_32,G_45_32,P_45_32,G_47_32
|
||||
,P_47_32,G_57_48,P_57_48,G_59_48,P_59_48,G_61_48,P_61_48,G_63_48,P_63_48,G_17_0,G_19_0,G_21_0,G_23_0,G_25_0,G_27_0
|
||||
,G_29_0,G_31_0,G_49_32,P_49_32,G_51_32,P_51_32,G_53_32,P_53_32,G_55_32,P_55_32,G_57_32,P_57_32,G_59_32,P_59_32
|
||||
,G_61_32,P_61_32,G_63_32,P_63_32,G_33_0,G_35_0,G_37_0,G_39_0,G_41_0,G_43_0,G_45_0,G_47_0,G_49_0,G_51_0,G_53_0
|
||||
,G_55_0,G_57_0,G_59_0,G_61_0,G_63_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0,G_14_0,G_16_0,G_18_0,G_20_0,G_22_0
|
||||
,G_24_0,G_26_0,G_28_0,G_30_0,G_32_0,G_34_0,G_36_0,G_38_0,G_40_0,G_42_0,G_44_0,G_46_0,G_48_0,G_50_0,G_52_0
|
||||
,G_54_0,G_56_0,G_58_0,G_60_0,G_62_0;
|
||||
// parallel-prefix, Ladner-Fischer
|
||||
|
||||
// Stage 1: Generates G/P pairs that span 1 bits
|
||||
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
|
||||
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
|
||||
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
|
||||
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
|
||||
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
|
||||
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
|
||||
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
|
||||
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
|
||||
|
||||
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
|
||||
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
|
||||
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
|
||||
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
|
||||
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
|
||||
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
|
||||
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
|
||||
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
|
||||
|
||||
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
|
||||
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
|
||||
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
|
||||
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
|
||||
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
|
||||
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
|
||||
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
|
||||
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
|
||||
|
||||
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
|
||||
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
|
||||
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
|
||||
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
|
||||
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
|
||||
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
|
||||
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
|
||||
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
|
||||
|
||||
// Stage 2: Generates G/P pairs that span 2 bits
|
||||
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
|
||||
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
|
||||
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
|
||||
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
|
||||
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
|
||||
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
|
||||
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
|
||||
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
|
||||
|
||||
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
|
||||
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
|
||||
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
|
||||
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
|
||||
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
|
||||
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
|
||||
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
|
||||
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
|
||||
|
||||
// Stage 3: Generates G/P pairs that span 4 bits
|
||||
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
|
||||
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
|
||||
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
|
||||
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
|
||||
black b_21_16 (G_21_16, P_21_16, {G_21_20,G_19_16}, {P_21_20,P_19_16});
|
||||
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
|
||||
black b_29_24 (G_29_24, P_29_24, {G_29_28,G_27_24}, {P_29_28,P_27_24});
|
||||
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
|
||||
|
||||
black b_37_32 (G_37_32, P_37_32, {G_37_36,G_35_32}, {P_37_36,P_35_32});
|
||||
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
|
||||
black b_45_40 (G_45_40, P_45_40, {G_45_44,G_43_40}, {P_45_44,P_43_40});
|
||||
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
|
||||
black b_53_48 (G_53_48, P_53_48, {G_53_52,G_51_48}, {P_53_52,P_51_48});
|
||||
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
|
||||
black b_61_56 (G_61_56, P_61_56, {G_61_60,G_59_56}, {P_61_60,P_59_56});
|
||||
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
|
||||
|
||||
// Stage 4: Generates G/P pairs that span 8 bits
|
||||
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
|
||||
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
|
||||
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
|
||||
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
|
||||
black b_25_16 (G_25_16, P_25_16, {G_25_24,G_23_16}, {P_25_24,P_23_16});
|
||||
black b_27_16 (G_27_16, P_27_16, {G_27_24,G_23_16}, {P_27_24,P_23_16});
|
||||
black b_29_16 (G_29_16, P_29_16, {G_29_24,G_23_16}, {P_29_24,P_23_16});
|
||||
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
|
||||
|
||||
black b_41_32 (G_41_32, P_41_32, {G_41_40,G_39_32}, {P_41_40,P_39_32});
|
||||
black b_43_32 (G_43_32, P_43_32, {G_43_40,G_39_32}, {P_43_40,P_39_32});
|
||||
black b_45_32 (G_45_32, P_45_32, {G_45_40,G_39_32}, {P_45_40,P_39_32});
|
||||
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
|
||||
black b_57_48 (G_57_48, P_57_48, {G_57_56,G_55_48}, {P_57_56,P_55_48});
|
||||
black b_59_48 (G_59_48, P_59_48, {G_59_56,G_55_48}, {P_59_56,P_55_48});
|
||||
black b_61_48 (G_61_48, P_61_48, {G_61_56,G_55_48}, {P_61_56,P_55_48});
|
||||
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
|
||||
|
||||
// Stage 5: Generates G/P pairs that span 16 bits
|
||||
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
|
||||
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
|
||||
grey g_21_0 (G_21_0, {G_21_16,G_15_0}, P_21_16);
|
||||
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
|
||||
grey g_25_0 (G_25_0, {G_25_16,G_15_0}, P_25_16);
|
||||
grey g_27_0 (G_27_0, {G_27_16,G_15_0}, P_27_16);
|
||||
grey g_29_0 (G_29_0, {G_29_16,G_15_0}, P_29_16);
|
||||
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
|
||||
|
||||
black b_49_32 (G_49_32, P_49_32, {G_49_48,G_47_32}, {P_49_48,P_47_32});
|
||||
black b_51_32 (G_51_32, P_51_32, {G_51_48,G_47_32}, {P_51_48,P_47_32});
|
||||
black b_53_32 (G_53_32, P_53_32, {G_53_48,G_47_32}, {P_53_48,P_47_32});
|
||||
black b_55_32 (G_55_32, P_55_32, {G_55_48,G_47_32}, {P_55_48,P_47_32});
|
||||
black b_57_32 (G_57_32, P_57_32, {G_57_48,G_47_32}, {P_57_48,P_47_32});
|
||||
black b_59_32 (G_59_32, P_59_32, {G_59_48,G_47_32}, {P_59_48,P_47_32});
|
||||
black b_61_32 (G_61_32, P_61_32, {G_61_48,G_47_32}, {P_61_48,P_47_32});
|
||||
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
|
||||
|
||||
// Stage 6: Generates G/P pairs that span 32 bits
|
||||
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
|
||||
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
|
||||
grey g_37_0 (G_37_0, {G_37_32,G_31_0}, P_37_32);
|
||||
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
|
||||
grey g_41_0 (G_41_0, {G_41_32,G_31_0}, P_41_32);
|
||||
grey g_43_0 (G_43_0, {G_43_32,G_31_0}, P_43_32);
|
||||
grey g_45_0 (G_45_0, {G_45_32,G_31_0}, P_45_32);
|
||||
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
|
||||
|
||||
grey g_49_0 (G_49_0, {G_49_32,G_31_0}, P_49_32);
|
||||
grey g_51_0 (G_51_0, {G_51_32,G_31_0}, P_51_32);
|
||||
grey g_53_0 (G_53_0, {G_53_32,G_31_0}, P_53_32);
|
||||
grey g_55_0 (G_55_0, {G_55_32,G_31_0}, P_55_32);
|
||||
grey g_57_0 (G_57_0, {G_57_32,G_31_0}, P_57_32);
|
||||
grey g_59_0 (G_59_0, {G_59_32,G_31_0}, P_59_32);
|
||||
grey g_61_0 (G_61_0, {G_61_32,G_31_0}, P_61_32);
|
||||
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
|
||||
|
||||
// Extra grey cell stage
|
||||
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
|
||||
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
|
||||
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
|
||||
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
|
||||
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
|
||||
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
|
||||
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
|
||||
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
|
||||
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
|
||||
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
|
||||
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
|
||||
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
|
||||
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
|
||||
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
|
||||
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
|
||||
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
|
||||
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
|
||||
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
|
||||
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
|
||||
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
|
||||
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
|
||||
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
|
||||
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
|
||||
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
|
||||
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
|
||||
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
|
||||
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
|
||||
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
|
||||
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
|
||||
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
|
||||
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
|
||||
|
||||
// Final Stage: Apply c_k+1=G_k_0
|
||||
assign c[1]=g[0];
|
||||
assign c[2]=G_1_0;
|
||||
assign c[3]=G_2_0;
|
||||
assign c[4]=G_3_0;
|
||||
assign c[5]=G_4_0;
|
||||
assign c[6]=G_5_0;
|
||||
assign c[7]=G_6_0;
|
||||
assign c[8]=G_7_0;
|
||||
assign c[9]=G_8_0;
|
||||
|
||||
assign c[10]=G_9_0;
|
||||
assign c[11]=G_10_0;
|
||||
assign c[12]=G_11_0;
|
||||
assign c[13]=G_12_0;
|
||||
assign c[14]=G_13_0;
|
||||
assign c[15]=G_14_0;
|
||||
assign c[16]=G_15_0;
|
||||
assign c[17]=G_16_0;
|
||||
|
||||
assign c[18]=G_17_0;
|
||||
assign c[19]=G_18_0;
|
||||
assign c[20]=G_19_0;
|
||||
assign c[21]=G_20_0;
|
||||
assign c[22]=G_21_0;
|
||||
assign c[23]=G_22_0;
|
||||
assign c[24]=G_23_0;
|
||||
assign c[25]=G_24_0;
|
||||
|
||||
assign c[26]=G_25_0;
|
||||
assign c[27]=G_26_0;
|
||||
assign c[28]=G_27_0;
|
||||
assign c[29]=G_28_0;
|
||||
assign c[30]=G_29_0;
|
||||
assign c[31]=G_30_0;
|
||||
assign c[32]=G_31_0;
|
||||
assign c[33]=G_32_0;
|
||||
|
||||
assign c[34]=G_33_0;
|
||||
assign c[35]=G_34_0;
|
||||
assign c[36]=G_35_0;
|
||||
assign c[37]=G_36_0;
|
||||
assign c[38]=G_37_0;
|
||||
assign c[39]=G_38_0;
|
||||
assign c[40]=G_39_0;
|
||||
assign c[41]=G_40_0;
|
||||
|
||||
assign c[42]=G_41_0;
|
||||
assign c[43]=G_42_0;
|
||||
assign c[44]=G_43_0;
|
||||
assign c[45]=G_44_0;
|
||||
assign c[46]=G_45_0;
|
||||
assign c[47]=G_46_0;
|
||||
assign c[48]=G_47_0;
|
||||
assign c[49]=G_48_0;
|
||||
|
||||
assign c[50]=G_49_0;
|
||||
assign c[51]=G_50_0;
|
||||
assign c[52]=G_51_0;
|
||||
assign c[53]=G_52_0;
|
||||
assign c[54]=G_53_0;
|
||||
assign c[55]=G_54_0;
|
||||
assign c[56]=G_55_0;
|
||||
assign c[57]=G_56_0;
|
||||
|
||||
assign c[58]=G_57_0;
|
||||
assign c[59]=G_58_0;
|
||||
assign c[60]=G_59_0;
|
||||
assign c[61]=G_60_0;
|
||||
assign c[62]=G_61_0;
|
||||
assign c[63]=G_62_0;
|
||||
assign c[64]=G_63_0;
|
||||
|
||||
endmodule // ladner_fischer
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
// input B0;
|
||||
// input B1;
|
||||
|
||||
|
||||
// output P;
|
||||
// output V;
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -16,7 +16,7 @@
|
||||
// xxxxxL,Rxxxxxxx
|
||||
// where , denotes the rounding boundary. S is the logical OR of all the
|
||||
// bits to the right of R.
|
||||
|
||||
|
||||
module rounder (Result, DenormIO, Flags, rm, P, OvEn,
|
||||
UnEn, exp_valid, sel_inv, Invalid, DenormIn, convert, Asign, Aexp,
|
||||
norm_shift, A, exponent_postsum, A_Norm, B_Norm, exp_A_unmodified, exp_B_unmodified,
|
||||
|
@ -5,48 +5,41 @@
|
||||
// It produces a rounded 52-bit result, Z, the exponent of the rounded
|
||||
// result, Z_exp, and a flag that indicates if the result was rounded,
|
||||
// Inexact. The rounding mode has the following values.
|
||||
// rm Modee
|
||||
// rm Mode
|
||||
// 00 round-to-nearest-even
|
||||
// 01 round-toward-zero
|
||||
// 01 round-toward-zero
|
||||
// 10 round-toward-plus infinity
|
||||
// 11 round-toward-minus infinity
|
||||
// 11 round-toward-minus infinity
|
||||
//
|
||||
|
||||
module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
|
||||
UnEn, exp_diff, sel_inv, Invalid, DenormIn,
|
||||
SignR, q1, qm1, qp1, q0, qm0, qp0, regr_out);
|
||||
|
||||
input [1:0] rm;
|
||||
input P;
|
||||
input OvEn;
|
||||
input UnEn;
|
||||
input [12:0] exp_diff;
|
||||
input [2:0] sel_inv;
|
||||
input Invalid;
|
||||
input DenormIn;
|
||||
input SignR;
|
||||
module rounder_div (
|
||||
input logic [1:0] rm,
|
||||
input logic P,
|
||||
input logic OvEn,
|
||||
input logic UnEn,
|
||||
input logic [12:0] exp_diff,
|
||||
input logic [2:0] sel_inv,
|
||||
input logic Invalid,
|
||||
input logic SignR,
|
||||
|
||||
input logic [63:0] q1;
|
||||
input logic [63:0] qm1;
|
||||
input logic [63:0] qp1;
|
||||
input logic [63:0] q0;
|
||||
input logic [63:0] qm0;
|
||||
input logic [63:0] qp0;
|
||||
input logic [127:0] regr_out;
|
||||
|
||||
output logic [63:0] Result;
|
||||
output logic DenormIO;
|
||||
output logic [4:0] Flags;
|
||||
|
||||
supply1 vdd;
|
||||
supply0 vss;
|
||||
input logic [63:0] q1,
|
||||
input logic [63:0] qm1,
|
||||
input logic [63:0] qp1,
|
||||
input logic [63:0] q0,
|
||||
input logic [63:0] qm0,
|
||||
input logic [63:0] qp0,
|
||||
input logic [127:0] regr_out,
|
||||
|
||||
output logic [63:0] Result,
|
||||
output logic [4:0] Flags
|
||||
);
|
||||
|
||||
logic Rsign;
|
||||
logic [10:0] Rexp;
|
||||
logic [12:0] Texp;
|
||||
logic [51:0] Rmant;
|
||||
logic [63:0] Tmant;
|
||||
logic [51:0] Smant;
|
||||
logic [10:0] Rexp;
|
||||
logic [12:0] Texp;
|
||||
logic [51:0] Rmant;
|
||||
logic [63:0] Tmant;
|
||||
logic [51:0] Smant;
|
||||
logic Rzero;
|
||||
logic Gdp, Gsp, G;
|
||||
logic UnFlow_SP, UnFlow_DP, UnderFlow;
|
||||
@ -64,10 +57,10 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
|
||||
logic Texp_l7o;
|
||||
logic OvCon;
|
||||
logic zero_rem;
|
||||
logic [1:0] mux_mant;
|
||||
logic [1:0] mux_mant;
|
||||
logic sign_rem;
|
||||
logic [63:0] q, qm, qp;
|
||||
logic exp_ovf, exp_ovfSP, exp_ovfDP;
|
||||
logic [63:0] q, qm, qp;
|
||||
logic exp_ovf;
|
||||
|
||||
// Remainder = 0?
|
||||
assign zero_rem = ~(|regr_out);
|
||||
@ -98,7 +91,7 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
|
||||
// 1.) we choose any qm0, qp0, q0 (since we shift mant)
|
||||
// 2.) we choose qp and we overflow (for RU)
|
||||
assign exp_ovf = |{qp[62:40], (qp[39:11] & {29{~P}})};
|
||||
assign Texp = exp_diff - {{13{vss}}, ~q1[63]} + {{13{vss}}, mux_mant[1]&qp1[63]&~exp_ovf};
|
||||
assign Texp = exp_diff - {{13{1'b0}}, ~q1[63]} + {{13{1'b0}}, mux_mant[1]&qp1[63]&~exp_ovf};
|
||||
|
||||
// Overflow only occurs for double precision, if Texp[10] to Texp[0] are
|
||||
// all ones. To encourage sharing with single precision overflow detection,
|
||||
@ -130,9 +123,6 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
|
||||
assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid;
|
||||
assign Div0 = sel_inv[2]&sel_inv[1]&~sel_inv[0];
|
||||
|
||||
// The DenormIO is set if underflow has occurred or if their was a
|
||||
// denormalized input.
|
||||
assign DenormIO = DenormIn | UnderFlow;
|
||||
|
||||
// The final result is Inexact if any rounding occurred ((i.e., R or S
|
||||
// is one), or (if the result overflows ) or (if the result underflows and the
|
||||
|
@ -1,5 +1,5 @@
|
||||
module sbtm_a0 (input logic [6:0] a,
|
||||
output logic [12:0] y);
|
||||
output logic [12:0] y);
|
||||
always_comb
|
||||
case(a)
|
||||
7'b0000000: y = 13'b1111111100010;
|
||||
|
@ -1,5 +1,5 @@
|
||||
module sbtm_a1 (input logic [6:0] a,
|
||||
output logic [4:0] y);
|
||||
output logic [4:0] y);
|
||||
always_comb
|
||||
case(a)
|
||||
7'b0000000: y = 5'b11100;
|
||||
|
@ -1,5 +1,5 @@
|
||||
module sbtm_a2 (input logic [7:0] a,
|
||||
output logic [13:0] y);
|
||||
output logic [13:0] y);
|
||||
always_comb
|
||||
case(a)
|
||||
8'b01000000: y = 14'b10110100010111;
|
||||
|
@ -1,5 +1,5 @@
|
||||
module sbtm_a3 (input logic [7:0] a,
|
||||
output logic [5:0] y);
|
||||
output logic [5:0] y);
|
||||
always_comb
|
||||
case(a)
|
||||
8'b01000000: y = 6'b100110;
|
||||
|
@ -7,12 +7,12 @@ module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out);
|
||||
logic [2:0] x2_1cmp;
|
||||
// mem outputs
|
||||
logic [12:0] y0;
|
||||
logic [4:0] y1;
|
||||
logic [4:0] y1;
|
||||
// input to CPA
|
||||
logic [14:0] op1;
|
||||
logic [14:0] op2;
|
||||
logic [14:0] p;
|
||||
logic cout;
|
||||
logic cout;
|
||||
|
||||
assign x0 = a[10:7];
|
||||
assign x1 = a[6:4];
|
||||
@ -26,10 +26,8 @@ module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out);
|
||||
// 1s cmp per sbtm/stam
|
||||
assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} :
|
||||
{1'b0, 8'b0, y1, 1'b1};
|
||||
// CPA
|
||||
// adder #(15) cp1 (op1, op2, 1'b0, p, cout);
|
||||
// CPA
|
||||
assign {cout, p} = op1 + op2;
|
||||
//assign ia_out = {p[14:4], {53{1'b0}}};
|
||||
assign ia_out = p[14:4];
|
||||
|
||||
endmodule // sbtm
|
||||
|
@ -7,12 +7,12 @@ module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y);
|
||||
logic [2:0] x2_1cmp;
|
||||
// mem outputs
|
||||
logic [13:0] y0;
|
||||
logic [5:0] y1;
|
||||
logic [5:0] y1;
|
||||
// input to CPA
|
||||
logic [14:0] op1;
|
||||
logic [14:0] op2;
|
||||
logic [14:0] p;
|
||||
logic cout;
|
||||
logic cout;
|
||||
|
||||
assign x0 = a[11:7];
|
||||
assign x1 = a[6:4];
|
||||
@ -29,7 +29,6 @@ module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y);
|
||||
{8'b0, y1, 1'b1};
|
||||
|
||||
// CPA
|
||||
//adder #(15) cp1 (op1, op2, 1'b0, p, cout);
|
||||
assign {cout, p} = op1 + op2;
|
||||
assign y = p[14:4];
|
||||
|
||||
|
@ -28,7 +28,7 @@ module mux21x64 (Z, A, B, Sel);
|
||||
assign Z = Sel ? B : A;
|
||||
|
||||
endmodule // mux21x64
|
||||
|
||||
|
||||
// The implementation of the barrel shifter was modified to use
|
||||
// fewer gates. It is now implemented using six 64-bit 2-to-1 muxes. The
|
||||
// barrel shifter takes a 64-bit input A and shifts it left by up to
|
||||
|
@ -1,4 +1,4 @@
|
||||
module unpacking (
|
||||
module unpacking (
|
||||
input logic [63:0] X, Y, Z,
|
||||
input logic FmtE,
|
||||
input logic [2:0] FOpCtrlE,
|
||||
@ -25,9 +25,9 @@ module unpacking (
|
||||
assign YSgnE = FmtE ? Y[63] : Y[31];
|
||||
assign ZSgnE = FmtE ? Z[63] : Z[31];
|
||||
|
||||
assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30] & XExpNonzero | XExpMaxE}}, X[29:23]};
|
||||
assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30] & YExpNonzero | YExpMaxE}}, Y[29:23]};
|
||||
assign ZExpE = FmtE ? Z[62:52] : {Z[30], {3{~Z[30] & ZExpNonzero | ZExpMaxE}}, Z[29:23]};
|
||||
assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};//{X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]};
|
||||
assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};//{Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]};
|
||||
assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};//{Z[30], {3{~Z[30]&~ZExpZero|ZExpMaxE}}, Z[29:23]};
|
||||
/* assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]}; // *** maybe convert to full number of bits here?
|
||||
assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};
|
||||
assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};*/
|
||||
@ -78,7 +78,7 @@ module unpacking (
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
//assign BiasE = FmtE ? 13'h3ff : 13'h7f; // *** is it better to convert to full precision exponents so bias isn't needed?
|
||||
assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision
|
||||
assign BiasE = FmtE ? 13'h3ff : 13'h7f; // *** is it better to convert to full precision exponents so bias isn't needed?
|
||||
// assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision
|
||||
|
||||
endmodule
|
@ -50,6 +50,12 @@ module hptw
|
||||
output logic WalkerInstrPageFaultF, WalkerLoadPageFaultM,WalkerStorePageFaultM // faults
|
||||
);
|
||||
|
||||
typedef enum {L0_ADR, L0_RD,
|
||||
L1_ADR, L1_RD,
|
||||
L2_ADR, L2_RD,
|
||||
L3_ADR, L3_RD,
|
||||
LEAF, IDLE, FAULT} statetype; // *** placed outside generate statement to remove synthesis errors
|
||||
|
||||
generate
|
||||
if (`MEM_VIRTMEM) begin
|
||||
logic DTLBWalk; // register TLBs translation miss requests
|
||||
@ -66,12 +72,6 @@ module hptw
|
||||
logic [`SVMODE_BITS-1:0] SvMode;
|
||||
logic [`XLEN-1:0] TranslationVAdr;
|
||||
|
||||
|
||||
typedef enum {LEVEL0_SET_ADR, LEVEL0_READ, LEVEL0,
|
||||
LEVEL1_SET_ADR, LEVEL1_READ, LEVEL1,
|
||||
LEVEL2_SET_ADR, LEVEL2_READ, LEVEL2,
|
||||
LEVEL3_SET_ADR, LEVEL3_READ, LEVEL3,
|
||||
LEAF, IDLE, FAULT} statetype;
|
||||
statetype WalkerState, NextWalkerState, InitialWalkerState;
|
||||
|
||||
// Extract bits from CSRs and inputs
|
||||
@ -99,7 +99,7 @@ module hptw
|
||||
|
||||
// Enable and select signals based on states
|
||||
assign StartWalk = (WalkerState == IDLE) & TLBMiss;
|
||||
assign HPTWRead = (WalkerState == LEVEL3_READ) | (WalkerState == LEVEL2_READ) | (WalkerState == LEVEL1_READ) | (WalkerState == LEVEL0_READ);
|
||||
assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD);
|
||||
assign SelPTW = (WalkerState != IDLE) & (WalkerState != FAULT) & (WalkerState != LEAF);
|
||||
assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk;
|
||||
assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk;
|
||||
@ -113,10 +113,10 @@ module hptw
|
||||
flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType);
|
||||
always_comb
|
||||
case (WalkerState)
|
||||
LEVEL3: NextPageType = 2'b11; // terapage
|
||||
LEVEL2: NextPageType = 2'b10; // gigapage
|
||||
LEVEL1: NextPageType = 2'b01; // megapage
|
||||
LEVEL0: NextPageType = 2'b00; // kilopage
|
||||
L3_RD: NextPageType = 2'b11; // terapage
|
||||
L2_RD: NextPageType = 2'b10; // gigapage
|
||||
L1_RD: NextPageType = 2'b01; // megapage
|
||||
L0_RD: NextPageType = 2'b00; // kilopage
|
||||
default: NextPageType = PageType;
|
||||
endcase
|
||||
|
||||
@ -124,36 +124,36 @@ module hptw
|
||||
if (`XLEN==32) begin // RV32
|
||||
logic [9:0] VPN;
|
||||
logic [`PPN_BITS-1:0] PPN;
|
||||
assign VPN = ((WalkerState == LEVEL1_SET_ADR) | (WalkerState == LEVEL1_READ)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state
|
||||
assign PPN = ((WalkerState == LEVEL1_SET_ADR) | (WalkerState == LEVEL1_READ)) ? BasePageTablePPN : CurrentPPN;
|
||||
assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state
|
||||
assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN;
|
||||
assign TranslationPAdr = {PPN, VPN, 2'b00};
|
||||
end else begin // RV64
|
||||
logic [8:0] VPN;
|
||||
logic [`PPN_BITS-1:0] PPN;
|
||||
always_comb
|
||||
case (WalkerState) // select VPN field based on HPTW state
|
||||
LEVEL3_SET_ADR, LEVEL3_READ: VPN = TranslationVAdr[47:39];
|
||||
LEVEL3, LEVEL2_SET_ADR, LEVEL2_READ: VPN = TranslationVAdr[38:30];
|
||||
LEVEL2, LEVEL1_SET_ADR, LEVEL1_READ: VPN = TranslationVAdr[29:21];
|
||||
L3_ADR, L3_RD: VPN = TranslationVAdr[47:39];
|
||||
L2_ADR, L2_RD: VPN = TranslationVAdr[38:30];
|
||||
L1_ADR, L1_RD: VPN = TranslationVAdr[29:21];
|
||||
default: VPN = TranslationVAdr[20:12];
|
||||
endcase
|
||||
assign PPN = ((WalkerState == LEVEL3_SET_ADR) | (WalkerState == LEVEL3_READ) |
|
||||
(SvMode != `SV48 & ((WalkerState == LEVEL2_SET_ADR) | (WalkerState == LEVEL2_READ)))) ? BasePageTablePPN : CurrentPPN;
|
||||
assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) |
|
||||
(SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN;
|
||||
assign TranslationPAdr = {PPN, VPN, 3'b000};
|
||||
end
|
||||
|
||||
// Initial state and misalignment for RV32/64
|
||||
if (`XLEN == 32) begin
|
||||
assign InitialWalkerState = LEVEL1_SET_ADR;
|
||||
assign InitialWalkerState = L1_ADR;
|
||||
assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
|
||||
assign Misaligned = ((WalkerState == LEVEL1) & MegapageMisaligned);
|
||||
assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned);
|
||||
end else begin
|
||||
logic GigapageMisaligned, TerapageMisaligned;
|
||||
assign InitialWalkerState = (SvMode == `SV48) ? LEVEL3_SET_ADR : LEVEL2_SET_ADR;
|
||||
assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR;
|
||||
assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
|
||||
assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
|
||||
assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
|
||||
assign Misaligned = ((WalkerState == LEVEL3) & TerapageMisaligned) | ((WalkerState == LEVEL2) & GigapageMisaligned) | ((WalkerState == LEVEL1) & MegapageMisaligned);
|
||||
assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned);
|
||||
end
|
||||
|
||||
// Page Table Walker FSM
|
||||
@ -166,29 +166,37 @@ module hptw
|
||||
case (WalkerState)
|
||||
IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState;
|
||||
else NextWalkerState = IDLE;
|
||||
LEVEL3_SET_ADR: NextWalkerState = LEVEL3_READ;
|
||||
LEVEL3_READ: if (HPTWStall) NextWalkerState = LEVEL3_READ;
|
||||
else NextWalkerState = LEVEL3;
|
||||
LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||
else if (ValidNonLeafPTE) NextWalkerState = LEVEL2_SET_ADR;
|
||||
L3_ADR: NextWalkerState = L3_RD; // first access in SV48
|
||||
L3_RD: if (HPTWStall) NextWalkerState = L3_RD;
|
||||
else NextWalkerState = L2_ADR;
|
||||
// LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||
// else if (ValidNonLeafPTE) NextWalkerState = L2_ADR;
|
||||
// else NextWalkerState = FAULT;
|
||||
L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39
|
||||
else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
|
||||
else if (ValidNonLeafPTE) NextWalkerState = L2_RD;
|
||||
else NextWalkerState = FAULT;
|
||||
L2_RD: if (HPTWStall) NextWalkerState = L2_RD;
|
||||
else NextWalkerState = L1_ADR;
|
||||
// LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||
// else if (ValidNonLeafPTE) NextWalkerState = L1_ADR;
|
||||
// else NextWalkerState = FAULT;
|
||||
L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32
|
||||
else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
|
||||
else if (ValidNonLeafPTE) NextWalkerState = L1_RD;
|
||||
else NextWalkerState = FAULT;
|
||||
L1_RD: if (HPTWStall) NextWalkerState = L1_RD;
|
||||
else NextWalkerState = L0_ADR;
|
||||
// LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||
// else if (ValidNonLeafPTE) NextWalkerState = L0_ADR;
|
||||
// else NextWalkerState = FAULT;
|
||||
L0_ADR: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
|
||||
else if (ValidNonLeafPTE) NextWalkerState = L0_RD;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL2_SET_ADR: NextWalkerState = LEVEL2_READ;
|
||||
LEVEL2_READ: if (HPTWStall) NextWalkerState = LEVEL2_READ;
|
||||
else NextWalkerState = LEVEL2;
|
||||
LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||
else if (ValidNonLeafPTE) NextWalkerState = LEVEL1_SET_ADR;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL1_SET_ADR: NextWalkerState = LEVEL1_READ;
|
||||
LEVEL1_READ: if (HPTWStall) NextWalkerState = LEVEL1_READ;
|
||||
else NextWalkerState = LEVEL1;
|
||||
LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||
else if (ValidNonLeafPTE) NextWalkerState = LEVEL0_SET_ADR;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL0_SET_ADR: NextWalkerState = LEVEL0_READ;
|
||||
LEVEL0_READ: if (HPTWStall) NextWalkerState = LEVEL0_READ;
|
||||
else NextWalkerState = LEVEL0;
|
||||
LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF;
|
||||
else NextWalkerState = FAULT;
|
||||
L0_RD: if (HPTWStall) NextWalkerState = L0_RD;
|
||||
else NextWalkerState = LEAF;
|
||||
// LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF;
|
||||
// else NextWalkerState = FAULT;
|
||||
LEAF: NextWalkerState = IDLE;
|
||||
FAULT: if (ITLBMissF & AnyCPUReqM & ~MemAfterIWalkDone) NextWalkerState = FAULT;
|
||||
else NextWalkerState = IDLE;
|
||||
|
@ -67,9 +67,7 @@ module pmpadrdec (
|
||||
assign TORMatch = PAgePMPAdrIn && PAltPMPAdr;
|
||||
|
||||
// Naturally aligned regions
|
||||
|
||||
// verilator lint_off UNOPTFLAT
|
||||
logic [`PA_BITS-1:0] Mask;
|
||||
logic [`PA_BITS-1:0] NAMask;
|
||||
//genvar i;
|
||||
|
||||
// create a mask of which bits to ignore
|
||||
@ -80,23 +78,14 @@ module pmpadrdec (
|
||||
// assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore
|
||||
// end
|
||||
// endgenerate
|
||||
prioritycircuit #(.ENTRIES(`PA_BITS-2), .FINAL_OP("NONE")) maskgen(.a(~PMPAdr[`PA_BITS-3:0]), .FirstPin(AdrMode==NAPOT), .y(Mask[`PA_BITS-1:2]));
|
||||
assign Mask[1:0] = 2'b11;
|
||||
|
||||
// *** possible experiments:
|
||||
/* PA < PMP addr could be in its own module,
|
||||
preeserving hierarchy so we can know if this is the culprit on the critical path
|
||||
Should take logarthmic time, so more like 6 levels than 40 should be expected
|
||||
assign NAMask[1:0] = {2'b11};
|
||||
|
||||
update mask generation
|
||||
Should be concurrent with the subtraction/comparison
|
||||
if one is the critical path, the other shouldn't be which makes us think the mask generation is the culprit.
|
||||
prioritythemometer #(`PA_BITS-2) namaskgen(
|
||||
.a({PMPAdr[`PA_BITS-4:0], (AdrMode == NAPOT)}),
|
||||
.y(NAMask[`PA_BITS-1:2]));
|
||||
|
||||
Hopefully just use the priority circuit here
|
||||
*/
|
||||
// verilator lint_on UNOPTFLAT
|
||||
|
||||
assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | Mask);
|
||||
assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | NAMask);
|
||||
|
||||
assign Match = (AdrMode == TOR) ? TORMatch :
|
||||
(AdrMode == NA4 || AdrMode == NAPOT) ? NAMatch :
|
||||
|
@ -69,7 +69,7 @@ module pmpchecker (
|
||||
.PAgePMPAdrOut(PAgePMPAdr),
|
||||
.FirstMatch, .Match, .Active, .L, .X, .W, .R);
|
||||
|
||||
prioritycircuit #(.ENTRIES(`PMP_ENTRIES), .FINAL_OP("AND")) pmppriority(.a(Match), .FirstPin(1'b1), .y(FirstMatch)); // Take the ripple gates/signals out of the pmpadrdec and into another unit.
|
||||
priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // Take the ripple gates/signals out of the pmpadrdec and into another unit.
|
||||
|
||||
// Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
|
||||
assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active;
|
||||
|
@ -1,5 +1,5 @@
|
||||
///////////////////////////////////////////
|
||||
// prioritycircuit.sv
|
||||
// priorityonehot.sv
|
||||
//
|
||||
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021
|
||||
// Modified: Teo Ene 15 Apr 2021:
|
||||
@ -30,31 +30,22 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module prioritycircuit #(parameter ENTRIES = 8,
|
||||
parameter FINAL_OP = "AND") (
|
||||
module priorityonehot #(parameter ENTRIES = 8) (
|
||||
input logic [ENTRIES-1:0] a,
|
||||
input logic FirstPin,
|
||||
output logic [ENTRIES-1:0] y
|
||||
);
|
||||
// verilator lint_off UNOPTFLAT
|
||||
|
||||
logic [ENTRIES-1:0] nolower;
|
||||
|
||||
// generate thermometer code mask
|
||||
genvar i;
|
||||
generate
|
||||
assign nolower[0] = FirstPin;
|
||||
assign nolower[0] = 1'b1;
|
||||
for (i=1; i<ENTRIES; i++) begin:therm
|
||||
assign nolower[i] = nolower[i-1] & ~a[i-1];
|
||||
end
|
||||
endgenerate
|
||||
// verilator lint_on UNOPTFLAT
|
||||
|
||||
assign y = a & nolower;
|
||||
|
||||
generate
|
||||
if (FINAL_OP=="AND") begin
|
||||
assign y = a & nolower;
|
||||
end else if (FINAL_OP=="NONE") begin
|
||||
assign y = nolower;
|
||||
end // *** So far these are the only two operations I need to do at the end, but feel free to add more as needed.
|
||||
endgenerate
|
||||
// assign y = a & nolower;
|
||||
endmodule
|
50
wally-pipelined/src/mmu/prioritythermometer.sv
Normal file
50
wally-pipelined/src/mmu/prioritythermometer.sv
Normal file
@ -0,0 +1,50 @@
|
||||
///////////////////////////////////////////
|
||||
// priritythermometer.sv
|
||||
//
|
||||
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021
|
||||
// Modified: Teo Ene 15 Apr 2021:
|
||||
// Temporarily removed paramterized priority encoder for non-parameterized one
|
||||
// To get synthesis working quickly
|
||||
// Kmacsaigoren@hmc.edu 28 May 2021:
|
||||
// Added working version of parameterized priority encoder.
|
||||
// David_Harris@Hmc.edu switched to one-hot output
|
||||
//
|
||||
// Purpose: Priority circuit to choose most significant one-hot output
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module prioritythemometer #(parameter N = 8) (
|
||||
input logic [N-1:0] a,
|
||||
output logic [N-1:0] y
|
||||
);
|
||||
|
||||
// generate thermometer code mask
|
||||
genvar i;
|
||||
generate
|
||||
assign y[0] = a[0];
|
||||
for (i=1; i<N; i++) begin
|
||||
assign y[i] = y[i-1] & a[i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -39,7 +39,7 @@ module tlblru #(parameter TLB_ENTRIES = 8) (
|
||||
logic AllUsed; // High if the next access causes all RU bits to be 1
|
||||
|
||||
// Find the first line not recently used
|
||||
prioritycircuit #(.ENTRIES(TLB_ENTRIES), .FINAL_OP("AND")) nru(.a(~RUBits), .FirstPin(1'b1), .y(WriteLines));
|
||||
priorityonehot #(TLB_ENTRIES) nru(.a(~RUBits), .y(WriteLines));
|
||||
|
||||
// Track recently used lines, updating on a CAM Hit or TLB write
|
||||
assign WriteEnables = WriteLines & {(TLB_ENTRIES){TLBWrite}};
|
||||
|
512
wally-pipelined/testbench/imperas-boottim.txt
Normal file
512
wally-pipelined/testbench/imperas-boottim.txt
Normal file
@ -0,0 +1,512 @@
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
@ -46,11 +46,15 @@ module testbench();
|
||||
|
||||
string tests32mmu[] = '{
|
||||
"rv32mmu/WALLY-MMU-SV32", "3000"
|
||||
//"rv32mmu/WALLY-PMA", "3000",
|
||||
//"rv32mmu/WALLY-PMA", "3000"
|
||||
};
|
||||
|
||||
string tests64mmu[] = '{
|
||||
"rv64mmu/WALLY-MMU-SV48", "3000",
|
||||
"rv64mmu/WALLY-MMU-SV39", "3000"
|
||||
//"rv64mmu/WALLY-PMA", "3000",
|
||||
//"rv64mmu/WALLY-PMA", "3000"
|
||||
};
|
||||
|
||||
|
||||
@ -558,7 +562,7 @@ string tests32f[] = '{
|
||||
end
|
||||
end
|
||||
|
||||
string signame, memfilename;
|
||||
string signame, memfilename, romfilename;
|
||||
|
||||
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
|
||||
logic UARTSin, UARTSout;
|
||||
@ -604,7 +608,9 @@ string tests32f[] = '{
|
||||
end
|
||||
// read test vectors into memory
|
||||
memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
|
||||
romfilename = {"../../imperas-riscv-tests/imperas-boottim.txt"};
|
||||
$readmemh(memfilename, dut.uncore.dtim.RAM);
|
||||
$readmemh(romfilename, dut.uncore.bootdtim.bootdtim.RAM);
|
||||
ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
|
||||
ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
|
||||
$display("Read memfile %s", memfilename);
|
||||
@ -886,6 +892,7 @@ module instrNameDecTB(
|
||||
else if (imm == 2) name = "URET";
|
||||
else if (imm == 258) name = "SRET";
|
||||
else if (imm == 770) name = "MRET";
|
||||
else if (funct7 == 9) name = "SFENCE.VMA";
|
||||
else name = "ILLEGAL";
|
||||
10'b1110011_001: name = "CSRRW";
|
||||
10'b1110011_010: name = "CSRRS";
|
||||
|
@ -27,7 +27,7 @@
|
||||
|
||||
module testbench();
|
||||
|
||||
parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*3160000; // # of instructions at which to turn on waves in graphical sim
|
||||
parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*3080000; // # of instructions at which to turn on waves in graphical sim
|
||||
parameter stopICount = `BUSYBEAR*143898 + `BUILDROOT*0000000; // # instructions at which to halt sim completely (set to 0 to let it run as far as it can)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -184,9 +184,12 @@ module testbench();
|
||||
scan_file_rf = $fscanf(data_file_rf, "%d\n", regNumExpected);
|
||||
scan_file_rf = $fscanf(data_file_rf, "%x\n", regExpected);
|
||||
force dut.hart.ieu.dp.regf.wd3 = regExpected;
|
||||
// Hack to compensate for QEMU's incorrect MSTATUS
|
||||
// Hack to compensate for QEMU's incorrect MSTATUS (Wally correctly identifies MXL, SXL to be 2 whereas QEMU sets them to an invalid value of 0
|
||||
end else if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,16) == "mstatus") begin
|
||||
force dut.hart.ieu.dp.regf.wd3 = dut.hart.ieu.dp.WriteDataW & ~64'ha00000000;
|
||||
// Hack to compensate for QEMU's incorrect SSTATUS (Wally correctly identifies UXL to be 2 whereas QEMU sets it to an invalid value of 0
|
||||
end else if (PCtextW.substr(0,3) == "csrr" && ((PCtextW.substr(10,16) == "sstatus") || (PCtextW.substr(11,17) == "sstatus"))) begin
|
||||
force dut.hart.ieu.dp.regf.wd3 = dut.hart.ieu.dp.WriteDataW & ~64'h200000000;
|
||||
end else release dut.hart.ieu.dp.regf.wd3;
|
||||
// Hack to compensate for QEMU's correct but different MTVAL (according to spec, storing the faulting instr is an optional feature)
|
||||
if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,14) == "mtval") begin
|
||||
@ -265,7 +268,7 @@ module testbench();
|
||||
|
||||
// Check PCD, InstrD
|
||||
if (~PCDwrong && ~(dut.hart.ifu.PCD === PCDexpected)) begin
|
||||
$display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, PCDexpected);
|
||||
$display("%0t ps, instr %0d: PCD does not equal PCD expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, PCDexpected);
|
||||
`ERROR
|
||||
end
|
||||
InstrMask = InstrDExpected[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF;
|
||||
|
Loading…
Reference in New Issue
Block a user