Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main
This commit is contained in:
Ross Thompson 2021-07-26 11:55:00 -05:00
commit ef55b30e99
52 changed files with 1637 additions and 27033 deletions

View File

@ -26,6 +26,7 @@
// include shared configuration
`include "wally-shared.vh"
// `include "../../../config/shared/wally-shared.vh"
`define BUILDROOT 0
`define BUSYBEAR 0

View File

@ -0,0 +1,215 @@
`include "../../../config/rv64icfd/wally-config.vh"
module testbench3();
logic [31:0] errors=0;
logic [31:0] vectornum=0;
logic [`FLEN*4+7:0] testvectors[6133248:0];
// logic [63:0] X,Y,Z;
logic [`FLEN-1:0] ans;
logic [7:0] flags;
logic [2:0] FrmE;
logic FmtE;
logic [`FLEN-1:0] FMAResM;
logic [4:0] FMAFlgM;
integer fp;
logic [2:0] FOpCtrlE;
logic [2*`NF+1:0] ProdManE;
logic [3*`NF+5:0] AlignedAddendE;
logic [`NE+1:0] ProdExpE;
logic AddendStickyE;
logic KillProdE;
// logic XZeroE;
// logic YZeroE;
// logic ZZeroE;
// logic XDenormE;
// logic YDenormE;
// logic ZDenormE;
// logic XInfE;
// logic YInfE;
// logic ZInfE;
// logic XNaNE;
// logic YNaNE;
// logic ZNaNE;
logic wnan;
// logic XNaNE;
// logic YNaNE;
// logic ZNaNE;
logic ansnan, clk;
assign FOpCtrlE = 3'b0;
// nearest even - 000
// twords zero - 001
// down - 010
// up - 011
// nearest max mag - 100
assign FrmE = 3'b000;
assign FmtE = 1'b0;
logic [`FLEN-1:0] X, Y, Z;
// logic FmtE;
// logic [2:0] FOpCtrlE;
logic XSgnE, YSgnE, ZSgnE;
logic [`NE-1:0] XExpE, YExpE, ZExpE;
logic [`NF-1:0] XFracE, YFracE, ZFracE;
logic XAssumed1E, YAssumed1E, ZAssumed1E;
logic XNormE;
logic XNaNE, YNaNE, ZNaNE;
logic XSNaNE, YSNaNE, ZSNaNE;
logic XDenormE, YDenormE, ZDenormE;
logic XZeroE, YZeroE, ZZeroE;
logic [`NE-1:0] BiasE;
logic XInfE, YInfE, ZInfE;
logic XExpMaxE;
//***rename to make significand = 1.frac m = significand
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic [`FLEN-1:0] Addend; // value to add (Z or zero)
logic YExpMaxE, ZExpMaxE; // input exponent all 1s
assign Addend = FOpCtrlE[2] ? (`FLEN)'(0) : Z; // Z is only used in the FMA, and is set to Zero if a multiply opperation
assign XSgnE = FmtE ? X[`FLEN-1] : X[31];
assign YSgnE = FmtE ? Y[`FLEN-1] : Y[31];
assign ZSgnE = FmtE ? Addend[`FLEN-1] : Addend[31];
assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};//{X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]};
assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};//{Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]};
assign ZExpE = FmtE ? Addend[62:52] : {3'b0, Addend[30:23]};//{Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]};
assign XFracE = FmtE ? X[`NF-1:0] : {X[22:0], 29'b0};
assign YFracE = FmtE ? Y[`NF-1:0] : {Y[22:0], 29'b0};
assign ZFracE = FmtE ? Addend[`NF-1:0] : {Addend[22:0], 29'b0};
assign XAssumed1E = FmtE ? |X[62:52] : |X[30:23];
assign YAssumed1E = FmtE ? |Y[62:52] : |Y[30:23];
assign ZAssumed1E = FmtE ? |Z[62:52] : |Z[30:23];
assign XExpZero = ~XAssumed1E;
assign YExpZero = ~YAssumed1E;
assign ZExpZero = ~ZAssumed1E;
assign XFracZero = ~|XFracE;
assign YFracZero = ~|YFracE;
assign ZFracZero = ~|ZFracE;
assign XExpMaxE = FmtE ? &X[62:52] : &X[30:23];
assign YExpMaxE = FmtE ? &Y[62:52] : &Y[30:23];
assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23];
assign XNormE = ~(XExpMaxE|XExpZero);
assign XNaNE = XExpMaxE & ~XFracZero;
assign YNaNE = YExpMaxE & ~YFracZero;
assign ZNaNE = ZExpMaxE & ~ZFracZero;
assign XSNaNE = XNaNE&~XFracE[`NF-1];
assign YSNaNE = YNaNE&~YFracE[`NF-1];
assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
assign XDenormE = XExpZero & ~XFracZero;
assign YDenormE = YExpZero & ~YFracZero;
assign ZDenormE = ZExpZero & ~ZFracZero;
assign XInfE = XExpMaxE & XFracZero;
assign YInfE = YExpMaxE & YFracZero;
assign ZInfE = ZExpMaxE & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
assign BiasE = FmtE ? {1'b0, {`NE-1{1'b1}}} : 13'h7f;
assign wnan = FmtE ? &FMAResM[`FLEN-2:`NF] && |FMAResM[`NF-1:0] : &FMAResM[30:23] && |FMAResM[22:0];
// assign XNaNE = FmtE ? &X[62:52] && |X[51:0] : &X[62:55] && |X[54:32];
// assign YNaNE = FmtE ? &Y[62:52] && |Y[51:0] : &Y[62:55] && |Y[54:32];
// assign ZNaNE = FmtE ? &Z[62:52] && |Z[51:0] : &Z[62:55] && |Z[54:32];
assign ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[22:0];
// instantiate device under test
fma1 UUT1(.XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}), .*);
fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .ZSgnM(ZSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM,
.FOpCtrlM(FOpCtrlE[2:0]), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .AlignedAddendM(AlignedAddendE), .ProdManM(ProdManE),
.FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM);
// generate clock
always
begin
clk = 1; #5; clk = 0; #5;
end
// at start of test, load vectors
// and pulse reset
initial
begin
$readmemh("testFloatNoSpace", testvectors);
end
// apply test vectors on rising edge of clk
always @(posedge clk)
begin
#1;
if (FmtE==1'b1) {X, Y, Z, ans, flags} = testvectors[vectornum];
else begin X = {{32{1'b1}}, testvectors[vectornum][135:104]};
Y = {{32{1'b1}}, testvectors[vectornum][103:72]};
Z = {{32{1'b1}}, testvectors[vectornum][71:40]};
ans = {{32{1'b1}}, testvectors[vectornum][39:8]};
flags = testvectors[vectornum][7:0];
end
end
// check results on falling edge of clk
always @(negedge clk) begin
// fp = $fopen("/home/kparry/riscv-wally/wally-pipelined/src/fpu/FMA/tbgen/results.dat","w");
if((FmtE==1'b1) & (FMAFlgM != flags[4:0] || (!wnan && (FMAResM != ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] == {XExpE,1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] == {YExpE,1'b1,Y[`NF-2:0]})) || (ZNaNE && (FMAResM[`FLEN-2:0] == {ZExpE,1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] == ans[`FLEN-2:0]))))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
if(XDenormE) $display( "xdenorm ");
if(YDenormE) $display( "ydenorm ");
if(ZDenormE) $display( "zdenorm ");
if(FMAFlgM[4] != 0) $display( "invld ");
if(FMAFlgM[2] != 0) $display( "ovrflw ");
if(FMAFlgM[1] != 0) $display( "unflw ");
if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} && FMAResM[`NF-1:0] == 0) $display( "FMAResM=-inf ");
if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} && FMAResM[`NF-1:0] == 0) $display( "FMAResM=+inf ");
if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} && FMAResM[`NF-1:0] != 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} && FMAResM[`NF-1:0] != 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
if(ans[`FLEN] && ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] == 0) $display( "ans=-inf ");
if(~ans[`FLEN] && ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] == 0) $display( "ans=+inf ");
if(ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] != 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
if(ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] != 0 && ans[`NF-1]) $display( "ans=qutNaN ");
errors = errors + 1;
$stop;
end
if((FmtE==1'b0)&(FMAFlgM != flags[4:0] || (!wnan && (FMAResM != ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[30:0] == {X[30:23],1'b1,X[21:0]})) || (YNaNE && (FMAResM[30:0] == {Y[30:23],1'b1,Y[21:0]})) || (ZNaNE && (FMAResM[30:0] == {Z[30:23],1'b1,Z[21:0]})) || (FMAResM[30:0] == ans[30:0]))) ))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
if(FMAFlgM[4] != 0) $display( "invld ");
if(FMAFlgM[2] != 0) $display( "ovrflw ");
if(FMAFlgM[1] != 0) $display( "unflw ");
if(FMAResM == 64'hFF80000000000000) $display( "FMAResM=-inf ");
if(FMAResM == 64'h7F80000000000000) $display( "FMAResM=+inf ");
if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
if(ans == 64'hFF80000000000000) $display( "ans=-inf ");
if(ans == 64'h7F80000000000000) $display( "ans=+inf ");
if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
errors = errors + 1;
//if (errors == 10)
$stop;
end
vectornum = vectornum + 1;
if (testvectors[vectornum] === 194'bx) begin
$display("%d tests completed with %d errors", vectornum, errors);
$stop;
end
end
endmodule

View File

@ -0,0 +1,3 @@
testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat
tr -d ' ' < testFloat > testFloatNoSpace

0
wally-pipelined/src/fpu/adder.sv Executable file → Normal file
View File

View File

@ -1,117 +0,0 @@
// Kogge-Stone Prefix Adder
module bk15 (cout, sum, a, b, cin);
input [14:0] a, b;
input cin;
output [14:0] sum;
output cout;
wire [15:0] p,g;
wire [15:1] h,c;
// pre-computation
assign p={a|b,1'b1};
assign g={a&b, cin};
// prefix tree
kogge_stone prefix_tree(h, c, p[14:0], g[14:0]);
// post-computation
assign h[15]=g[15]|c[15];
assign sum=p[15:1]^h|g[15:1]&c;
assign cout=p[15]&h[15];
endmodule // bk15
module kogge_stone (h, c, p, g);
input [14:0] p;
input [14:0] g;
output [15:1] h;
output [15:1] c;
logic H_1_0,H_2_1,I_2_1,H_3_2,I_3_2,H_4_3,I_4_3,H_5_4,I_5_4,H_6_5,I_6_5,H_7_6,I_7_6,H_8_7,I_8_7,H_9_8,I_9_8,H_10_9
,I_10_9,H_11_10,I_11_10,H_12_11,I_12_11,H_13_12,I_13_12,H_14_13,I_14_13,H_2_0,H_3_0,H_4_1,I_4_1,H_5_2,I_5_2,H_6_3
,I_6_3,H_7_4,I_7_4,H_8_5,I_8_5,H_9_6,I_9_6,H_10_7,I_10_7,H_11_8,I_11_8,H_12_9,I_12_9,H_13_10,I_13_10,H_14_11,I_14_11
,H_4_0,H_5_0,H_6_0,H_7_0,H_8_1,I_8_1,H_9_2,I_9_2,H_10_3,I_10_3,H_11_4,I_11_4,H_12_5,I_12_5,H_13_6,I_13_6,H_14_7
,I_14_7,H_8_0,H_9_0,H_10_0,H_11_0,H_12_0,H_13_0,H_14_0;
// parallel-prefix, Kogge-Stone
// Stage 1: Generates G/P pairs that span 1 bits
rgry g_1_0 (H_1_0, {g[1],g[0]});
rblk b_2_1 (H_2_1, I_2_1, {g[2],g[1]}, {p[1],p[0]});
rblk b_3_2 (H_3_2, I_3_2, {g[3],g[2]}, {p[2],p[1]});
rblk b_4_3 (H_4_3, I_4_3, {g[4],g[3]}, {p[3],p[2]});
rblk b_5_4 (H_5_4, I_5_4, {g[5],g[4]}, {p[4],p[3]});
rblk b_6_5 (H_6_5, I_6_5, {g[6],g[5]}, {p[5],p[4]});
rblk b_7_6 (H_7_6, I_7_6, {g[7],g[6]}, {p[6],p[5]});
rblk b_8_7 (H_8_7, I_8_7, {g[8],g[7]}, {p[7],p[6]});
rblk b_9_8 (H_9_8, I_9_8, {g[9],g[8]}, {p[8],p[7]});
rblk b_10_9 (H_10_9, I_10_9, {g[10],g[9]}, {p[9],p[8]});
rblk b_11_10 (H_11_10, I_11_10, {g[11],g[10]}, {p[10],p[9]});
rblk b_12_11 (H_12_11, I_12_11, {g[12],g[11]}, {p[11],p[10]});
rblk b_13_12 (H_13_12, I_13_12, {g[13],g[12]}, {p[12],p[11]});
rblk b_14_13 (H_14_13, I_14_13, {g[14],g[13]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_2_0 (H_2_0, {H_2_1,g[0]}, I_2_1);
grey g_3_0 (H_3_0, {H_3_2,H_1_0}, I_3_2);
black b_4_1 (H_4_1, I_4_1, {H_4_3,H_2_1}, {I_4_3,I_2_1});
black b_5_2 (H_5_2, I_5_2, {H_5_4,H_3_2}, {I_5_4,I_3_2});
black b_6_3 (H_6_3, I_6_3, {H_6_5,H_4_3}, {I_6_5,I_4_3});
black b_7_4 (H_7_4, I_7_4, {H_7_6,H_5_4}, {I_7_6,I_5_4});
black b_8_5 (H_8_5, I_8_5, {H_8_7,H_6_5}, {I_8_7,I_6_5});
black b_9_6 (H_9_6, I_9_6, {H_9_8,H_7_6}, {I_9_8,I_7_6});
black b_10_7 (H_10_7, I_10_7, {H_10_9,H_8_7}, {I_10_9,I_8_7});
black b_11_8 (H_11_8, I_11_8, {H_11_10,H_9_8}, {I_11_10,I_9_8});
black b_12_9 (H_12_9, I_12_9, {H_12_11,H_10_9}, {I_12_11,I_10_9});
black b_13_10 (H_13_10, I_13_10, {H_13_12,H_11_10}, {I_13_12,I_11_10});
black b_14_11 (H_14_11, I_14_11, {H_14_13,H_12_11}, {I_14_13,I_12_11});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_4_0 (H_4_0, {H_4_1,g[0]}, I_4_1);
grey g_5_0 (H_5_0, {H_5_2,H_1_0}, I_5_2);
grey g_6_0 (H_6_0, {H_6_3,H_2_0}, I_6_3);
grey g_7_0 (H_7_0, {H_7_4,H_3_0}, I_7_4);
black b_8_1 (H_8_1, I_8_1, {H_8_5,H_4_1}, {I_8_5,I_4_1});
black b_9_2 (H_9_2, I_9_2, {H_9_6,H_5_2}, {I_9_6,I_5_2});
black b_10_3 (H_10_3, I_10_3, {H_10_7,H_6_3}, {I_10_7,I_6_3});
black b_11_4 (H_11_4, I_11_4, {H_11_8,H_7_4}, {I_11_8,I_7_4});
black b_12_5 (H_12_5, I_12_5, {H_12_9,H_8_5}, {I_12_9,I_8_5});
black b_13_6 (H_13_6, I_13_6, {H_13_10,H_9_6}, {I_13_10,I_9_6});
black b_14_7 (H_14_7, I_14_7, {H_14_11,H_10_7}, {I_14_11,I_10_7});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_8_0 (H_8_0, {H_8_1,g[0]}, I_8_1);
grey g_9_0 (H_9_0, {H_9_2,H_1_0}, I_9_2);
grey g_10_0 (H_10_0, {H_10_3,H_2_0}, I_10_3);
grey g_11_0 (H_11_0, {H_11_4,H_3_0}, I_11_4);
grey g_12_0 (H_12_0, {H_12_5,H_4_0}, I_12_5);
grey g_13_0 (H_13_0, {H_13_6,H_5_0}, I_13_6);
grey g_14_0 (H_14_0, {H_14_7,H_6_0}, I_14_7);
// Final Stage: Apply c_k+1=p_k&H_k_0
assign c[1]=g[0];
assign h[1]=H_1_0; assign c[2]=p[1]&H_1_0;
assign h[2]=H_2_0; assign c[3]=p[2]&H_2_0;
assign h[3]=H_3_0; assign c[4]=p[3]&H_3_0;
assign h[4]=H_4_0; assign c[5]=p[4]&H_4_0;
assign h[5]=H_5_0; assign c[6]=p[5]&H_5_0;
assign h[6]=H_6_0; assign c[7]=p[6]&H_6_0;
assign h[7]=H_7_0; assign c[8]=p[7]&H_7_0;
assign h[8]=H_8_0; assign c[9]=p[8]&H_8_0;
assign h[9]=H_9_0; assign c[10]=p[9]&H_9_0;
assign h[10]=H_10_0; assign c[11]=p[10]&H_10_0;
assign h[11]=H_11_0; assign c[12]=p[11]&H_11_0;
assign h[12]=H_12_0; assign c[13]=p[12]&H_12_0;
assign h[13]=H_13_0; assign c[14]=p[13]&H_13_0;
assign h[14]=H_14_0; assign c[15]=p[14]&H_14_0;
endmodule // kogge_stone

View File

@ -1,43 +0,0 @@
// Black cell
module black(gout, pout, gin, pin);
input [1:0] gin, pin;
output gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule // black
// Grey cell
module grey(gout, gin, pin);
input[1:0] gin;
input pin;
output gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule // grey
// reduced Black cell
module rblk(hout, iout, gin, pin);
input [1:0] gin, pin;
output hout, iout;
assign iout=pin[1]&pin[0];
assign hout=gin[1]|gin[0];
endmodule // rblk
// reduced Grey cell
module rgry(hout, gin);
input[1:0] gin;
output hout;
assign hout=gin[1]|gin[0];
endmodule // rgry

0
wally-pipelined/src/fpu/cla12.sv Executable file → Normal file
View File

0
wally-pipelined/src/fpu/cla52.sv Executable file → Normal file
View File

View File

@ -207,7 +207,7 @@ module cla64 (S, X, Y, Sub);
assign Bbar = B ^ {64{Sub}};
endmodule // cla64
// This module performs 64-bit subtraction. It is used to get the two's complement
// of main addition or subtraction in the floating point adder.

View File

@ -5,19 +5,19 @@
// and modifies the sign of op1. The converted operands are Float1
// and Float2.
module convert_inputs(Float1, Float2, op1, op2, op_type, P);
input [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B)
input [3:0] op_type; // Function opcode
input P; // Result Precision (0 for double, 1 for single)
module convert_inputs(
input [63:0] op1, // 1st input operand (A)
input [63:0] op2, // 2nd input operand (B)
input [3:0] op_type, // Function opcode
input P, // Result Precision (0 for double, 1 for single)
output [63:0] Float1; // Converted 1st input operand
output [63:0] Float2; // Converted 2nd input operand
wire conv_SP; // Convert from SP to DP
wire negate; // Operation is negation
wire abs_val; // Operation is absolute value
output [63:0] Float1, // Converted 1st input operand
output [63:0] Float2 // Converted 2nd input operand
);
wire conv_SP; // Convert from SP to DP
wire negate; // Operation is negation
wire abs_val; // Operation is absolute value
wire Zexp1; // One if the exponent of op1 is zero
wire Zexp2; // One if the exponent of op2 is zero
wire Oexp1; // One if the exponent of op1 is all ones
@ -33,14 +33,6 @@ module convert_inputs(Float1, Float2, op1, op2, op_type, P);
assign Zexp2 = ~(|op2[30:23]);
assign Oexp1 = (&op1[30:23]);
assign Oexp2 = (&op2[30:23]);
// assign Zexp1 = ~(op1[62] | op1[61] | op1[60] | op1[59] |
// op1[58] | op1[57] | op1[56] | op1[55]);
// assign Zexp2 = ~(op2[62] | op2[61] | op2[60] | op2[59] |
// op2[58] | op2[57] | op2[56] | op2[55]);
// assign Oexp1 = (op1[62] & op1[61] & op1[60] & op1[59] &
// op1[58] & op1[57] & op1[56] & op1[55]);
// assign Oexp2 = (op2[62] & op2[61] & op2[60] & op2[59] &
// op2[58] & op2[57] & op2[56] &op2[55]);
// Conditionally convert op1. Lower 29 bits are zero for single precision.
assign Float1[62:29] = conv_SP ? {op1[30], {3{(~op1[30]&~Zexp1)|Oexp1}}, op1[29:0]}
@ -57,7 +49,7 @@ module convert_inputs(Float1, Float2, op1, op2, op_type, P);
// is negation (op_type = 101) or absolute value (op_type = 100)
assign negate = op_type[2] & ~op_type[1] & op_type[0];
assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0];
assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0]; //*** remove abs_val
assign Float1[63] = conv_SP ? (op1[31] ^ negate) & ~abs_val : (op1[63] ^ negate) & ~abs_val;
assign Float2[63] = conv_SP ? op2[31] : op2[63];

View File

@ -3,21 +3,22 @@
// it conditionally converts single precision values to double
// precision values and modifies the sign of op1.
// The converted operands are Float1 and Float2.
module convert_inputs_div (Float1, Float2b, op1, op2, op_type, P);
module convert_inputs_div (
input logic [63:0] op1; // 1st input operand (A)
input logic [63:0] op2; // 2nd input operand (B)
input logic P; // Result Precision (0 for double, 1 for single)
input logic op_type; // Operation
input logic [63:0] op1, // 1st input operand (A)
input logic [63:0] op2, // 2nd input operand (B)
input logic P, // Result Precision (0 for double, 1 for single)
input logic op_type, // Operation
output logic [63:0] Float1; // Converted 1st input operand
output logic [63:0] Float2b; // Converted 2nd input operand
output logic [63:0] Float1, // Converted 1st input operand
output logic [63:0] Float2b // Converted 2nd input operand
);
logic [63:0] Float2;
logic Zexp1; // One if the exponent of op1 is zero
logic Zexp2; // One if the exponent of op2 is zero
logic Oexp1; // One if the exponent of op1 is all ones
logic Oexp2; // One if the exponent of op2 is all ones
logic Zexp1; // One if the exponent of op1 is zero
logic Zexp2; // One if the exponent of op2 is zero
logic Oexp1; // One if the exponent of op1 is all ones
logic Oexp2; // One if the exponent of op2 is all ones
// Test if the input exponent is zero, because if it is then the
// exponent of the converted number should be zero.

View File

@ -1,25 +1,21 @@
module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, load_rega, load_regb,
load_regc, load_regd, load_regr, load_regs, P, op_type, exp_odd);
module divconv (
input logic [52:0] d, n;
input logic [2:0] sel_muxa, sel_muxb;
input logic sel_muxr;
input logic load_rega, load_regb, load_regc, load_regd;
input logic load_regr, load_regs;
input logic P;
input logic op_type;
input logic exp_odd;
input logic reset;
input logic clk;
input logic [52:0] d, n,
input logic [2:0] sel_muxa, sel_muxb,
input logic sel_muxr,
input logic load_rega, load_regb, load_regc, load_regd,
input logic load_regr, load_regs,
input logic P,
input logic op_type,
input logic exp_odd,
input logic reset,
input logic clk,
output logic [63:0] q1, qp1, qm1;
output logic [63:0] q0, qp0, qm0;
output logic [63:0] rega_out, regb_out, regc_out, regd_out;
output logic [127:0] regr_out;
supply1 vdd;
supply0 vss;
output logic [63:0] q1, qp1, qm1,
output logic [63:0] q0, qp0, qm0,
output logic [63:0] rega_out, regb_out, regc_out, regd_out,
output logic [127:0] regr_out
);
logic [63:0] muxa_out, muxb_out;
logic [10:0] ia_div, ia_sqrt;
@ -36,12 +32,12 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o
logic [63:0] q_const, qp_const, qm_const;
logic [63:0] d2, n2;
logic [11:0] d3;
logic muxr_out;
logic cout1, cout2, cout3, cout4, cout5, cout6, cout7;
logic muxr_out;
logic cout1, cout2, cout3, cout4, cout5, cout6, cout7;
// Check if exponent is odd for sqrt
// If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
assign d2 = (exp_odd&op_type) ? {vss,d,10'h0} : {d,11'h0};
assign d2 = (exp_odd&op_type) ? {1'b0,d,10'h0} : {d,11'h0};
assign n2 = op_type ? d2 : {n,11'h0};
// IA div/sqrt
@ -62,10 +58,7 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o
mux2 #(64) mx4 (q0, q1, q1[63], mcand_q);
mux2 #(64) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);
mux2 #(64) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
// TDM multiplier (carry/save)
multiplier mult1 (mcand, mplier, Sum, Carry);
// Q*D - N (reversed but changed in rounder.v to account for sign reversal)
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2);
// Add ulp for subtraction in remainder
mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
@ -74,24 +67,17 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o
mux2 #(64) mx9 ({64'h0000_0000_0000_0A00}, {64'h0000_0140_0000_0000}, P, qp_const);
mux2 #(64) mxA ({64'hFFFF_FFFF_FFFF_F9FF}, {64'hFFFF_FF3F_FFFF_FFFF}, P, qm_const);
// CPA (from CSA)/Remainder addition/subtraction
// adder #(128) cpa1 (Sum2, Carry2, muxr_out, mul_out, cout1);
assign {cout1, mul_out} = Sum2 + Carry2 + muxr_out;
// CPA (from CSA)/Remainder addition/subtraction
assign {cout1, mul_out} = (mcand*mplier) + constant + muxr_out;
// Assuming [1,2) - q1
// adder #(64) cpa2 (regb_out, q_const, 1'b0, q_out1, cout2);
assign {cout2, q_out1} = regb_out + q_const;
// adder #(64) cpa3 (regb_out, qp_const, 1'b0, qp_out1, cout3);
assign {cout3, qp_out1} = regb_out + qp_const;
// adder #(64) cpa4 (regb_out, qm_const, 1'b1, qm_out1, cout4);
assign {cout4, qm_out1} = regb_out + qm_const + 1'b1;
// Assuming [0.5,1) - q0
// adder #(64) cpa5 ({regb_out[62:0], vss}, q_const, 1'b0, q_out0, cout5);
assign {cout5, q_out0} = {regb_out[62:0], vss} + q_const;
// adder #(64) cpa6 ({regb_out[62:0], vss}, qp_const, 1'b0, qp_out0, cout6);
assign {cout6, qp_out0} = {regb_out[62:0], vss} + qp_const;
// adder #(64) cpa7 ({regb_out[62:0], vss}, qm_const, 1'b1, qm_out0, cout7);
assign {cout7, qm_out0} = {regb_out[62:0], vss} + qm_const + 1'b1;
assign {cout5, q_out0} = {regb_out[62:0], 1'b0} + q_const;
assign {cout6, qp_out0} = {regb_out[62:0], 1'b0} + qp_const;
assign {cout7, qm_out0} = {regb_out[62:0], 1'b0} + qm_const + 1'b1;
// One's complement instead of two's complement (for hw efficiency)
assign three = {~mul_out[126], mul_out[126], ~mul_out[125:63]};
@ -114,151 +100,3 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o
endmodule // divconv
// module adder #(parameter WIDTH=8)
// (input logic [WIDTH-1:0] a, b,
// input logic cin,
// output logic [WIDTH-1:0] y,
// output logic cout);
// assign {cout, y} = a + b + cin;
// endmodule // adder
// module flopenr #(parameter WIDTH = 8)
// (input logic clk, reset, en,
// input logic [WIDTH-1:0] d,
// output logic [WIDTH-1:0] q);
// always_ff @(posedge clk, posedge reset)
// if (reset) q <= #10 0;
// else if (en) q <= #10 d;
// endmodule // flopenr
// module flopr #(parameter WIDTH = 8)
// (input logic clk, reset,
// input logic [WIDTH-1:0] d,
// output logic [WIDTH-1:0] q);
// always_ff @(posedge clk, posedge reset)
// if (reset) q <= #10 0;
// else q <= #10 d;
// endmodule // flopr
// module flopenrc #(parameter WIDTH = 8)
// (input logic clk, reset, en, clear,
// input logic [WIDTH-1:0] d,
// output logic [WIDTH-1:0] q);
// always_ff @(posedge clk, posedge reset)
// if (reset) q <= #10 0;
// else if (en)
// if (clear) q <= #10 0;
// else q <= #10 d;
// endmodule // flopenrc
// module floprc #(parameter WIDTH = 8)
// (input logic clk, reset, clear,
// input logic [WIDTH-1:0] d,
// output logic [WIDTH-1:0] q);
// always_ff @(posedge clk, posedge reset)
// if (reset) q <= #10 0;
// else
// if (clear) q <= #10 0;
// else q <= #10 d;
// endmodule // floprc
// module mux2 #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] d0, d1,
// input logic s,
// output logic [WIDTH-1:0] y);
// assign y = s ? d1 : d0;
// endmodule // mux2
// module mux3 #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] d0, d1, d2,
// input logic [1:0] s,
// output logic [WIDTH-1:0] y);
// assign y = s[1] ? d2 : (s[0] ? d1 : d0);
// endmodule // mux3
// module mux4 #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] d0, d1, d2, d3,
// input logic [1:0] s,
// output logic [WIDTH-1:0] y);
// assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
// endmodule // mux4
// module mux5 #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] d0, d1, d2, d3, d4,
// input logic [2:0] s,
// output logic [WIDTH-1:0] y);
// always_comb
// casez (s)
// 3'b000 : y = d0;
// 3'b001 : y = d1;
// 3'b010 : y = d2;
// 3'b011 : y = d3;
// 3'b1?? : y = d4;
// endcase // casez (s)
// endmodule // mux5
// module mux6 #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5,
// input logic [2:0] s,
// output logic [WIDTH-1:0] y);
// always_comb
// casez (s)
// 3'b000 : y = d0;
// 3'b001 : y = d1;
// 3'b010 : y = d2;
// 3'b011 : y = d3;
// 3'b10? : y = d4;
// 3'b11? : y = d5;
// endcase // casez (s)
// endmodule // mux6
module eqcmp #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] a, b,
output logic y);
assign y = (a == b);
endmodule // eqcmp
// module fa (input logic a, b, c, output logic sum, carry);
// assign sum = a^b^c;
// assign carry = a&b|a&c|b&c;
// endmodule // fa
// module csa #(parameter WIDTH=8)
// (input logic [WIDTH-1:0] a, b, c,
// output logic [WIDTH-1:0] sum, carry);
// logic [WIDTH:0] carry_temp;
// genvar i;
// generate
// for (i=0;i<WIDTH;i=i+1)
// begin : genbit
// fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
// end
// endgenerate
// assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
// endmodule // csa

View File

@ -115,6 +115,6 @@ module exception (Ztype, Invalid, Denorm, ANorm, BNorm, Sub, A, B, op_type);
// Determine if the effective operation is subtraction
assign Sub = ~(op_type[3] & ~op_type[0]) & ( (op_type[3] & op_type[0]) | (add_sub & (A[63]^B[63]^op_type[0])) );
endmodule // exception

View File

@ -1,16 +1,13 @@
// Exception logic for the floating point adder. Note: We may
// actually want to move to where the result is computed.
module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
module exception_div (
input logic [63:0] A; // 1st input operand (op1)
input logic [63:0] B; // 2nd input operand (op2)
input logic op_type; // Determine operation
output logic [2:0] Ztype; // Indicates type of result (Z)
output logic Invalid; // Invalid operation exception
output logic Denorm; // Denormalized input
output logic ANorm; // A is not zero or Denorm
output logic BNorm; // B is not zero or Denorm
input logic [63:0] A, // 1st input operand (op1)
input logic [63:0] B, // 2nd input operand (op2)
input logic op_type, // Determine operation
output logic [2:0] Ztype, // Indicates type of result (Z)
output logic Invalid // Invalid operation exception
);
logic AzeroM; // '1' if the mantissa of A is zero
logic BzeroM; // '1' if the mantissa of B is zero
@ -18,8 +15,6 @@ module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
logic BzeroE; // '1' if the exponent of B is zero
logic AonesE; // '1' if the exponent of A is all ones
logic BonesE; // '1' if the exponent of B is all ones
logic ADenorm; // '1' if A is a denomalized number
logic BDenorm; // '1' if B is a denomalized number
logic AInf; // '1' if A is infinite
logic BInf; // '1' if B is infinite
logic AZero; // '1' if A is 0
@ -32,11 +27,10 @@ module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
logic ZInf; // '1' if result Z is an infnity
logic Zero; // '1' if result is zero
parameter [51:0] fifty_two_zeros = 52'h0; // Use parameter?
// Determine if mantissas are all zeros
assign AzeroM = (A[51:0] == fifty_two_zeros);
assign BzeroM = (B[51:0] == fifty_two_zeros);
assign AzeroM = (A[51:0] == 52'h0);
assign BzeroM = (B[51:0] == 52'h0);
// Determine if exponents are all ones or all zeros
assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52];
@ -45,8 +39,6 @@ module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]);
// Determine special cases. Note: Zero is not really a special case.
assign ADenorm = AzeroE & ~AzeroM;
assign BDenorm = BzeroE & ~BzeroM;
assign AInf = AonesE & AzeroM;
assign BInf = BonesE & BzeroM;
assign ANaN = AonesE & ~AzeroM;
@ -56,17 +48,11 @@ module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
assign AZero = AzeroE & AzeroM;
assign BZero = BzeroE & BzeroE;
// A and B are normalized if their exponents are not zero.
assign ANorm = ~AzeroE;
assign BNorm = ~BzeroE;
// An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
// or (A and B are both Infinite)
assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) |
(A[63] & op_type);
// The Denorm flag is set if A is denormlized or if B is normalized
assign Denorm = ADenorm | BDenorm;
// The result is a quiet NaN if (an "Invalid Operation" exception occurs)
// or (A is a NaN) or (B is a NaN).

View File

@ -29,14 +29,14 @@
module faddcvt(
input logic clk,
input logic reset,
input logic FlushM,
input logic StallM,
input logic FlushM, // flush the memory stage
input logic StallM, // stall the memory stage
input logic [63:0] FSrcXE, // 1st input operand (A)
input logic [63:0] FSrcYE, // 2nd input operand (B)
input logic [3:0] FOpCtrlE, FOpCtrlM, // Function opcode
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
input logic [2:0] FrmM, // Rounding mode - specify values
output logic [63:0] FAddResM, // Result of operation
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
input logic [2:0] FrmM, // Rounding mode - specify values
output logic [63:0] FAddResM, // Result of operation
output logic [4:0] FAddFlgM); // IEEE exception flags
logic [63:0] AddSumE, AddSumM;
@ -51,7 +51,6 @@ module faddcvt(
logic AddInvalidE, AddInvalidM;
logic AddDenormInE, AddDenormInM;
logic AddSwapE, AddSwapM;
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
logic AddSignAE, AddSignAM;
logic AddConvertE, AddConvertM;
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
@ -62,8 +61,9 @@ module faddcvt(
fpuaddcvt1 fpadd1 (.FSrcXE, .FSrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
.AddDenormInE, .AddConvertE, .AddSwapE);
// E/M pipeline registers
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
@ -72,9 +72,9 @@ module faddcvt(
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
flopenrc #(14) EMRegAdd9(clk, reset, FlushM, ~StallM,
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddSignAE},
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM});
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
@ -83,53 +83,52 @@ module faddcvt(
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
endmodule
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FSrcXE, FSrcYE, FOpCtrlE, FmtE);
module fpuaddcvt1 (
input logic [63:0] FSrcXE, // 1st input operand (A)
input logic [63:0] FSrcYE, // 2nd input operand (B)
input logic [3:0] FOpCtrlE, // Function opcode
input logic FmtE, // Result Precision (1 for double, 0 for single)
input logic [63:0] FSrcXE; // 1st input operand (A)
input logic [63:0] FSrcYE; // 2nd input operand (B)
input logic [3:0] FOpCtrlE; // Function opcode
input logic FmtE; // Result Precision (1 for double, 0 for single)
output logic [63:0] AddFloat1E,
output logic [63:0] AddFloat2E,
output logic [10:0] AddExponentE,
output logic [10:0] AddExpPostSumE,
output logic [11:0] AddExp1DenormE, AddExp2DenormE,//KEP used to be [10:0]
output logic [63:0] AddSumE, AddSumTcE,
output logic [3:0] AddSelInvE,
output logic AddCorrSignE,
output logic AddSignAE,
output logic AddOp1NormE, AddOp2NormE,
output logic AddOpANormE, AddOpBNormE,
output logic AddInvalidE,
output logic AddDenormInE,
output logic AddConvertE,
output logic AddSwapE
);
wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB;
wire ZV_mantissaA;
wire ZV_mantissaB;
wire P;
assign P = ~FmtE;
wire [63:0] IntValue;
wire [11:0] exp1, exp2;
wire [11:0] exp_diff1, exp_diff2;
wire [11:0] exp_shift;
wire [51:0] mantissaA;
wire [56:0] mantissaA1;
wire [63:0] mantissaA3;
wire [51:0] mantissaB;
wire [56:0] mantissaB1, mantissaB2;
wire [63:0] mantissaB3;
wire exp_gt63;
wire Sticky_out;
wire sub;
wire zeroB;
wire [5:0] align_shift;
output logic [63:0] AddFloat1E;
output logic [63:0] AddFloat2E;
output logic [10:0] AddExponentE;
output logic [10:0] AddExpPostSumE;
output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
output logic [63:0] AddSumE, AddSumTcE;
output logic [3:0] AddSelInvE;
output logic AddCorrSignE;
output logic AddSignAE;
output logic AddOp1NormE, AddOp2NormE;
output logic AddOpANormE, AddOpBNormE;
output logic AddInvalidE;
output logic AddDenormInE;
// output logic exp_valid;
output logic AddConvertE;
output logic AddSwapE;
output logic AddNormOvflowE;
wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB;
wire ZV_mantissaA;
wire ZV_mantissaB;
wire [63:0] IntValue;
wire [11:0] exp1, exp2;
wire [11:0] exp_diff1, exp_diff2;
wire [11:0] exp_shift;
wire [51:0] mantissaA;
wire [56:0] mantissaA1;
wire [63:0] mantissaA3;
wire [51:0] mantissaB;
wire [56:0] mantissaB1, mantissaB2;
wire [63:0] mantissaB3;
wire exp_gt63;
wire Sticky_out;
wire sub;
wire zeroB;
wire [5:0] align_shift;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the FOpCtrlE , and their precision P.
@ -137,7 +136,7 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs conv1 (AddFloat1E, AddFloat2E, FSrcXE, FSrcYE, FOpCtrlE, P);
convert_inputs conv1 (.Float1(AddFloat1E), .Float2(AddFloat2E), .op1(FSrcXE), .op2(FSrcYE), .op_type(FOpCtrlE), .P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "AddSelInvE" is used in
@ -247,7 +246,7 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
// Finds normal underflow result to determine whether to round final exponent down
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
// assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
endmodule // fpadd
@ -281,32 +280,28 @@ endmodule // fpadd
//
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
module fpuaddcvt2 (
input [2:0] FrmM, // Rounding mode - specify values
input [3:0] FOpCtrlM, // Function opcode
input FmtM, // Result Precision (0 for double, 1 for single)
input [63:0] AddSumM, AddSumTcM,
input [63:0] AddFloat1M,
input [63:0] AddFloat2M,
input [11:0] AddExp1DenormM, AddExp2DenormM,
input [10:0] AddExponentM, AddExpPostSumM,
input [3:0] AddSelInvM,
input AddOp1NormM, AddOp2NormM,
input AddOpANormM, AddOpBNormM,
input AddInvalidM,
input AddDenormInM,
input AddSignAM,
input AddCorrSignM,
input AddConvertM,
input AddSwapM,
input [2:0] FrmM; // Rounding mode - specify values
input [3:0] FOpCtrlM; // Function opcode
input FmtM; // Result Precision (0 for double, 1 for single)
// input AddOvEnM; // Overflow trap enabled
// input AddUnEnM; // Underflow trap enabled
input [63:0] AddSumM, AddSumTcM;
input [63:0] AddFloat1M;
input [63:0] AddFloat2M;
input [11:0] AddExp1DenormM, AddExp2DenormM;
input [10:0] AddExponentM, AddExpPostSumM; //exp_pre;
//input exp_valid;
input [3:0] AddSelInvM;
input AddOp1NormM, AddOp2NormM;
input AddOpANormM, AddOpBNormM;
input AddInvalidM;
input AddDenormInM;
input AddSignAM;
input AddCorrSignM;
input AddConvertM;
input AddSwapM;
// input AddNormOvflowM;
output [63:0] FAddResM; // Result of operation
output [4:0] FAddFlgM; // IEEE exception flags
output [63:0] FAddResM, // Result of operation
output [4:0] FAddFlgM // IEEE exception flags
);
wire AddDenormM; // AddDenormM on input or output
wire P;
@ -322,7 +317,6 @@ module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPos
wire Sticky_out;
wire sign_corr;
wire zeroB;
wire [10:0] AddExpPostSumM;
wire mantissa_comp;
wire mantissa_comp_sum;
wire mantissa_comp_sum_tc;

View File

@ -2,19 +2,21 @@
`include "wally-config.vh"
module fclassify (
input logic XSgnE,
input logic XNaNE,
input logic XSNaNE,
input logic XNormE,
input logic XDenormE,
input logic XZeroE,
input logic XInfE,
output logic [63:0] ClassResE
input logic XSgnE, // sign bit
input logic XNaNE, // is NaN
input logic XSNaNE, // is signaling NaN
input logic XNormE, // is normal
input logic XDenormE, // is denormal
input logic XZeroE, // is zero
input logic XInfE, // is infinity
output logic [63:0] ClassResE // classify result
);
logic PInf, PZero, PNorm, PDenorm;
logic NInf, NZero, NNorm, NDenorm;
// determine the sub categories
assign PInf = ~XSgnE&XInfE;
assign NInf = XSgnE&XInfE;
assign PNorm = ~XSgnE&XNormE;

View File

@ -1,20 +1,21 @@
module fctrl (
input logic [6:0] Funct7D,
input logic [6:0] OpD,
input logic [4:0] Rs2D,
input logic [2:0] Funct3D,
input logic [2:0] FRM_REGW,
output logic IllegalFPUInstrD,
output logic FRegWriteD,
output logic FDivStartD,
output logic [2:0] FResultSelD,
output logic [3:0] FOpCtrlD,
output logic [1:0] FResSelD,
output logic [1:0] FIntResSelD,
output logic FmtD,
output logic [2:0] FrmD,
output logic FWriteIntD);
input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision
input logic [6:0] OpD, // bits 6:0 of instruction
input logic [4:0] Rs2D, // bits 24:20 of instruction
input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode
input logic [2:0] FRM_REGW, // rounding mode from CSR
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic FRegWriteD, // FP register write enable
output logic FDivStartD, // Start division or squareroot
output logic [2:0] FResultSelD, // select result to be written to fp register
output logic [3:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
output logic [1:0] FResSelD, // select one of the results done in the memory stage
output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
output logic FmtD, // precision - single-0 double-1
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
output logic FWriteIntD // is the result written to the integer register
);
`define FCTRLW 15
logic [`FCTRLW-1:0] ControlsD;
@ -100,16 +101,43 @@ module fctrl (
endcase
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
// unswizzle control bits
assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
// if dynamic rounding, choose FRM_REGW
// rounding modes:
// 000 - round to nearest, ties to even
// 001 - round twords 0 - round to min magnitude
// 010 - round down - round twords negitive infinity
// 011 - round up - round twords positive infinity
// 100 - round to nearest, ties to max magnitude - round to nearest, ties away from zero
// 111 - dynamic - choose FRM_REGW as rounding mode
assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
// Precision
// 0-single
// 1-double
// 0-single
// 1-double
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
// FResultSel:
// 000 - ReadRes - load
// 001 - FMARes - FMA and multiply
// 010 - FAddRes - add and fp to fp
// 011 - FDivRes - divide and squareroot
// 100 - FRes - anything that is written to the fp register and is ready in the memory stage
// FResSel:
// 00 - SrcA - move to fp register
// 01 - SgnRes - sign injection
// 10 - CmpRes - min/max
// 11 - CvtRes - convert to fp
// FIntResSel:
// 00 - CmpRes - less than, equal, or less than or equal
// 01 - FSrcX - move to int register
// 10 - ClassRes - classify
// 11 - CvtRes - convert to signed/unsigned int
// OpCtrl values:
// div/sqrt
// fdiv = ???0
// fsqrt = ???1
@ -120,7 +148,7 @@ module fctrl (
// feq = ?010
// flt = ?001
// fle = ?011
// {?, is min or max, is eq or le, is lt or le}
// {?, is min or max, is eq or le, is lt or le}
//fma/mult
// fmadd = ?000
@ -128,7 +156,7 @@ module fctrl (
// fnmsub = ?010 -(a*b)+c
// fnmadd = ?011 -(a*b)-c
// fmul = ?100
// {?, is mul, is negitive, is sub}
// {?, is mul, negate product, negate addend}
// sgn inj
// fsgnj = ??00
@ -138,37 +166,28 @@ module fctrl (
// add/sub/cnvt
// fadd = 0000
// fsub = 0001
// cnvt
// fcvt.s.d = 0111
// fcvt.d.s = 0111
// Fmt controls the output for fp -> fp
// convert
// fcvt.w.s = 0010
// fcvt.wu.s = 0110
// fcvt.s.w = 0001
// fcvt.s.wu = 0101
// fcvt.s.d = 0000
// fcvt.l.s = 1010
// fcvt.lu.s = 1110
// fcvt.s.l = 1001
// fcvt.s.lu = 1101
// fcvt.w.d = 0010
// fcvt.w.d = 0010
// fcvt.wu.d = 0110
// fcvt.d.w = 0001
// fcvt.d.wu = 0101
// fcvt.d.s = 0000
// fcvt.l.d = 1010
// fcvt.lu.d = 1110
// fcvt.d.l = 1001
// fcvt.d.lu = 1101
// {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
// fmv.w.x = ???0
// fmv.w.d = ???1
// flw = ?000
// fld = ?001
// fsw = ?010
// fsd = ?011
// fmv.x.w = ?100
// fmv.x.d = ?101
// {?, is mv, is store, is double or fmv}
// {long, unsigned, to int, from int}
endmodule

View File

@ -1,36 +1,37 @@
`include "wally-config.vh"
// `include "../../config/rv64icfd/wally-config.vh"
module fcvt (
input logic XSgnE,
input logic [10:0] XExpE,
input logic [52:0] XManE,
input logic XZeroE,
input logic XNaNE,
input logic XInfE,
input logic XDenormE,
input logic [10:0] BiasE,
input logic [`XLEN-1:0] SrcAE, // integer input
input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below)
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic FmtE, // precision 1 = double 0 = single
output logic [63:0] CvtResE, // convert final result
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
input logic XSgnE, // X's sign
input logic [10:0] XExpE, // X's exponent
input logic [52:0] XManE, // X's fraction
input logic XZeroE, // is X zero
input logic XNaNE, // is X NaN
input logic XInfE, // is X infinity
input logic XDenormE, // is X denormalized
input logic [10:0] BiasE, // bias - depends on precision (max exponent/2)
input logic [`XLEN-1:0] SrcAE, // integer input
input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below)
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic FmtE, // precision 1 = double 0 = single
output logic [63:0] CvtResE, // convert final result
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
logic ResSgn; // FP result's sign
logic [10:0] ResExp,TmpExp; // FP result's exponent
logic [51:0] ResFrac; // FP result's fraction
logic [5:0] LZResP; // lz output
logic [7:0] Bits; // how many bits are in the integer result
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
logic [64+51:0] ShiftedManTmp; // Shifted mantissa
logic [64+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|)
logic [64+1:0] ShiftedMan; // shifted mantissa truncated
logic ResSgn; // FP result's sign
logic [10:0] ResExp,TmpExp; // FP result's exponent
logic [51:0] ResFrac; // FP result's fraction
logic [5:0] LZResP; // lz output
logic [7:0] Bits; // how many bits are in the integer result
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
logic [64+51:0] ShiftedManTmp; // Shifted mantissa
logic [64+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|)
logic [64+1:0] ShiftedMan; // shifted mantissa truncated
logic [64:0] RoundedTmp; // full size rounded result - in case of overfow
logic [63:0] Rounded; // rounded result
logic [12:0] ExpVal; // unbiased X exponent
logic [12:0] ShiftCnt; // how much is the mantissa shifted
logic [64-1:0] IntIn; // trimed integer input
logic [64-1:0] PosInt; // absolute value of the integer input
logic [64-1:0] IntIn; // trimed integer input
logic [64-1:0] PosInt; // absolute value of the integer input
logic [63:0] CvtIntRes; // interger result from the fp -> int instructions
logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions
logic Of, Uf; // did the integer result underflow or overflow
@ -61,11 +62,9 @@ module fcvt (
// {long, unsigned, to int, from int}
// calculate signals based off the input and output's size
// assign Bias = FmtE ? 12'h3ff : 12'h7f;
assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
//assign SubBits = In64 ? 8'd64 : 8'd32;
assign SubBits = 8'd64;
assign Res64 = (FOpCtrlE[1]&FOpCtrlE[3]) | (FmtE&FOpCtrlE[0]);
assign In64 = (FOpCtrlE[0]&FOpCtrlE[3]) | (FmtE&FOpCtrlE[1]);
assign SubBits = In64 ? 8'd64 : 8'd32;
assign Bits = Res64 ? 8'd64 : 8'd32;
// calulate the unbiased exponent
@ -80,15 +79,6 @@ module fcvt (
// determine the integer's sign
assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
// generate
// if(`XLEN == 64)
// lz64 lz(LZResP, LZResV, PosInt);
// else if(`XLEN == 32) begin
// assign LZResP[5] = 1'b0;
// lz32 lz(LZResP[4:0], LZResV, PosInt);
// end
// endgenerate
// Leading one detector
logic [8:0] i;
always_comb begin
@ -98,7 +88,7 @@ module fcvt (
end
// if no one was found set to zero otherwise calculate the exponent
assign TmpExp = i==`XLEN ? 0 : BiasE + SubBits - LZResP;
assign TmpExp = i==`XLEN ? 0 : FmtE ? 1023 + SubBits - LZResP : 127 + SubBits - LZResP;

View File

@ -1,256 +0,0 @@
// //
// // File name : fpdiv
// // Title : Floating-Point Divider/Square-Root
// // project : FPU
// // Library : fpdiv
// // Author(s) : James E. Stine, Jr.
// // Purpose : definition of main unit to floating-point div/sqrt
// // notes :
// //
// // Copyright Oklahoma State University
// //
// // Basic Operations
// //
// // Step 1: Load operands, set flags, and convert SP to DP
// // Step 2: Check for special inputs ( +/- Infinity, NaN)
// // Step 3: Exponent Logic
// // Step 4: Divide/Sqrt using Goldschmidt
// // Step 5: Normalize the result.//
// // Shift left until normalized. Normalized when the value to the
// // left of the binrary point is 1.
// // Step 6: Round the result.//
// // Step 7: Put quotient/remainder onto output.
// //
// // `timescale 1ps/1ps
// module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
// FDivStartE, reset, clk, FDivBusyE, HoldInputs);
// input [63:0] DivInput1E; // 1st input operand (A)
// input [63:0] DivInput2E; // 2nd input operand (B)
// input [2:0] FrmE; // Rounding mode - specify values
// input DivOpType; // Function opcode
// input FmtE; // Result Precision (0 for double, 1 for single) //***will need to swap this
// input DivOvEn; // Overflow trap enabled
// input DivUnEn; // Underflow trap enabled
// input FDivStartE;
// input reset;
// input clk;
// output [63:0] FDivResultM; // Result of operation
// output [4:0] FDivSqrtFlgM; // IEEE exception flags
// output FDivSqrtDoneE;
// output FDivBusyE, HoldInputs;
// supply1 vdd;
// supply0 vss;
// wire [63:0] Float1;
// wire [63:0] Float2;
// wire [63:0] IntValue;
// wire DivDenormM; // DivDenormM on input or output
// wire [12:0] exp1, exp2, expF;
// wire [12:0] exp_diff, bias;
// wire [13:0] exp_sqrt;
// wire [12:0] exp_s;
// wire [12:0] exp_c;
// wire [10:0] exponent, exp_pre;
// wire [63:0] Result;
// wire [52:0] mantissaA;
// wire [52:0] mantissaB;
// wire [63:0] sum, sum_tc, sum_corr, sum_norm;
// wire [5:0] align_shift;
// wire [5:0] norm_shift;
// wire [2:0] sel_inv;
// wire op1_Norm, op2_Norm;
// wire opA_Norm, opB_Norm;
// wire Invalid;
// wire DenormIn, DenormIO;
// wire [4:0] FlagsIn;
// wire exp_gt63;
// wire Sticky_out;
// wire signResult, sign_corr;
// wire corr_sign;
// wire zeroB;
// wire convert;
// wire swap;
// wire sub;
// wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
// wire [63:0] rega_out, regb_out, regc_out, regd_out;
// wire [127:0] regr_out;
// wire [2:0] sel_muxa, sel_muxb;
// wire sel_muxr;
// wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;
// wire donev, sel_muxrv, sel_muxsv;
// wire [1:0] sel_muxav, sel_muxbv;
// wire load_regav, load_regbv, load_regcv;
// wire load_regrv, load_regsv;
// logic exp_cout1, exp_cout2, exp_odd, open;
// // Convert the input operands to their appropriate forms based on
// // the orignal operands, the DivOpType , and their precision FmtE.
// // Single precision inputs are converted to double precision
// // and the sign of the first operand is set appropratiately based on
// // if the operation is absolute value or negation.
// convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE);
// // Test for exceptions and return the "Invalid Operation" and
// // "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in
// // the third pipeline stage to select the result. Also, op1_Norm
// // and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized.
// // sub is one if the effective operation is subtaction.
// exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
// Float1, Float2, DivOpType);
// // Determine Sign/Mantissa
// assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType;
// assign mantissaA = {vdd, Float1[51:0]};
// assign mantissaB = {vdd, Float2[51:0]};
// // Perform Exponent Subtraction - expA - expB + Bias
// assign exp1 = {2'b0, Float1[62:52]};
// assign exp2 = {2'b0, Float2[62:52]};
// // bias : DP = 2^{11-1}-1 = 1023
// assign bias = {3'h0, 10'h3FF};
// // Divide exponent
// csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder
// exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder?
// {vss, exp_s}, {vss, exp_c}, 1'b1);
// // Sqrt exponent (check if exponent is odd)
// assign exp_odd = Float1[52] ? vss : vdd;
// exp_add explogic2 (exp_cout2, exp_sqrt, //***adder?
// {vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
// // Choose correct exponent
// assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;
// // Main Goldschmidt/Division Routine
// divconv goldy (q1, qm1, qp1, q0, qm0, qp0,
// rega_out, regb_out, regc_out, regd_out,
// regr_out, mantissaB, mantissaA,
// sel_muxa, sel_muxb, sel_muxr,
// reset, clk,
// load_rega, load_regb, load_regc, load_regd,
// load_regr, load_regs, FmtE, DivOpType, exp_odd);
// // FSM : control divider
// fsm control (FDivSqrtDoneE, load_rega, load_regb, load_regc, load_regd,
// load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
// clk, reset, FDivStartE, DivOpType, FDivBusyE, HoldInputs);
// // Round the mantissa to a 52-bit value, with the leading one
// // removed. The rounding units also handles special cases and
// // set the exception flags.
// //***add max magnitude and swap negitive and positive infinity
// rounder_div divround1 (Result, DenormIO, FlagsIn,
// FrmE, FmtE, DivOvEn, DivUnEn, expF,
// sel_inv, Invalid, DenormIn, signResult,
// q1, qm1, qp1, q0, qm0, qp0, regr_out);
// // Store the final result and the exception flags in registers.
// flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM);
// flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM);
// flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM);
// endmodule // fpadd
// //
// // Brent-Kung Prefix Adder
// // (yes, it is 14 bits as my generator is broken for 13 bits :(
// // assume, synthesizer will delete stuff not needed )
// //
// module exp_add (cout, sum, a, b, cin);
// input [13:0] a, b;
// input cin;
// output [13:0] sum;
// output cout;
// wire [14:0] p,g;
// wire [13:0] c;
// // pre-computation
// assign p={a^b,1'b0};
// assign g={a&b, cin};
// // prefix tree
// brent_kung prefix_tree(c, p[13:0], g[13:0]);
// // post-computation
// assign sum=p[14:1]^c;
// assign cout=g[14]|(p[14]&c[13]);
// endmodule // exp_add
// module brent_kung (c, p, g);
// input [13:0] p;
// input [13:0] g;
// output [14:1] c;
// logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
// logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
// logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
// // parallel-prefix, Brent-Kung
// // Stage 1: Generates G/FmtE pairs that span 1 bits
// grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
// black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
// black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
// black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
// black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
// black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
// black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// // Stage 2: Generates G/FmtE pairs that span 2 bits
// grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
// black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
// black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// // Stage 3: Generates G/FmtE pairs that span 4 bits
// grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// // Stage 4: Generates G/FmtE pairs that span 8 bits
// // Stage 5: Generates G/FmtE pairs that span 4 bits
// grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// // Stage 6: Generates G/FmtE pairs that span 2 bits
// grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
// grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
// grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
// // Last grey cell stage
// grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
// grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
// grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
// grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
// grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
// grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
// // Final Stage: Apply c_k+1=G_k_0
// assign c[1]=g[0];
// assign c[2]=G_1_0;
// assign c[3]=G_2_0;
// assign c[4]=G_3_0;
// assign c[5]=G_4_0;
// assign c[6]=G_5_0;
// assign c[7]=G_6_0;
// assign c[8]=G_7_0;
// assign c[9]=G_8_0;
// assign c[10]=G_9_0;
// assign c[11]=G_10_0;
// assign c[12]=G_11_0;
// assign c[13]=G_12_0;
// assign c[14]=G_13_0;
// endmodule // brent_kung

View File

@ -26,41 +26,47 @@
`include "wally-config.vh"
module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E,
input logic FRegWriteM, FRegWriteW,
input logic [4:0] RdM, RdW,
input logic [2:0] FResultSelM,
output logic FStallD,
output logic [1:0] FForwardXE, FForwardYE, FForwardZE
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdM, RdW, // the adress being written to
input logic [2:0] FResultSelM, // the result being selected
output logic FStallD, // stall the decode stage
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value
);
always_comb begin
// set ReadData as default
// set defaults
FForwardXE = 2'b00; // choose FRD1E
FForwardYE = 2'b00; // choose FRD2E
FForwardZE = 2'b00; // choose FRD3E
FStallD = 0;
if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM
if(FResultSelM == 3'b100) FForwardXE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 1
if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 3'b100) FForwardXE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM
if(FResultSelM == 3'b100) FForwardYE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 2
if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 3'b100) FForwardYE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM
if(FResultSelM == 3'b100) FForwardZE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 3
if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 3'b100) FForwardZE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
end

View File

@ -26,41 +26,50 @@
// `include "../../../config/rv64icfd/wally-config.vh"
module fma(
input logic clk,
input logic reset,
input logic FlushM,
input logic StallM,
input logic FmtE, FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic XSgnE, YSgnE, ZSgnE,
input logic [`NE-1:0] XExpE, YExpE, ZExpE,
input logic [`NF:0] XManE, YManE, ZManE,
input logic XSgnM, YSgnM, ZSgnM,
input logic [`NE-1:0] XExpM, YExpM, ZExpM, // ***needed
input logic [`NF:0] XManM, YManM, ZManM,
input logic XDenormE, YDenormE, ZDenormE,
input logic XZeroE, YZeroE, ZZeroE,
input logic XNaNM, YNaNM, ZNaNM,
input logic XSNaNM, YSNaNM, ZSNaNM,
input logic XZeroM, YZeroM, ZZeroM,
input logic XInfM, YInfM, ZInfM,
input logic [10:0] BiasE,
output logic [`FLEN-1:0] FMAResM,
output logic [4:0] FMAFlgM);
input logic clk,
input logic reset,
input logic FlushM, // flush the memory stage
input logic StallM, // stall memory stage
input logic FmtE, FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic XSgnE, YSgnE, ZSgnE, // input signs - execute stage
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // input exponents - execute stage
input logic [`NF:0] XManE, YManE, ZManE, // input mantissa - execute stage
input logic XSgnM, YSgnM, ZSgnM, // input signs - memory stage
input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents - memory stage
input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage
input logic XDenormE, YDenormE, ZDenormE, // is denorm
input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage
input logic XNaNM, YNaNM, ZNaNM, // is NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN
input logic XZeroM, YZeroM, ZZeroM, // is zero - memory stage
input logic XInfM, YInfM, ZInfM, // is infinity
input logic [10:0] BiasE, // bias - depends on precison (max exponent/2)
output logic [`FLEN-1:0] FMAResM, // FMA result
output logic [4:0] FMAFlgM); // FMA flags
//fma/mult
// fmadd = ?000
// fmsub = ?001
// fnmsub = ?010 -(a*b)+c
// fnmadd = ?011 -(a*b)-c
// fmul = ?100
// {?, is mul, negate product, negate addend}
// signals transfered between pipeline stages
logic [2*`NF+1:0] ProdManE, ProdManM;
logic [3*`NF+5:0] AlignedAddendE, AlignedAddendM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.BiasE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
.ProdExpE, .AddendStickyE, .KillProdE);
// E/M pipeline registers
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
@ -82,8 +91,8 @@ module fma1(
// input logic XSgnE, YSgnE, ZSgnE,
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format]
input logic XDenormE, YDenormE, ZDenormE,
input logic XZeroE, YZeroE, ZZeroE,
input logic XDenormE, YDenormE, ZDenormE, // is the input denormal
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
input logic [`NE-1:0] BiasE,
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
@ -94,8 +103,8 @@ module fma1(
output logic KillProdE // set the product to zero before addition if the product is too small to matter
);
logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format *** is this enough bits?
logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1)
///////////////////////////////////////////////////////////////////////////////
@ -200,32 +209,33 @@ module fma2(
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
logic [`NF-1:0] ResultFrac; // Result fraction
logic [`NE-1:0] ResultExp; // Result exponent
logic ResultSgn; // Result sign
logic PSgn; // product sign
logic [`NF-1:0] ResultFrac; // Result fraction
logic [`NE-1:0] ResultExp; // Result exponent
logic ResultSgn; // Result sign
logic PSgn; // product sign
logic [2*`NF+1:0] ProdMan2; // product being added
logic [3*`NF+6:0] AlignedAddend2; // possibly inverted aligned Z
logic [3*`NF+5:0] Sum; // positive sum
logic [3*`NF+6:0] PreSum; // possibly negitive sum
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
logic [`NE+1:0] SumExpTmpMinus1; // SumExpTmp-1
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
logic [`NF+2:0] NormSum; // normalized sum
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
logic [`NE+1:0] SumExpTmpMinus1; // SumExpTmp-1
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
logic [`NF+2:0] NormSum; // normalized sum
logic [3*`NF+5:0] SumShifted; // sum shifted for normalization
logic [8:0] NormCnt; // output of the leading zero detector //***change this later
logic NormSumSticky; // sticky bit calulated from the normalized sum
logic SumZero; // is the sum zero
logic NegSum; // is the sum negitive
logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z)
logic ResultDenorm; // is the result denormalized
logic Sticky; // Sticky bit
logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding
logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag)
logic Invalid,Underflow,Overflow,Inexact; // flags
logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later
logic SubBySmallNum; // was there supposed to be a subtraction by a small number
logic [8:0] NormCnt; // output of the leading zero detector //***change this later
logic NormSumSticky; // sticky bit calulated from the normalized sum
logic SumZero; // is the sum zero
logic NegSum; // is the sum negitive
logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z)
logic ResultDenorm; // is the result denormalized
logic Sticky; // Sticky bit
logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding
logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag)
logic Invalid,Underflow,Overflow,Inexact; // flags
logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later
logic SubBySmallNum; // was there supposed to be a subtraction by a small number
logic [`FLEN-1:0] Addend; // value to add (Z or zero)
logic ZeroSgn; // the result's sign if the sum is zero
logic ResultSgnTmp; // the result's sign assuming the result is not zero
@ -306,11 +316,12 @@ module fma2(
assign SumZero = ~(|Sum);
// determine the length of the fraction based on precision
//assign FracLen = FmtM ? `NF : 13'd23;
assign FracLen = `NF;
assign FracLen = FmtM ? `NF : 13'd23;
//assign FracLen = `NF;
// Determine if the result is denormal
assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4));
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results
@ -458,16 +469,18 @@ module fma2(
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
assign MaxExp = FmtM ? {`NE{1'b1}} : 13'd255;
assign MaxExp = FmtM ? {`NE{1'b1}} : {8{1'b1}};
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
// - Don't set the overflow flag if an overflowed result isn't outputed
assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
assign Overflow = FullResultExp >= {MaxExp} & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Underflow flag if the number is too small to be represented in normal numbers
// - Don't set the underflow flag if the result is exact
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky|UfGuard)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
@ -504,8 +517,8 @@ module fma2(
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult : // has to be before inf
XInfM ? FmtM ? {PSgn, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgn, XExpM[7:0], XManM[51:29]} :
YInfM ? FmtM ? {PSgn, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgn, YExpM[7:0], YManM[51:29]} :
XInfM ? FmtM ? {PSgn, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgn, XExpM[7:0], XManM[51:29]} :
YInfM ? FmtM ? {PSgn, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgn, YExpM[7:0], YManM[51:29]} :
ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
Overflow ? OverflowResult :
KillProdM ? KillProdResult : // has to be after Underflow

View File

@ -1,286 +0,0 @@
///////////////////////////////////////////
//
// Written: James.Stine@okstate.edu 1 February 2021
// Modified:
//
// Purpose: FP Add/Sub instructions
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
// or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller exponent,
// and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put sum onto output.
//
module fpadd (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);
input [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B)
input [2:0] rm; // Rounding mode - specify values
input [3:0] op_type; // Function opcode
input P; // Result Precision (0 for double, 1 for single)
input OvEn; // Overflow trap enabled
input UnEn; // Underflow trap enabled
output [63:0] AS_Result; // Result of operation
output [4:0] Flags; // IEEE exception flags
output Denorm; // Denorm on input or output
wire [63:0] Float1;
wire [63:0] Float2;
wire [63:0] IntValue;
wire [11:0] exp1, exp2;
wire [11:0] exp_diff1, exp_diff2;
wire [10:0] exponent, exp_pre;
wire [11:0] exp_shift;
wire [63:0] Result;
wire [51:0] mantissaA;
wire [56:0] mantissaA1;
wire [63:0] mantissaA3;
wire [51:0] mantissaB;
wire [56:0] mantissaB1, mantissaB2;
wire [63:0] mantissaB3;
wire [63:0] sum, sum_tc, sum_corr, sum_norm, sum_norm_w_bypass;
wire [5:0] align_shift;
wire [5:0] norm_shift, norm_shift_denorm;
wire [3:0] sel_inv;
wire op1_Norm, op2_Norm;
wire opA_Norm, opB_Norm;
wire Invalid;
wire DenormIn, DenormIO;
wire [4:0] FlagsIn;
wire exp_valid;
wire exp_gt63;
wire Sticky_out;
wire signA, sign_corr;
wire corr_sign;
wire zeroB;
wire convert;
wire swap;
wire sub;
wire [10:0] exponent_postsum;
wire mantissa_comp;
wire mantissa_comp_sum;
wire mantissa_comp_sum_tc;
wire Float1_sum_comp;
wire Float2_sum_comp;
wire Float1_sum_tc_comp;
wire Float2_sum_tc_comp;
wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB;
wire ZV_mantissaA;
wire ZV_mantissaB;
wire normal_underflow;
wire normal_overflow;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the op_type , and their precision P.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs conv1 (Float1, Float2, op1, op2, op_type, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, sub,
Float1, Float2, op_type);
// Perform Exponent Subtraction (used for alignment). For performance
// both exponent subtractions are performed in parallel. This was
// changed to a behavior level to allow the tools to try to optimize
// the two parallel additions. The input values are zero-extended to 12
// bits prior to performing the addition.
assign exp1 = {1'b0, Float1[62:52]};
assign exp2 = {1'b0, Float2[62:52]};
assign exp_diff1 = exp1 - exp2;
assign exp_diff2 = DenormIn ? ({Float2[63], exp2[10:0]} - {Float1[63], exp1[10:0]}): exp2 - exp1;
// The second operand (B) should be set to zero, if op_type does not
// specify addition or subtraction
assign zeroB = op_type[2] | op_type[1];
// Swapped operands if zeroB is not one and exp1 < exp2.
// Swapping causes exp2 to be used for the result exponent.
// Only the exponent of the larger operand is used to determine
// the final result.
assign swap = exp_diff1[11] & ~zeroB;
assign exponent = swap ? exp2[10:0] : exp1[10:0];
assign exponent_postsum = swap ? exp2[10:0] : exp1[10:0];
assign mantissaA = swap ? Float2[51:0] : Float1[51:0];
assign mantissaB = swap ? Float1[51:0] : Float2[51:0];
assign signA = swap ? Float2[63] : Float1[63];
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
// modified to 52 bits to detect leading zeroes on denormalized mantissas
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
assign exp1_denorm = swap ? (exp1 - ZP_mantissaB) : (exp1 - ZP_mantissaA);
assign exp2_denorm = swap ? (exp2 - ZP_mantissaA) : (exp2 - ZP_mantissaB);
// Finds normal underflow result to determine whether to round final exponent down
// Comparison between each float and the resulting sum of the primary cla adder/subtractor and cla subtractor
assign Float1_sum_comp = (Float1[51:0] > sum[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_comp = (Float2[51:0] > sum[51:0]) ? 1'b0 : 1'b1;
assign Float1_sum_tc_comp = (Float1[51:0] > sum_tc[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_tc_comp = (Float2[51:0] > sum_tc[51:0]) ? 1'b0 : 1'b1;
// Determines the correct Float value to compare based on swap result
assign mantissa_comp_sum = swap ? Float2_sum_comp : Float1_sum_comp;
assign mantissa_comp_sum_tc = swap ? Float2_sum_tc_comp : Float1_sum_tc_comp;
// Determines the correct comparison result based on operation and sign of resulting sum
assign mantissa_comp = (op_type[0] ^ sum[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
// If the signs are different and both operands aren't denormalized
// the normal underflow bit is needed and therefore updated.
assign normal_underflow = ((Float1[63] ~^ Float2[63]) & (opA_Norm | opB_Norm)) ? mantissa_comp : 1'b0;
// Determine the alignment shift and limit it to 63. If any bit from
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
assign exp_shift = swap ? exp_diff2 : exp_diff1;
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
| exp_shift[8] | exp_shift[7] | exp_shift[6];
assign align_shift = exp_shift | {6{exp_gt63}};
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
// 001.M[51]M[50] ... M[1]M[0]00
// Unless the number has an exponent of zero, in which case it
// is unpacked as
// 000.00 ... 00
// This effectively flushes denormalized values to zero.
// The three bits of to the left of the binary point prevent overflow
// and loss of sign information. The two bits to the right of the
// original mantissa form the "guard" and "round" bits that are used
// to round the result.
assign opA_Norm = swap ? op2_Norm : op1_Norm;
assign opB_Norm = swap ? op1_Norm : op2_Norm;
assign mantissaA1 = {2'h0, opA_Norm, mantissaA[51:0]&{52{opA_Norm}}, 2'h0};
assign mantissaB1 = {2'h0, opB_Norm, mantissaB[51:0]&{52{opB_Norm}}, 2'h0};
// Perform mantissa alignment using a 57-bit barrel shifter
// If any of the bits shifted out are one, Sticky_out is set.
// The size of the barrel shifter could be reduced by two bits
// by not adding the leading two zeros until after the shift.
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
// Place either the sign-extened 32-bit value or the original 64-bit value
// into IntValue (to be used for integer to floating point conversion)
assign IntValue [31:0] = op1[31:0];
assign IntValue [63:32] = op_type[0] ? {32{op1[31]}} : op1[63:32];
// If doing an integer to floating point conversion, mantissaA3 is set to
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
// and the exponent value is left unchanged.
// Under denormalized cases, the exponent before the rounder is set to 1
// if the normal shift value is 11.
assign convert = ~op_type[2] & op_type[1];
assign mantissaA3 = (op_type[3]) ? (op_type[0] ? Float1 : ~Float1) : (DenormIn ? ({12'h0, mantissaA}) : (convert ? IntValue : {mantissaA1, 7'h0}));
assign exp_pre = DenormIn ?
((norm_shift == 6'b001011) ? 11'b00000000001 : (swap ? exp2_denorm : exp1_denorm))
: (convert ? 11'b10000111100 : exponent);
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
// zeros.
assign mantissaB3[63:7] = (op_type[3]) ? (57'h0) : (DenormIn ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
assign mantissaB3[6] = (op_type[3]) ? (1'b0) : (DenormIn ? mantissaB[6] : Sticky_out & ~zeroB);
assign mantissaB3[5:0] = (op_type[3]) ? (6'h01) : (DenormIn ? mantissaB[5:0] : 6'h0);
// The sign of the result needs to be corrected if the true
// operation is subtraction and the input operands were swapped.
assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (sum, mantissaA3, mantissaB3, sub); //***adder
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); //***adder
// Determine the correct sign of the result
assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63];
// If the sum is negative, use its two complement instead.
// This value has to be 64-bits to correctly handle the
// case 10...00
assign sum_corr = (DenormIn & (opA_Norm | opB_Norm) & ( ( (Float1[63] ~^ Float2[63]) & op_type[0] ) | ((Float1[63] ^ Float2[63]) & ~op_type[0]) ))
? (sum[63] ? sum : sum_tc) : ( (op_type[3]) ? sum : (sum[63] ? sum_tc : sum));
// Finds normal underflow result to determine whether to round final exponent down
assign normal_overflow = (DenormIn & (sum == 16'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
assign norm_shift_denorm = (DenormIn & ( (~opA_Norm & ~opB_Norm) | normal_underflow)) ? (6'h00) : (norm_shift);
// Barell shifter used for normalization. It takes as inputs the
// the corrected sum and the amount by which the sum should
// be right shifted. It outputs the normalized sum.
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
assign sum_norm_w_bypass = (op_type[3]) ? (op_type[0] ? ~sum_corr : sum_corr) : (sum_norm);
// Round the mantissa to a 52-bit value, with the leading one
// removed. If the result is a single precision number, the actual
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
// At this point, normalization has already been performed, so we know
// exactly where the rounding point is. The rounding units also
// handles special cases and set the exception flags.
// Changed DenormIO -> Denorm and FlagsIn -> Flags in order to
// help in processor reservation station detection of load/stores. In
// other words, the processor would like to know ahead of time that
// if the result is an exception then don't load or store.
rounder round1 (Result, DenormIO, FlagsIn, rm, P, OvEn, UnEn, exp_valid,
sel_inv, Invalid, DenormIn, convert, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
exponent_postsum, op1_Norm, op2_Norm, Float1[63:52], Float2[63:52],
normal_overflow, normal_underflow, swap, op_type, sum);
// Store the final result and the exception flags in registers.
assign AS_Result = Result;
assign {Denorm, Flags} = {DenormIO, FlagsIn};
endmodule // fpadd

View File

@ -24,133 +24,117 @@
// `timescale 1ps/1ps
module fpdiv (
input logic [63:0] op1, // 1st input operand (A)
input logic [63:0] op2, // 2nd input operand (B)
input logic [1:0] rm, // Rounding mode - specify values
input logic op_type, // Function opcode
input logic P, // Result Precision (0 for double, 1 for single)
input logic OvEn, // Overflow trap enabled
input logic UnEn, // Underflow trap enabled
input logic start,
input logic reset,
input logic clk,
output logic done,
output logic FDivBusyE,
output logic HoldInputs,
output logic [63:0] AS_Result, // Result of operation
output logic [4:0] Flags); // IEEE exception flags
logic Denorm; // Denorm on input or output
// output done;
input logic clk,
input logic reset,
input logic start,
input logic [63:0] op1, // 1st input operand (A)
input logic [63:0] op2, // 2nd input operand (B)
input logic [1:0] rm, // Rounding mode - specify values
input logic op_type, // Function opcode
input logic P, // Result Precision (0 for double, 1 for single)
input logic OvEn, // Overflow trap enabled
input logic UnEn, // Underflow trap enabled
output logic done,
output logic FDivBusyE,
output logic [63:0] AS_Result, // Result of operation
output logic [4:0] Flags); // IEEE exception flags
supply1 vdd;
supply0 vss;
wire [63:0] Float1;
wire [63:0] Float2;
wire [63:0] IntValue;
logic [63:0] Float1;
logic [63:0] Float2;
wire [12:0] exp1, exp2, expF;
wire [12:0] exp_diff, bias;
wire [13:0] exp_sqrt;
wire [12:0] exp_s;
wire [12:0] exp_c;
logic [12:0] exp1, exp2, expF;
logic [12:0] exp_diff, bias;
logic [13:0] exp_sqrt;
logic [12:0] exp_s;
logic [12:0] exp_c;
wire [10:0] exponent, exp_pre;
wire [63:0] Result;
wire [52:0] mantissaA;
wire [52:0] mantissaB;
wire [63:0] sum, sum_tc, sum_corr, sum_norm;
logic [10:0] exponent;
logic [63:0] Result;
logic [52:0] mantissaA;
logic [52:0] mantissaB;
wire [5:0] align_shift;
wire [5:0] norm_shift;
wire [2:0] sel_inv;
wire op1_Norm, op2_Norm;
wire opA_Norm, opB_Norm;
wire Invalid;
wire DenormIn, DenormIO;
wire [4:0] FlagsIn;
wire exp_gt63;
wire Sticky_out;
wire signResult, sign_corr;
wire corr_sign;
wire zeroB;
wire convert;
wire swap;
wire sub;
logic [2:0] sel_inv;
logic Invalid;
logic [4:0] FlagsIn;
logic signResult;
logic convert;
logic sub;
wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
wire [63:0] rega_out, regb_out, regc_out, regd_out;
wire [127:0] regr_out;
wire [2:0] sel_muxa, sel_muxb;
wire sel_muxr;
wire load_rega, load_regb, load_regc, load_regd, load_regr;
logic [63:0] q1, qm1, qp1, q0, qm0, qp0;
logic [63:0] rega_out, regb_out, regc_out, regd_out;
logic [127:0] regr_out;
logic [2:0] sel_muxa, sel_muxb;
logic sel_muxr;
logic load_rega, load_regb, load_regc, load_regd, load_regr;
wire donev, sel_muxrv, sel_muxsv;
wire [1:0] sel_muxav, sel_muxbv;
wire load_regav, load_regbv, load_regcv;
wire load_regrv, load_regs;
logic exp_cout1, exp_cout2;
logic exp_odd, open;
// logic reset;
logic load_regs;
logic exp_cout1, exp_cout2;
logic exp_odd, open;
// div/sqrt
// fdiv = 0
// fsqrt = 1
// Convert the input operands to their appropriate forms based on
// the orignal operands, the op_type , and their precision P.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs_div conv1 (Float1, Float2, op1, op2, op_type, P);
convert_inputs_div conv1 (.op1, .op2, .op_type, .P,
// outputs:
.Float1, .Float2b(Float2));
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception_div exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
Float1, Float2, op_type);
exception_div exc1 (.A(Float1), .B(Float2), .op_type,
// output:
.Ztype(sel_inv), .Invalid);
// Determine Sign/Mantissa
assign signResult = (Float1[63]^Float2[63]);
assign mantissaA = {vdd, Float1[51:0]};
assign mantissaB = {vdd, Float2[51:0]};
assign mantissaA = {1'b1, Float1[51:0]};
assign mantissaB = {1'b1, Float2[51:0]};
// Perform Exponent Subtraction - expA - expB + Bias
assign exp1 = {2'b0, Float1[62:52]};
assign exp2 = {2'b0, Float2[62:52]};
// bias : DP = 2^{11-1}-1 = 1023
assign bias = {3'h0, 10'h3FF};
// Divide exponent
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
// adder #(14) explogic1 ({vss, exp_s}, {vss, exp_c}, 1'b1, {open, exp_diff}, exp_cout1);
assign {exp_cout1, open, exp_diff} = {vss, exp_s} + {vss, exp_c} + 1'b1;
assign {exp_cout1, open, exp_diff} = exp1 - exp2 + bias;
// Sqrt exponent (check if exponent is odd)
assign exp_odd = Float1[52] ? vss : vdd;
// adder #(14) explogic2 ({vss, exp1}, {4'h0, 10'h3ff}, exp_odd, exp_sqrt, exp_cout2);
assign {exp_cout2, exp_sqrt} = {vss, exp1} + {4'h0, 10'h3ff} + exp_odd;
assign exp_odd = Float1[52] ? 1'b0 : 1'b1;
assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + exp_odd;
// Choose correct exponent
assign expF = op_type ? exp_sqrt[13:1] : exp_diff;
// Main Goldschmidt/Division Routine
divconv goldy (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
regr_out, mantissaB, mantissaA, sel_muxa, sel_muxb, sel_muxr,
reset, clk, load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, P, op_type, exp_odd);
divconv goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, .rega_out, .regb_out, .regc_out, .regd_out,
.regr_out, .d(mantissaB), .n(mantissaA), .sel_muxa, .sel_muxb, .sel_muxr,
.reset, .clk, .load_rega, .load_regb, .load_regc, .load_regd,
.load_regr, .load_regs, .P, .op_type, .exp_odd);
// FSM : control divider
fsm control (done, load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
clk, reset, start, op_type, FDivBusyE, HoldInputs);
fsm control (.clk, .reset, .start, .op_type,
// outputs:
.done, .load_rega, .load_regb, .load_regc, .load_regd,
.load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr,
.divBusy(FDivBusyE));
// Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and
// set the exception flags.
rounder_div round1 (Result, DenormIO, FlagsIn,
rm, P, OvEn, UnEn, expF,
sel_inv, Invalid, DenormIn, signResult,
q1, qm1, qp1, q0, qm0, qp0, regr_out);
rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF),
.sel_inv, .Invalid, .SignR(signResult),
.q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out,
// outputs:
.Result, .Flags(FlagsIn));
// Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, done, Result, AS_Result);
flopenr #(1) regb (clk, reset, done, DenormIO, Denorm);
flopenr #(64) rega (clk, reset, done, Result, AS_Result);
flopenr #(5) regc (clk, reset, done, FlagsIn, Flags);
endmodule // fpadd

View File

@ -25,142 +25,173 @@
`include "wally-config.vh"
module fpu (
input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD,
input logic [`XLEN-1:0] ReadDataW, // Read data from memory
input logic [`XLEN-1:0] SrcAE, // Integer input being processed
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
input logic [4:0] RdE, RdM, RdW,
output logic FRegWriteM,
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM,
output logic FDivBusyE, // Is the divison/sqrt unit busy
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM); // FPU result
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
// *** folder at same level of src for tests fpu tests
// qa.b
// u1.52 - u sunsigned, q signed
generate
if (`F_SUPPORTED | `D_SUPPORTED) begin
// control logic signal instantiation
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM;
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
logic [4:0] Adr1E, Adr2E, Adr3E;
input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD, // instruction from IFU
input logic [`XLEN-1:0] ReadDataW,// Read data from memory
input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU)
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU)
input logic StallE, StallM, StallW, // stall signals from HZU
input logic FlushE, FlushM, FlushW, // flush signals from HZU
input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU)
output logic FRegWriteM, // FP register write enable
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM // FMA flags (to privileged unit)
);
//*** make everything FLEN at some point
//*** add the 128 bit support to the if statement when needed
//*** make new tests for fp using testfloat that include flag checking and all rounding modes
//*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec
//*** only fma/mul and fp <-> int convert flags have been tested. test the others.
// FPU specifics:
// - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// single stored in a double: | 32 1s | single precision value |
// - sets the underflow after rounding
generate if (`F_SUPPORTED | `D_SUPPORTED) begin
// control signals
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// regfile signals
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`XLEN-1:0] FSrcXMAligned;
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE;
logic [10:0] XExpE, YExpE, ZExpE;
logic [52:0] XManE, YManE, ZManE;
logic XNaNE, YNaNE, ZNaNE;
logic XSNaNE, YSNaNE, ZSNaNE;
logic XDenormE, YDenormE, ZDenormE;
logic XZeroE, YZeroE, ZZeroE;
logic [10:0] BiasE;
logic XInfE, YInfE, ZInfE;
logic XExpMaxE;
logic XNormE;
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM, ZSgnM; // input's sign - memory stage
logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage
logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage
logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage
logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2)
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, YDenormE, ZDenormE; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XExpMaxE; // is the exponent all ones (max value)
logic XNormE; // is X normal
logic XSgnM, YSgnM, ZSgnM;
logic [10:0] XExpM, YExpM, ZExpM;
logic [52:0] XManM, YManM, ZManM;
logic XNaNM, YNaNM, ZNaNM;
logic XSNaNM, YSNaNM, ZSNaNM;
logic XZeroM, YZeroM, ZZeroM;
logic XInfM, YInfM, ZInfM;
// div/sqrt signals
logic [63:0] FDivResultM, FDivResultW;
logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW;
logic FDivSqrtDoneE;
logic [63:0] DivInput1E, DivInput2E;
logic HoldInputs; // keep forwarded inputs arround durring division
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags
logic [63:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result
//fpu signals
logic [63:0] FMAResM, FMAResW;
logic [4:0] FMAFlgM, FMAFlgW;
logic [63:0] ReadResW; // read result (load instruction)
logic [63:0] FAddResM, FAddResW; // add/FP -> FP convert result
logic [4:0] FAddFlgM, FAddFlgW; // add/FP -> FP convert flags
logic [63:0] CvtResE, CvtResM; // FP <-> int convert result
logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this
logic [63:0] ReadResW;
// add/cvt signals
logic [63:0] FAddResM, FAddResW;
logic [4:0] FAddFlgM, FAddFlgW;
logic [63:0] CvtResE, CvtResM;
logic [4:0] CvtFlgE, CvtFlgM;
// cmp signals
logic CmpNVE, CmpNVM, CmpNVW;
logic [63:0] CmpResE, CmpResM, CmpResW;
// fsgn signals
logic [63:0] SgnResE, SgnResM;
logic SgnNVE, SgnNVM, SgnNVW;
logic [63:0] FResM, FResW;
logic [4:0] FFlgM, FFlgW;
// instantiation of W stage regfile signals
logic [63:0] AlignedSrcAM;
// classify signals
logic [63:0] ClassResE, ClassResM;
// 64-bit FPU result
logic [63:0] FPUResultW;
logic [4:0] FPUFlagsW;
logic [63:0] ClassResE, ClassResM; // classify result
logic [63:0] CmpResE, CmpResM; // compare result
logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid)
logic [63:0] SgnResE, SgnResM; // sign injection result
logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid)
logic [63:0] FResM, FResW; // selected result that is ready in the memory stage
logic [4:0] FFlgM; // selected flag that is ready in the memory stage
logic [63:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic FDivClk; // clock for divide/squareroot unit
logic [63:0] AlignedSrcAM; // align SrcA to the floating point format
////////////////////////////////////////////////////////////////////////////////////////
//DECODE STAGE
////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
// outputs:
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
// top-level controller for FPU
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
.FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
// FP register file
// - can read 3 registers and write 1 register every cycle
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW),
.wd4(FPUResultW),
// outputs:
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
// regfile instantiation
fregfile fregfile (clk, reset, FRegWriteW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
FPUResultW,
FRD1D, FRD2D, FRD3D);
//*****************
// D/E pipe registers
//*****************
////////////////////////////////////////////////////////////////////////////////////////
// D/E pipeline registers
////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(18) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
////////////////////////////////////////////////////////////////////////////////////////
//EXECUTION STAGE
////////////////////////////////////////////////////////////////////////////////////////
// Hazard unit for FPU
// - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
// outputs:
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
// Hazard unit for FPU
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD,
.FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
@ -168,128 +199,190 @@ module fpu (
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux2 #(64) fzmulmux(FPreSrcZE, 64'b0, FOpCtrlE[2], FSrcZE); // Force Z to be 0 for multiply instructions
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE),
.FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE,
.ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE,
.XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// first of two-stage instance of floating-point fused multiply-add unit
// unpacking unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE,
// outputs:
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// FMA
// - two stage FMA
// - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding
// - handles FMA and multiply instructions
// - contains some E/M pipleine registers
// *** currently handles FLEN and 32 bits(dont know if 32 works with 128 - easy to fix) - change to handle only the supported formats
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .
ZManE, .XDenormE, .YDenormE,
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM,
.YManM, .ZManM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]),
.FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
// first and only instance of floating-point divider
logic fpdivClk;
.FmtE, .FmtM, .FrmM,
// outputs:
.FMAFlgM, .FMAResM);
// clock gater
// - creates a clock that only runs durring divide/sqrt instructions
// - using the seperate clock gives the divide/sqrt unit some to get set up
// *** the module says not to use in synthisis
clockgater fpdivclkg(.E(FDivStartE),
.SE(1'b0),
.CLK(clk),
.ECLK(fpdivClk));
.ECLK(FDivClk));
// capture the inputs for div/sqrt
// capture the inputs for divide/sqrt
// - if not captured any forwarded inputs will change durring computation
// - this problem is caused by stalling the execute stage
// - the other units don't have this problem, only div/sqrt stalls the execute stage
flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(HoldInputs));
.reset(reset), .clk(FDivBusyE));
flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(HoldInputs));
//*** add round to nearest ties to max magnitude
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.P(~FmtE), .FDivBusyE, .HoldInputs,
.OvEn(1'b1), .UnEn(1'b1),
.start(FDivStartE), .reset, .clk(fpdivClk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM));
// .DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
// .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
// .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
// assign FDivBusyE = 0;
// first of two-stage instance of floating-point add/cvt unit
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
.FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
// first and only instance of floating-point comparator
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), .FSrcXE,
.FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE,
.Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE);
// first and only instance of floating-point sign converter
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE);
// first and only instance of floating-point classify unit
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
.reset(reset), .clk(FDivBusyE));
// output for store instructions
//*** change to use the unpacking unit if possible
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1),
// outputs:
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
// add/FP <-> FP convert
// - computation is done in two stages
// - contains some E/M pipleine registers
//*** remove uneeded logic
//*** change to use the unpacking unit if possible
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE,
// outputs:
.FAddResM, .FAddFlgM);
// compare unit
// - computation is done in one stage
// - writes to FP file durring min/max instructions
// - other comparisons write a 1 or 0 to the integer register
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}),
.FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]),
.FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE,
// outputs:
.Invalid(CmpNVE), .CmpResE);
// sign injection unit
// - computation is done in one stage
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE,
// outputs:
.SgnNVE, .SgnResE);
// classify
// - computation is done in one stage
// - most of the work is done in the unpacking unit
// - result is written to the integer register
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE,
// outputs:
.XSNaNE, .ClassResE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE,
// outputs:
.CvtResE, .CvtFlgE);
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
assign FWriteDataE = FSrcYE[`XLEN-1:0];
//*****************
//***will synth remove registers of values that are always zero?
////////////////////////////////////////////////////////////////////////////////////////
// E/M pipe registers
//*****************
////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
// flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM);
// flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM);
flopenrc #(65) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
flopenrc #(65) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
flopenrc #(65) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZManE}, {ZSgnM,ZExpM,ZManM});
flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM,
flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
flopenrc #(65) EMFpReg4(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZManE}, {ZSgnM,ZExpM,ZManM});
flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
flopenrc #(1) EMRegCmpFlg(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
flopenrc #(64) EMRegSgnRes(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
////////////////////////////////////////////////////////////////////////////////////////
//BEGIN MEMORY STAGE
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
// mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned);
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
////////////////////////////////////////////////////////////////////////////////////////
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM);
// select a result that may be written to the FP register
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
//*****************
// select the result that may be written to the integer register - to IEU
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
// FPU flag selection - to privileged
mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
////////////////////////////////////////////////////////////////////////////////////////
// M/W pipe registers
//*****************
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
//#########################################
////////////////////////////////////////////////////////////////////////////////////////
// BEGIN WRITEBACK STAGE
//#########################################
////////////////////////////////////////////////////////////////////////////////////////
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
// select the result to be written to the FP register
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResW, FResW, FResultSelW, FPUResultW);
end else begin // no F_SUPPORTED; tie outputs low
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FStallD = 0;
assign FWriteIntE = 0;
assign FWriteIntM = 0;
@ -299,7 +392,7 @@ module fpu (
assign FDivBusyE = 0;
assign IllegalFPUInstrD = 1;
assign SetFflagsM = 0;
end
end
endgenerate
endmodule // fpu

View File

@ -26,10 +26,10 @@
`include "wally-config.vh"
module fregfile (
input logic clk, reset,
input logic we4,
input logic [ 4:0] a1, a2, a3, a4,
input logic [63:0] wd4, //KEP `XLEN-1 changed to 63 (lint warning) *** figure out if double can be suported when XLEN = 32
input logic clk, reset,
input logic we4,
input logic [ 4:0] a1, a2, a3, a4,
input logic [63:0] wd4,
output logic [63:0] rd1, rd2, rd3);
logic [63:0] rf[31:0];

View File

@ -1,15 +1,15 @@
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
module fsgn (
input logic XSgnE, YSgnE,
input logic [63:0] FSrcXE,
input logic XExpMaxE,
input logic FmtE,
input logic [1:0] SgnOpCodeE,
output logic [63:0] SgnResE,
output logic SgnNVE);
input logic XSgnE, YSgnE, // X and Y sign bits
input logic [63:0] FSrcXE, // X
input logic XExpMaxE, // max possible exponent (all ones)
input logic FmtE, // precision 1 = double 0 = single
input logic [1:0] SgnOpCodeE, // operation control
output logic [63:0] SgnResE, // result
output logic SgnNVE // invalid flag
);
logic AonesExp;
logic ResSgn;
//op code designation:
@ -19,7 +19,12 @@ module fsgn (
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
//
// calculate the result's sign
assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]);
// format final result based on precision
// - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
assign SgnResE = FmtE ? {ResSgn, FSrcXE[62:0]} : {FSrcXE[63:32], ResSgn, FSrcXE[30:0]};
//If the exponent is all ones, then the value is either Inf or NaN,

View File

@ -1,37 +1,22 @@
module fsm (done, load_rega, load_regb, load_regc,
load_regd, load_regr, load_regs,
sel_muxa, sel_muxb, sel_muxr,
clk, reset, start, op_type, divBusy, holdInputs);
module fsm (
input clk;
input reset;
input start;
// input error;
input op_type;
//***can use divbusy insted of holdinputs
output done;
output load_rega;
output load_regb;
output load_regc;
output load_regd;
output load_regr;
output load_regs;
output [2:0] sel_muxa;
output [2:0] sel_muxb;
output sel_muxr;
output logic divBusy,holdInputs;
input logic clk,
input logic reset,
input logic start,
input logic op_type,
output logic done, // End of cycles
output logic load_rega, // enable for regA
output logic load_regb, // enable for regB
output logic load_regc, // enable for regC
output logic load_regd, // enable for regD
output logic load_regr, // enable for rem
output logic load_regs, // enable for q,qm,qp
output logic [2:0] sel_muxa, // Select muxA
output logic [2:0] sel_muxb, // Select muxB
output logic sel_muxr, // Select rem mux
output logic divBusy // calculation is happening
);
reg done; // End of cycles
reg load_rega; // enable for regA
reg load_regb; // enable for regB
reg load_regc; // enable for regC
reg load_regd; // enable for regD
reg load_regr; // enable for rem
reg load_regs; // enable for q,qm,qp
reg [2:0] sel_muxa; // Select muxA
reg [2:0] sel_muxb; // Select muxB
reg sel_muxr; // Select rem mux
reg [4:0] CURRENT_STATE;
reg [4:0] NEXT_STATE;
@ -65,7 +50,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b0;
holdInputs = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -81,7 +65,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -97,7 +80,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -114,7 +96,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -130,7 +111,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -146,7 +126,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -162,7 +141,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -178,7 +156,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -194,7 +171,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -210,7 +186,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -226,7 +201,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -242,7 +216,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -258,7 +231,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b1;
divBusy = 1'b0;
holdInputs = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -274,7 +246,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -290,7 +261,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -306,7 +276,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -322,7 +291,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -338,7 +306,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -354,7 +321,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -370,7 +336,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -386,7 +351,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -402,7 +366,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -418,7 +381,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -434,7 +396,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -450,7 +411,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -466,7 +426,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -482,7 +441,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b1;
divBusy = 1'b0;
holdInputs = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -498,7 +456,6 @@ module fsm (done, load_rega, load_regb, load_regc,
begin
done = 1'b0;
divBusy = 1'b0;
holdInputs = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;

View File

@ -1,461 +0,0 @@
// `timescale 1ps/1ps
// module fsm_div (done, load_rega, load_regb, load_regc,
// load_regd, load_regr, load_regs,
// sel_muxa, sel_muxb, sel_muxr,
// clk, reset, start, error, op_type);
// input clk;
// input reset;
// input start;
// input error;
// input op_type;
// output done;
// output load_rega;
// output load_regb;
// output load_regc;
// output load_regd;
// output load_regr;
// output load_regs;
// output [2:0] sel_muxa;
// output [2:0] sel_muxb;
// output sel_muxr;
// reg done; // End of cycles
// reg load_rega; // enable for regA
// reg load_regb; // enable for regB
// reg load_regc; // enable for regC
// reg load_regd; // enable for regD
// reg load_regr; // enable for rem
// reg load_regs; // enable for q,qm,qp
// reg [2:0] sel_muxa; // Select muxA
// reg [2:0] sel_muxb; // Select muxB
// reg sel_muxr; // Select rem mux
// reg [4:0] CURRENT_STATE;
// reg [4:0] NEXT_STATE;
// parameter [4:0]
// S0=5'd0, S1=5'd1, S2=5'd2,
// S3=5'd3, S4=5'd4, S5=5'd5,
// S6=5'd6, S7=5'd7, S8=5'd8,
// S9=5'd9, S10=5'd10,
// S13=5'd13, S14=5'd14, S15=5'd15,
// S16=5'd16, S17=5'd17, S18=5'd18,
// S19=5'd19, S20=5'd20, S21=5'd21,
// S22=5'd22, S23=5'd23, S24=5'd24,
// S25=5'd25, S26=5'd26, S27=5'd27,
// S28=5'd28, S29=5'd29, S30=5'd30;
// always @(posedge clk)
// begin
// if(reset==1'b1)
// CURRENT_STATE<=S0;
// else
// CURRENT_STATE<=NEXT_STATE;
// end
// always @(*)
// begin
// case(CURRENT_STATE)
// S0: // iteration 0
// begin
// if (start==1'b0)
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b000;
// sel_muxr = 1'b0;
// NEXT_STATE <= S0;
// end
// else if (start==1'b1 && op_type==1'b0)
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b1;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b001;
// sel_muxb = 3'b001;
// sel_muxr = 1'b0;
// NEXT_STATE <= S1;
// end // if (start==1'b1 && op_type==1'b0)
// else if (start==1'b1 && op_type==1'b1)
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b1;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b010;
// sel_muxb = 3'b000;
// sel_muxr = 1'b0;
// NEXT_STATE <= S13;
// end
// end // case: S0
// S1:
// begin
// done = 1'b0;
// load_rega = 1'b1;
// load_regb = 1'b0;
// load_regc = 1'b1;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b010;
// sel_muxb = 3'b000;
// sel_muxr = 1'b0;
// NEXT_STATE <= S2;
// end
// S2: // iteration 1
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b1;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b011;
// sel_muxb = 3'b011;
// sel_muxr = 1'b0;
// NEXT_STATE <= S3;
// end
// S3:
// begin
// done = 1'b0;
// load_rega = 1'b1;
// load_regb = 1'b0;
// load_regc = 1'b1;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b010;
// sel_muxr = 1'b0;
// NEXT_STATE <= S4;
// end
// S4: // iteration 2
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b1;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b011;
// sel_muxb = 3'b011;
// sel_muxr = 1'b0;
// NEXT_STATE <= S5;
// end
// S5:
// begin
// done = 1'b0;
// load_rega = 1'b1;
// load_regb = 1'b0;
// load_regc = 1'b1;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b010;
// sel_muxr = 1'b0; // add
// NEXT_STATE <= S6;
// end
// S6: // iteration 3
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b1;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b011;
// sel_muxb = 3'b011;
// sel_muxr = 1'b0;
// NEXT_STATE <= S8;
// end
// S7:
// begin
// done = 1'b0;
// load_rega = 1'b1;
// load_regb = 1'b0;
// load_regc = 1'b1;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b010;
// sel_muxr = 1'b0;
// NEXT_STATE <= S8;
// end // case: S7
// S8: // q,qm,qp
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b1;
// sel_muxa = 3'b000;
// sel_muxb = 3'b000;
// sel_muxr = 1'b0;
// NEXT_STATE <= S9;
// end
// S9: // rem
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b1;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b000;
// sel_muxr = 1'b1;
// NEXT_STATE <= S10;
// end
// S10: // done
// begin
// done = 1'b1;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b000;
// sel_muxr = 1'b0;
// NEXT_STATE <= S0;
// end
// S13: // start of sqrt path
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b1;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b010;
// sel_muxb = 3'b001;
// sel_muxr = 1'b0;
// NEXT_STATE <= S14;
// end
// S14:
// begin
// done = 1'b0;
// load_rega = 1'b1;
// load_regb = 1'b0;
// load_regc = 1'b1;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b001;
// sel_muxb = 3'b100;
// sel_muxr = 1'b0;
// NEXT_STATE <= S15;
// end
// S15: // iteration 1
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b1;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b011;
// sel_muxb = 3'b011;
// sel_muxr = 1'b0;
// NEXT_STATE <= S16;
// end
// S16:
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b1;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b011;
// sel_muxr = 1'b0;
// NEXT_STATE <= S17;
// end
// S17:
// begin
// done = 1'b0;
// load_rega = 1'b1;
// load_regb = 1'b0;
// load_regc = 1'b1;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b100;
// sel_muxb = 3'b010;
// sel_muxr = 1'b0;
// NEXT_STATE <= S18;
// end
// S18: // iteration 2
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b1;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b011;
// sel_muxb = 3'b011;
// sel_muxr = 1'b0;
// NEXT_STATE <= S19;
// end
// S19:
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b1;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b011;
// sel_muxr = 1'b0;
// NEXT_STATE <= S20;
// end
// S20:
// begin
// done = 1'b0;
// load_rega = 1'b1;
// load_regb = 1'b0;
// load_regc = 1'b1;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b100;
// sel_muxb = 3'b010;
// sel_muxr = 1'b0;
// NEXT_STATE <= S21;
// end
// S21: // iteration 3
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b1;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b011;
// sel_muxb = 3'b011;
// sel_muxr = 1'b0;
// NEXT_STATE <= S22;
// end
// S22:
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b1;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b011;
// sel_muxr = 1'b0;
// NEXT_STATE <= S23;
// end
// S23:
// begin
// done = 1'b0;
// load_rega = 1'b1;
// load_regb = 1'b0;
// load_regc = 1'b1;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b100;
// sel_muxb = 3'b010;
// sel_muxr = 1'b0;
// NEXT_STATE <= S24;
// end
// S24: // q,qm,qp
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b1;
// sel_muxa = 3'b000;
// sel_muxb = 3'b000;
// sel_muxr = 1'b0;
// NEXT_STATE <= S25;
// end
// S25: // rem
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b1;
// load_regs = 1'b0;
// sel_muxa = 3'b011;
// sel_muxb = 3'b110;
// sel_muxr = 1'b1;
// NEXT_STATE <= S26;
// end
// S26: // done
// begin
// done = 1'b1;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b000;
// sel_muxr = 1'b0;
// NEXT_STATE <= S0;
// end
// default:
// begin
// done = 1'b0;
// load_rega = 1'b0;
// load_regb = 1'b0;
// load_regc = 1'b0;
// load_regd = 1'b0;
// load_regr = 1'b0;
// load_regs = 1'b0;
// sel_muxa = 3'b000;
// sel_muxb = 3'b000;
// sel_muxr = 1'b0;
// NEXT_STATE <= S0;
// end
// endcase // case(CURRENT_STATE)
// end // always @ (CURRENT_STATE or X)
// endmodule // fsm

View File

@ -1,593 +0,0 @@
// Ladner-Fischer Prefix Adder
module ldf128 (cout, sum, a, b, cin);
input [127:0] a, b;
input cin;
output [127:0] sum;
output cout;
wire [128:0] p,g;
wire [127:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
ladner_fischer128 prefix_tree (c, p[127:0], g[127:0]);
// post-computation
assign sum=p[128:1]^c;
assign cout=g[128]|(p[128]&c[127]);
endmodule
module ladner_fischer128 (c, p, g);
input [127:0] p;
input [127:0] g;
output [128:1] c;
logic G_1_0, G_3_2, P_3_2, G_5_4, P_5_4, G_7_6, P_7_6, G_9_8, P_9_8, G_11_10, P_11_10, G_13_12
, P_13_12, G_15_14, P_15_14, G_17_16, P_17_16, G_19_18, P_19_18, G_21_20, P_21_20, G_23_22
, P_23_22, G_25_24, P_25_24, G_27_26, P_27_26, G_29_28, P_29_28, G_31_30, P_31_30, G_33_32
, P_33_32, G_35_34, P_35_34, G_37_36, P_37_36, G_39_38, P_39_38, G_41_40, P_41_40, G_43_42
, P_43_42, G_45_44, P_45_44, G_47_46, P_47_46, G_49_48, P_49_48, G_51_50, P_51_50, G_53_52
, P_53_52, G_55_54, P_55_54, G_57_56, P_57_56, G_59_58, P_59_58, G_61_60, P_61_60, G_63_62
, P_63_62, G_65_64, P_65_64, G_67_66, P_67_66, G_69_68, P_69_68, G_71_70, P_71_70, G_73_72
, P_73_72, G_75_74, P_75_74, G_77_76, P_77_76, G_79_78, P_79_78, G_81_80, P_81_80, G_83_82
, P_83_82, G_85_84, P_85_84, G_87_86, P_87_86, G_89_88, P_89_88, G_91_90, P_91_90, G_93_92
, P_93_92, G_95_94, P_95_94, G_97_96, P_97_96, G_99_98, P_99_98, G_101_100, P_101_100, G_103_102
, P_103_102, G_105_104, P_105_104, G_107_106, P_107_106, G_109_108, P_109_108, G_111_110, P_111_110
, G_113_112, P_113_112, G_115_114, P_115_114, G_117_116, P_117_116, G_119_118, P_119_118, G_121_120
, P_121_120, G_123_122, P_123_122, G_125_124, P_125_124, G_127_126, P_127_126, G_3_0, G_7_4, P_7_4
, G_11_8, P_11_8, G_15_12, P_15_12, G_19_16, P_19_16, G_23_20, P_23_20, G_27_24, P_27_24, G_31_28
, P_31_28, G_35_32, P_35_32, G_39_36, P_39_36, G_43_40, P_43_40, G_47_44, P_47_44, G_51_48, P_51_48
, G_55_52, P_55_52, G_59_56, P_59_56, G_63_60, P_63_60, G_67_64, P_67_64, G_71_68, P_71_68, G_75_72
, P_75_72, G_79_76, P_79_76, G_83_80, P_83_80, G_87_84, P_87_84, G_91_88, P_91_88, G_95_92, P_95_92
, G_99_96, P_99_96, G_103_100, P_103_100, G_107_104, P_107_104, G_111_108, P_111_108, G_115_112
, P_115_112, G_119_116, P_119_116, G_123_120, P_123_120, G_127_124, P_127_124, G_5_0, G_7_0, G_13_8
, P_13_8, G_15_8, P_15_8, G_21_16, P_21_16, G_23_16, P_23_16, G_29_24, P_29_24, G_31_24, P_31_24
, G_37_32, P_37_32, G_39_32, P_39_32, G_45_40, P_45_40, G_47_40, P_47_40, G_53_48, P_53_48, G_55_48
, P_55_48, G_61_56, P_61_56, G_63_56, P_63_56, G_69_64, P_69_64, G_71_64, P_71_64, G_77_72, P_77_72
, G_79_72, P_79_72, G_85_80, P_85_80, G_87_80, P_87_80, G_93_88, P_93_88, G_95_88, P_95_88, G_101_96
, P_101_96, G_103_96, P_103_96, G_109_104, P_109_104, G_111_104, P_111_104, G_117_112, P_117_112
, G_119_112, P_119_112, G_125_120, P_125_120, G_127_120, P_127_120, G_9_0, G_11_0, G_13_0, G_15_0, G_25_16
, P_25_16, G_27_16, P_27_16, G_29_16, P_29_16, G_31_16, P_31_16, G_41_32, P_41_32, G_43_32, P_43_32, G_45_32
, P_45_32, G_47_32, P_47_32, G_57_48, P_57_48, G_59_48, P_59_48, G_61_48, P_61_48, G_63_48, P_63_48, G_73_64
, P_73_64, G_75_64, P_75_64, G_77_64, P_77_64, G_79_64, P_79_64, G_89_80, P_89_80, G_91_80, P_91_80
, G_93_80, P_93_80, G_95_80, P_95_80, G_105_96, P_105_96, G_107_96, P_107_96, G_109_96, P_109_96
, G_111_96, P_111_96, G_121_112, P_121_112, G_123_112, P_123_112, G_125_112, P_125_112, G_127_112
, P_127_112, G_17_0, G_19_0, G_21_0, G_23_0, G_25_0, G_27_0, G_29_0, G_31_0, G_49_32, P_49_32, G_51_32
, P_51_32, G_53_32, P_53_32, G_55_32, P_55_32, G_57_32, P_57_32, G_59_32, P_59_32, G_61_32, P_61_32
, G_63_32, P_63_32, G_81_64, P_81_64, G_83_64, P_83_64, G_85_64, P_85_64, G_87_64, P_87_64, G_89_64, P_89_64
, G_91_64, P_91_64, G_93_64, P_93_64, G_95_64, P_95_64, G_113_96, P_113_96, G_115_96, P_115_96
, G_117_96, P_117_96, G_119_96, P_119_96, G_121_96, P_121_96, G_123_96, P_123_96, G_125_96, P_125_96
, G_127_96, P_127_96, G_33_0, G_35_0, G_37_0, G_39_0, G_41_0, G_43_0, G_45_0, G_47_0, G_49_0, G_51_0
, G_53_0, G_55_0, G_57_0, G_59_0, G_61_0, G_63_0, G_97_64, P_97_64, G_99_64, P_99_64, G_101_64, P_101_64
, G_103_64, P_103_64, G_105_64, P_105_64, G_107_64, P_107_64, G_109_64, P_109_64, G_111_64, P_111_64
, G_113_64, P_113_64, G_115_64, P_115_64, G_117_64, P_117_64, G_119_64, P_119_64, G_121_64, P_121_64
, G_123_64, P_123_64, G_125_64, P_125_64, G_127_64, P_127_64, G_65_0, G_67_0, G_69_0, G_71_0, G_73_0
, G_75_0, G_77_0, G_79_0, G_81_0, G_83_0, G_85_0, G_87_0, G_89_0, G_91_0, G_93_0, G_95_0, G_97_0
, G_99_0, G_101_0, G_103_0, G_105_0, G_107_0, G_109_0, G_111_0, G_113_0, G_115_0, G_117_0, G_119_0
, G_121_0, G_123_0, G_125_0, G_127_0, G_2_0, G_4_0, G_6_0, G_8_0, G_10_0, G_12_0, G_14_0, G_16_0
, G_18_0, G_20_0, G_22_0, G_24_0, G_26_0, G_28_0, G_30_0, G_32_0, G_34_0, G_36_0, G_38_0, G_40_0
, G_42_0, G_44_0, G_46_0, G_48_0, G_50_0, G_52_0, G_54_0, G_56_0, G_58_0, G_60_0, G_62_0, G_64_0
, G_66_0, G_68_0, G_70_0, G_72_0, G_74_0, G_76_0, G_78_0, G_80_0, G_82_0, G_84_0, G_86_0, G_88_0
, G_90_0, G_92_0, G_94_0, G_96_0, G_98_0, G_100_0, G_102_0, G_104_0, G_106_0, G_108_0, G_110_0, G_112_0
, G_114_0, G_116_0, G_118_0, G_120_0, G_122_0, G_124_0, G_126_0;
// parallel-prefix, Ladner-Fischer
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]});
black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]});
black b_69_68 (G_69_68, P_69_68, {g[69],g[68]}, {p[69],p[68]});
black b_71_70 (G_71_70, P_71_70, {g[71],g[70]}, {p[71],p[70]});
black b_73_72 (G_73_72, P_73_72, {g[73],g[72]}, {p[73],p[72]});
black b_75_74 (G_75_74, P_75_74, {g[75],g[74]}, {p[75],p[74]});
black b_77_76 (G_77_76, P_77_76, {g[77],g[76]}, {p[77],p[76]});
black b_79_78 (G_79_78, P_79_78, {g[79],g[78]}, {p[79],p[78]});
black b_81_80 (G_81_80, P_81_80, {g[81],g[80]}, {p[81],p[80]});
black b_83_82 (G_83_82, P_83_82, {g[83],g[82]}, {p[83],p[82]});
black b_85_84 (G_85_84, P_85_84, {g[85],g[84]}, {p[85],p[84]});
black b_87_86 (G_87_86, P_87_86, {g[87],g[86]}, {p[87],p[86]});
black b_89_88 (G_89_88, P_89_88, {g[89],g[88]}, {p[89],p[88]});
black b_91_90 (G_91_90, P_91_90, {g[91],g[90]}, {p[91],p[90]});
black b_93_92 (G_93_92, P_93_92, {g[93],g[92]}, {p[93],p[92]});
black b_95_94 (G_95_94, P_95_94, {g[95],g[94]}, {p[95],p[94]});
black b_97_96 (G_97_96, P_97_96, {g[97],g[96]}, {p[97],p[96]});
black b_99_98 (G_99_98, P_99_98, {g[99],g[98]}, {p[99],p[98]});
black b_101_100 (G_101_100, P_101_100, {g[101],g[100]}, {p[101],p[100]});
black b_103_102 (G_103_102, P_103_102, {g[103],g[102]}, {p[103],p[102]});
black b_105_104 (G_105_104, P_105_104, {g[105],g[104]}, {p[105],p[104]});
black b_107_106 (G_107_106, P_107_106, {g[107],g[106]}, {p[107],p[106]});
black b_109_108 (G_109_108, P_109_108, {g[109],g[108]}, {p[109],p[108]});
black b_111_110 (G_111_110, P_111_110, {g[111],g[110]}, {p[111],p[110]});
black b_113_112 (G_113_112, P_113_112, {g[113],g[112]}, {p[113],p[112]});
black b_115_114 (G_115_114, P_115_114, {g[115],g[114]}, {p[115],p[114]});
black b_117_116 (G_117_116, P_117_116, {g[117],g[116]}, {p[117],p[116]});
black b_119_118 (G_119_118, P_119_118, {g[119],g[118]}, {p[119],p[118]});
black b_121_120 (G_121_120, P_121_120, {g[121],g[120]}, {p[121],p[120]});
black b_123_122 (G_123_122, P_123_122, {g[123],g[122]}, {p[123],p[122]});
black b_125_124 (G_125_124, P_125_124, {g[125],g[124]}, {p[125],p[124]});
black b_127_126 (G_127_126, P_127_126, {g[127],g[126]}, {p[127],p[126]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64});
black b_71_68 (G_71_68, P_71_68, {G_71_70,G_69_68}, {P_71_70,P_69_68});
black b_75_72 (G_75_72, P_75_72, {G_75_74,G_73_72}, {P_75_74,P_73_72});
black b_79_76 (G_79_76, P_79_76, {G_79_78,G_77_76}, {P_79_78,P_77_76});
black b_83_80 (G_83_80, P_83_80, {G_83_82,G_81_80}, {P_83_82,P_81_80});
black b_87_84 (G_87_84, P_87_84, {G_87_86,G_85_84}, {P_87_86,P_85_84});
black b_91_88 (G_91_88, P_91_88, {G_91_90,G_89_88}, {P_91_90,P_89_88});
black b_95_92 (G_95_92, P_95_92, {G_95_94,G_93_92}, {P_95_94,P_93_92});
black b_99_96 (G_99_96, P_99_96, {G_99_98,G_97_96}, {P_99_98,P_97_96});
black b_103_100 (G_103_100, P_103_100, {G_103_102,G_101_100}, {P_103_102,P_101_100});
black b_107_104 (G_107_104, P_107_104, {G_107_106,G_105_104}, {P_107_106,P_105_104});
black b_111_108 (G_111_108, P_111_108, {G_111_110,G_109_108}, {P_111_110,P_109_108});
black b_115_112 (G_115_112, P_115_112, {G_115_114,G_113_112}, {P_115_114,P_113_112});
black b_119_116 (G_119_116, P_119_116, {G_119_118,G_117_116}, {P_119_118,P_117_116});
black b_123_120 (G_123_120, P_123_120, {G_123_122,G_121_120}, {P_123_122,P_121_120});
black b_127_124 (G_127_124, P_127_124, {G_127_126,G_125_124}, {P_127_126,P_125_124});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_21_16 (G_21_16, P_21_16, {G_21_20,G_19_16}, {P_21_20,P_19_16});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_29_24 (G_29_24, P_29_24, {G_29_28,G_27_24}, {P_29_28,P_27_24});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_37_32 (G_37_32, P_37_32, {G_37_36,G_35_32}, {P_37_36,P_35_32});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
black b_45_40 (G_45_40, P_45_40, {G_45_44,G_43_40}, {P_45_44,P_43_40});
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
black b_53_48 (G_53_48, P_53_48, {G_53_52,G_51_48}, {P_53_52,P_51_48});
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
black b_61_56 (G_61_56, P_61_56, {G_61_60,G_59_56}, {P_61_60,P_59_56});
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
black b_69_64 (G_69_64, P_69_64, {G_69_68,G_67_64}, {P_69_68,P_67_64});
black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64});
black b_77_72 (G_77_72, P_77_72, {G_77_76,G_75_72}, {P_77_76,P_75_72});
black b_79_72 (G_79_72, P_79_72, {G_79_76,G_75_72}, {P_79_76,P_75_72});
black b_85_80 (G_85_80, P_85_80, {G_85_84,G_83_80}, {P_85_84,P_83_80});
black b_87_80 (G_87_80, P_87_80, {G_87_84,G_83_80}, {P_87_84,P_83_80});
black b_93_88 (G_93_88, P_93_88, {G_93_92,G_91_88}, {P_93_92,P_91_88});
black b_95_88 (G_95_88, P_95_88, {G_95_92,G_91_88}, {P_95_92,P_91_88});
black b_101_96 (G_101_96, P_101_96, {G_101_100,G_99_96}, {P_101_100,P_99_96});
black b_103_96 (G_103_96, P_103_96, {G_103_100,G_99_96}, {P_103_100,P_99_96});
black b_109_104 (G_109_104, P_109_104, {G_109_108,G_107_104}, {P_109_108,P_107_104});
black b_111_104 (G_111_104, P_111_104, {G_111_108,G_107_104}, {P_111_108,P_107_104});
black b_117_112 (G_117_112, P_117_112, {G_117_116,G_115_112}, {P_117_116,P_115_112});
black b_119_112 (G_119_112, P_119_112, {G_119_116,G_115_112}, {P_119_116,P_115_112});
black b_125_120 (G_125_120, P_125_120, {G_125_124,G_123_120}, {P_125_124,P_123_120});
black b_127_120 (G_127_120, P_127_120, {G_127_124,G_123_120}, {P_127_124,P_123_120});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_25_16 (G_25_16, P_25_16, {G_25_24,G_23_16}, {P_25_24,P_23_16});
black b_27_16 (G_27_16, P_27_16, {G_27_24,G_23_16}, {P_27_24,P_23_16});
black b_29_16 (G_29_16, P_29_16, {G_29_24,G_23_16}, {P_29_24,P_23_16});
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_41_32 (G_41_32, P_41_32, {G_41_40,G_39_32}, {P_41_40,P_39_32});
black b_43_32 (G_43_32, P_43_32, {G_43_40,G_39_32}, {P_43_40,P_39_32});
black b_45_32 (G_45_32, P_45_32, {G_45_40,G_39_32}, {P_45_40,P_39_32});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
black b_57_48 (G_57_48, P_57_48, {G_57_56,G_55_48}, {P_57_56,P_55_48});
black b_59_48 (G_59_48, P_59_48, {G_59_56,G_55_48}, {P_59_56,P_55_48});
black b_61_48 (G_61_48, P_61_48, {G_61_56,G_55_48}, {P_61_56,P_55_48});
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
black b_73_64 (G_73_64, P_73_64, {G_73_72,G_71_64}, {P_73_72,P_71_64});
black b_75_64 (G_75_64, P_75_64, {G_75_72,G_71_64}, {P_75_72,P_71_64});
black b_77_64 (G_77_64, P_77_64, {G_77_72,G_71_64}, {P_77_72,P_71_64});
black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64});
black b_89_80 (G_89_80, P_89_80, {G_89_88,G_87_80}, {P_89_88,P_87_80});
black b_91_80 (G_91_80, P_91_80, {G_91_88,G_87_80}, {P_91_88,P_87_80});
black b_93_80 (G_93_80, P_93_80, {G_93_88,G_87_80}, {P_93_88,P_87_80});
black b_95_80 (G_95_80, P_95_80, {G_95_88,G_87_80}, {P_95_88,P_87_80});
black b_105_96 (G_105_96, P_105_96, {G_105_104,G_103_96}, {P_105_104,P_103_96});
black b_107_96 (G_107_96, P_107_96, {G_107_104,G_103_96}, {P_107_104,P_103_96});
black b_109_96 (G_109_96, P_109_96, {G_109_104,G_103_96}, {P_109_104,P_103_96});
black b_111_96 (G_111_96, P_111_96, {G_111_104,G_103_96}, {P_111_104,P_103_96});
black b_121_112 (G_121_112, P_121_112, {G_121_120,G_119_112}, {P_121_120,P_119_112});
black b_123_112 (G_123_112, P_123_112, {G_123_120,G_119_112}, {P_123_120,P_119_112});
black b_125_112 (G_125_112, P_125_112, {G_125_120,G_119_112}, {P_125_120,P_119_112});
black b_127_112 (G_127_112, P_127_112, {G_127_120,G_119_112}, {P_127_120,P_119_112});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_21_0 (G_21_0, {G_21_16,G_15_0}, P_21_16);
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_25_0 (G_25_0, {G_25_16,G_15_0}, P_25_16);
grey g_27_0 (G_27_0, {G_27_16,G_15_0}, P_27_16);
grey g_29_0 (G_29_0, {G_29_16,G_15_0}, P_29_16);
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_49_32 (G_49_32, P_49_32, {G_49_48,G_47_32}, {P_49_48,P_47_32});
black b_51_32 (G_51_32, P_51_32, {G_51_48,G_47_32}, {P_51_48,P_47_32});
black b_53_32 (G_53_32, P_53_32, {G_53_48,G_47_32}, {P_53_48,P_47_32});
black b_55_32 (G_55_32, P_55_32, {G_55_48,G_47_32}, {P_55_48,P_47_32});
black b_57_32 (G_57_32, P_57_32, {G_57_48,G_47_32}, {P_57_48,P_47_32});
black b_59_32 (G_59_32, P_59_32, {G_59_48,G_47_32}, {P_59_48,P_47_32});
black b_61_32 (G_61_32, P_61_32, {G_61_48,G_47_32}, {P_61_48,P_47_32});
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
black b_81_64 (G_81_64, P_81_64, {G_81_80,G_79_64}, {P_81_80,P_79_64});
black b_83_64 (G_83_64, P_83_64, {G_83_80,G_79_64}, {P_83_80,P_79_64});
black b_85_64 (G_85_64, P_85_64, {G_85_80,G_79_64}, {P_85_80,P_79_64});
black b_87_64 (G_87_64, P_87_64, {G_87_80,G_79_64}, {P_87_80,P_79_64});
black b_89_64 (G_89_64, P_89_64, {G_89_80,G_79_64}, {P_89_80,P_79_64});
black b_91_64 (G_91_64, P_91_64, {G_91_80,G_79_64}, {P_91_80,P_79_64});
black b_93_64 (G_93_64, P_93_64, {G_93_80,G_79_64}, {P_93_80,P_79_64});
black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64});
black b_113_96 (G_113_96, P_113_96, {G_113_112,G_111_96}, {P_113_112,P_111_96});
black b_115_96 (G_115_96, P_115_96, {G_115_112,G_111_96}, {P_115_112,P_111_96});
black b_117_96 (G_117_96, P_117_96, {G_117_112,G_111_96}, {P_117_112,P_111_96});
black b_119_96 (G_119_96, P_119_96, {G_119_112,G_111_96}, {P_119_112,P_111_96});
black b_121_96 (G_121_96, P_121_96, {G_121_112,G_111_96}, {P_121_112,P_111_96});
black b_123_96 (G_123_96, P_123_96, {G_123_112,G_111_96}, {P_123_112,P_111_96});
black b_125_96 (G_125_96, P_125_96, {G_125_112,G_111_96}, {P_125_112,P_111_96});
black b_127_96 (G_127_96, P_127_96, {G_127_112,G_111_96}, {P_127_112,P_111_96});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_37_0 (G_37_0, {G_37_32,G_31_0}, P_37_32);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_41_0 (G_41_0, {G_41_32,G_31_0}, P_41_32);
grey g_43_0 (G_43_0, {G_43_32,G_31_0}, P_43_32);
grey g_45_0 (G_45_0, {G_45_32,G_31_0}, P_45_32);
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_49_0 (G_49_0, {G_49_32,G_31_0}, P_49_32);
grey g_51_0 (G_51_0, {G_51_32,G_31_0}, P_51_32);
grey g_53_0 (G_53_0, {G_53_32,G_31_0}, P_53_32);
grey g_55_0 (G_55_0, {G_55_32,G_31_0}, P_55_32);
grey g_57_0 (G_57_0, {G_57_32,G_31_0}, P_57_32);
grey g_59_0 (G_59_0, {G_59_32,G_31_0}, P_59_32);
grey g_61_0 (G_61_0, {G_61_32,G_31_0}, P_61_32);
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
black b_97_64 (G_97_64, P_97_64, {G_97_96,G_95_64}, {P_97_96,P_95_64});
black b_99_64 (G_99_64, P_99_64, {G_99_96,G_95_64}, {P_99_96,P_95_64});
black b_101_64 (G_101_64, P_101_64, {G_101_96,G_95_64}, {P_101_96,P_95_64});
black b_103_64 (G_103_64, P_103_64, {G_103_96,G_95_64}, {P_103_96,P_95_64});
black b_105_64 (G_105_64, P_105_64, {G_105_96,G_95_64}, {P_105_96,P_95_64});
black b_107_64 (G_107_64, P_107_64, {G_107_96,G_95_64}, {P_107_96,P_95_64});
black b_109_64 (G_109_64, P_109_64, {G_109_96,G_95_64}, {P_109_96,P_95_64});
black b_111_64 (G_111_64, P_111_64, {G_111_96,G_95_64}, {P_111_96,P_95_64});
black b_113_64 (G_113_64, P_113_64, {G_113_96,G_95_64}, {P_113_96,P_95_64});
black b_115_64 (G_115_64, P_115_64, {G_115_96,G_95_64}, {P_115_96,P_95_64});
black b_117_64 (G_117_64, P_117_64, {G_117_96,G_95_64}, {P_117_96,P_95_64});
black b_119_64 (G_119_64, P_119_64, {G_119_96,G_95_64}, {P_119_96,P_95_64});
black b_121_64 (G_121_64, P_121_64, {G_121_96,G_95_64}, {P_121_96,P_95_64});
black b_123_64 (G_123_64, P_123_64, {G_123_96,G_95_64}, {P_123_96,P_95_64});
black b_125_64 (G_125_64, P_125_64, {G_125_96,G_95_64}, {P_125_96,P_95_64});
black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64});
// Stage 7: Generates G/P pairs that span 64 bits
grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64);
grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64);
grey g_69_0 (G_69_0, {G_69_64,G_63_0}, P_69_64);
grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64);
grey g_73_0 (G_73_0, {G_73_64,G_63_0}, P_73_64);
grey g_75_0 (G_75_0, {G_75_64,G_63_0}, P_75_64);
grey g_77_0 (G_77_0, {G_77_64,G_63_0}, P_77_64);
grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64);
grey g_81_0 (G_81_0, {G_81_64,G_63_0}, P_81_64);
grey g_83_0 (G_83_0, {G_83_64,G_63_0}, P_83_64);
grey g_85_0 (G_85_0, {G_85_64,G_63_0}, P_85_64);
grey g_87_0 (G_87_0, {G_87_64,G_63_0}, P_87_64);
grey g_89_0 (G_89_0, {G_89_64,G_63_0}, P_89_64);
grey g_91_0 (G_91_0, {G_91_64,G_63_0}, P_91_64);
grey g_93_0 (G_93_0, {G_93_64,G_63_0}, P_93_64);
grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64);
grey g_97_0 (G_97_0, {G_97_64,G_63_0}, P_97_64);
grey g_99_0 (G_99_0, {G_99_64,G_63_0}, P_99_64);
grey g_101_0 (G_101_0, {G_101_64,G_63_0}, P_101_64);
grey g_103_0 (G_103_0, {G_103_64,G_63_0}, P_103_64);
grey g_105_0 (G_105_0, {G_105_64,G_63_0}, P_105_64);
grey g_107_0 (G_107_0, {G_107_64,G_63_0}, P_107_64);
grey g_109_0 (G_109_0, {G_109_64,G_63_0}, P_109_64);
grey g_111_0 (G_111_0, {G_111_64,G_63_0}, P_111_64);
grey g_113_0 (G_113_0, {G_113_64,G_63_0}, P_113_64);
grey g_115_0 (G_115_0, {G_115_64,G_63_0}, P_115_64);
grey g_117_0 (G_117_0, {G_117_64,G_63_0}, P_117_64);
grey g_119_0 (G_119_0, {G_119_64,G_63_0}, P_119_64);
grey g_121_0 (G_121_0, {G_121_64,G_63_0}, P_121_64);
grey g_123_0 (G_123_0, {G_123_64,G_63_0}, P_123_64);
grey g_125_0 (G_125_0, {G_125_64,G_63_0}, P_125_64);
grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64);
// Extra grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]);
grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]);
grey g_68_0 (G_68_0, {g[68],G_67_0}, p[68]);
grey g_70_0 (G_70_0, {g[70],G_69_0}, p[70]);
grey g_72_0 (G_72_0, {g[72],G_71_0}, p[72]);
grey g_74_0 (G_74_0, {g[74],G_73_0}, p[74]);
grey g_76_0 (G_76_0, {g[76],G_75_0}, p[76]);
grey g_78_0 (G_78_0, {g[78],G_77_0}, p[78]);
grey g_80_0 (G_80_0, {g[80],G_79_0}, p[80]);
grey g_82_0 (G_82_0, {g[82],G_81_0}, p[82]);
grey g_84_0 (G_84_0, {g[84],G_83_0}, p[84]);
grey g_86_0 (G_86_0, {g[86],G_85_0}, p[86]);
grey g_88_0 (G_88_0, {g[88],G_87_0}, p[88]);
grey g_90_0 (G_90_0, {g[90],G_89_0}, p[90]);
grey g_92_0 (G_92_0, {g[92],G_91_0}, p[92]);
grey g_94_0 (G_94_0, {g[94],G_93_0}, p[94]);
grey g_96_0 (G_96_0, {g[96],G_95_0}, p[96]);
grey g_98_0 (G_98_0, {g[98],G_97_0}, p[98]);
grey g_100_0 (G_100_0, {g[100],G_99_0}, p[100]);
grey g_102_0 (G_102_0, {g[102],G_101_0}, p[102]);
grey g_104_0 (G_104_0, {g[104],G_103_0}, p[104]);
grey g_106_0 (G_106_0, {g[106],G_105_0}, p[106]);
grey g_108_0 (G_108_0, {g[108],G_107_0}, p[108]);
grey g_110_0 (G_110_0, {g[110],G_109_0}, p[110]);
grey g_112_0 (G_112_0, {g[112],G_111_0}, p[112]);
grey g_114_0 (G_114_0, {g[114],G_113_0}, p[114]);
grey g_116_0 (G_116_0, {g[116],G_115_0}, p[116]);
grey g_118_0 (G_118_0, {g[118],G_117_0}, p[118]);
grey g_120_0 (G_120_0, {g[120],G_119_0}, p[120]);
grey g_122_0 (G_122_0, {g[122],G_121_0}, p[122]);
grey g_124_0 (G_124_0, {g[124],G_123_0}, p[124]);
grey g_126_0 (G_126_0, {g[126],G_125_0}, p[126]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
assign c[37]=G_36_0;
assign c[38]=G_37_0;
assign c[39]=G_38_0;
assign c[40]=G_39_0;
assign c[41]=G_40_0;
assign c[42]=G_41_0;
assign c[43]=G_42_0;
assign c[44]=G_43_0;
assign c[45]=G_44_0;
assign c[46]=G_45_0;
assign c[47]=G_46_0;
assign c[48]=G_47_0;
assign c[49]=G_48_0;
assign c[50]=G_49_0;
assign c[51]=G_50_0;
assign c[52]=G_51_0;
assign c[53]=G_52_0;
assign c[54]=G_53_0;
assign c[55]=G_54_0;
assign c[56]=G_55_0;
assign c[57]=G_56_0;
assign c[58]=G_57_0;
assign c[59]=G_58_0;
assign c[60]=G_59_0;
assign c[61]=G_60_0;
assign c[62]=G_61_0;
assign c[63]=G_62_0;
assign c[64]=G_63_0;
assign c[65]=G_64_0;
assign c[66]=G_65_0;
assign c[67]=G_66_0;
assign c[68]=G_67_0;
assign c[69]=G_68_0;
assign c[70]=G_69_0;
assign c[71]=G_70_0;
assign c[72]=G_71_0;
assign c[73]=G_72_0;
assign c[74]=G_73_0;
assign c[75]=G_74_0;
assign c[76]=G_75_0;
assign c[77]=G_76_0;
assign c[78]=G_77_0;
assign c[79]=G_78_0;
assign c[80]=G_79_0;
assign c[81]=G_80_0;
assign c[82]=G_81_0;
assign c[83]=G_82_0;
assign c[84]=G_83_0;
assign c[85]=G_84_0;
assign c[86]=G_85_0;
assign c[87]=G_86_0;
assign c[88]=G_87_0;
assign c[89]=G_88_0;
assign c[90]=G_89_0;
assign c[91]=G_90_0;
assign c[92]=G_91_0;
assign c[93]=G_92_0;
assign c[94]=G_93_0;
assign c[95]=G_94_0;
assign c[96]=G_95_0;
assign c[97]=G_96_0;
assign c[98]=G_97_0;
assign c[99]=G_98_0;
assign c[100]=G_99_0;
assign c[101]=G_100_0;
assign c[102]=G_101_0;
assign c[103]=G_102_0;
assign c[104]=G_103_0;
assign c[105]=G_104_0;
assign c[106]=G_105_0;
assign c[107]=G_106_0;
assign c[108]=G_107_0;
assign c[109]=G_108_0;
assign c[110]=G_109_0;
assign c[111]=G_110_0;
assign c[112]=G_111_0;
assign c[113]=G_112_0;
assign c[114]=G_113_0;
assign c[115]=G_114_0;
assign c[116]=G_115_0;
assign c[117]=G_116_0;
assign c[118]=G_117_0;
assign c[119]=G_118_0;
assign c[120]=G_119_0;
assign c[121]=G_120_0;
assign c[122]=G_121_0;
assign c[123]=G_122_0;
assign c[124]=G_123_0;
assign c[125]=G_124_0;
assign c[126]=G_125_0;
assign c[127]=G_126_0;
assign c[128]=G_127_0;
endmodule // ladner_fischer

View File

@ -1,289 +0,0 @@
// Ladner-Fischer Prefix Adder
module ldf64 (cout, sum, a, b, cin);
input [63:0] a, b;
input cin;
output [63:0] sum;
output cout;
wire [64:0] p,g;
wire [63:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
ladner_fischer64 prefix_tree(c, p[63:0], g[63:0]);
// post-computation
assign sum=p[64:1]^c;
assign cout=g[64]|(p[64]&c[63]);
endmodule
module ladner_fischer64 (c, p, g);
input [63:0] p;
input [63:0] g;
output [64:1] c;
logic G_1_0,G_3_2,P_3_2,G_5_4,P_5_4,G_7_6,P_7_6,G_9_8,P_9_8,G_11_10,P_11_10,G_13_12,P_13_12,G_15_14,P_15_14
,G_17_16,P_17_16,G_19_18,P_19_18,G_21_20,P_21_20,G_23_22,P_23_22,G_25_24,P_25_24,G_27_26,P_27_26,G_29_28,P_29_28
,G_31_30,P_31_30,G_33_32,P_33_32,G_35_34,P_35_34,G_37_36,P_37_36,G_39_38,P_39_38,G_41_40,P_41_40,G_43_42,P_43_42
,G_45_44,P_45_44,G_47_46,P_47_46,G_49_48,P_49_48,G_51_50,P_51_50,G_53_52,P_53_52,G_55_54,P_55_54,G_57_56,P_57_56
,G_59_58,P_59_58,G_61_60,P_61_60,G_63_62,P_63_62,G_3_0,G_7_4,P_7_4,G_11_8,P_11_8,G_15_12,P_15_12,G_19_16,P_19_16
,G_23_20,P_23_20,G_27_24,P_27_24,G_31_28,P_31_28,G_35_32,P_35_32,G_39_36,P_39_36,G_43_40,P_43_40,G_47_44,P_47_44
,G_51_48,P_51_48,G_55_52,P_55_52,G_59_56,P_59_56,G_63_60,P_63_60,G_5_0,G_7_0,G_13_8,P_13_8,G_15_8,P_15_8,G_21_16
,P_21_16,G_23_16,P_23_16,G_29_24,P_29_24,G_31_24,P_31_24,G_37_32,P_37_32,G_39_32,P_39_32,G_45_40,P_45_40,G_47_40
,P_47_40,G_53_48,P_53_48,G_55_48,P_55_48,G_61_56,P_61_56,G_63_56,P_63_56,G_9_0,G_11_0,G_13_0,G_15_0,G_25_16
,P_25_16,G_27_16,P_27_16,G_29_16,P_29_16,G_31_16,P_31_16,G_41_32,P_41_32,G_43_32,P_43_32,G_45_32,P_45_32,G_47_32
,P_47_32,G_57_48,P_57_48,G_59_48,P_59_48,G_61_48,P_61_48,G_63_48,P_63_48,G_17_0,G_19_0,G_21_0,G_23_0,G_25_0,G_27_0
,G_29_0,G_31_0,G_49_32,P_49_32,G_51_32,P_51_32,G_53_32,P_53_32,G_55_32,P_55_32,G_57_32,P_57_32,G_59_32,P_59_32
,G_61_32,P_61_32,G_63_32,P_63_32,G_33_0,G_35_0,G_37_0,G_39_0,G_41_0,G_43_0,G_45_0,G_47_0,G_49_0,G_51_0,G_53_0
,G_55_0,G_57_0,G_59_0,G_61_0,G_63_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0,G_14_0,G_16_0,G_18_0,G_20_0,G_22_0
,G_24_0,G_26_0,G_28_0,G_30_0,G_32_0,G_34_0,G_36_0,G_38_0,G_40_0,G_42_0,G_44_0,G_46_0,G_48_0,G_50_0,G_52_0
,G_54_0,G_56_0,G_58_0,G_60_0,G_62_0;
// parallel-prefix, Ladner-Fischer
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_21_16 (G_21_16, P_21_16, {G_21_20,G_19_16}, {P_21_20,P_19_16});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_29_24 (G_29_24, P_29_24, {G_29_28,G_27_24}, {P_29_28,P_27_24});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_37_32 (G_37_32, P_37_32, {G_37_36,G_35_32}, {P_37_36,P_35_32});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
black b_45_40 (G_45_40, P_45_40, {G_45_44,G_43_40}, {P_45_44,P_43_40});
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
black b_53_48 (G_53_48, P_53_48, {G_53_52,G_51_48}, {P_53_52,P_51_48});
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
black b_61_56 (G_61_56, P_61_56, {G_61_60,G_59_56}, {P_61_60,P_59_56});
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_25_16 (G_25_16, P_25_16, {G_25_24,G_23_16}, {P_25_24,P_23_16});
black b_27_16 (G_27_16, P_27_16, {G_27_24,G_23_16}, {P_27_24,P_23_16});
black b_29_16 (G_29_16, P_29_16, {G_29_24,G_23_16}, {P_29_24,P_23_16});
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_41_32 (G_41_32, P_41_32, {G_41_40,G_39_32}, {P_41_40,P_39_32});
black b_43_32 (G_43_32, P_43_32, {G_43_40,G_39_32}, {P_43_40,P_39_32});
black b_45_32 (G_45_32, P_45_32, {G_45_40,G_39_32}, {P_45_40,P_39_32});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
black b_57_48 (G_57_48, P_57_48, {G_57_56,G_55_48}, {P_57_56,P_55_48});
black b_59_48 (G_59_48, P_59_48, {G_59_56,G_55_48}, {P_59_56,P_55_48});
black b_61_48 (G_61_48, P_61_48, {G_61_56,G_55_48}, {P_61_56,P_55_48});
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_21_0 (G_21_0, {G_21_16,G_15_0}, P_21_16);
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_25_0 (G_25_0, {G_25_16,G_15_0}, P_25_16);
grey g_27_0 (G_27_0, {G_27_16,G_15_0}, P_27_16);
grey g_29_0 (G_29_0, {G_29_16,G_15_0}, P_29_16);
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_49_32 (G_49_32, P_49_32, {G_49_48,G_47_32}, {P_49_48,P_47_32});
black b_51_32 (G_51_32, P_51_32, {G_51_48,G_47_32}, {P_51_48,P_47_32});
black b_53_32 (G_53_32, P_53_32, {G_53_48,G_47_32}, {P_53_48,P_47_32});
black b_55_32 (G_55_32, P_55_32, {G_55_48,G_47_32}, {P_55_48,P_47_32});
black b_57_32 (G_57_32, P_57_32, {G_57_48,G_47_32}, {P_57_48,P_47_32});
black b_59_32 (G_59_32, P_59_32, {G_59_48,G_47_32}, {P_59_48,P_47_32});
black b_61_32 (G_61_32, P_61_32, {G_61_48,G_47_32}, {P_61_48,P_47_32});
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_37_0 (G_37_0, {G_37_32,G_31_0}, P_37_32);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_41_0 (G_41_0, {G_41_32,G_31_0}, P_41_32);
grey g_43_0 (G_43_0, {G_43_32,G_31_0}, P_43_32);
grey g_45_0 (G_45_0, {G_45_32,G_31_0}, P_45_32);
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_49_0 (G_49_0, {G_49_32,G_31_0}, P_49_32);
grey g_51_0 (G_51_0, {G_51_32,G_31_0}, P_51_32);
grey g_53_0 (G_53_0, {G_53_32,G_31_0}, P_53_32);
grey g_55_0 (G_55_0, {G_55_32,G_31_0}, P_55_32);
grey g_57_0 (G_57_0, {G_57_32,G_31_0}, P_57_32);
grey g_59_0 (G_59_0, {G_59_32,G_31_0}, P_59_32);
grey g_61_0 (G_61_0, {G_61_32,G_31_0}, P_61_32);
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
// Extra grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
assign c[37]=G_36_0;
assign c[38]=G_37_0;
assign c[39]=G_38_0;
assign c[40]=G_39_0;
assign c[41]=G_40_0;
assign c[42]=G_41_0;
assign c[43]=G_42_0;
assign c[44]=G_43_0;
assign c[45]=G_44_0;
assign c[46]=G_45_0;
assign c[47]=G_46_0;
assign c[48]=G_47_0;
assign c[49]=G_48_0;
assign c[50]=G_49_0;
assign c[51]=G_50_0;
assign c[52]=G_51_0;
assign c[53]=G_52_0;
assign c[54]=G_53_0;
assign c[55]=G_54_0;
assign c[56]=G_55_0;
assign c[57]=G_56_0;
assign c[58]=G_57_0;
assign c[59]=G_58_0;
assign c[60]=G_59_0;
assign c[61]=G_60_0;
assign c[62]=G_61_0;
assign c[63]=G_62_0;
assign c[64]=G_63_0;
endmodule // ladner_fischer

View File

@ -2,7 +2,7 @@
// input B0;
// input B1;
// output P;
// output V;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -16,7 +16,7 @@
// xxxxxL,Rxxxxxxx
// where , denotes the rounding boundary. S is the logical OR of all the
// bits to the right of R.
module rounder (Result, DenormIO, Flags, rm, P, OvEn,
UnEn, exp_valid, sel_inv, Invalid, DenormIn, convert, Asign, Aexp,
norm_shift, A, exponent_postsum, A_Norm, B_Norm, exp_A_unmodified, exp_B_unmodified,

View File

@ -5,48 +5,41 @@
// It produces a rounded 52-bit result, Z, the exponent of the rounded
// result, Z_exp, and a flag that indicates if the result was rounded,
// Inexact. The rounding mode has the following values.
// rm Modee
// rm Mode
// 00 round-to-nearest-even
// 01 round-toward-zero
// 01 round-toward-zero
// 10 round-toward-plus infinity
// 11 round-toward-minus infinity
// 11 round-toward-minus infinity
//
module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
UnEn, exp_diff, sel_inv, Invalid, DenormIn,
SignR, q1, qm1, qp1, q0, qm0, qp0, regr_out);
input [1:0] rm;
input P;
input OvEn;
input UnEn;
input [12:0] exp_diff;
input [2:0] sel_inv;
input Invalid;
input DenormIn;
input SignR;
module rounder_div (
input logic [1:0] rm,
input logic P,
input logic OvEn,
input logic UnEn,
input logic [12:0] exp_diff,
input logic [2:0] sel_inv,
input logic Invalid,
input logic SignR,
input logic [63:0] q1;
input logic [63:0] qm1;
input logic [63:0] qp1;
input logic [63:0] q0;
input logic [63:0] qm0;
input logic [63:0] qp0;
input logic [127:0] regr_out;
output logic [63:0] Result;
output logic DenormIO;
output logic [4:0] Flags;
supply1 vdd;
supply0 vss;
input logic [63:0] q1,
input logic [63:0] qm1,
input logic [63:0] qp1,
input logic [63:0] q0,
input logic [63:0] qm0,
input logic [63:0] qp0,
input logic [127:0] regr_out,
output logic [63:0] Result,
output logic [4:0] Flags
);
logic Rsign;
logic [10:0] Rexp;
logic [12:0] Texp;
logic [51:0] Rmant;
logic [63:0] Tmant;
logic [51:0] Smant;
logic [10:0] Rexp;
logic [12:0] Texp;
logic [51:0] Rmant;
logic [63:0] Tmant;
logic [51:0] Smant;
logic Rzero;
logic Gdp, Gsp, G;
logic UnFlow_SP, UnFlow_DP, UnderFlow;
@ -64,10 +57,10 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
logic Texp_l7o;
logic OvCon;
logic zero_rem;
logic [1:0] mux_mant;
logic [1:0] mux_mant;
logic sign_rem;
logic [63:0] q, qm, qp;
logic exp_ovf, exp_ovfSP, exp_ovfDP;
logic [63:0] q, qm, qp;
logic exp_ovf;
// Remainder = 0?
assign zero_rem = ~(|regr_out);
@ -98,7 +91,7 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
// 1.) we choose any qm0, qp0, q0 (since we shift mant)
// 2.) we choose qp and we overflow (for RU)
assign exp_ovf = |{qp[62:40], (qp[39:11] & {29{~P}})};
assign Texp = exp_diff - {{13{vss}}, ~q1[63]} + {{13{vss}}, mux_mant[1]&qp1[63]&~exp_ovf};
assign Texp = exp_diff - {{13{1'b0}}, ~q1[63]} + {{13{1'b0}}, mux_mant[1]&qp1[63]&~exp_ovf};
// Overflow only occurs for double precision, if Texp[10] to Texp[0] are
// all ones. To encourage sharing with single precision overflow detection,
@ -130,9 +123,6 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid;
assign Div0 = sel_inv[2]&sel_inv[1]&~sel_inv[0];
// The DenormIO is set if underflow has occurred or if their was a
// denormalized input.
assign DenormIO = DenormIn | UnderFlow;
// The final result is Inexact if any rounding occurred ((i.e., R or S
// is one), or (if the result overflows ) or (if the result underflows and the

View File

@ -1,5 +1,5 @@
module sbtm_a0 (input logic [6:0] a,
output logic [12:0] y);
output logic [12:0] y);
always_comb
case(a)
7'b0000000: y = 13'b1111111100010;

View File

@ -1,5 +1,5 @@
module sbtm_a1 (input logic [6:0] a,
output logic [4:0] y);
output logic [4:0] y);
always_comb
case(a)
7'b0000000: y = 5'b11100;

View File

@ -1,5 +1,5 @@
module sbtm_a2 (input logic [7:0] a,
output logic [13:0] y);
output logic [13:0] y);
always_comb
case(a)
8'b01000000: y = 14'b10110100010111;

View File

@ -1,5 +1,5 @@
module sbtm_a3 (input logic [7:0] a,
output logic [5:0] y);
output logic [5:0] y);
always_comb
case(a)
8'b01000000: y = 6'b100110;

View File

@ -7,12 +7,12 @@ module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out);
logic [2:0] x2_1cmp;
// mem outputs
logic [12:0] y0;
logic [4:0] y1;
logic [4:0] y1;
// input to CPA
logic [14:0] op1;
logic [14:0] op2;
logic [14:0] p;
logic cout;
logic cout;
assign x0 = a[10:7];
assign x1 = a[6:4];
@ -26,10 +26,8 @@ module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out);
// 1s cmp per sbtm/stam
assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} :
{1'b0, 8'b0, y1, 1'b1};
// CPA
// adder #(15) cp1 (op1, op2, 1'b0, p, cout);
// CPA
assign {cout, p} = op1 + op2;
//assign ia_out = {p[14:4], {53{1'b0}}};
assign ia_out = p[14:4];
endmodule // sbtm

View File

@ -7,12 +7,12 @@ module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y);
logic [2:0] x2_1cmp;
// mem outputs
logic [13:0] y0;
logic [5:0] y1;
logic [5:0] y1;
// input to CPA
logic [14:0] op1;
logic [14:0] op2;
logic [14:0] p;
logic cout;
logic cout;
assign x0 = a[11:7];
assign x1 = a[6:4];
@ -29,7 +29,6 @@ module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y);
{8'b0, y1, 1'b1};
// CPA
//adder #(15) cp1 (op1, op2, 1'b0, p, cout);
assign {cout, p} = op1 + op2;
assign y = p[14:4];

View File

@ -28,7 +28,7 @@ module mux21x64 (Z, A, B, Sel);
assign Z = Sel ? B : A;
endmodule // mux21x64
// The implementation of the barrel shifter was modified to use
// fewer gates. It is now implemented using six 64-bit 2-to-1 muxes. The
// barrel shifter takes a 64-bit input A and shifts it left by up to

View File

@ -1,4 +1,4 @@
module unpacking (
module unpacking (
input logic [63:0] X, Y, Z,
input logic FmtE,
input logic [2:0] FOpCtrlE,
@ -25,9 +25,9 @@ module unpacking (
assign YSgnE = FmtE ? Y[63] : Y[31];
assign ZSgnE = FmtE ? Z[63] : Z[31];
assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30] & XExpNonzero | XExpMaxE}}, X[29:23]};
assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30] & YExpNonzero | YExpMaxE}}, Y[29:23]};
assign ZExpE = FmtE ? Z[62:52] : {Z[30], {3{~Z[30] & ZExpNonzero | ZExpMaxE}}, Z[29:23]};
assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};//{X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]};
assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};//{Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]};
assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};//{Z[30], {3{~Z[30]&~ZExpZero|ZExpMaxE}}, Z[29:23]};
/* assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]}; // *** maybe convert to full number of bits here?
assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};
assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};*/
@ -78,7 +78,7 @@ module unpacking (
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
//assign BiasE = FmtE ? 13'h3ff : 13'h7f; // *** is it better to convert to full precision exponents so bias isn't needed?
assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision
assign BiasE = FmtE ? 13'h3ff : 13'h7f; // *** is it better to convert to full precision exponents so bias isn't needed?
// assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision
endmodule

View File

@ -50,6 +50,12 @@ module hptw
output logic WalkerInstrPageFaultF, WalkerLoadPageFaultM,WalkerStorePageFaultM // faults
);
typedef enum {L0_ADR, L0_RD,
L1_ADR, L1_RD,
L2_ADR, L2_RD,
L3_ADR, L3_RD,
LEAF, IDLE, FAULT} statetype; // *** placed outside generate statement to remove synthesis errors
generate
if (`MEM_VIRTMEM) begin
logic DTLBWalk; // register TLBs translation miss requests
@ -66,12 +72,6 @@ module hptw
logic [`SVMODE_BITS-1:0] SvMode;
logic [`XLEN-1:0] TranslationVAdr;
typedef enum {LEVEL0_SET_ADR, LEVEL0_READ, LEVEL0,
LEVEL1_SET_ADR, LEVEL1_READ, LEVEL1,
LEVEL2_SET_ADR, LEVEL2_READ, LEVEL2,
LEVEL3_SET_ADR, LEVEL3_READ, LEVEL3,
LEAF, IDLE, FAULT} statetype;
statetype WalkerState, NextWalkerState, InitialWalkerState;
// Extract bits from CSRs and inputs
@ -99,7 +99,7 @@ module hptw
// Enable and select signals based on states
assign StartWalk = (WalkerState == IDLE) & TLBMiss;
assign HPTWRead = (WalkerState == LEVEL3_READ) | (WalkerState == LEVEL2_READ) | (WalkerState == LEVEL1_READ) | (WalkerState == LEVEL0_READ);
assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD);
assign SelPTW = (WalkerState != IDLE) & (WalkerState != FAULT) & (WalkerState != LEAF);
assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk;
assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk;
@ -113,10 +113,10 @@ module hptw
flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType);
always_comb
case (WalkerState)
LEVEL3: NextPageType = 2'b11; // terapage
LEVEL2: NextPageType = 2'b10; // gigapage
LEVEL1: NextPageType = 2'b01; // megapage
LEVEL0: NextPageType = 2'b00; // kilopage
L3_RD: NextPageType = 2'b11; // terapage
L2_RD: NextPageType = 2'b10; // gigapage
L1_RD: NextPageType = 2'b01; // megapage
L0_RD: NextPageType = 2'b00; // kilopage
default: NextPageType = PageType;
endcase
@ -124,36 +124,36 @@ module hptw
if (`XLEN==32) begin // RV32
logic [9:0] VPN;
logic [`PPN_BITS-1:0] PPN;
assign VPN = ((WalkerState == LEVEL1_SET_ADR) | (WalkerState == LEVEL1_READ)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state
assign PPN = ((WalkerState == LEVEL1_SET_ADR) | (WalkerState == LEVEL1_READ)) ? BasePageTablePPN : CurrentPPN;
assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state
assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN;
assign TranslationPAdr = {PPN, VPN, 2'b00};
end else begin // RV64
logic [8:0] VPN;
logic [`PPN_BITS-1:0] PPN;
always_comb
case (WalkerState) // select VPN field based on HPTW state
LEVEL3_SET_ADR, LEVEL3_READ: VPN = TranslationVAdr[47:39];
LEVEL3, LEVEL2_SET_ADR, LEVEL2_READ: VPN = TranslationVAdr[38:30];
LEVEL2, LEVEL1_SET_ADR, LEVEL1_READ: VPN = TranslationVAdr[29:21];
L3_ADR, L3_RD: VPN = TranslationVAdr[47:39];
L2_ADR, L2_RD: VPN = TranslationVAdr[38:30];
L1_ADR, L1_RD: VPN = TranslationVAdr[29:21];
default: VPN = TranslationVAdr[20:12];
endcase
assign PPN = ((WalkerState == LEVEL3_SET_ADR) | (WalkerState == LEVEL3_READ) |
(SvMode != `SV48 & ((WalkerState == LEVEL2_SET_ADR) | (WalkerState == LEVEL2_READ)))) ? BasePageTablePPN : CurrentPPN;
assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) |
(SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN;
assign TranslationPAdr = {PPN, VPN, 3'b000};
end
// Initial state and misalignment for RV32/64
if (`XLEN == 32) begin
assign InitialWalkerState = LEVEL1_SET_ADR;
assign InitialWalkerState = L1_ADR;
assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
assign Misaligned = ((WalkerState == LEVEL1) & MegapageMisaligned);
assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned);
end else begin
logic GigapageMisaligned, TerapageMisaligned;
assign InitialWalkerState = (SvMode == `SV48) ? LEVEL3_SET_ADR : LEVEL2_SET_ADR;
assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR;
assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
assign Misaligned = ((WalkerState == LEVEL3) & TerapageMisaligned) | ((WalkerState == LEVEL2) & GigapageMisaligned) | ((WalkerState == LEVEL1) & MegapageMisaligned);
assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned);
end
// Page Table Walker FSM
@ -166,29 +166,37 @@ module hptw
case (WalkerState)
IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState;
else NextWalkerState = IDLE;
LEVEL3_SET_ADR: NextWalkerState = LEVEL3_READ;
LEVEL3_READ: if (HPTWStall) NextWalkerState = LEVEL3_READ;
else NextWalkerState = LEVEL3;
LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = LEVEL2_SET_ADR;
L3_ADR: NextWalkerState = L3_RD; // first access in SV48
L3_RD: if (HPTWStall) NextWalkerState = L3_RD;
else NextWalkerState = L2_ADR;
// LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
// else if (ValidNonLeafPTE) NextWalkerState = L2_ADR;
// else NextWalkerState = FAULT;
L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39
else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
else if (ValidNonLeafPTE) NextWalkerState = L2_RD;
else NextWalkerState = FAULT;
L2_RD: if (HPTWStall) NextWalkerState = L2_RD;
else NextWalkerState = L1_ADR;
// LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
// else if (ValidNonLeafPTE) NextWalkerState = L1_ADR;
// else NextWalkerState = FAULT;
L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32
else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
else if (ValidNonLeafPTE) NextWalkerState = L1_RD;
else NextWalkerState = FAULT;
L1_RD: if (HPTWStall) NextWalkerState = L1_RD;
else NextWalkerState = L0_ADR;
// LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
// else if (ValidNonLeafPTE) NextWalkerState = L0_ADR;
// else NextWalkerState = FAULT;
L0_ADR: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
else if (ValidNonLeafPTE) NextWalkerState = L0_RD;
else NextWalkerState = FAULT;
LEVEL2_SET_ADR: NextWalkerState = LEVEL2_READ;
LEVEL2_READ: if (HPTWStall) NextWalkerState = LEVEL2_READ;
else NextWalkerState = LEVEL2;
LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = LEVEL1_SET_ADR;
else NextWalkerState = FAULT;
LEVEL1_SET_ADR: NextWalkerState = LEVEL1_READ;
LEVEL1_READ: if (HPTWStall) NextWalkerState = LEVEL1_READ;
else NextWalkerState = LEVEL1;
LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = LEVEL0_SET_ADR;
else NextWalkerState = FAULT;
LEVEL0_SET_ADR: NextWalkerState = LEVEL0_READ;
LEVEL0_READ: if (HPTWStall) NextWalkerState = LEVEL0_READ;
else NextWalkerState = LEVEL0;
LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF;
else NextWalkerState = FAULT;
L0_RD: if (HPTWStall) NextWalkerState = L0_RD;
else NextWalkerState = LEAF;
// LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF;
// else NextWalkerState = FAULT;
LEAF: NextWalkerState = IDLE;
FAULT: if (ITLBMissF & AnyCPUReqM & ~MemAfterIWalkDone) NextWalkerState = FAULT;
else NextWalkerState = IDLE;

View File

@ -67,9 +67,7 @@ module pmpadrdec (
assign TORMatch = PAgePMPAdrIn && PAltPMPAdr;
// Naturally aligned regions
// verilator lint_off UNOPTFLAT
logic [`PA_BITS-1:0] Mask;
logic [`PA_BITS-1:0] NAMask;
//genvar i;
// create a mask of which bits to ignore
@ -80,23 +78,14 @@ module pmpadrdec (
// assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore
// end
// endgenerate
prioritycircuit #(.ENTRIES(`PA_BITS-2), .FINAL_OP("NONE")) maskgen(.a(~PMPAdr[`PA_BITS-3:0]), .FirstPin(AdrMode==NAPOT), .y(Mask[`PA_BITS-1:2]));
assign Mask[1:0] = 2'b11;
// *** possible experiments:
/* PA < PMP addr could be in its own module,
preeserving hierarchy so we can know if this is the culprit on the critical path
Should take logarthmic time, so more like 6 levels than 40 should be expected
assign NAMask[1:0] = {2'b11};
update mask generation
Should be concurrent with the subtraction/comparison
if one is the critical path, the other shouldn't be which makes us think the mask generation is the culprit.
prioritythemometer #(`PA_BITS-2) namaskgen(
.a({PMPAdr[`PA_BITS-4:0], (AdrMode == NAPOT)}),
.y(NAMask[`PA_BITS-1:2]));
Hopefully just use the priority circuit here
*/
// verilator lint_on UNOPTFLAT
assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | Mask);
assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | NAMask);
assign Match = (AdrMode == TOR) ? TORMatch :
(AdrMode == NA4 || AdrMode == NAPOT) ? NAMatch :

View File

@ -69,7 +69,7 @@ module pmpchecker (
.PAgePMPAdrOut(PAgePMPAdr),
.FirstMatch, .Match, .Active, .L, .X, .W, .R);
prioritycircuit #(.ENTRIES(`PMP_ENTRIES), .FINAL_OP("AND")) pmppriority(.a(Match), .FirstPin(1'b1), .y(FirstMatch)); // Take the ripple gates/signals out of the pmpadrdec and into another unit.
priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // Take the ripple gates/signals out of the pmpadrdec and into another unit.
// Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active;

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////
// prioritycircuit.sv
// priorityonehot.sv
//
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021
// Modified: Teo Ene 15 Apr 2021:
@ -30,31 +30,22 @@
`include "wally-config.vh"
module prioritycircuit #(parameter ENTRIES = 8,
parameter FINAL_OP = "AND") (
module priorityonehot #(parameter ENTRIES = 8) (
input logic [ENTRIES-1:0] a,
input logic FirstPin,
output logic [ENTRIES-1:0] y
);
// verilator lint_off UNOPTFLAT
logic [ENTRIES-1:0] nolower;
// generate thermometer code mask
genvar i;
generate
assign nolower[0] = FirstPin;
assign nolower[0] = 1'b1;
for (i=1; i<ENTRIES; i++) begin:therm
assign nolower[i] = nolower[i-1] & ~a[i-1];
end
endgenerate
// verilator lint_on UNOPTFLAT
assign y = a & nolower;
generate
if (FINAL_OP=="AND") begin
assign y = a & nolower;
end else if (FINAL_OP=="NONE") begin
assign y = nolower;
end // *** So far these are the only two operations I need to do at the end, but feel free to add more as needed.
endgenerate
// assign y = a & nolower;
endmodule

View File

@ -0,0 +1,50 @@
///////////////////////////////////////////
// priritythermometer.sv
//
// Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021
// Modified: Teo Ene 15 Apr 2021:
// Temporarily removed paramterized priority encoder for non-parameterized one
// To get synthesis working quickly
// Kmacsaigoren@hmc.edu 28 May 2021:
// Added working version of parameterized priority encoder.
// David_Harris@Hmc.edu switched to one-hot output
//
// Purpose: Priority circuit to choose most significant one-hot output
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module prioritythemometer #(parameter N = 8) (
input logic [N-1:0] a,
output logic [N-1:0] y
);
// generate thermometer code mask
genvar i;
generate
assign y[0] = a[0];
for (i=1; i<N; i++) begin
assign y[i] = y[i-1] & a[i];
end
endgenerate
endmodule

View File

@ -39,7 +39,7 @@ module tlblru #(parameter TLB_ENTRIES = 8) (
logic AllUsed; // High if the next access causes all RU bits to be 1
// Find the first line not recently used
prioritycircuit #(.ENTRIES(TLB_ENTRIES), .FINAL_OP("AND")) nru(.a(~RUBits), .FirstPin(1'b1), .y(WriteLines));
priorityonehot #(TLB_ENTRIES) nru(.a(~RUBits), .y(WriteLines));
// Track recently used lines, updating on a CAM Hit or TLB write
assign WriteEnables = WriteLines & {(TLB_ENTRIES){TLBWrite}};

View File

@ -0,0 +1,512 @@
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000

View File

@ -46,11 +46,15 @@ module testbench();
string tests32mmu[] = '{
"rv32mmu/WALLY-MMU-SV32", "3000"
//"rv32mmu/WALLY-PMA", "3000",
//"rv32mmu/WALLY-PMA", "3000"
};
string tests64mmu[] = '{
"rv64mmu/WALLY-MMU-SV48", "3000",
"rv64mmu/WALLY-MMU-SV39", "3000"
//"rv64mmu/WALLY-PMA", "3000",
//"rv64mmu/WALLY-PMA", "3000"
};
@ -558,7 +562,7 @@ string tests32f[] = '{
end
end
string signame, memfilename;
string signame, memfilename, romfilename;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
logic UARTSin, UARTSout;
@ -604,7 +608,9 @@ string tests32f[] = '{
end
// read test vectors into memory
memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
romfilename = {"../../imperas-riscv-tests/imperas-boottim.txt"};
$readmemh(memfilename, dut.uncore.dtim.RAM);
$readmemh(romfilename, dut.uncore.bootdtim.bootdtim.RAM);
ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
$display("Read memfile %s", memfilename);
@ -886,6 +892,7 @@ module instrNameDecTB(
else if (imm == 2) name = "URET";
else if (imm == 258) name = "SRET";
else if (imm == 770) name = "MRET";
else if (funct7 == 9) name = "SFENCE.VMA";
else name = "ILLEGAL";
10'b1110011_001: name = "CSRRW";
10'b1110011_010: name = "CSRRS";

View File

@ -27,7 +27,7 @@
module testbench();
parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*3160000; // # of instructions at which to turn on waves in graphical sim
parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*3080000; // # of instructions at which to turn on waves in graphical sim
parameter stopICount = `BUSYBEAR*143898 + `BUILDROOT*0000000; // # instructions at which to halt sim completely (set to 0 to let it run as far as it can)
///////////////////////////////////////////////////////////////////////////////
@ -184,9 +184,12 @@ module testbench();
scan_file_rf = $fscanf(data_file_rf, "%d\n", regNumExpected);
scan_file_rf = $fscanf(data_file_rf, "%x\n", regExpected);
force dut.hart.ieu.dp.regf.wd3 = regExpected;
// Hack to compensate for QEMU's incorrect MSTATUS
// Hack to compensate for QEMU's incorrect MSTATUS (Wally correctly identifies MXL, SXL to be 2 whereas QEMU sets them to an invalid value of 0
end else if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,16) == "mstatus") begin
force dut.hart.ieu.dp.regf.wd3 = dut.hart.ieu.dp.WriteDataW & ~64'ha00000000;
// Hack to compensate for QEMU's incorrect SSTATUS (Wally correctly identifies UXL to be 2 whereas QEMU sets it to an invalid value of 0
end else if (PCtextW.substr(0,3) == "csrr" && ((PCtextW.substr(10,16) == "sstatus") || (PCtextW.substr(11,17) == "sstatus"))) begin
force dut.hart.ieu.dp.regf.wd3 = dut.hart.ieu.dp.WriteDataW & ~64'h200000000;
end else release dut.hart.ieu.dp.regf.wd3;
// Hack to compensate for QEMU's correct but different MTVAL (according to spec, storing the faulting instr is an optional feature)
if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,14) == "mtval") begin
@ -265,7 +268,7 @@ module testbench();
// Check PCD, InstrD
if (~PCDwrong && ~(dut.hart.ifu.PCD === PCDexpected)) begin
$display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, PCDexpected);
$display("%0t ps, instr %0d: PCD does not equal PCD expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, PCDexpected);
`ERROR
end
InstrMask = InstrDExpected[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF;