diff --git a/wally-pipelined/src/fpu/fpadd/adder.v b/wally-pipelined/src/fpu/fpadd/adder.v
new file mode 100755
index 00000000..3d4124af
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/adder.v
@@ -0,0 +1,758 @@
+// The following module make up the basic building blocks that
+// are used by the cla64, cla_sub64, and cla52.
+
+module INVBLOCK ( GIN, GOUT );
+   
+   input  GIN;
+   output GOUT;
+   
+   assign GOUT =  ~ GIN;
+   
+endmodule // INVBLOCK
+
+
+module XXOR1 ( A, B, GIN, SUM );
+   
+   input  A;
+   input  B;
+   input  GIN;
+   output SUM;
+   
+   assign SUM = ( ~ (A ^ B)) ^ GIN;
+   
+endmodule // XXOR1
+
+
+module BLOCK0 ( A, B, POUT, GOUT );
+   
+   input  A;
+   input  B;
+   output POUT;
+   output GOUT;
+   
+   assign POUT =  ~ (A | B);
+   assign GOUT =  ~ (A & B);
+   
+endmodule // BLOCK0
+
+
+module BLOCK1 ( PIN1, PIN2, GIN1, GIN2, POUT, GOUT );
+   
+   input  PIN1;
+   input  PIN2;
+   input  GIN1;
+   input  GIN2;
+   output POUT;
+   output GOUT;
+   
+   assign POUT =  ~ (PIN1 | PIN2);
+   assign GOUT =  ~ (GIN2 & (PIN2 | GIN1));
+   
+endmodule // BLOCK1
+
+
+module BLOCK2 ( PIN1, PIN2, GIN1, GIN2, POUT, GOUT );
+   
+   input  PIN1;
+   input  PIN2;
+   input  GIN1;
+   input  GIN2;
+   output POUT;
+   output GOUT;
+   
+   assign POUT =  ~ (PIN1 & PIN2);
+   assign GOUT =  ~ (GIN2 | (PIN2 & GIN1));
+   
+endmodule // BLOCK2
+
+
+module BLOCK1A ( PIN2, GIN1, GIN2, GOUT );
+   
+   input  PIN2;
+   input  GIN1;
+   input  GIN2;
+   output GOUT;
+   
+   assign GOUT =  ~ (GIN2 & (PIN2 | GIN1));
+   
+endmodule // BLOCK1A
+
+
+module BLOCK2A ( PIN2, GIN1, GIN2, GOUT );
+   
+   input  PIN2;
+   input  GIN1;
+   input  GIN2;
+   output GOUT;
+   
+   assign GOUT =  ~ (GIN2 | (PIN2 & GIN1));
+   
+endmodule
+
+module PRESTAGE_64 ( A, B, CIN, POUT, GOUT );
+   
+   input  [0:63] A;
+   input [0:63]  B;
+   input 	 CIN;
+   
+   output [0:63] POUT;
+   output [0:64] GOUT;
+   
+   BLOCK0 U10 (A[0] , B[0] , POUT[0] , GOUT[1] );
+   BLOCK0 U11 (A[1] , B[1] , POUT[1] , GOUT[2] );
+   BLOCK0 U12 (A[2] , B[2] , POUT[2] , GOUT[3] );
+   BLOCK0 U13 (A[3] , B[3] , POUT[3] , GOUT[4] );
+   BLOCK0 U14 (A[4] , B[4] , POUT[4] , GOUT[5] );
+   BLOCK0 U15 (A[5] , B[5] , POUT[5] , GOUT[6] );
+   BLOCK0 U16 (A[6] , B[6] , POUT[6] , GOUT[7] );
+   BLOCK0 U17 (A[7] , B[7] , POUT[7] , GOUT[8] );
+   BLOCK0 U18 (A[8] , B[8] , POUT[8] , GOUT[9] );
+   BLOCK0 U19 (A[9] , B[9] , POUT[9] , GOUT[10] );
+   BLOCK0 U110 (A[10] , B[10] , POUT[10] , GOUT[11] );
+   BLOCK0 U111 (A[11] , B[11] , POUT[11] , GOUT[12] );
+   BLOCK0 U112 (A[12] , B[12] , POUT[12] , GOUT[13] );
+   BLOCK0 U113 (A[13] , B[13] , POUT[13] , GOUT[14] );
+   BLOCK0 U114 (A[14] , B[14] , POUT[14] , GOUT[15] );
+   BLOCK0 U115 (A[15] , B[15] , POUT[15] , GOUT[16] );
+   BLOCK0 U116 (A[16] , B[16] , POUT[16] , GOUT[17] );
+   BLOCK0 U117 (A[17] , B[17] , POUT[17] , GOUT[18] );
+   BLOCK0 U118 (A[18] , B[18] , POUT[18] , GOUT[19] );
+   BLOCK0 U119 (A[19] , B[19] , POUT[19] , GOUT[20] );
+   BLOCK0 U120 (A[20] , B[20] , POUT[20] , GOUT[21] );
+   BLOCK0 U121 (A[21] , B[21] , POUT[21] , GOUT[22] );
+   BLOCK0 U122 (A[22] , B[22] , POUT[22] , GOUT[23] );
+   BLOCK0 U123 (A[23] , B[23] , POUT[23] , GOUT[24] );
+   BLOCK0 U124 (A[24] , B[24] , POUT[24] , GOUT[25] );
+   BLOCK0 U125 (A[25] , B[25] , POUT[25] , GOUT[26] );
+   BLOCK0 U126 (A[26] , B[26] , POUT[26] , GOUT[27] );
+   BLOCK0 U127 (A[27] , B[27] , POUT[27] , GOUT[28] );
+   BLOCK0 U128 (A[28] , B[28] , POUT[28] , GOUT[29] );
+   BLOCK0 U129 (A[29] , B[29] , POUT[29] , GOUT[30] );
+   BLOCK0 U130 (A[30] , B[30] , POUT[30] , GOUT[31] );
+   BLOCK0 U131 (A[31] , B[31] , POUT[31] , GOUT[32] );
+   BLOCK0 U132 (A[32] , B[32] , POUT[32] , GOUT[33] );
+   BLOCK0 U133 (A[33] , B[33] , POUT[33] , GOUT[34] );
+   BLOCK0 U134 (A[34] , B[34] , POUT[34] , GOUT[35] );
+   BLOCK0 U135 (A[35] , B[35] , POUT[35] , GOUT[36] );
+   BLOCK0 U136 (A[36] , B[36] , POUT[36] , GOUT[37] );
+   BLOCK0 U137 (A[37] , B[37] , POUT[37] , GOUT[38] );
+   BLOCK0 U138 (A[38] , B[38] , POUT[38] , GOUT[39] );
+   BLOCK0 U139 (A[39] , B[39] , POUT[39] , GOUT[40] );
+   BLOCK0 U140 (A[40] , B[40] , POUT[40] , GOUT[41] );
+   BLOCK0 U141 (A[41] , B[41] , POUT[41] , GOUT[42] );
+   BLOCK0 U142 (A[42] , B[42] , POUT[42] , GOUT[43] );
+   BLOCK0 U143 (A[43] , B[43] , POUT[43] , GOUT[44] );
+   BLOCK0 U144 (A[44] , B[44] , POUT[44] , GOUT[45] );
+   BLOCK0 U145 (A[45] , B[45] , POUT[45] , GOUT[46] );
+   BLOCK0 U146 (A[46] , B[46] , POUT[46] , GOUT[47] );
+   BLOCK0 U147 (A[47] , B[47] , POUT[47] , GOUT[48] );
+   BLOCK0 U148 (A[48] , B[48] , POUT[48] , GOUT[49] );
+   BLOCK0 U149 (A[49] , B[49] , POUT[49] , GOUT[50] );
+   BLOCK0 U150 (A[50] , B[50] , POUT[50] , GOUT[51] );
+   BLOCK0 U151 (A[51] , B[51] , POUT[51] , GOUT[52] );
+   BLOCK0 U152 (A[52] , B[52] , POUT[52] , GOUT[53] );
+   BLOCK0 U153 (A[53] , B[53] , POUT[53] , GOUT[54] );
+   BLOCK0 U154 (A[54] , B[54] , POUT[54] , GOUT[55] );
+   BLOCK0 U155 (A[55] , B[55] , POUT[55] , GOUT[56] );
+   BLOCK0 U156 (A[56] , B[56] , POUT[56] , GOUT[57] );
+   BLOCK0 U157 (A[57] , B[57] , POUT[57] , GOUT[58] );
+   BLOCK0 U158 (A[58] , B[58] , POUT[58] , GOUT[59] );
+   BLOCK0 U159 (A[59] , B[59] , POUT[59] , GOUT[60] );
+   BLOCK0 U160 (A[60] , B[60] , POUT[60] , GOUT[61] );
+   BLOCK0 U161 (A[61] , B[61] , POUT[61] , GOUT[62] );
+   BLOCK0 U162 (A[62] , B[62] , POUT[62] , GOUT[63] );
+   BLOCK0 U163 (A[63] , B[63] , POUT[63] , GOUT[64] );
+   INVBLOCK U2 (CIN , GOUT[0] );
+   
+endmodule // PRESTAGE_64
+
+
+module DBLC_0_64 ( PIN, GIN, POUT, GOUT );
+   
+   input  [0:63] PIN;
+   input [0:64]  GIN;
+   
+   output [0:62] POUT;
+   output [0:64] GOUT;
+   
+   INVBLOCK U10 (GIN[0] , GOUT[0] );
+   BLOCK1A U21 (PIN[0] , GIN[0] , GIN[1] , GOUT[1] );
+   BLOCK1 U32 (PIN[0] , PIN[1] , GIN[1] , GIN[2] , POUT[0] , GOUT[2] );
+   BLOCK1 U33 (PIN[1] , PIN[2] , GIN[2] , GIN[3] , POUT[1] , GOUT[3] );
+   BLOCK1 U34 (PIN[2] , PIN[3] , GIN[3] , GIN[4] , POUT[2] , GOUT[4] );
+   BLOCK1 U35 (PIN[3] , PIN[4] , GIN[4] , GIN[5] , POUT[3] , GOUT[5] );
+   BLOCK1 U36 (PIN[4] , PIN[5] , GIN[5] , GIN[6] , POUT[4] , GOUT[6] );
+   BLOCK1 U37 (PIN[5] , PIN[6] , GIN[6] , GIN[7] , POUT[5] , GOUT[7] );
+   BLOCK1 U38 (PIN[6] , PIN[7] , GIN[7] , GIN[8] , POUT[6] , GOUT[8] );
+   BLOCK1 U39 (PIN[7] , PIN[8] , GIN[8] , GIN[9] , POUT[7] , GOUT[9] );
+   BLOCK1 U310 (PIN[8] , PIN[9] , GIN[9] , GIN[10] , POUT[8] , GOUT[10] );
+   BLOCK1 U311 (PIN[9] , PIN[10] , GIN[10] , GIN[11] , POUT[9] , GOUT[11] );
+   BLOCK1 U312 (PIN[10] , PIN[11] , GIN[11] , GIN[12] , POUT[10] , GOUT[12] );
+   BLOCK1 U313 (PIN[11] , PIN[12] , GIN[12] , GIN[13] , POUT[11] , GOUT[13] );
+   BLOCK1 U314 (PIN[12] , PIN[13] , GIN[13] , GIN[14] , POUT[12] , GOUT[14] );
+   BLOCK1 U315 (PIN[13] , PIN[14] , GIN[14] , GIN[15] , POUT[13] , GOUT[15] );
+   BLOCK1 U316 (PIN[14] , PIN[15] , GIN[15] , GIN[16] , POUT[14] , GOUT[16] );
+   BLOCK1 U317 (PIN[15] , PIN[16] , GIN[16] , GIN[17] , POUT[15] , GOUT[17] );
+   BLOCK1 U318 (PIN[16] , PIN[17] , GIN[17] , GIN[18] , POUT[16] , GOUT[18] );
+   BLOCK1 U319 (PIN[17] , PIN[18] , GIN[18] , GIN[19] , POUT[17] , GOUT[19] );
+   BLOCK1 U320 (PIN[18] , PIN[19] , GIN[19] , GIN[20] , POUT[18] , GOUT[20] );
+   BLOCK1 U321 (PIN[19] , PIN[20] , GIN[20] , GIN[21] , POUT[19] , GOUT[21] );
+   BLOCK1 U322 (PIN[20] , PIN[21] , GIN[21] , GIN[22] , POUT[20] , GOUT[22] );
+   BLOCK1 U323 (PIN[21] , PIN[22] , GIN[22] , GIN[23] , POUT[21] , GOUT[23] );
+   BLOCK1 U324 (PIN[22] , PIN[23] , GIN[23] , GIN[24] , POUT[22] , GOUT[24] );
+   BLOCK1 U325 (PIN[23] , PIN[24] , GIN[24] , GIN[25] , POUT[23] , GOUT[25] );
+   BLOCK1 U326 (PIN[24] , PIN[25] , GIN[25] , GIN[26] , POUT[24] , GOUT[26] );
+   BLOCK1 U327 (PIN[25] , PIN[26] , GIN[26] , GIN[27] , POUT[25] , GOUT[27] );
+   BLOCK1 U328 (PIN[26] , PIN[27] , GIN[27] , GIN[28] , POUT[26] , GOUT[28] );
+   BLOCK1 U329 (PIN[27] , PIN[28] , GIN[28] , GIN[29] , POUT[27] , GOUT[29] );
+   BLOCK1 U330 (PIN[28] , PIN[29] , GIN[29] , GIN[30] , POUT[28] , GOUT[30] );
+   BLOCK1 U331 (PIN[29] , PIN[30] , GIN[30] , GIN[31] , POUT[29] , GOUT[31] );
+   BLOCK1 U332 (PIN[30] , PIN[31] , GIN[31] , GIN[32] , POUT[30] , GOUT[32] );
+   BLOCK1 U333 (PIN[31] , PIN[32] , GIN[32] , GIN[33] , POUT[31] , GOUT[33] );
+   BLOCK1 U334 (PIN[32] , PIN[33] , GIN[33] , GIN[34] , POUT[32] , GOUT[34] );
+   BLOCK1 U335 (PIN[33] , PIN[34] , GIN[34] , GIN[35] , POUT[33] , GOUT[35] );
+   BLOCK1 U336 (PIN[34] , PIN[35] , GIN[35] , GIN[36] , POUT[34] , GOUT[36] );
+   BLOCK1 U337 (PIN[35] , PIN[36] , GIN[36] , GIN[37] , POUT[35] , GOUT[37] );
+   BLOCK1 U338 (PIN[36] , PIN[37] , GIN[37] , GIN[38] , POUT[36] , GOUT[38] );
+   BLOCK1 U339 (PIN[37] , PIN[38] , GIN[38] , GIN[39] , POUT[37] , GOUT[39] );
+   BLOCK1 U340 (PIN[38] , PIN[39] , GIN[39] , GIN[40] , POUT[38] , GOUT[40] );
+   BLOCK1 U341 (PIN[39] , PIN[40] , GIN[40] , GIN[41] , POUT[39] , GOUT[41] );
+   BLOCK1 U342 (PIN[40] , PIN[41] , GIN[41] , GIN[42] , POUT[40] , GOUT[42] );
+   BLOCK1 U343 (PIN[41] , PIN[42] , GIN[42] , GIN[43] , POUT[41] , GOUT[43] );
+   BLOCK1 U344 (PIN[42] , PIN[43] , GIN[43] , GIN[44] , POUT[42] , GOUT[44] );
+   BLOCK1 U345 (PIN[43] , PIN[44] , GIN[44] , GIN[45] , POUT[43] , GOUT[45] );
+   BLOCK1 U346 (PIN[44] , PIN[45] , GIN[45] , GIN[46] , POUT[44] , GOUT[46] );
+   BLOCK1 U347 (PIN[45] , PIN[46] , GIN[46] , GIN[47] , POUT[45] , GOUT[47] );
+   BLOCK1 U348 (PIN[46] , PIN[47] , GIN[47] , GIN[48] , POUT[46] , GOUT[48] );
+   BLOCK1 U349 (PIN[47] , PIN[48] , GIN[48] , GIN[49] , POUT[47] , GOUT[49] );
+   BLOCK1 U350 (PIN[48] , PIN[49] , GIN[49] , GIN[50] , POUT[48] , GOUT[50] );
+   BLOCK1 U351 (PIN[49] , PIN[50] , GIN[50] , GIN[51] , POUT[49] , GOUT[51] );
+   BLOCK1 U352 (PIN[50] , PIN[51] , GIN[51] , GIN[52] , POUT[50] , GOUT[52] );
+   BLOCK1 U353 (PIN[51] , PIN[52] , GIN[52] , GIN[53] , POUT[51] , GOUT[53] );
+   BLOCK1 U354 (PIN[52] , PIN[53] , GIN[53] , GIN[54] , POUT[52] , GOUT[54] );
+   BLOCK1 U355 (PIN[53] , PIN[54] , GIN[54] , GIN[55] , POUT[53] , GOUT[55] );
+   BLOCK1 U356 (PIN[54] , PIN[55] , GIN[55] , GIN[56] , POUT[54] , GOUT[56] );
+   BLOCK1 U357 (PIN[55] , PIN[56] , GIN[56] , GIN[57] , POUT[55] , GOUT[57] );
+   BLOCK1 U358 (PIN[56] , PIN[57] , GIN[57] , GIN[58] , POUT[56] , GOUT[58] );
+   BLOCK1 U359 (PIN[57] , PIN[58] , GIN[58] , GIN[59] , POUT[57] , GOUT[59] );
+   BLOCK1 U360 (PIN[58] , PIN[59] , GIN[59] , GIN[60] , POUT[58] , GOUT[60] );
+   BLOCK1 U361 (PIN[59] , PIN[60] , GIN[60] , GIN[61] , POUT[59] , GOUT[61] );
+   BLOCK1 U362 (PIN[60] , PIN[61] , GIN[61] , GIN[62] , POUT[60] , GOUT[62] );
+   BLOCK1 U363 (PIN[61] , PIN[62] , GIN[62] , GIN[63] , POUT[61] , GOUT[63] );
+   BLOCK1 U364 (PIN[62] , PIN[63] , GIN[63] , GIN[64] , POUT[62] , GOUT[64] );
+   
+endmodule // DBLC_0_64
+
+
+module DBLC_1_64 ( PIN, GIN, POUT, GOUT );
+   
+   input  [0:62] PIN;
+   input [0:64]  GIN;
+   
+   output [0:60] POUT;
+   output [0:64] GOUT;
+   
+   INVBLOCK U10 (GIN[0] , GOUT[0] );
+   INVBLOCK U11 (GIN[1] , GOUT[1] );
+   BLOCK2A U22 (PIN[0] , GIN[0] , GIN[2] , GOUT[2] );
+   BLOCK2A U23 (PIN[1] , GIN[1] , GIN[3] , GOUT[3] );
+   BLOCK2 U34 (PIN[0] , PIN[2] , GIN[2] , GIN[4] , POUT[0] , GOUT[4] );
+   BLOCK2 U35 (PIN[1] , PIN[3] , GIN[3] , GIN[5] , POUT[1] , GOUT[5] );
+   BLOCK2 U36 (PIN[2] , PIN[4] , GIN[4] , GIN[6] , POUT[2] , GOUT[6] );
+   BLOCK2 U37 (PIN[3] , PIN[5] , GIN[5] , GIN[7] , POUT[3] , GOUT[7] );
+   BLOCK2 U38 (PIN[4] , PIN[6] , GIN[6] , GIN[8] , POUT[4] , GOUT[8] );
+   BLOCK2 U39 (PIN[5] , PIN[7] , GIN[7] , GIN[9] , POUT[5] , GOUT[9] );
+   BLOCK2 U310 (PIN[6] , PIN[8] , GIN[8] , GIN[10] , POUT[6] , GOUT[10] );
+   BLOCK2 U311 (PIN[7] , PIN[9] , GIN[9] , GIN[11] , POUT[7] , GOUT[11] );
+   BLOCK2 U312 (PIN[8] , PIN[10] , GIN[10] , GIN[12] , POUT[8] , GOUT[12] );
+   BLOCK2 U313 (PIN[9] , PIN[11] , GIN[11] , GIN[13] , POUT[9] , GOUT[13] );
+   BLOCK2 U314 (PIN[10] , PIN[12] , GIN[12] , GIN[14] , POUT[10] , GOUT[14] );
+   BLOCK2 U315 (PIN[11] , PIN[13] , GIN[13] , GIN[15] , POUT[11] , GOUT[15] );
+   BLOCK2 U316 (PIN[12] , PIN[14] , GIN[14] , GIN[16] , POUT[12] , GOUT[16] );
+   BLOCK2 U317 (PIN[13] , PIN[15] , GIN[15] , GIN[17] , POUT[13] , GOUT[17] );
+   BLOCK2 U318 (PIN[14] , PIN[16] , GIN[16] , GIN[18] , POUT[14] , GOUT[18] );
+   BLOCK2 U319 (PIN[15] , PIN[17] , GIN[17] , GIN[19] , POUT[15] , GOUT[19] );
+   BLOCK2 U320 (PIN[16] , PIN[18] , GIN[18] , GIN[20] , POUT[16] , GOUT[20] );
+   BLOCK2 U321 (PIN[17] , PIN[19] , GIN[19] , GIN[21] , POUT[17] , GOUT[21] );
+   BLOCK2 U322 (PIN[18] , PIN[20] , GIN[20] , GIN[22] , POUT[18] , GOUT[22] );
+   BLOCK2 U323 (PIN[19] , PIN[21] , GIN[21] , GIN[23] , POUT[19] , GOUT[23] );
+   BLOCK2 U324 (PIN[20] , PIN[22] , GIN[22] , GIN[24] , POUT[20] , GOUT[24] );
+   BLOCK2 U325 (PIN[21] , PIN[23] , GIN[23] , GIN[25] , POUT[21] , GOUT[25] );
+   BLOCK2 U326 (PIN[22] , PIN[24] , GIN[24] , GIN[26] , POUT[22] , GOUT[26] );
+   BLOCK2 U327 (PIN[23] , PIN[25] , GIN[25] , GIN[27] , POUT[23] , GOUT[27] );
+   BLOCK2 U328 (PIN[24] , PIN[26] , GIN[26] , GIN[28] , POUT[24] , GOUT[28] );
+   BLOCK2 U329 (PIN[25] , PIN[27] , GIN[27] , GIN[29] , POUT[25] , GOUT[29] );
+   BLOCK2 U330 (PIN[26] , PIN[28] , GIN[28] , GIN[30] , POUT[26] , GOUT[30] );
+   BLOCK2 U331 (PIN[27] , PIN[29] , GIN[29] , GIN[31] , POUT[27] , GOUT[31] );
+   BLOCK2 U332 (PIN[28] , PIN[30] , GIN[30] , GIN[32] , POUT[28] , GOUT[32] );
+   BLOCK2 U333 (PIN[29] , PIN[31] , GIN[31] , GIN[33] , POUT[29] , GOUT[33] );
+   BLOCK2 U334 (PIN[30] , PIN[32] , GIN[32] , GIN[34] , POUT[30] , GOUT[34] );
+   BLOCK2 U335 (PIN[31] , PIN[33] , GIN[33] , GIN[35] , POUT[31] , GOUT[35] );
+   BLOCK2 U336 (PIN[32] , PIN[34] , GIN[34] , GIN[36] , POUT[32] , GOUT[36] );
+   BLOCK2 U337 (PIN[33] , PIN[35] , GIN[35] , GIN[37] , POUT[33] , GOUT[37] );
+   BLOCK2 U338 (PIN[34] , PIN[36] , GIN[36] , GIN[38] , POUT[34] , GOUT[38] );
+   BLOCK2 U339 (PIN[35] , PIN[37] , GIN[37] , GIN[39] , POUT[35] , GOUT[39] );
+   BLOCK2 U340 (PIN[36] , PIN[38] , GIN[38] , GIN[40] , POUT[36] , GOUT[40] );
+   BLOCK2 U341 (PIN[37] , PIN[39] , GIN[39] , GIN[41] , POUT[37] , GOUT[41] );
+   BLOCK2 U342 (PIN[38] , PIN[40] , GIN[40] , GIN[42] , POUT[38] , GOUT[42] );
+   BLOCK2 U343 (PIN[39] , PIN[41] , GIN[41] , GIN[43] , POUT[39] , GOUT[43] );
+   BLOCK2 U344 (PIN[40] , PIN[42] , GIN[42] , GIN[44] , POUT[40] , GOUT[44] );
+   BLOCK2 U345 (PIN[41] , PIN[43] , GIN[43] , GIN[45] , POUT[41] , GOUT[45] );
+   BLOCK2 U346 (PIN[42] , PIN[44] , GIN[44] , GIN[46] , POUT[42] , GOUT[46] );
+   BLOCK2 U347 (PIN[43] , PIN[45] , GIN[45] , GIN[47] , POUT[43] , GOUT[47] );
+   BLOCK2 U348 (PIN[44] , PIN[46] , GIN[46] , GIN[48] , POUT[44] , GOUT[48] );
+   BLOCK2 U349 (PIN[45] , PIN[47] , GIN[47] , GIN[49] , POUT[45] , GOUT[49] );
+   BLOCK2 U350 (PIN[46] , PIN[48] , GIN[48] , GIN[50] , POUT[46] , GOUT[50] );
+   BLOCK2 U351 (PIN[47] , PIN[49] , GIN[49] , GIN[51] , POUT[47] , GOUT[51] );
+   BLOCK2 U352 (PIN[48] , PIN[50] , GIN[50] , GIN[52] , POUT[48] , GOUT[52] );
+   BLOCK2 U353 (PIN[49] , PIN[51] , GIN[51] , GIN[53] , POUT[49] , GOUT[53] );
+   BLOCK2 U354 (PIN[50] , PIN[52] , GIN[52] , GIN[54] , POUT[50] , GOUT[54] );
+   BLOCK2 U355 (PIN[51] , PIN[53] , GIN[53] , GIN[55] , POUT[51] , GOUT[55] );
+   BLOCK2 U356 (PIN[52] , PIN[54] , GIN[54] , GIN[56] , POUT[52] , GOUT[56] );
+   BLOCK2 U357 (PIN[53] , PIN[55] , GIN[55] , GIN[57] , POUT[53] , GOUT[57] );
+   BLOCK2 U358 (PIN[54] , PIN[56] , GIN[56] , GIN[58] , POUT[54] , GOUT[58] );
+   BLOCK2 U359 (PIN[55] , PIN[57] , GIN[57] , GIN[59] , POUT[55] , GOUT[59] );
+   BLOCK2 U360 (PIN[56] , PIN[58] , GIN[58] , GIN[60] , POUT[56] , GOUT[60] );
+   BLOCK2 U361 (PIN[57] , PIN[59] , GIN[59] , GIN[61] , POUT[57] , GOUT[61] );
+   BLOCK2 U362 (PIN[58] , PIN[60] , GIN[60] , GIN[62] , POUT[58] , GOUT[62] );
+   BLOCK2 U363 (PIN[59] , PIN[61] , GIN[61] , GIN[63] , POUT[59] , GOUT[63] );
+   BLOCK2 U364 (PIN[60] , PIN[62] , GIN[62] , GIN[64] , POUT[60] , GOUT[64] );
+   
+endmodule // DBLC_1_64
+
+
+module DBLC_2_64 ( PIN, GIN, POUT, GOUT );
+   
+   input  [0:60] PIN;
+   input [0:64]  GIN;
+   
+   output [0:56] POUT;
+   output [0:64] GOUT;
+   
+   INVBLOCK U10 (GIN[0] , GOUT[0] );
+   INVBLOCK U11 (GIN[1] , GOUT[1] );
+   INVBLOCK U12 (GIN[2] , GOUT[2] );
+   INVBLOCK U13 (GIN[3] , GOUT[3] );
+   BLOCK1A U24 (PIN[0] , GIN[0] , GIN[4] , GOUT[4] );
+   BLOCK1A U25 (PIN[1] , GIN[1] , GIN[5] , GOUT[5] );
+   BLOCK1A U26 (PIN[2] , GIN[2] , GIN[6] , GOUT[6] );
+   BLOCK1A U27 (PIN[3] , GIN[3] , GIN[7] , GOUT[7] );
+   BLOCK1 U38 (PIN[0] , PIN[4] , GIN[4] , GIN[8] , POUT[0] , GOUT[8] );
+   BLOCK1 U39 (PIN[1] , PIN[5] , GIN[5] , GIN[9] , POUT[1] , GOUT[9] );
+   BLOCK1 U310 (PIN[2] , PIN[6] , GIN[6] , GIN[10] , POUT[2] , GOUT[10] );
+   BLOCK1 U311 (PIN[3] , PIN[7] , GIN[7] , GIN[11] , POUT[3] , GOUT[11] );
+   BLOCK1 U312 (PIN[4] , PIN[8] , GIN[8] , GIN[12] , POUT[4] , GOUT[12] );
+   BLOCK1 U313 (PIN[5] , PIN[9] , GIN[9] , GIN[13] , POUT[5] , GOUT[13] );
+   BLOCK1 U314 (PIN[6] , PIN[10] , GIN[10] , GIN[14] , POUT[6] , GOUT[14] );
+   BLOCK1 U315 (PIN[7] , PIN[11] , GIN[11] , GIN[15] , POUT[7] , GOUT[15] );
+   BLOCK1 U316 (PIN[8] , PIN[12] , GIN[12] , GIN[16] , POUT[8] , GOUT[16] );
+   BLOCK1 U317 (PIN[9] , PIN[13] , GIN[13] , GIN[17] , POUT[9] , GOUT[17] );
+   BLOCK1 U318 (PIN[10] , PIN[14] , GIN[14] , GIN[18] , POUT[10] , GOUT[18] );
+   BLOCK1 U319 (PIN[11] , PIN[15] , GIN[15] , GIN[19] , POUT[11] , GOUT[19] );
+   BLOCK1 U320 (PIN[12] , PIN[16] , GIN[16] , GIN[20] , POUT[12] , GOUT[20] );
+   BLOCK1 U321 (PIN[13] , PIN[17] , GIN[17] , GIN[21] , POUT[13] , GOUT[21] );
+   BLOCK1 U322 (PIN[14] , PIN[18] , GIN[18] , GIN[22] , POUT[14] , GOUT[22] );
+   BLOCK1 U323 (PIN[15] , PIN[19] , GIN[19] , GIN[23] , POUT[15] , GOUT[23] );
+   BLOCK1 U324 (PIN[16] , PIN[20] , GIN[20] , GIN[24] , POUT[16] , GOUT[24] );
+   BLOCK1 U325 (PIN[17] , PIN[21] , GIN[21] , GIN[25] , POUT[17] , GOUT[25] );
+   BLOCK1 U326 (PIN[18] , PIN[22] , GIN[22] , GIN[26] , POUT[18] , GOUT[26] );
+   BLOCK1 U327 (PIN[19] , PIN[23] , GIN[23] , GIN[27] , POUT[19] , GOUT[27] );
+   BLOCK1 U328 (PIN[20] , PIN[24] , GIN[24] , GIN[28] , POUT[20] , GOUT[28] );
+   BLOCK1 U329 (PIN[21] , PIN[25] , GIN[25] , GIN[29] , POUT[21] , GOUT[29] );
+   BLOCK1 U330 (PIN[22] , PIN[26] , GIN[26] , GIN[30] , POUT[22] , GOUT[30] );
+   BLOCK1 U331 (PIN[23] , PIN[27] , GIN[27] , GIN[31] , POUT[23] , GOUT[31] );
+   BLOCK1 U332 (PIN[24] , PIN[28] , GIN[28] , GIN[32] , POUT[24] , GOUT[32] );
+   BLOCK1 U333 (PIN[25] , PIN[29] , GIN[29] , GIN[33] , POUT[25] , GOUT[33] );
+   BLOCK1 U334 (PIN[26] , PIN[30] , GIN[30] , GIN[34] , POUT[26] , GOUT[34] );
+   BLOCK1 U335 (PIN[27] , PIN[31] , GIN[31] , GIN[35] , POUT[27] , GOUT[35] );
+   BLOCK1 U336 (PIN[28] , PIN[32] , GIN[32] , GIN[36] , POUT[28] , GOUT[36] );
+   BLOCK1 U337 (PIN[29] , PIN[33] , GIN[33] , GIN[37] , POUT[29] , GOUT[37] );
+   BLOCK1 U338 (PIN[30] , PIN[34] , GIN[34] , GIN[38] , POUT[30] , GOUT[38] );
+   BLOCK1 U339 (PIN[31] , PIN[35] , GIN[35] , GIN[39] , POUT[31] , GOUT[39] );
+   BLOCK1 U340 (PIN[32] , PIN[36] , GIN[36] , GIN[40] , POUT[32] , GOUT[40] );
+   BLOCK1 U341 (PIN[33] , PIN[37] , GIN[37] , GIN[41] , POUT[33] , GOUT[41] );
+   BLOCK1 U342 (PIN[34] , PIN[38] , GIN[38] , GIN[42] , POUT[34] , GOUT[42] );
+   BLOCK1 U343 (PIN[35] , PIN[39] , GIN[39] , GIN[43] , POUT[35] , GOUT[43] );
+   BLOCK1 U344 (PIN[36] , PIN[40] , GIN[40] , GIN[44] , POUT[36] , GOUT[44] );
+   BLOCK1 U345 (PIN[37] , PIN[41] , GIN[41] , GIN[45] , POUT[37] , GOUT[45] );
+   BLOCK1 U346 (PIN[38] , PIN[42] , GIN[42] , GIN[46] , POUT[38] , GOUT[46] );
+   BLOCK1 U347 (PIN[39] , PIN[43] , GIN[43] , GIN[47] , POUT[39] , GOUT[47] );
+   BLOCK1 U348 (PIN[40] , PIN[44] , GIN[44] , GIN[48] , POUT[40] , GOUT[48] );
+   BLOCK1 U349 (PIN[41] , PIN[45] , GIN[45] , GIN[49] , POUT[41] , GOUT[49] );
+   BLOCK1 U350 (PIN[42] , PIN[46] , GIN[46] , GIN[50] , POUT[42] , GOUT[50] );
+   BLOCK1 U351 (PIN[43] , PIN[47] , GIN[47] , GIN[51] , POUT[43] , GOUT[51] );
+   BLOCK1 U352 (PIN[44] , PIN[48] , GIN[48] , GIN[52] , POUT[44] , GOUT[52] );
+   BLOCK1 U353 (PIN[45] , PIN[49] , GIN[49] , GIN[53] , POUT[45] , GOUT[53] );
+   BLOCK1 U354 (PIN[46] , PIN[50] , GIN[50] , GIN[54] , POUT[46] , GOUT[54] );
+   BLOCK1 U355 (PIN[47] , PIN[51] , GIN[51] , GIN[55] , POUT[47] , GOUT[55] );
+   BLOCK1 U356 (PIN[48] , PIN[52] , GIN[52] , GIN[56] , POUT[48] , GOUT[56] );
+   BLOCK1 U357 (PIN[49] , PIN[53] , GIN[53] , GIN[57] , POUT[49] , GOUT[57] );
+   BLOCK1 U358 (PIN[50] , PIN[54] , GIN[54] , GIN[58] , POUT[50] , GOUT[58] );
+   BLOCK1 U359 (PIN[51] , PIN[55] , GIN[55] , GIN[59] , POUT[51] , GOUT[59] );
+   BLOCK1 U360 (PIN[52] , PIN[56] , GIN[56] , GIN[60] , POUT[52] , GOUT[60] );
+   BLOCK1 U361 (PIN[53] , PIN[57] , GIN[57] , GIN[61] , POUT[53] , GOUT[61] );
+   BLOCK1 U362 (PIN[54] , PIN[58] , GIN[58] , GIN[62] , POUT[54] , GOUT[62] );
+   BLOCK1 U363 (PIN[55] , PIN[59] , GIN[59] , GIN[63] , POUT[55] , GOUT[63] );
+   BLOCK1 U364 (PIN[56] , PIN[60] , GIN[60] , GIN[64] , POUT[56] , GOUT[64] );
+   
+endmodule // DBLC_2_64
+
+
+module DBLC_3_64 ( PIN, GIN, POUT, GOUT );
+   
+   input  [0:56] PIN;
+   input [0:64]  GIN;
+   
+   output [0:48] POUT;
+   output [0:64] GOUT;
+   
+   INVBLOCK U10 (GIN[0] , GOUT[0] );
+   INVBLOCK U11 (GIN[1] , GOUT[1] );
+   INVBLOCK U12 (GIN[2] , GOUT[2] );
+   INVBLOCK U13 (GIN[3] , GOUT[3] );
+   INVBLOCK U14 (GIN[4] , GOUT[4] );
+   INVBLOCK U15 (GIN[5] , GOUT[5] );
+   INVBLOCK U16 (GIN[6] , GOUT[6] );
+   INVBLOCK U17 (GIN[7] , GOUT[7] );
+   BLOCK2A U28 (PIN[0] , GIN[0] , GIN[8] , GOUT[8] );
+   BLOCK2A U29 (PIN[1] , GIN[1] , GIN[9] , GOUT[9] );
+   BLOCK2A U210 (PIN[2] , GIN[2] , GIN[10] , GOUT[10] );
+   BLOCK2A U211 (PIN[3] , GIN[3] , GIN[11] , GOUT[11] );
+   BLOCK2A U212 (PIN[4] , GIN[4] , GIN[12] , GOUT[12] );
+   BLOCK2A U213 (PIN[5] , GIN[5] , GIN[13] , GOUT[13] );
+   BLOCK2A U214 (PIN[6] , GIN[6] , GIN[14] , GOUT[14] );
+   BLOCK2A U215 (PIN[7] , GIN[7] , GIN[15] , GOUT[15] );
+   BLOCK2 U316 (PIN[0] , PIN[8] , GIN[8] , GIN[16] , POUT[0] , GOUT[16] );
+   BLOCK2 U317 (PIN[1] , PIN[9] , GIN[9] , GIN[17] , POUT[1] , GOUT[17] );
+   BLOCK2 U318 (PIN[2] , PIN[10] , GIN[10] , GIN[18] , POUT[2] , GOUT[18] );
+   BLOCK2 U319 (PIN[3] , PIN[11] , GIN[11] , GIN[19] , POUT[3] , GOUT[19] );
+   BLOCK2 U320 (PIN[4] , PIN[12] , GIN[12] , GIN[20] , POUT[4] , GOUT[20] );
+   BLOCK2 U321 (PIN[5] , PIN[13] , GIN[13] , GIN[21] , POUT[5] , GOUT[21] );
+   BLOCK2 U322 (PIN[6] , PIN[14] , GIN[14] , GIN[22] , POUT[6] , GOUT[22] );
+   BLOCK2 U323 (PIN[7] , PIN[15] , GIN[15] , GIN[23] , POUT[7] , GOUT[23] );
+   BLOCK2 U324 (PIN[8] , PIN[16] , GIN[16] , GIN[24] , POUT[8] , GOUT[24] );
+   BLOCK2 U325 (PIN[9] , PIN[17] , GIN[17] , GIN[25] , POUT[9] , GOUT[25] );
+   BLOCK2 U326 (PIN[10] , PIN[18] , GIN[18] , GIN[26] , POUT[10] , GOUT[26] );
+   BLOCK2 U327 (PIN[11] , PIN[19] , GIN[19] , GIN[27] , POUT[11] , GOUT[27] );
+   BLOCK2 U328 (PIN[12] , PIN[20] , GIN[20] , GIN[28] , POUT[12] , GOUT[28] );
+   BLOCK2 U329 (PIN[13] , PIN[21] , GIN[21] , GIN[29] , POUT[13] , GOUT[29] );
+   BLOCK2 U330 (PIN[14] , PIN[22] , GIN[22] , GIN[30] , POUT[14] , GOUT[30] );
+   BLOCK2 U331 (PIN[15] , PIN[23] , GIN[23] , GIN[31] , POUT[15] , GOUT[31] );
+   BLOCK2 U332 (PIN[16] , PIN[24] , GIN[24] , GIN[32] , POUT[16] , GOUT[32] );
+   BLOCK2 U333 (PIN[17] , PIN[25] , GIN[25] , GIN[33] , POUT[17] , GOUT[33] );
+   BLOCK2 U334 (PIN[18] , PIN[26] , GIN[26] , GIN[34] , POUT[18] , GOUT[34] );
+   BLOCK2 U335 (PIN[19] , PIN[27] , GIN[27] , GIN[35] , POUT[19] , GOUT[35] );
+   BLOCK2 U336 (PIN[20] , PIN[28] , GIN[28] , GIN[36] , POUT[20] , GOUT[36] );
+   BLOCK2 U337 (PIN[21] , PIN[29] , GIN[29] , GIN[37] , POUT[21] , GOUT[37] );
+   BLOCK2 U338 (PIN[22] , PIN[30] , GIN[30] , GIN[38] , POUT[22] , GOUT[38] );
+   BLOCK2 U339 (PIN[23] , PIN[31] , GIN[31] , GIN[39] , POUT[23] , GOUT[39] );
+   BLOCK2 U340 (PIN[24] , PIN[32] , GIN[32] , GIN[40] , POUT[24] , GOUT[40] );
+   BLOCK2 U341 (PIN[25] , PIN[33] , GIN[33] , GIN[41] , POUT[25] , GOUT[41] );
+   BLOCK2 U342 (PIN[26] , PIN[34] , GIN[34] , GIN[42] , POUT[26] , GOUT[42] );
+   BLOCK2 U343 (PIN[27] , PIN[35] , GIN[35] , GIN[43] , POUT[27] , GOUT[43] );
+   BLOCK2 U344 (PIN[28] , PIN[36] , GIN[36] , GIN[44] , POUT[28] , GOUT[44] );
+   BLOCK2 U345 (PIN[29] , PIN[37] , GIN[37] , GIN[45] , POUT[29] , GOUT[45] );
+   BLOCK2 U346 (PIN[30] , PIN[38] , GIN[38] , GIN[46] , POUT[30] , GOUT[46] );
+   BLOCK2 U347 (PIN[31] , PIN[39] , GIN[39] , GIN[47] , POUT[31] , GOUT[47] );
+   BLOCK2 U348 (PIN[32] , PIN[40] , GIN[40] , GIN[48] , POUT[32] , GOUT[48] );
+   BLOCK2 U349 (PIN[33] , PIN[41] , GIN[41] , GIN[49] , POUT[33] , GOUT[49] );
+   BLOCK2 U350 (PIN[34] , PIN[42] , GIN[42] , GIN[50] , POUT[34] , GOUT[50] );
+   BLOCK2 U351 (PIN[35] , PIN[43] , GIN[43] , GIN[51] , POUT[35] , GOUT[51] );
+   BLOCK2 U352 (PIN[36] , PIN[44] , GIN[44] , GIN[52] , POUT[36] , GOUT[52] );
+   BLOCK2 U353 (PIN[37] , PIN[45] , GIN[45] , GIN[53] , POUT[37] , GOUT[53] );
+   BLOCK2 U354 (PIN[38] , PIN[46] , GIN[46] , GIN[54] , POUT[38] , GOUT[54] );
+   BLOCK2 U355 (PIN[39] , PIN[47] , GIN[47] , GIN[55] , POUT[39] , GOUT[55] );
+   BLOCK2 U356 (PIN[40] , PIN[48] , GIN[48] , GIN[56] , POUT[40] , GOUT[56] );
+   BLOCK2 U357 (PIN[41] , PIN[49] , GIN[49] , GIN[57] , POUT[41] , GOUT[57] );
+   BLOCK2 U358 (PIN[42] , PIN[50] , GIN[50] , GIN[58] , POUT[42] , GOUT[58] );
+   BLOCK2 U359 (PIN[43] , PIN[51] , GIN[51] , GIN[59] , POUT[43] , GOUT[59] );
+   BLOCK2 U360 (PIN[44] , PIN[52] , GIN[52] , GIN[60] , POUT[44] , GOUT[60] );
+   BLOCK2 U361 (PIN[45] , PIN[53] , GIN[53] , GIN[61] , POUT[45] , GOUT[61] );
+   BLOCK2 U362 (PIN[46] , PIN[54] , GIN[54] , GIN[62] , POUT[46] , GOUT[62] );
+   BLOCK2 U363 (PIN[47] , PIN[55] , GIN[55] , GIN[63] , POUT[47] , GOUT[63] );
+   BLOCK2 U364 (PIN[48] , PIN[56] , GIN[56] , GIN[64] , POUT[48] , GOUT[64] );
+   
+endmodule // DBLC_3_64
+
+
+module DBLC_4_64 ( PIN, GIN, POUT, GOUT );
+   
+   input  [0:48] PIN;
+   input [0:64]  GIN;
+   
+   output [0:32] POUT;
+   output [0:64] GOUT;
+   
+   INVBLOCK U10 (GIN[0] , GOUT[0] );
+   INVBLOCK U11 (GIN[1] , GOUT[1] );
+   INVBLOCK U12 (GIN[2] , GOUT[2] );
+   INVBLOCK U13 (GIN[3] , GOUT[3] );
+   INVBLOCK U14 (GIN[4] , GOUT[4] );
+   INVBLOCK U15 (GIN[5] , GOUT[5] );
+   INVBLOCK U16 (GIN[6] , GOUT[6] );
+   INVBLOCK U17 (GIN[7] , GOUT[7] );
+   INVBLOCK U18 (GIN[8] , GOUT[8] );
+   INVBLOCK U19 (GIN[9] , GOUT[9] );
+   INVBLOCK U110 (GIN[10] , GOUT[10] );
+   INVBLOCK U111 (GIN[11] , GOUT[11] );
+   INVBLOCK U112 (GIN[12] , GOUT[12] );
+   INVBLOCK U113 (GIN[13] , GOUT[13] );
+   INVBLOCK U114 (GIN[14] , GOUT[14] );
+   INVBLOCK U115 (GIN[15] , GOUT[15] );
+   BLOCK1A U216 (PIN[0] , GIN[0] , GIN[16] , GOUT[16] );
+   BLOCK1A U217 (PIN[1] , GIN[1] , GIN[17] , GOUT[17] );
+   BLOCK1A U218 (PIN[2] , GIN[2] , GIN[18] , GOUT[18] );
+   BLOCK1A U219 (PIN[3] , GIN[3] , GIN[19] , GOUT[19] );
+   BLOCK1A U220 (PIN[4] , GIN[4] , GIN[20] , GOUT[20] );
+   BLOCK1A U221 (PIN[5] , GIN[5] , GIN[21] , GOUT[21] );
+   BLOCK1A U222 (PIN[6] , GIN[6] , GIN[22] , GOUT[22] );
+   BLOCK1A U223 (PIN[7] , GIN[7] , GIN[23] , GOUT[23] );
+   BLOCK1A U224 (PIN[8] , GIN[8] , GIN[24] , GOUT[24] );
+   BLOCK1A U225 (PIN[9] , GIN[9] , GIN[25] , GOUT[25] );
+   BLOCK1A U226 (PIN[10] , GIN[10] , GIN[26] , GOUT[26] );
+   BLOCK1A U227 (PIN[11] , GIN[11] , GIN[27] , GOUT[27] );
+   BLOCK1A U228 (PIN[12] , GIN[12] , GIN[28] , GOUT[28] );
+   BLOCK1A U229 (PIN[13] , GIN[13] , GIN[29] , GOUT[29] );
+   BLOCK1A U230 (PIN[14] , GIN[14] , GIN[30] , GOUT[30] );
+   BLOCK1A U231 (PIN[15] , GIN[15] , GIN[31] , GOUT[31] );
+   BLOCK1 U332 (PIN[0] , PIN[16] , GIN[16] , GIN[32] , POUT[0] , GOUT[32] );
+   BLOCK1 U333 (PIN[1] , PIN[17] , GIN[17] , GIN[33] , POUT[1] , GOUT[33] );
+   BLOCK1 U334 (PIN[2] , PIN[18] , GIN[18] , GIN[34] , POUT[2] , GOUT[34] );
+   BLOCK1 U335 (PIN[3] , PIN[19] , GIN[19] , GIN[35] , POUT[3] , GOUT[35] );
+   BLOCK1 U336 (PIN[4] , PIN[20] , GIN[20] , GIN[36] , POUT[4] , GOUT[36] );
+   BLOCK1 U337 (PIN[5] , PIN[21] , GIN[21] , GIN[37] , POUT[5] , GOUT[37] );
+   BLOCK1 U338 (PIN[6] , PIN[22] , GIN[22] , GIN[38] , POUT[6] , GOUT[38] );
+   BLOCK1 U339 (PIN[7] , PIN[23] , GIN[23] , GIN[39] , POUT[7] , GOUT[39] );
+   BLOCK1 U340 (PIN[8] , PIN[24] , GIN[24] , GIN[40] , POUT[8] , GOUT[40] );
+   BLOCK1 U341 (PIN[9] , PIN[25] , GIN[25] , GIN[41] , POUT[9] , GOUT[41] );
+   BLOCK1 U342 (PIN[10] , PIN[26] , GIN[26] , GIN[42] , POUT[10] , GOUT[42] );
+   BLOCK1 U343 (PIN[11] , PIN[27] , GIN[27] , GIN[43] , POUT[11] , GOUT[43] );
+   BLOCK1 U344 (PIN[12] , PIN[28] , GIN[28] , GIN[44] , POUT[12] , GOUT[44] );
+   BLOCK1 U345 (PIN[13] , PIN[29] , GIN[29] , GIN[45] , POUT[13] , GOUT[45] );
+   BLOCK1 U346 (PIN[14] , PIN[30] , GIN[30] , GIN[46] , POUT[14] , GOUT[46] );
+   BLOCK1 U347 (PIN[15] , PIN[31] , GIN[31] , GIN[47] , POUT[15] , GOUT[47] );
+   BLOCK1 U348 (PIN[16] , PIN[32] , GIN[32] , GIN[48] , POUT[16] , GOUT[48] );
+   BLOCK1 U349 (PIN[17] , PIN[33] , GIN[33] , GIN[49] , POUT[17] , GOUT[49] );
+   BLOCK1 U350 (PIN[18] , PIN[34] , GIN[34] , GIN[50] , POUT[18] , GOUT[50] );
+   BLOCK1 U351 (PIN[19] , PIN[35] , GIN[35] , GIN[51] , POUT[19] , GOUT[51] );
+   BLOCK1 U352 (PIN[20] , PIN[36] , GIN[36] , GIN[52] , POUT[20] , GOUT[52] );
+   BLOCK1 U353 (PIN[21] , PIN[37] , GIN[37] , GIN[53] , POUT[21] , GOUT[53] );
+   BLOCK1 U354 (PIN[22] , PIN[38] , GIN[38] , GIN[54] , POUT[22] , GOUT[54] );
+   BLOCK1 U355 (PIN[23] , PIN[39] , GIN[39] , GIN[55] , POUT[23] , GOUT[55] );
+   BLOCK1 U356 (PIN[24] , PIN[40] , GIN[40] , GIN[56] , POUT[24] , GOUT[56] );
+   BLOCK1 U357 (PIN[25] , PIN[41] , GIN[41] , GIN[57] , POUT[25] , GOUT[57] );
+   BLOCK1 U358 (PIN[26] , PIN[42] , GIN[42] , GIN[58] , POUT[26] , GOUT[58] );
+   BLOCK1 U359 (PIN[27] , PIN[43] , GIN[43] , GIN[59] , POUT[27] , GOUT[59] );
+   BLOCK1 U360 (PIN[28] , PIN[44] , GIN[44] , GIN[60] , POUT[28] , GOUT[60] );
+   BLOCK1 U361 (PIN[29] , PIN[45] , GIN[45] , GIN[61] , POUT[29] , GOUT[61] );
+   BLOCK1 U362 (PIN[30] , PIN[46] , GIN[46] , GIN[62] , POUT[30] , GOUT[62] );
+   BLOCK1 U363 (PIN[31] , PIN[47] , GIN[47] , GIN[63] , POUT[31] , GOUT[63] );
+   BLOCK1 U364 (PIN[32] , PIN[48] , GIN[48] , GIN[64] , POUT[32] , GOUT[64] );
+   
+endmodule // DBLC_4_64
+
+
+module DBLC_5_64 ( PIN, GIN, POUT, GOUT );
+   
+   input  [0:32] PIN;
+   input [0:64]  GIN;
+   
+   output [0:0]  POUT;
+   output [0:64] GOUT;
+   
+   INVBLOCK U10 (GIN[0] , GOUT[0] );
+   INVBLOCK U11 (GIN[1] , GOUT[1] );
+   INVBLOCK U12 (GIN[2] , GOUT[2] );
+   INVBLOCK U13 (GIN[3] , GOUT[3] );
+   INVBLOCK U14 (GIN[4] , GOUT[4] );
+   INVBLOCK U15 (GIN[5] , GOUT[5] );
+   INVBLOCK U16 (GIN[6] , GOUT[6] );
+   INVBLOCK U17 (GIN[7] , GOUT[7] );
+   INVBLOCK U18 (GIN[8] , GOUT[8] );
+   INVBLOCK U19 (GIN[9] , GOUT[9] );
+   INVBLOCK U110 (GIN[10] , GOUT[10] );
+   INVBLOCK U111 (GIN[11] , GOUT[11] );
+   INVBLOCK U112 (GIN[12] , GOUT[12] );
+   INVBLOCK U113 (GIN[13] , GOUT[13] );
+   INVBLOCK U114 (GIN[14] , GOUT[14] );
+   INVBLOCK U115 (GIN[15] , GOUT[15] );
+   INVBLOCK U116 (GIN[16] , GOUT[16] );
+   INVBLOCK U117 (GIN[17] , GOUT[17] );
+   INVBLOCK U118 (GIN[18] , GOUT[18] );
+   INVBLOCK U119 (GIN[19] , GOUT[19] );
+   INVBLOCK U120 (GIN[20] , GOUT[20] );
+   INVBLOCK U121 (GIN[21] , GOUT[21] );
+   INVBLOCK U122 (GIN[22] , GOUT[22] );
+   INVBLOCK U123 (GIN[23] , GOUT[23] );
+   INVBLOCK U124 (GIN[24] , GOUT[24] );
+   INVBLOCK U125 (GIN[25] , GOUT[25] );
+   INVBLOCK U126 (GIN[26] , GOUT[26] );
+   INVBLOCK U127 (GIN[27] , GOUT[27] );
+   INVBLOCK U128 (GIN[28] , GOUT[28] );
+   INVBLOCK U129 (GIN[29] , GOUT[29] );
+   INVBLOCK U130 (GIN[30] , GOUT[30] );
+   INVBLOCK U131 (GIN[31] , GOUT[31] );
+   BLOCK2A U232 (PIN[0] , GIN[0] , GIN[32] , GOUT[32] );
+   BLOCK2A U233 (PIN[1] , GIN[1] , GIN[33] , GOUT[33] );
+   BLOCK2A U234 (PIN[2] , GIN[2] , GIN[34] , GOUT[34] );
+   BLOCK2A U235 (PIN[3] , GIN[3] , GIN[35] , GOUT[35] );
+   BLOCK2A U236 (PIN[4] , GIN[4] , GIN[36] , GOUT[36] );
+   BLOCK2A U237 (PIN[5] , GIN[5] , GIN[37] , GOUT[37] );
+   BLOCK2A U238 (PIN[6] , GIN[6] , GIN[38] , GOUT[38] );
+   BLOCK2A U239 (PIN[7] , GIN[7] , GIN[39] , GOUT[39] );
+   BLOCK2A U240 (PIN[8] , GIN[8] , GIN[40] , GOUT[40] );
+   BLOCK2A U241 (PIN[9] , GIN[9] , GIN[41] , GOUT[41] );
+   BLOCK2A U242 (PIN[10] , GIN[10] , GIN[42] , GOUT[42] );
+   BLOCK2A U243 (PIN[11] , GIN[11] , GIN[43] , GOUT[43] );
+   BLOCK2A U244 (PIN[12] , GIN[12] , GIN[44] , GOUT[44] );
+   BLOCK2A U245 (PIN[13] , GIN[13] , GIN[45] , GOUT[45] );
+   BLOCK2A U246 (PIN[14] , GIN[14] , GIN[46] , GOUT[46] );
+   BLOCK2A U247 (PIN[15] , GIN[15] , GIN[47] , GOUT[47] );
+   BLOCK2A U248 (PIN[16] , GIN[16] , GIN[48] , GOUT[48] );
+   BLOCK2A U249 (PIN[17] , GIN[17] , GIN[49] , GOUT[49] );
+   BLOCK2A U250 (PIN[18] , GIN[18] , GIN[50] , GOUT[50] );
+   BLOCK2A U251 (PIN[19] , GIN[19] , GIN[51] , GOUT[51] );
+   BLOCK2A U252 (PIN[20] , GIN[20] , GIN[52] , GOUT[52] );
+   BLOCK2A U253 (PIN[21] , GIN[21] , GIN[53] , GOUT[53] );
+   BLOCK2A U254 (PIN[22] , GIN[22] , GIN[54] , GOUT[54] );
+   BLOCK2A U255 (PIN[23] , GIN[23] , GIN[55] , GOUT[55] );
+   BLOCK2A U256 (PIN[24] , GIN[24] , GIN[56] , GOUT[56] );
+   BLOCK2A U257 (PIN[25] , GIN[25] , GIN[57] , GOUT[57] );
+   BLOCK2A U258 (PIN[26] , GIN[26] , GIN[58] , GOUT[58] );
+   BLOCK2A U259 (PIN[27] , GIN[27] , GIN[59] , GOUT[59] );
+   BLOCK2A U260 (PIN[28] , GIN[28] , GIN[60] , GOUT[60] );
+   BLOCK2A U261 (PIN[29] , GIN[29] , GIN[61] , GOUT[61] );
+   BLOCK2A U262 (PIN[30] , GIN[30] , GIN[62] , GOUT[62] );
+   BLOCK2A U263 (PIN[31] , GIN[31] , GIN[63] , GOUT[63] );
+   BLOCK2 U364 (PIN[0] , PIN[32] , GIN[32] , GIN[64] , POUT[0] , GOUT[64] );
+   
+endmodule // DBLC_5_64
+
+
+module XORSTAGE_64 ( A, B, PBIT, CARRY, SUM, COUT );
+   
+   input  [0:63] A;
+   input [0:63]  B;
+   input 	 PBIT;
+   input [0:64]  CARRY;
+   
+   output [0:63] SUM;
+   output 	 COUT;
+   
+   XXOR1 U20 (A[0] , B[0] , CARRY[0] , SUM[0] );
+   XXOR1 U21 (A[1] , B[1] , CARRY[1] , SUM[1] );
+   XXOR1 U22 (A[2] , B[2] , CARRY[2] , SUM[2] );
+   XXOR1 U23 (A[3] , B[3] , CARRY[3] , SUM[3] );
+   XXOR1 U24 (A[4] , B[4] , CARRY[4] , SUM[4] );
+   XXOR1 U25 (A[5] , B[5] , CARRY[5] , SUM[5] );
+   XXOR1 U26 (A[6] , B[6] , CARRY[6] , SUM[6] );
+   XXOR1 U27 (A[7] , B[7] , CARRY[7] , SUM[7] );
+   XXOR1 U28 (A[8] , B[8] , CARRY[8] , SUM[8] );
+   XXOR1 U29 (A[9] , B[9] , CARRY[9] , SUM[9] );
+   XXOR1 U210 (A[10] , B[10] , CARRY[10] , SUM[10] );
+   XXOR1 U211 (A[11] , B[11] , CARRY[11] , SUM[11] );
+   XXOR1 U212 (A[12] , B[12] , CARRY[12] , SUM[12] );
+   XXOR1 U213 (A[13] , B[13] , CARRY[13] , SUM[13] );
+   XXOR1 U214 (A[14] , B[14] , CARRY[14] , SUM[14] );
+   XXOR1 U215 (A[15] , B[15] , CARRY[15] , SUM[15] );
+   XXOR1 U216 (A[16] , B[16] , CARRY[16] , SUM[16] );
+   XXOR1 U217 (A[17] , B[17] , CARRY[17] , SUM[17] );
+   XXOR1 U218 (A[18] , B[18] , CARRY[18] , SUM[18] );
+   XXOR1 U219 (A[19] , B[19] , CARRY[19] , SUM[19] );
+   XXOR1 U220 (A[20] , B[20] , CARRY[20] , SUM[20] );
+   XXOR1 U221 (A[21] , B[21] , CARRY[21] , SUM[21] );
+   XXOR1 U222 (A[22] , B[22] , CARRY[22] , SUM[22] );
+   XXOR1 U223 (A[23] , B[23] , CARRY[23] , SUM[23] );
+   XXOR1 U224 (A[24] , B[24] , CARRY[24] , SUM[24] );
+   XXOR1 U225 (A[25] , B[25] , CARRY[25] , SUM[25] );
+   XXOR1 U226 (A[26] , B[26] , CARRY[26] , SUM[26] );
+   XXOR1 U227 (A[27] , B[27] , CARRY[27] , SUM[27] );
+   XXOR1 U228 (A[28] , B[28] , CARRY[28] , SUM[28] );
+   XXOR1 U229 (A[29] , B[29] , CARRY[29] , SUM[29] );
+   XXOR1 U230 (A[30] , B[30] , CARRY[30] , SUM[30] );
+   XXOR1 U231 (A[31] , B[31] , CARRY[31] , SUM[31] );
+   XXOR1 U232 (A[32] , B[32] , CARRY[32] , SUM[32] );
+   XXOR1 U233 (A[33] , B[33] , CARRY[33] , SUM[33] );
+   XXOR1 U234 (A[34] , B[34] , CARRY[34] , SUM[34] );
+   XXOR1 U235 (A[35] , B[35] , CARRY[35] , SUM[35] );
+   XXOR1 U236 (A[36] , B[36] , CARRY[36] , SUM[36] );
+   XXOR1 U237 (A[37] , B[37] , CARRY[37] , SUM[37] );
+   XXOR1 U238 (A[38] , B[38] , CARRY[38] , SUM[38] );
+   XXOR1 U239 (A[39] , B[39] , CARRY[39] , SUM[39] );
+   XXOR1 U240 (A[40] , B[40] , CARRY[40] , SUM[40] );
+   XXOR1 U241 (A[41] , B[41] , CARRY[41] , SUM[41] );
+   XXOR1 U242 (A[42] , B[42] , CARRY[42] , SUM[42] );
+   XXOR1 U243 (A[43] , B[43] , CARRY[43] , SUM[43] );
+   XXOR1 U244 (A[44] , B[44] , CARRY[44] , SUM[44] );
+   XXOR1 U245 (A[45] , B[45] , CARRY[45] , SUM[45] );
+   XXOR1 U246 (A[46] , B[46] , CARRY[46] , SUM[46] );
+   XXOR1 U247 (A[47] , B[47] , CARRY[47] , SUM[47] );
+   XXOR1 U248 (A[48] , B[48] , CARRY[48] , SUM[48] );
+   XXOR1 U249 (A[49] , B[49] , CARRY[49] , SUM[49] );
+   XXOR1 U250 (A[50] , B[50] , CARRY[50] , SUM[50] );
+   XXOR1 U251 (A[51] , B[51] , CARRY[51] , SUM[51] );
+   XXOR1 U252 (A[52] , B[52] , CARRY[52] , SUM[52] );
+   XXOR1 U253 (A[53] , B[53] , CARRY[53] , SUM[53] );
+   XXOR1 U254 (A[54] , B[54] , CARRY[54] , SUM[54] );
+   XXOR1 U255 (A[55] , B[55] , CARRY[55] , SUM[55] );
+   XXOR1 U256 (A[56] , B[56] , CARRY[56] , SUM[56] );
+   XXOR1 U257 (A[57] , B[57] , CARRY[57] , SUM[57] );
+   XXOR1 U258 (A[58] , B[58] , CARRY[58] , SUM[58] );
+   XXOR1 U259 (A[59] , B[59] , CARRY[59] , SUM[59] );
+   XXOR1 U260 (A[60] , B[60] , CARRY[60] , SUM[60] );
+   XXOR1 U261 (A[61] , B[61] , CARRY[61] , SUM[61] );
+   XXOR1 U262 (A[62] , B[62] , CARRY[62] , SUM[62] );
+   XXOR1 U263 (A[63] , B[63] , CARRY[63] , SUM[63] );
+   BLOCK1A U1 (PBIT , CARRY[0] , CARRY[64] , COUT );
+   
+endmodule // XORSTAGE_64
+
+
+module DBLCTREE_64 ( PIN, GIN, GOUT, POUT );
+   
+   input  [0:63] PIN;
+   input [0:64]  GIN;
+   
+   output [0:64] GOUT;
+   output [0:0]  POUT;
+   
+   wire [0:62] 	 INTPROP_0;
+   wire [0:64] 	 INTGEN_0;
+   wire [0:60] 	 INTPROP_1;
+   wire [0:64] 	 INTGEN_1;
+   wire [0:56] 	 INTPROP_2;
+   wire [0:64] 	 INTGEN_2;
+   wire [0:48] 	 INTPROP_3;
+   wire [0:64] 	 INTGEN_3;
+   wire [0:32] 	 INTPROP_4;
+   wire [0:64] 	 INTGEN_4;
+   
+   DBLC_0_64 U_0 (.PIN(PIN) , .GIN(GIN) , .POUT(INTPROP_0) , .GOUT(INTGEN_0) );
+   DBLC_1_64 U_1 (.PIN(INTPROP_0) , .GIN(INTGEN_0) , .POUT(INTPROP_1) , .GOUT(INTGEN_1) );
+   DBLC_2_64 U_2 (.PIN(INTPROP_1) , .GIN(INTGEN_1) , .POUT(INTPROP_2) , .GOUT(INTGEN_2) );
+   DBLC_3_64 U_3 (.PIN(INTPROP_2) , .GIN(INTGEN_2) , .POUT(INTPROP_3) , .GOUT(INTGEN_3) );
+   DBLC_4_64 U_4 (.PIN(INTPROP_3) , .GIN(INTGEN_3) , .POUT(INTPROP_4) , .GOUT(INTGEN_4) );
+   DBLC_5_64 U_5 (.PIN(INTPROP_4) , .GIN(INTGEN_4) , .POUT(POUT) , .GOUT(GOUT) );
+   
+endmodule // DBLCTREE_64
+
+
+module DBLCADDER_64_64 ( OPA, OPB, CIN, SUM, COUT );
+   
+   input  [0:63] OPA;
+   input [0:63]  OPB;
+   input 	 CIN;
+   
+   output [0:63] SUM;
+   output 	 COUT;
+   
+   wire [0:63] 	 INTPROP;
+   wire [0:64] 	 INTGEN;
+   wire [0:0] 	 PBIT;
+   wire [0:64] 	 CARRY;
+   
+   PRESTAGE_64 U1 (OPA , OPB , CIN , INTPROP , INTGEN );
+   DBLCTREE_64 U2 (INTPROP , INTGEN , CARRY , PBIT );
+   XORSTAGE_64 U3 (OPA[0:63] , OPB[0:63] , PBIT[0] , CARRY[0:64] , SUM , COUT );
+   
+endmodule 
diff --git a/wally-pipelined/src/fpu/fpadd/cla52.v b/wally-pipelined/src/fpu/fpadd/cla52.v
new file mode 100755
index 00000000..00fca299
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/cla52.v
@@ -0,0 +1,202 @@
+// This module implements a 52-bit carry lookahead adder. It is used
+// for rounding in the floating point adder. 
+
+module cla52 (S, CO, X, Y);
+   
+   input  [51:0] X;
+   input [51:0]  Y;
+   
+   output [51:0] S;
+   output 	 CO;
+   
+   wire [0:63] 	 A,B,Q;
+   wire 	 LOGIC0;
+   wire 	 CIN;
+   wire 	 CO_64;
+   
+   assign LOGIC0 = 0;
+   assign CIN = 0;
+   DBLCADDER_64_64 U1 (A , B , CIN, Q , CO_64);
+   assign A[0] = X[0];
+   assign B[0] = Y[0];
+   assign A[1] = X[1];
+   assign B[1] = Y[1];
+   assign A[2] = X[2];
+   assign B[2] = Y[2];
+   assign A[3] = X[3];
+   assign B[3] = Y[3];
+   assign A[4] = X[4];
+   assign B[4] = Y[4];
+   assign A[5] = X[5];
+   assign B[5] = Y[5];
+   assign A[6] = X[6];
+   assign B[6] = Y[6];
+   assign A[7] = X[7];
+   assign B[7] = Y[7];
+   assign A[8] = X[8];
+   assign B[8] = Y[8];
+   assign A[9] = X[9];
+   assign B[9] = Y[9];
+   assign A[10] = X[10];
+   assign B[10] = Y[10];
+   assign A[11] = X[11];
+   assign B[11] = Y[11];
+   assign A[12] = X[12];
+   assign B[12] = Y[12];
+   assign A[13] = X[13];
+   assign B[13] = Y[13];
+   assign A[14] = X[14];
+   assign B[14] = Y[14];
+   assign A[15] = X[15];
+   assign B[15] = Y[15];
+   assign A[16] = X[16];
+   assign B[16] = Y[16];
+   assign A[17] = X[17];
+   assign B[17] = Y[17];
+   assign A[18] = X[18];
+   assign B[18] = Y[18];
+   assign A[19] = X[19];
+   assign B[19] = Y[19];
+   assign A[20] = X[20];
+   assign B[20] = Y[20];
+   assign A[21] = X[21];
+   assign B[21] = Y[21];
+   assign A[22] = X[22];
+   assign B[22] = Y[22];
+   assign A[23] = X[23];
+   assign B[23] = Y[23];
+   assign A[24] = X[24];
+   assign B[24] = Y[24];
+   assign A[25] = X[25];
+   assign B[25] = Y[25];
+   assign A[26] = X[26];
+   assign B[26] = Y[26];
+   assign A[27] = X[27];
+   assign B[27] = Y[27];
+   assign A[28] = X[28];
+   assign B[28] = Y[28];
+   assign A[29] = X[29];
+   assign B[29] = Y[29];
+   assign A[30] = X[30];
+   assign B[30] = Y[30];
+   assign A[31] = X[31];
+   assign B[31] = Y[31];
+   assign A[32] = X[32];
+   assign B[32] = Y[32];
+   assign A[33] = X[33];
+   assign B[33] = Y[33];
+   assign A[34] = X[34];
+   assign B[34] = Y[34];
+   assign A[35] = X[35];
+   assign B[35] = Y[35];
+   assign A[36] = X[36];
+   assign B[36] = Y[36];
+   assign A[37] = X[37];
+   assign B[37] = Y[37];
+   assign A[38] = X[38];
+   assign B[38] = Y[38];
+   assign A[39] = X[39];
+   assign B[39] = Y[39];
+   assign A[40] = X[40];
+   assign B[40] = Y[40];
+   assign A[41] = X[41];
+   assign B[41] = Y[41];
+   assign A[42] = X[42];
+   assign B[42] = Y[42];
+   assign A[43] = X[43];
+   assign B[43] = Y[43];
+   assign A[44] = X[44];
+   assign B[44] = Y[44];
+   assign A[45] = X[45];
+   assign B[45] = Y[45];
+   assign A[46] = X[46];
+   assign B[46] = Y[46];
+   assign A[47] = X[47];
+   assign B[47] = Y[47];
+   assign A[48] = X[48];
+   assign B[48] = Y[48];
+   assign A[49] = X[49];
+   assign B[49] = Y[49];
+   assign A[50] = X[50];
+   assign B[50] = Y[50];
+   assign A[51] = X[51];
+   assign B[51] = Y[51];
+   assign A[52] = LOGIC0;
+   assign B[52] = LOGIC0;
+   assign A[53] = LOGIC0;
+   assign B[53] = LOGIC0;
+   assign A[54] = LOGIC0;
+   assign B[54] = LOGIC0;
+   assign A[55] = LOGIC0;
+   assign B[55] = LOGIC0;
+   assign A[56] = LOGIC0;
+   assign B[56] = LOGIC0;
+   assign A[57] = LOGIC0;
+   assign B[57] = LOGIC0;
+   assign A[58] = LOGIC0;
+   assign B[58] = LOGIC0;
+   assign A[59] = LOGIC0;
+   assign B[59] = LOGIC0;
+   assign A[60] = LOGIC0;
+   assign B[60] = LOGIC0;
+   assign A[61] = LOGIC0;
+   assign B[61] = LOGIC0;
+   assign A[62] = LOGIC0;
+   assign B[62] = LOGIC0;
+   assign A[63] = LOGIC0;
+   assign B[63] = LOGIC0;
+   assign S[0] = Q[0];
+   assign S[1] = Q[1];
+   assign S[2] = Q[2];
+   assign S[3] = Q[3];
+   assign S[4] = Q[4];
+   assign S[5] = Q[5];
+   assign S[6] = Q[6];
+   assign S[7] = Q[7];
+   assign S[8] = Q[8];
+   assign S[9] = Q[9];
+   assign S[10] = Q[10];
+   assign S[11] = Q[11];
+   assign S[12] = Q[12];
+   assign S[13] = Q[13];
+   assign S[14] = Q[14];
+   assign S[15] = Q[15];
+   assign S[16] = Q[16];
+   assign S[17] = Q[17];
+   assign S[18] = Q[18];
+   assign S[19] = Q[19];
+   assign S[20] = Q[20];
+   assign S[21] = Q[21];
+   assign S[22] = Q[22];
+   assign S[23] = Q[23];
+   assign S[24] = Q[24];
+   assign S[25] = Q[25];
+   assign S[26] = Q[26];
+   assign S[27] = Q[27];
+   assign S[28] = Q[28];
+   assign S[29] = Q[29];
+   assign S[30] = Q[30];
+   assign S[31] = Q[31];
+   assign S[32] = Q[32];
+   assign S[33] = Q[33];
+   assign S[34] = Q[34];
+   assign S[35] = Q[35];
+   assign S[36] = Q[36];
+   assign S[37] = Q[37];
+   assign S[38] = Q[38];
+   assign S[39] = Q[39];
+   assign S[40] = Q[40];
+   assign S[41] = Q[41];
+   assign S[42] = Q[42];
+   assign S[43] = Q[43];
+   assign S[44] = Q[44];
+   assign S[45] = Q[45];
+   assign S[46] = Q[46];
+   assign S[47] = Q[47];
+   assign S[48] = Q[48];
+   assign S[49] = Q[49];
+   assign S[50] = Q[50];
+   assign S[51] = Q[51];
+   assign CO    = Q[52];
+   
+endmodule //cla52
diff --git a/wally-pipelined/src/fpu/fpadd/cla64.v b/wally-pipelined/src/fpu/fpadd/cla64.v
new file mode 100755
index 00000000..a0809e9d
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/cla64.v
@@ -0,0 +1,420 @@
+// This module implements a 64-bit carry lookehead adder/subtractor. 
+// It is used to perform the primary addition in the floating point
+// adder
+
+module cla64 (S, X, Y, Sub);
+   
+   input  [63:0] X;
+   input [63:0]  Y;
+   input 	 Sub;
+   output [63:0] S;
+   wire 	 CO;
+   wire [0:63] 	 A,B,Q, Bbar;
+   
+   DBLCADDER_64_64 U1 (A , Bbar , Sub , Q , CO );
+   assign A[0] = X[0];
+   assign B[0] = Y[0];
+   assign A[1] = X[1];
+   assign B[1] = Y[1];
+   assign A[2] = X[2];
+   assign B[2] = Y[2];
+   assign A[3] = X[3];
+   assign B[3] = Y[3];
+   assign A[4] = X[4];
+   assign B[4] = Y[4];
+   assign A[5] = X[5];
+   assign B[5] = Y[5];
+   assign A[6] = X[6];
+   assign B[6] = Y[6];
+   assign A[7] = X[7];
+   assign B[7] = Y[7];
+   assign A[8] = X[8];
+   assign B[8] = Y[8];
+   assign A[9] = X[9];
+   assign B[9] = Y[9];
+   assign A[10] = X[10];
+   assign B[10] = Y[10];
+   assign A[11] = X[11];
+   assign B[11] = Y[11];
+   assign A[12] = X[12];
+   assign B[12] = Y[12];
+   assign A[13] = X[13];
+   assign B[13] = Y[13];
+   assign A[14] = X[14];
+   assign B[14] = Y[14];
+   assign A[15] = X[15];
+   assign B[15] = Y[15];
+   assign A[16] = X[16];
+   assign B[16] = Y[16];
+   assign A[17] = X[17];
+   assign B[17] = Y[17];
+   assign A[18] = X[18];
+   assign B[18] = Y[18];
+   assign A[19] = X[19];
+   assign B[19] = Y[19];
+   assign A[20] = X[20];
+   assign B[20] = Y[20];
+   assign A[21] = X[21];
+   assign B[21] = Y[21];
+   assign A[22] = X[22];
+   assign B[22] = Y[22];
+   assign A[23] = X[23];
+   assign B[23] = Y[23];
+   assign A[24] = X[24];
+   assign B[24] = Y[24];
+   assign A[25] = X[25];
+   assign B[25] = Y[25];
+   assign A[26] = X[26];
+   assign B[26] = Y[26];
+   assign A[27] = X[27];
+   assign B[27] = Y[27];
+   assign A[28] = X[28];
+   assign B[28] = Y[28];
+   assign A[29] = X[29];
+   assign B[29] = Y[29];
+   assign A[30] = X[30];
+   assign B[30] = Y[30];
+   assign A[31] = X[31];
+   assign B[31] = Y[31];
+   assign A[32] = X[32];
+   assign B[32] = Y[32];
+   assign A[33] = X[33];
+   assign B[33] = Y[33];
+   assign A[34] = X[34];
+   assign B[34] = Y[34];
+   assign A[35] = X[35];
+   assign B[35] = Y[35];
+   assign A[36] = X[36];
+   assign B[36] = Y[36];
+   assign A[37] = X[37];
+   assign B[37] = Y[37];
+   assign A[38] = X[38];
+   assign B[38] = Y[38];
+   assign A[39] = X[39];
+   assign B[39] = Y[39];
+   assign A[40] = X[40];
+   assign B[40] = Y[40];
+   assign A[41] = X[41];
+   assign B[41] = Y[41];
+   assign A[42] = X[42];
+   assign B[42] = Y[42];
+   assign A[43] = X[43];
+   assign B[43] = Y[43];
+   assign A[44] = X[44];
+   assign B[44] = Y[44];
+   assign A[45] = X[45];
+   assign B[45] = Y[45];
+   assign A[46] = X[46];
+   assign B[46] = Y[46];
+   assign A[47] = X[47];
+   assign B[47] = Y[47];
+   assign A[48] = X[48];
+   assign B[48] = Y[48];
+   assign A[49] = X[49];
+   assign B[49] = Y[49];
+   assign A[50] = X[50];
+   assign B[50] = Y[50];
+   assign A[51] = X[51];
+   assign B[51] = Y[51];
+   assign A[52] = X[52];
+   assign B[52] = Y[52];
+   assign A[53] = X[53];
+   assign B[53] = Y[53];
+   assign A[54] = X[54];
+   assign B[54] = Y[54];
+   assign A[55] = X[55];
+   assign B[55] = Y[55];
+   assign A[56] = X[56];
+   assign B[56] = Y[56];
+   assign A[57] = X[57];
+   assign B[57] = Y[57];
+   assign A[58] = X[58];
+   assign B[58] = Y[58];
+   assign A[59] = X[59];
+   assign B[59] = Y[59];
+   assign A[60] = X[60];
+   assign B[60] = Y[60];
+   assign A[61] = X[61];
+   assign B[61] = Y[61];
+   assign A[62] = X[62];
+   assign B[62] = Y[62];
+   assign A[63] = X[63];
+   assign B[63] = Y[63];
+   assign S[0] = Q[0];
+   assign S[1] = Q[1];
+   assign S[2] = Q[2];
+   assign S[3] = Q[3];
+   assign S[4] = Q[4];
+   assign S[5] = Q[5];
+   assign S[6] = Q[6];
+   assign S[7] = Q[7];
+   assign S[8] = Q[8];
+   assign S[9] = Q[9];
+   assign S[10] = Q[10];
+   assign S[11] = Q[11];
+   assign S[12] = Q[12];
+   assign S[13] = Q[13];
+   assign S[14] = Q[14];
+   assign S[15] = Q[15];
+   assign S[16] = Q[16];
+   assign S[17] = Q[17];
+   assign S[18] = Q[18];
+   assign S[19] = Q[19];
+   assign S[20] = Q[20];
+   assign S[21] = Q[21];
+   assign S[22] = Q[22];
+   assign S[23] = Q[23];
+   assign S[24] = Q[24];
+   assign S[25] = Q[25];
+   assign S[26] = Q[26];
+   assign S[27] = Q[27];
+   assign S[28] = Q[28];
+   assign S[29] = Q[29];
+   assign S[30] = Q[30];
+   assign S[31] = Q[31];
+   assign S[32] = Q[32];
+   assign S[33] = Q[33];
+   assign S[34] = Q[34];
+   assign S[35] = Q[35];
+   assign S[36] = Q[36];
+   assign S[37] = Q[37];
+   assign S[38] = Q[38];
+   assign S[39] = Q[39];
+   assign S[40] = Q[40];
+   assign S[41] = Q[41];
+   assign S[42] = Q[42];
+   assign S[43] = Q[43];
+   assign S[44] = Q[44];
+   assign S[45] = Q[45];
+   assign S[46] = Q[46];
+   assign S[47] = Q[47];
+   assign S[48] = Q[48];
+   assign S[49] = Q[49];
+   assign S[50] = Q[50];
+   assign S[51] = Q[51];
+   assign S[52] = Q[52];
+   assign S[53] = Q[53];
+   assign S[54] = Q[54];
+   assign S[55] = Q[55];
+   assign S[56] = Q[56];
+   assign S[57] = Q[57];
+   assign S[58] = Q[58];
+   assign S[59] = Q[59];
+   assign S[60] = Q[60];
+   assign S[61] = Q[61];
+   assign S[62] = Q[62];
+   assign S[63] = Q[63];
+   assign Bbar = B ^ {64{Sub}};
+   
+endmodule // cla64
+
+// This module performs 64-bit subtraction. It is used to get the two's complement
+// of main addition or subtraction in the floating point adder. 
+
+module cla_sub64 (S, X, Y);
+   
+   input  [63:0] X;
+   input [63:0]  Y;
+   
+   output [63:0] S;
+   
+   wire 	 CO;
+   wire 	 VDD = 1'b1;
+   wire [0:63] 	 A,B,Q, Bbar;
+   
+   DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO );
+   assign A[0] = X[0];
+   assign B[0] = Y[0];
+   assign A[1] = X[1];
+   assign B[1] = Y[1];
+   assign A[2] = X[2];
+   assign B[2] = Y[2];
+   assign A[3] = X[3];
+   assign B[3] = Y[3];
+   assign A[4] = X[4];
+   assign B[4] = Y[4];
+   assign A[5] = X[5];
+   assign B[5] = Y[5];
+   assign A[6] = X[6];
+   assign B[6] = Y[6];
+   assign A[7] = X[7];
+   assign B[7] = Y[7];
+   assign A[8] = X[8];
+   assign B[8] = Y[8];
+   assign A[9] = X[9];
+   assign B[9] = Y[9];
+   assign A[10] = X[10];
+   assign B[10] = Y[10];
+   assign A[11] = X[11];
+   assign B[11] = Y[11];
+   assign A[12] = X[12];
+   assign B[12] = Y[12];
+   assign A[13] = X[13];
+   assign B[13] = Y[13];
+   assign A[14] = X[14];
+   assign B[14] = Y[14];
+   assign A[15] = X[15];
+   assign B[15] = Y[15];
+   assign A[16] = X[16];
+   assign B[16] = Y[16];
+   assign A[17] = X[17];
+   assign B[17] = Y[17];
+   assign A[18] = X[18];
+   assign B[18] = Y[18];
+   assign A[19] = X[19];
+   assign B[19] = Y[19];
+   assign A[20] = X[20];
+   assign B[20] = Y[20];
+   assign A[21] = X[21];
+   assign B[21] = Y[21];
+   assign A[22] = X[22];
+   assign B[22] = Y[22];
+   assign A[23] = X[23];
+   assign B[23] = Y[23];
+   assign A[24] = X[24];
+   assign B[24] = Y[24];
+   assign A[25] = X[25];
+   assign B[25] = Y[25];
+   assign A[26] = X[26];
+   assign B[26] = Y[26];
+   assign A[27] = X[27];
+   assign B[27] = Y[27];
+   assign A[28] = X[28];
+   assign B[28] = Y[28];
+   assign A[29] = X[29];
+   assign B[29] = Y[29];
+   assign A[30] = X[30];
+   assign B[30] = Y[30];
+   assign A[31] = X[31];
+   assign B[31] = Y[31];
+   assign A[32] = X[32];
+   assign B[32] = Y[32];
+   assign A[33] = X[33];
+   assign B[33] = Y[33];
+   assign A[34] = X[34];
+   assign B[34] = Y[34];
+   assign A[35] = X[35];
+   assign B[35] = Y[35];
+   assign A[36] = X[36];
+   assign B[36] = Y[36];
+   assign A[37] = X[37];
+   assign B[37] = Y[37];
+   assign A[38] = X[38];
+   assign B[38] = Y[38];
+   assign A[39] = X[39];
+   assign B[39] = Y[39];
+   assign A[40] = X[40];
+   assign B[40] = Y[40];
+   assign A[41] = X[41];
+   assign B[41] = Y[41];
+   assign A[42] = X[42];
+   assign B[42] = Y[42];
+   assign A[43] = X[43];
+   assign B[43] = Y[43];
+   assign A[44] = X[44];
+   assign B[44] = Y[44];
+   assign A[45] = X[45];
+   assign B[45] = Y[45];
+   assign A[46] = X[46];
+   assign B[46] = Y[46];
+   assign A[47] = X[47];
+   assign B[47] = Y[47];
+   assign A[48] = X[48];
+   assign B[48] = Y[48];
+   assign A[49] = X[49];
+   assign B[49] = Y[49];
+   assign A[50] = X[50];
+   assign B[50] = Y[50];
+   assign A[51] = X[51];
+   assign B[51] = Y[51];
+   assign A[52] = X[52];
+   assign B[52] = Y[52];
+   assign A[53] = X[53];
+   assign B[53] = Y[53];
+   assign A[54] = X[54];
+   assign B[54] = Y[54];
+   assign A[55] = X[55];
+   assign B[55] = Y[55];
+   assign A[56] = X[56];
+   assign B[56] = Y[56];
+   assign A[57] = X[57];
+   assign B[57] = Y[57];
+   assign A[58] = X[58];
+   assign B[58] = Y[58];
+   assign A[59] = X[59];
+   assign B[59] = Y[59];
+   assign A[60] = X[60];
+   assign B[60] = Y[60];
+   assign A[61] = X[61];
+   assign B[61] = Y[61];
+   assign A[62] = X[62];
+   assign B[62] = Y[62];
+   assign A[63] = X[63];
+   assign B[63] = Y[63];
+   assign S[0] = Q[0];
+   assign S[1] = Q[1];
+   assign S[2] = Q[2];
+   assign S[3] = Q[3];
+   assign S[4] = Q[4];
+   assign S[5] = Q[5];
+   assign S[6] = Q[6];
+   assign S[7] = Q[7];
+   assign S[8] = Q[8];
+   assign S[9] = Q[9];
+   assign S[10] = Q[10];
+   assign S[11] = Q[11];
+   assign S[12] = Q[12];
+   assign S[13] = Q[13];
+   assign S[14] = Q[14];
+   assign S[15] = Q[15];
+   assign S[16] = Q[16];
+   assign S[17] = Q[17];
+   assign S[18] = Q[18];
+   assign S[19] = Q[19];
+   assign S[20] = Q[20];
+   assign S[21] = Q[21];
+   assign S[22] = Q[22];
+   assign S[23] = Q[23];
+   assign S[24] = Q[24];
+   assign S[25] = Q[25];
+   assign S[26] = Q[26];
+   assign S[27] = Q[27];
+   assign S[28] = Q[28];
+   assign S[29] = Q[29];
+   assign S[30] = Q[30];
+   assign S[31] = Q[31];
+   assign S[32] = Q[32];
+   assign S[33] = Q[33];
+   assign S[34] = Q[34];
+   assign S[35] = Q[35];
+   assign S[36] = Q[36];
+   assign S[37] = Q[37];
+   assign S[38] = Q[38];
+   assign S[39] = Q[39];
+   assign S[40] = Q[40];
+   assign S[41] = Q[41];
+   assign S[42] = Q[42];
+   assign S[43] = Q[43];
+   assign S[44] = Q[44];
+   assign S[45] = Q[45];
+   assign S[46] = Q[46];
+   assign S[47] = Q[47];
+   assign S[48] = Q[48];
+   assign S[49] = Q[49];
+   assign S[50] = Q[50];
+   assign S[51] = Q[51];
+   assign S[52] = Q[52];
+   assign S[53] = Q[53];
+   assign S[54] = Q[54];
+   assign S[55] = Q[55];
+   assign S[56] = Q[56];
+   assign S[57] = Q[57];
+   assign S[58] = Q[58];
+   assign S[59] = Q[59];
+   assign S[60] = Q[60];
+   assign S[61] = Q[61];
+   assign S[62] = Q[62];
+   assign S[63] = Q[63];
+   assign Bbar = ~B;
+   
+endmodule // cla_sub64
\ No newline at end of file
diff --git a/wally-pipelined/src/fpu/fpadd/convert_inputs.v b/wally-pipelined/src/fpu/fpadd/convert_inputs.v
new file mode 100755
index 00000000..7ad93453
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/convert_inputs.v
@@ -0,0 +1,61 @@
+// This module takes as inputs two operands (op1 and op2) 
+// the operation type (op_type) and the result precision (P). 
+// Based on the operation and precision , it conditionally
+// converts single precision values to double precision values
+// and modifies the sign of op1. The converted operands are Float1
+// and Float2.
+
+module convert_inputs(Float1, Float2, op1, op2, op_type, P);
+   
+   input [63:0]  op1;            // 1st input operand (A)
+   input [63:0]  op2;            // 2nd input operand (B)
+   input [2:0] 	 op_type;        // Function opcode
+   input 	 P;              // Result Precision (0 for double, 1 for single)
+
+   output [63:0] Float1;	// Converted 1st input operand
+   output [63:0] Float2;	// Converted 2nd input operand   
+   
+   wire 	 conv_SP;        // Convert from SP to DP
+   wire 	 negate;         // Operation is negation
+   wire 	 abs_val;        // Operation is absolute value
+   wire 	 Zexp1;		// One if the exponent of op1 is zero
+   wire 	 Zexp2;		// One if the exponent of op2 is zero
+   wire 	 Oexp1;		// One if the exponent of op1 is all ones
+   wire 	 Oexp2;		// One if the exponent of op2 is all ones
+
+   // Convert from single precision to double precision if (op_type is 11X
+   // and P is 0) or (op_type is not 11X and P is one). 
+   assign conv_SP = (op_type[2]&op_type[1]) ^ P;
+
+   // Test if the input exponent is zero, because if it is then the
+   // exponent of the converted number should be zero. 
+   assign Zexp1 = ~(op1[62] | op1[61] | op1[60] | op1[59] | 
+		    op1[58] | op1[57] | op1[56] | op1[55]);
+   assign Zexp2 = ~(op2[62] | op2[61] | op2[60] | op2[59] | 
+		    op2[58] | op2[57] | op2[56] | op2[55]);
+   assign Oexp1 =  (op1[62] & op1[61] & op1[60] & op1[59] & 
+		    op1[58] & op1[57] & op1[56] & op1[55]);
+   assign Oexp2 =  (op2[62] & op2[61] & op2[60] & op2[59] & 
+		    op2[58] & op2[57] & op2[56] &op2[55]);
+
+   // Conditionally convert op1. Lower 29 bits are zero for single precision.
+   assign Float1[62:29] = conv_SP ? {op1[62], {3{(~op1[62]&~Zexp1)|Oexp1}}, op1[61:32]}
+			  : op1[62:29];
+   assign Float1[28:0] = op1[28:0] & {29{~conv_SP}};
+
+   // Conditionally convert op2. Lower 29 bits are zero for single precision. 
+   assign Float2[62:29] = conv_SP ? {op2[62], 
+				     {3{(~op2[62]&~Zexp2)|Oexp2}}, op2[61:32]}
+			  : op2[62:29];
+   assign Float2[28:0] = op2[28:0] & {29{~conv_SP}};
+
+   // Set the sign of Float1 based on its original sign and if the operation
+   // is negation (op_type = 101) or absolute value (op_type = 100)
+
+   assign negate  = op_type[2] & ~op_type[1] & op_type[0];
+   assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0];
+   assign Float1[63]  = (op1[63] ^ negate) & ~abs_val;
+   assign Float2[63]  = op2[63];
+
+endmodule // convert_inputs
+
diff --git a/wally-pipelined/src/fpu/fpadd/exception.v b/wally-pipelined/src/fpu/fpadd/exception.v
new file mode 100755
index 00000000..8f5b1cd4
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/exception.v
@@ -0,0 +1,120 @@
+// Exception logic for the floating point adder. Note: We may 
+// actually want to move to where the result is computed.
+
+module exception (Ztype, Invalid, Denorm, ANorm, BNorm, Sub, A, B, op_type);
+
+   input [63:0] A;		// 1st input operand (op1)
+   input [63:0] B;		// 2nd input operand (op2)
+   input [2:0] 	op_type;   	// Function opcode
+   output [3:0] Ztype;		// Indicates type of result (Z)
+   output 	Invalid;	// Invalid operation exception
+   output 	Denorm;		// Denormalized input
+   output       ANorm;          // A is not zero or Denorm
+   output       BNorm;          // B is not zero or Denorm
+   output       Sub;		// The effective operation is subtraction
+   wire		AzeroM;	 	// '1' if the mantissa of A is zero
+   wire		BzeroM;		// '1' if the mantissa of B is zero
+   wire		AzeroE;	 	// '1' if the exponent of A is zero
+   wire		BzeroE;		// '1' if the exponent of B is zero
+   wire		AonesE;	 	// '1' if the exponent of A is all ones
+   wire		BonesE;		// '1' if the exponent of B is all ones
+   wire		ADenorm; 	// '1' if A is a denomalized number
+   wire		BDenorm; 	// '1' if B is a denomalized number
+   wire		AInf;	 	// '1' if A is infinite
+   wire		BInf;	 	// '1' if B is infinite
+   wire		AZero;	 	// '1' if A is 0
+   wire		BZero;	 	// '1' if B is 0
+   wire		ANaN;	 	// '1' if A is a not-a-number
+   wire		BNaN; 		// '1' if B is a not-a-number
+   wire		ASNaN;	 	// '1' if A is a signalling not-a-number
+   wire		BSNaN;	 	// '1' if B is a signalling not-a-number
+   wire		ZQNaN;	 	// '1' if result Z is a quiet NaN
+   wire		ZPInf;	 	// '1' if result Z positive infnity
+   wire		ZNInf;	 	// '1' if result Z negative infnity
+   wire         add_sub;	// '1' if operation is add or subtract
+   wire 	converts;       // See if there are any converts   
+   
+   parameter [51:0]  fifty_two_zeros = 52'h0000000000000; // Use parameter?
+
+
+   // Is this instruction a convert
+   assign converts      = ~(~op_type[1] & ~op_type[2]);
+   
+   // Determine if mantissas are all zeros
+   assign AzeroM = (A[51:0] == fifty_two_zeros);
+   assign BzeroM = (B[51:0] == fifty_two_zeros);
+
+   // Determine if exponents are all ones or all zeros 
+   assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52];
+   assign BonesE = B[62]&B[61]&B[60]&B[59]&B[58]&B[57]&B[56]&B[55]&B[54]&B[53]&B[52];
+   assign AzeroE = ~(A[62]|A[61]|A[60]|A[59]|A[58]|A[57]|A[56]|A[55]|A[54]|A[53]|A[52]);
+   assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]);
+
+   // Determine special cases. Note: Zero is not really a special case. 
+   assign ADenorm = AzeroE & ~AzeroM;
+   assign BDenorm = BzeroE & ~BzeroM;
+   assign AInf = AonesE & AzeroM;
+   assign BInf = BonesE & BzeroM;
+   assign ANaN = AonesE & ~AzeroM;
+   assign BNaN = BonesE & ~BzeroM;
+   assign ASNaN = ANaN & ~A[51];
+   assign BSNaN = BNaN & ~B[51];
+   assign AZero = AzeroE & AzeroM;
+   assign BZero = BzeroE & BzeroE;
+
+   // A and B are normalized if their exponents are not zero. 
+   assign ANorm = ~AzeroE;
+   assign BNorm = ~BzeroE;
+
+   // An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
+   // or (A and B are both Infinite and the "effective operation" is 
+   // subtraction). 
+   assign add_sub = ~op_type[2] & ~op_type[1];
+   assign Invalid = (ASNaN | BSNaN | 
+		     (add_sub & AInf & BInf & (A[63]^B[63]^op_type[0]))) & ~converts;
+
+   // The Denorm flag is set if (A is denormlized and the operation is not integer 
+   // conversion ) or (if B is normalized and the operation is addition or  subtraction). 
+   assign Denorm = ADenorm&(op_type[2]|~op_type[1]) | BDenorm & add_sub;
+
+   // The result is a quiet NaN if (an "Invalid Operation" exception occurs) 
+   // or (A is a NaN) or (B is a NaN and the operation uses B).
+   assign ZQNaN = Invalid | ANaN | (BNaN & add_sub);
+
+   // The result is +Inf if ((A is +Inf) or (B is -Inf and the operation is
+   // subtraction) or (B is +Inf and the operation is addition)) and (the
+   // result is not a quiet NaN).  
+   assign ZPInf = (AInf&A[63] | add_sub&BInf&(~B[63]^op_type[0]))&~ZQNaN;
+
+   // The result is -Inf if ((A is -Inf) or (B is +Inf and the operation is
+   // subtraction) or (B is -Inf and the operation is addition)) and the
+   // result is not a quiet NaN.  
+   assign ZNInf = (AInf&~A[63] | add_sub&BInf&(B[63]^op_type[0]))&~ZQNaN;
+
+   // Set the type of the result as follows:
+   // (needs optimization - got lazy or was late)
+   // Ztype	Result 
+   //  0000	Normal
+   //  0001	Quiet NaN
+   //  0010     Negative Infinity
+   //  0011     Positive Infinity
+   //  0100     +Bzero and +Azero (and vice-versa)
+   //  0101     +Bzero and -Azero (and vice-versa)
+   //  1000     Convert SP to DP (and vice-versa)
+
+   assign Ztype[0] = ((ZQNaN | ZPInf) & ~(~op_type[2] & op_type[1])) | 
+		     ((AZero & BZero & (A[63]^B[63]^op_type[0])) 
+		      & ~converts);
+   assign Ztype[1] = ((ZNInf | ZPInf) & ~(~op_type[2] & op_type[1])) | 
+		     (((AZero & BZero & A[63] & B[63] & ~op_type[0]) |
+		       (AZero & BZero & A[63] & ~B[63] & op_type[0])) 
+		      & ~converts);
+   assign Ztype[2] = ((AZero & BZero & ~op_type[1] & ~op_type[2]) 
+		      & ~converts);
+   assign Ztype[3] = (op_type[1] & op_type[2] & ~op_type[0]);
+
+   // Determine if the effective operation is subtraction
+   assign Sub = add_sub & (A[63]^B[63]^op_type[0]);
+
+endmodule // exception
+
diff --git a/wally-pipelined/src/fpu/fpadd/f32_add_rd.do b/wally-pipelined/src/fpu/fpadd/f32_add_rd.do
new file mode 100755
index 00000000..607fda62
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f32_add_rd.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f32_add_rd.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   38,932 vectors, 389,365ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f32_add_rne.do b/wally-pipelined/src/fpu/fpadd/f32_add_rne.do
new file mode 100755
index 00000000..bc5ede61
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f32_add_rne.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f32_add_rne.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   39,052 vectors, 390,565ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f32_add_ru.do b/wally-pipelined/src/fpu/fpadd/f32_add_ru.do
new file mode 100755
index 00000000..faf652d8
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f32_add_ru.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f32_add_ru.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   38,946 vectors, 389,500ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f32_add_rz.do b/wally-pipelined/src/fpu/fpadd/f32_add_rz.do
new file mode 100755
index 00000000..f24385db
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f32_add_rz.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f32_add_rz.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   39,111 vectors, 391,150ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f32_f64_rne.do b/wally-pipelined/src/fpu/fpadd/f32_f64_rne.do
new file mode 100755
index 00000000..4f5cc284
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f32_f64_rne.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f32_f64_rne.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   544 vectors, 390,565ns
+run 5480ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f32_sub_rd.do b/wally-pipelined/src/fpu/fpadd/f32_sub_rd.do
new file mode 100755
index 00000000..f4e8f6f7
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f32_sub_rd.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f32_sub_rd.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   38,932 vectors, 389,365ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f32_sub_rne.do b/wally-pipelined/src/fpu/fpadd/f32_sub_rne.do
new file mode 100755
index 00000000..e8efd2a2
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f32_sub_rne.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f32_sub_rne.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   39,052 vectors, 390,565ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f32_sub_ru.do b/wally-pipelined/src/fpu/fpadd/f32_sub_ru.do
new file mode 100755
index 00000000..677584f1
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f32_sub_ru.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f32_sub_ru.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   38,946 vectors, 389,500ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f32_sub_rz.do b/wally-pipelined/src/fpu/fpadd/f32_sub_rz.do
new file mode 100755
index 00000000..031da39b
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f32_sub_rz.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f32_sub_rz.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   39,111 vectors, 391,150ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f64_add_rd.do b/wally-pipelined/src/fpu/fpadd/f64_add_rd.do
new file mode 100755
index 00000000..cb6005b0
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f64_add_rd.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f64_add_rd.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   38,932 vectors, 389,365ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f64_add_rne.do b/wally-pipelined/src/fpu/fpadd/f64_add_rne.do
new file mode 100755
index 00000000..c22ba168
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f64_add_rne.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f64_add_rne.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   39,052 vectors, 390,565ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f64_add_ru.do b/wally-pipelined/src/fpu/fpadd/f64_add_ru.do
new file mode 100755
index 00000000..18f340a0
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f64_add_ru.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f64_add_ru.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   38,946 vectors, 389,500ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f64_add_rz.do b/wally-pipelined/src/fpu/fpadd/f64_add_rz.do
new file mode 100755
index 00000000..b527719e
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f64_add_rz.do
@@ -0,0 +1,58 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f64_add_rz.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.tb
+
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   39,111 vectors, 391,150ns
+# run 405000ns
+run 100ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f64_f32_rne.do b/wally-pipelined/src/fpu/fpadd/f64_f32_rne.do
new file mode 100755
index 00000000..9376da17
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f64_f32_rne.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f64_f32_rne.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   565 vectors, 390,565ns
+run 5750ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f64_sub_rd.do b/wally-pipelined/src/fpu/fpadd/f64_sub_rd.do
new file mode 100755
index 00000000..fcbbbfcd
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f64_sub_rd.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f64_sub_rd.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   38,927 vectors, 389,315ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f64_sub_rne.do b/wally-pipelined/src/fpu/fpadd/f64_sub_rne.do
new file mode 100755
index 00000000..007c92e7
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f64_sub_rne.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f64_sub_rne.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   39,059 vectors, 390,635ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f64_sub_ru.do b/wally-pipelined/src/fpu/fpadd/f64_sub_ru.do
new file mode 100755
index 00000000..e5afa415
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f64_sub_ru.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f64_sub_ru.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   38,937 vectors, 389,415ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/f64_sub_rz.do b/wally-pipelined/src/fpu/fpadd/f64_sub_rz.do
new file mode 100755
index 00000000..cc807b08
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/f64_sub_rz.do
@@ -0,0 +1,56 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog convert_inputs.v exception.v lzd.v shifter.v adder.v cla52.v cla64.v rounder.v fpadd.v tb_f64_sub_rz.sv
+
+# start and run simulation
+vsim -novopt work.tb
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /tb/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+--   39,113 vectors, 391,175ns
+run 405000ns
+quit
diff --git a/wally-pipelined/src/fpu/fpadd/fpadd.v b/wally-pipelined/src/fpu/fpadd/fpadd.v
new file mode 100755
index 00000000..7f5f05eb
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/fpadd.v
@@ -0,0 +1,216 @@
+//
+// File name : fpadd
+// Title     : Floating-Point Adder/Subtractor
+// project   : FPU
+// Library   : fpadd
+// Author(s) : James E. Stine, Jr.
+// Purpose   : definition of main unit to floating-point add/sub
+// notes :   
+//
+// Copyright Oklahoma State University
+//
+// Basic Operations
+//
+// Step 1: Load operands, set flags, and convert SP to DP
+// Step 2: Check for special inputs ( +/- Infinity,  NaN)
+// Step 3: Compare exponents.  Swap the operands of exp1 < exp2
+//         or of (exp1 = exp2 AND mnt1 < mnt2)
+// Step 4: Shift the mantissa corresponding to the smaller exponent, 
+//          and extend precision by three bits to the right.
+// Step 5: Add or subtract the mantissas.
+// Step 6: Normalize the result.//
+//   Shift left until normalized.  Normalized when the value to the 
+//   left of the binrary point is 1.
+// Step 7: Round the result.// 
+// Step 8: Put sum onto output.
+//
+
+
+module fpadd (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);
+
+   input [63:0] op1;		// 1st input operand (A)
+   input [63:0] op2;		// 2nd input operand (B)
+   input [2:0] 	rm;		// Rounding mode - specify values 
+   input [2:0]	op_type;	// Function opcode
+   input 	P;   		// Result Precision (0 for double, 1 for single)
+   input 	OvEn;		// Overflow trap enabled
+   input 	UnEn;   	// Underflow trap enabled
+
+   output [63:0] AS_Result;	// Result of operation
+   output [4:0]  Flags;   	// IEEE exception flags 
+   output 	 Denorm;   	// Denorm on input or output   
+
+   wire [63:0] 	 Float1; 
+   wire [63:0] 	 Float2;
+   wire [63:0] 	 IntValue;
+   wire [11:0] 	 exp1, exp2;
+   wire [11:0] 	 exp_diff1, exp_diff2;
+   wire [10:0] 	 exponent, exp_pre;
+   wire [11:0] 	 exp_shift;
+   wire [63:0] 	 Result;   
+   wire [51:0] 	 mantissaA;
+   wire [56:0] 	 mantissaA1;
+   wire [63:0] 	 mantissaA3;
+   wire [51:0] 	 mantissaB; 
+   wire [56:0] 	 mantissaB1, mantissaB2;
+   wire [63:0] 	 mantissaB3;
+   wire [63:0] 	 sum, sum_tc, sum_corr, sum_norm;
+   wire [5:0] 	 align_shift;
+   wire [5:0] 	 norm_shift;
+   wire [3:0] 	 sel_inv;
+   wire		 op1_Norm, op2_Norm;
+   wire		 opA_Norm, opB_Norm;
+   wire		 Invalid;
+   wire 	 DenormIn, DenormIO;
+   wire [4:0] 	 FlagsIn;   	
+   wire 	 exp_valid;
+   wire 	 exp_gt63;
+   wire 	 Sticky_out;
+   wire 	 signA, sign_corr;
+   wire          corr_sign;
+   wire 	 zeroB;         
+   wire 	 convert;
+   wire          swap;
+   wire          sub;
+
+   // Convert the input operands to their appropriate forms based on 
+   // the orignal operands, the op_type , and their precision P. 
+   // Single precision inputs are converted to double precision 
+   // and the sign of the first operand is set appropratiately based on
+   // if the operation is absolute value or negation. 
+
+   convert_inputs conv1 (Float1, Float2, op1, op2, op_type, P);
+
+   // Test for exceptions and return the "Invalid Operation" and
+   // "Denormalized" Input Flags. The "sel_inv" is used in
+   // the third pipeline stage to select the result. Also, op1_Norm
+   // and op2_Norm are one if op1 and op2 are not zero or denormalized.
+   // sub is one if the effective operation is subtaction. 
+
+   exception exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, sub, 
+		   Float1, Float2, op_type);
+
+   // Perform Exponent Subtraction (used for alignment). For performance
+   // both exponent subtractions are performed in parallel. This was 
+   // changed to a behavior level to allow the tools to  try to optimize
+   // the two parallel additions. The input values are zero-extended to 12 
+   // bits prior to performing the addition. 
+
+   assign exp1 = {1'b0, Float1[62:52]};
+   assign exp2 = {1'b0, Float2[62:52]};
+   assign exp_diff1 = exp1 - exp2;
+   assign exp_diff2 = exp2 - exp1;
+
+   // The second operand (B) should be set to zero, if op_type does not
+   // specify addition or subtraction
+   assign zeroB = op_type[2] | op_type[1];
+
+   // Swapped operands if zeroB is not one and exp1 < exp2. 
+   // Swapping causes exp2 to be used for the result exponent. 
+   // Only the exponent of the larger operand is used to determine
+   // the final result. 
+   assign swap = exp_diff1[11] & ~zeroB;
+   assign exponent = swap ? exp2[10:0] : exp1[10:0];
+   assign mantissaA = swap ? Float2[51:0] : Float1[51:0];
+   assign mantissaB = swap ? Float1[51:0] : Float2[51:0];
+   assign signA     = swap ? Float2[63] : Float1[63];   
+
+   // Determine the alignment shift and limit it to 63. If any bit from 
+   // exp_shift[6] to exp_shift[11] is one, then shift is set to all ones. 
+   assign exp_shift = swap ? exp_diff2 : exp_diff1;
+   assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9] 
+     | exp_shift[8] | exp_shift[7] | exp_shift[6];
+   assign align_shift = exp_shift | {6{exp_gt63}};
+
+   // Unpack the 52-bit mantissas to 57-bit numbers of the form.
+   //    001.M[51]M[50] ... M[1]M[0]00
+   // Unless the number has an exponent of zero, in which case it
+   // is unpacked as
+   //    000.00 ... 00
+   // This effectively flushes denormalized values to zero. 
+   // The three bits of to the left of the binary point prevent overflow
+   // and loss of sign information. The two bits to the right of the 
+   // original mantissa form the "guard" and "round" bits that are used
+   // to round the result. 
+   assign opA_Norm = swap ? op2_Norm : op1_Norm;
+   assign opB_Norm = swap ? op1_Norm : op2_Norm;
+   assign mantissaA1 = {2'h0, opA_Norm, mantissaA[51:0]&{52{opA_Norm}}, 2'h0};
+   assign mantissaB1 = {2'h0, opB_Norm, mantissaB[51:0]&{52{opB_Norm}}, 2'h0};
+
+   // Perform mantissa alignment using a 57-bit barrel shifter 
+   // If any of the bits shifted out are one, Sticky_out is set. 
+   // The size of the barrel shifter could be reduced by two bits
+   // by not adding the leading two zeros until after the shift. 
+   barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
+
+   // Place either the sign-extened 32-bit value or the original 64-bit value 
+   // into IntValue (to be used for integer to floating point conversion)
+   assign IntValue [31:0] = op1[31:0];
+   assign IntValue [63:32] = op_type[0] ? {32{op1[31]}} : op1[63:32];
+
+   // If doing an integer to floating point conversion, mantissaA3 is set to 
+   // IntVal and the prenomalized exponent is set to 1084. Otherwise, 
+   // mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero, 
+   // and the exponent value is left unchanged. 
+   assign convert       = ~op_type[2] & op_type[1];
+   assign mantissaA3    = convert ? IntValue : {mantissaA1, 7'h0};
+   assign exp_pre       = convert ? 11'b10000111100 : exponent;
+
+   // Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to 
+   // 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six  
+   // zeros. 
+   assign mantissaB3[63:7] = mantissaB2 & {57{~zeroB}};
+   assign mantissaB3[6]    = Sticky_out & ~zeroB;
+   assign mantissaB3[5:0]  = 6'h0;
+
+   // The sign of the result needs to be corrected if the true
+   // operation is subtraction and the input operands were swapped. 
+   assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
+   
+   // 64-bit Mantissa Adder/Subtractor
+   cla64 add1 (sum, mantissaA3, mantissaB3, sub);
+
+   // 64-bit Mantissa Subtractor - to get the two's complement of the 
+   // result when the sign from the adder/subtractor is negative. 
+   cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
+
+   // Determine the correct sign of the result
+   assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63];   
+   
+   // If the sum is negative, use its two complement instead. 
+   // This value has to be 64-bits to correctly handle the 
+   // case 10...00
+   assign sum_corr = sum[63] ? sum_tc : sum;
+
+   // Leading-Zero Detector. Determine the size of the shift needed for
+   // normalization. If sum_corrected is all zeros, the exp_valid is 
+   // zero; otherwise, it is one. 
+   lz64 lzd1 (norm_shift, exp_valid, sum_corr);
+
+   // Barell shifter used for normalization. It takes as inputs the 
+   // the corrected sum and the amount by which the sum should 
+   // be right shifted. It outputs the normalized sum. 
+   barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift);
+   
+   // Round the mantissa to a 52-bit value, with the leading one
+   // removed. If the result is a single precision number, the actual 
+   // mantissa is in the upper 23 bits and the lower 29 bits are zero. 
+   // At this point, normalization has already been performed, so we know 
+   // exactly where the rounding point is. The rounding units also
+   // handles special cases and set the exception flags.
+
+   // Changed DenormIO -> Denorm and FlagsIn -> Flags in order to
+   // help in processor reservation station detection of load/stores. In
+   // other words, the processor would like to know ahead of time that
+   // if the result is an exception then don't load or store.
+   rounder round1 (Result, DenormIO, FlagsIn, rm, P, OvEn, UnEn, exp_valid, 
+		   sel_inv, Invalid, DenormIn, convert, sign_corr, exp_pre, 
+		   norm_shift, sum_norm);
+
+   // Store the final result and the exception flags in registers.
+   assign AS_Result = Result;
+   assign {Denorm, Flags} = {DenormIO, FlagsIn};
+   
+endmodule // fpadd
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/lzd.v b/wally-pipelined/src/fpu/fpadd/lzd.v
new file mode 100755
index 00000000..b3a14160
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/lzd.v
@@ -0,0 +1,137 @@
+// V. G. Oklobdzija, "Algorithmic design of a hierarchical and modular
+//   leading zero detector circuit," in Electronics Letters, vol. 29,
+//   no. 3, pp. 283-284, 4 Feb. 1993, doi: 10.1049/el:19930193.
+      
+module lz2 (P, V, B0, B1);
+
+   input B0;
+   input B1;
+
+   output P;
+   output V;
+
+   assign V = B0 | B1;
+   assign P = B0 & ~B1;
+   
+endmodule // lz2
+
+// Note: This module is not made out of two lz2's - why not? (MJS)
+
+module lz4 (ZP, ZV, B0, B1, V0, V1);
+   
+   input B0;
+   input B1;
+   input V0;
+   input V1;
+
+   output [1:0] ZP;
+   output 	ZV;
+
+   assign ZP[0] = V0 ? B0 : B1;
+   assign ZP[1] = ~V0;
+   assign ZV = V0 | V1;
+
+endmodule // lz4
+
+// Note: This module is not made out of two lz4's - why not? (MJS)
+
+module lz8 (ZP, ZV, B);
+   
+   input [7:0] B;
+
+   wire        s1p0;
+   wire        s1v0;
+   wire        s1p1;
+   wire        s1v1;
+   wire        s2p0;
+   wire        s2v0;
+   wire        s2p1;
+   wire        s2v1;
+   wire [1:0]  ZPa;
+   wire [1:0]  ZPb;
+   wire        ZVa;
+   wire        ZVb;
+   
+   output [2:0] ZP;
+   output       ZV;
+   
+   lz2 l1(s1p0, s1v0, B[2], B[3]);
+   lz2 l2(s1p1, s1v1, B[0], B[1]);
+   lz4 l3(ZPa, ZVa, s1p0, s1p1, s1v0, s1v1);
+
+   lz2 l4(s2p0, s2v0, B[6], B[7]);
+   lz2 l5(s2p1, s2v1, B[4], B[5]);
+   lz4 l6(ZPb, ZVb, s2p0, s2p1, s2v0, s2v1);
+
+   assign ZP[1:0] = ZVb ? ZPb : ZPa;
+   assign ZP[2]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lz8
+
+module lz16 (ZP, ZV, B);
+
+   input [15:0] B;
+
+   wire [2:0] 	ZPa;
+   wire [2:0] 	ZPb;
+   wire 	ZVa;
+   wire 	ZVb;   
+
+   output [3:0] ZP;
+   output 	ZV;
+
+   lz8 l1(ZPa, ZVa, B[7:0]);
+   lz8 l2(ZPb, ZVb, B[15:8]);
+
+   assign ZP[2:0] = ZVb ? ZPb : ZPa;
+   assign ZP[3]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lz16
+
+module lz32 (ZP, ZV, B);
+
+   input [31:0] B;
+
+   wire [3:0] 	ZPa;
+   wire [3:0] 	ZPb;
+   wire 	ZVa;
+   wire 	ZVb;
+
+   output [4:0] ZP;
+   output 	ZV;
+
+   lz16 l1(ZPa, ZVa, B[15:0]);
+   lz16 l2(ZPb, ZVb, B[31:16]);
+
+   assign ZP[3:0] = ZVb ? ZPb : ZPa;
+   assign ZP[4]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lz32
+
+// This module returns the number of leading zeros ZP in the 64-bit 
+// number B. If there are no ones in B, then ZP and ZV are both 0.
+
+module lz64 (ZP, ZV, B);
+
+   input [63:0] B;
+
+   wire [4:0] 	ZPa;
+   wire [4:0] 	ZPb;
+   wire 	ZVa;
+   wire 	ZVb;   
+
+   output [5:0] ZP;
+   output 	ZV;
+
+   lz32 l1(ZPa, ZVa, B[31:0]);
+   lz32 l2(ZPb, ZVb, B[63:32]);
+
+   assign ZV = ZVa | ZVb;
+   assign ZP[4:0] = (ZVb ? ZPb : ZPa) & {5{ZV}};
+   assign ZP[5]   = ~ZVb & ZV;
+
+endmodule // lz64
+
diff --git a/wally-pipelined/src/fpu/fpadd/rounder.v b/wally-pipelined/src/fpu/fpadd/rounder.v
new file mode 100755
index 00000000..b994acb3
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/rounder.v
@@ -0,0 +1,214 @@
+// The rounder takes as inputs a 64-bit value to be rounded, A, the 
+// exponent of the value to be rounded, the sign of the final result, Sign, 
+// the precision of the results, P, and the two-bit rounding mode, rm. 
+// It produces a rounded 52-bit result, Z, the exponent of the rounded 
+// result, Z_exp, and a flag that indicates if the result was rounded,
+// Inexact. The rounding mode has the following values.
+//	rm		Modee
+//      00 		round-to-nearest-even
+//	01 		round-toward-zero
+//      10 		round-toward-plus infinity
+//      11  		round-toward-minus infinity
+// The rounding algorithm determines if '1' should be added to the 
+// truncated signficant result, based on three significant bits 
+// (least (L), round (R) and sticky (S)), the rounding mode (rm)
+// and the sign of the final result (Sign). Visually, L and R appear as
+//    xxxxxL,Rxxxxxxx
+// where , denotes the rounding boundary. S is the logical OR of all the
+// bits to the right of R. 
+
+module rounder (Result, DenormIO, Flags, rm, P, OvEn, 
+		UnEn, exp_valid, sel_inv, Invalid, DenormIn, convert, Asign, Aexp, 
+		norm_shift, A);
+
+   input  [2:0]  rm;
+   input         P;
+   input         OvEn;
+   input         UnEn;
+   input         exp_valid;
+   input [3:0] 	 sel_inv;
+   input	 Invalid;
+   input	 DenormIn;
+   input         convert;
+   input         Asign;
+   input [10:0]  Aexp;
+   input [5:0] 	 norm_shift;
+   input [63:0]  A;
+   
+   output [63:0] Result;
+   output 	 DenormIO;
+   output [4:0]  Flags;
+   
+   wire          Rsign;
+   wire [10:0] 	 Rexp;
+   wire [11:0] 	 Texp;
+   wire [51:0] 	 Rmant;
+   wire [51:0] 	 Tmant;
+   wire          Rzero;
+   wire          VSS = 1'b0;
+   wire          VDD = 1'b1;
+   wire [51:0] 	 B;			// Value used to add the "ones"
+   wire		 S_SP;			// Single precision sticky bit
+   wire		 S_DP;			// Double precision sticky bit
+   wire		 S;			// Actual sticky bit
+   wire		 R;			// Round bit
+   wire		 L;			// Least significant bit
+   wire		 add_one;		// '1' if one should be added
+   wire		 UnFlow_SP, UnFlow_DP, UnderFlow; 
+   wire		 OvFlow_SP, OvFlow_DP, OverFlow;		
+   wire		 Inexact;
+   wire		 Round_zero;
+   wire		 Infinite;
+   wire		 VeryLarge;
+   wire		 Largest;
+   wire		 Adj_exp;
+   wire		 Valid;
+   wire		 NaN;
+   wire		 Cout;
+   wire		 Texp_l7z;
+   wire		 Texp_l7o;
+   wire		 OvCon;
+
+   // Determine the sticky bits for double and single precision
+   assign S_DP= A[9]|A[8]|A[7]|A[6]|A[5]|A[4]|A[3]|A[2]|A[1]|A[0];
+   assign S_SP = S_DP |A[38]|A[37]|A[36]|A[35]|A[34]|A[33]|A[32]|A[31]|A[30]|
+                 A[29]|A[28]|A[27]|A[26]|A[25]|A[24]|A[23]|A[22]|A[21]|A[20]|
+                 A[19]|A[18]|A[17]|A[16]|A[15]|A[14]|A[13]|A[12]|A[11]|A[10];
+
+   // Set the least (L), round (R), and sticky (S) bits based on
+   // the precision. 
+   assign {L, R, S} = P ? {A[40],A[39],S_SP} : {A[11],A[10],S_DP};
+
+   // Add one if ((the rounding mode is round-to-nearest) and (R is one) and
+   // (S or L is one)) or ((the rounding mode is towards plus or minus 
+   // infinity (rm[1] = 1)) and (the sign and rm[0] are the same) and 
+   // (R or S is one)). 
+
+   // Appended statement allows for roundTiesAway: if the rounding mode is round-towards-away,
+   // then if the sign of the result is 0 (i.e., positive), then add_one; otherwise, add zero.
+
+   assign add_one = ~rm[2] & ((~rm[1]&~rm[0]&R&(L|S)) | (rm[1]&(Asign^~rm[0])&(R|S))) | (rm[2] & R);
+
+   // Add one using a 52-bit adder. The one is added to the LSB B[0] for
+   // double precision or to B[29] for single precision. 
+   // This could be simplified by using a specialized adder.
+   // The current adder is actually 64-bits. The leading one 
+   // for normalized results in not included in the addition.
+   assign B = {{22{VSS}}, add_one&P, {28{VSS}}, add_one&~P};
+   cla52 add1(Tmant, Cout, A[62:11], B);
+
+   // Now that rounding is done, we compute the final exponent
+   // and test for special cases. 
+
+   // Compute the value of the exponent by subtracting the shift 
+   // value from the previous exponent and then adding 2 + cout. 
+   // If needed this could be optimized to used a specialized 
+   // adder. 
+
+   assign Texp    = {VSS, Aexp} - {{6{VSS}}, norm_shift} +{{10{VSS}}, VDD, Cout};   
+   
+   // Overflow only occurs for double precision, if Texp[10] to Texp[0] are 
+   // all ones. To encourage sharing with single precision overflow detection,
+   // the lower 7 bits are tested separately. 
+   assign Texp_l7o  = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0];
+   assign OvFlow_DP = Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o;
+
+   // Overflow occurs for single precision if (Texp[10] is one)  and 
+   // ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0] 
+   // are all ones. 
+   assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o);
+
+   // Underflow occurs for double precision if (Texp[11] is one)  or Texp[10] to 
+   // Texp[0] are all zeros. 
+   assign Texp_l7z  = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0];
+   assign UnFlow_DP = Texp[11] | ~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z;
+
+   // Underflow occurs for single precision if (Texp[10] is zero)  and 
+   // (Texp[9] or Texp[8] or Texp[7]) is zero. 
+   assign UnFlow_SP = (~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z));
+   
+   // Set the overflow and underflow flags. They should not be set if
+   // the input was infinite or NaN or the output of the adder is zero.
+   // 00 = Valid
+   // 10 = NaN
+   assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]);
+   assign NaN   = ~sel_inv[2]&~sel_inv[1]& sel_inv[0];
+   assign UnderFlow = ((P & UnFlow_SP | UnFlow_DP)&Valid&exp_valid) |
+		      (~Aexp[10]&Aexp[9]&Aexp[8]&Aexp[7]&~Aexp[6]
+		       &~Aexp[5]&~Aexp[4]&~Aexp[3]&~Aexp[2]
+		       &~Aexp[1]&~Aexp[0]&sel_inv[3]);
+   assign OverFlow  = (P & OvFlow_SP | OvFlow_DP)&Valid&~UnderFlow&exp_valid;
+
+   // The DenormIO is set if underflow has occurred or if their was a
+   // denormalized input. 
+   assign DenormIO = DenormIn | UnderFlow;
+
+   // The final result is Inexact if any rounding occurred ((i.e., R or S 
+   // is one), or (if the result overflows ) or (if the result underflows and the 
+   // underflow trap is not enabled)) and (value of the result was not previous set 
+   // by an exception case). 
+   assign Inexact = (R|S|OverFlow|(UnderFlow&~UnEn))&Valid;
+
+   // Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0, 
+   // Invlalid. 
+   assign Flags = {UnderFlow, VSS, OverFlow, Invalid, Inexact};
+
+   // Determine the final result. 
+
+   // The sign of the final result is one if the result is not zero and
+   // the sign of A is one, or if the result is zero and the the rounding 
+   // mode is round-to-minus infinity. The final result is zero, if exp_valid
+   // is zero. If underflow occurs, then the result is set to zero.
+   //   
+   // For Zero (goes equally for subtraction although 
+   // signs may alter operands sign):
+   // -0 + -0 = -0 (always)
+   // +0 + +0 = +0 (always)
+   // -0 + +0 = +0 (for RN, RZ, RU) 
+   // -0 + +0 = -0 (for RD) 
+   assign Rzero = ~exp_valid | UnderFlow;
+   assign Rsign = ((Asign&exp_valid | 
+		    (sel_inv[2]&~sel_inv[1]&sel_inv[0]&rm[1]&rm[0] |
+		     sel_inv[2]&sel_inv[1]&~sel_inv[0] |		  
+		     ~exp_valid&rm[1]&rm[0]&~sel_inv[2] | 
+		     UnderFlow&rm[1]&rm[0]) & ~convert) & ~sel_inv[3]) |
+		  (Asign & sel_inv[3]);
+   
+   // The exponent of the final result is zero if the final result is 
+   // zero or a denorm, all ones if the final result is NaN or Infinite
+   // or overflow occurred and the magnitude of the number is 
+   // not rounded toward from zero, and all ones with an LSB of zero
+   // if overflow occurred and the magnitude of the number is 
+   // rounded toward zero. If the result is single precision, 
+   // Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1)
+   // and overflow occurs and the operation is not conversion, bits 10 and 9 are 
+   // inverted for double precision, and bits 7 and 6 are inverted for single precision. 
+   assign Round_zero = ~rm[1]&rm[0] | ~Asign&rm[0] | Asign&rm[1]&~rm[0];
+   assign VeryLarge = OverFlow & ~OvEn;
+   assign Infinite   = (VeryLarge & ~Round_zero) | (~sel_inv[2] & sel_inv[1]);
+   assign Largest = VeryLarge & Round_zero;
+   assign Adj_exp = OverFlow & OvEn & ~convert;
+   assign Rexp[10:1] = ({10{~Valid}} | 
+			{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8], 
+			 (Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} | 
+		        {10{VeryLarge}})&{10{~Rzero | NaN}};
+   assign Rexp[0]    = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest;
+   
+   // If the result is zero or infinity, the mantissa is all zeros. 
+   // If the result is NaN, the mantissa is 10...0
+   // If the result the largest floating point number, the mantissa
+   // is all ones. Otherwise, the mantissa is not changed. 
+   assign Rmant[51] = Largest | NaN | (Tmant[51]&~Infinite&~Rzero);
+   assign Rmant[50:0] = {51{Largest}} | (Tmant[50:0]&{51{~Infinite&Valid&~Rzero}});
+
+   // For single precision, the 8 least significant bits of the exponent
+   // and 23 most significant bits of the mantissa contain bits used 
+   // for the final result. A double precision result is returned if 
+   // overflow has occurred, the overflow trap is enabled, and a conversion
+   // is being performed. 
+   assign OvCon = OverFlow & OvEn & convert;
+   assign Result = (P&~OvCon) ? {Rsign, Rexp[7:0], Rmant[51:29], {32{VSS}}}
+	           : {Rsign, Rexp, Rmant};
+
+endmodule // rounder
+
diff --git a/wally-pipelined/src/fpu/fpadd/shifter.v b/wally-pipelined/src/fpu/fpadd/shifter.v
new file mode 100755
index 00000000..7a85fc6a
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/shifter.v
@@ -0,0 +1,119 @@
+
+// MJS - This module implements a 57-bit 2-to-1 multiplexor, which is
+// used in the barrel shifter for significand alignment.
+
+module mux21x57 (Z, A, B, Sel);
+
+   input [56:0] A;
+   input [56:0] B;
+   input 	Sel;
+
+   output [56:0] Z;
+
+   assign Z = Sel ? B : A;
+
+endmodule // mux21x57
+
+// MJS - This module implements a 64-bit 2-to-1 multiplexor, which is
+// used in the barrel shifter for significand normalization. 
+
+module mux21x64 (Z, A, B, Sel);
+
+   input [63:0] A;
+   input [63:0] B;
+   input 	Sel;
+
+   output [63:0] Z;
+   
+   assign Z = Sel ? B : A;
+   
+endmodule // mux21x64
+
+// The implementation of the barrel shifter was modified to use 
+// fewer gates. It is now implemented using six 64-bit 2-to-1 muxes. The 
+// barrel shifter takes a 64-bit input A and shifts it left by up to 
+// 63-bits, as specified by Shift, to produce a 63-bit output Z. 
+// Bits to the right are filled with zeros. 
+// The 64 bit shift is implemented using 6 stages of shifts of 32
+// 16, 8, 4, 2, and 1 bit shifts. 
+
+module barrel_shifter_l64 (Z, A, Shift);
+
+   input [63:0] A;
+   input [5:0] 	Shift;
+   
+   wire [63:0] 	stage1;
+   wire [63:0] 	stage2;
+   wire [63:0] 	stage3;
+   wire [63:0] 	stage4;
+   wire [63:0] 	stage5;
+   wire [31:0] 	thirtytwozeros = 32'h0;
+   wire [15:0] 	sixteenzeros = 16'h0;
+   wire [ 7:0] 	eightzeros = 8'h0;
+   wire [ 3:0] 	fourzeros = 4'h0;
+   wire [ 1:0] 	twozeros = 2'b00;
+   wire 	onezero = 1'b0;   
+
+   output [63:0] Z;      
+
+   mux21x64  mx01(stage1, A,      {A[31:0], thirtytwozeros}, Shift[5]);
+   mux21x64  mx02(stage2, stage1, {stage1[47:0], sixteenzeros}, Shift[4]);
+   mux21x64  mx03(stage3, stage2, {stage2[55:0], eightzeros}, Shift[3]);
+   mux21x64  mx04(stage4, stage3, {stage3[59:0], fourzeros}, Shift[2]);
+   mux21x64  mx05(stage5, stage4, {stage4[61:0], twozeros}, Shift[1]);
+   mux21x64  mx06(Z     , stage5, {stage5[62:0], onezero}, Shift[0]);
+
+endmodule // barrel_shifter_l63
+
+// The implementation of the barrel shifter was modified to use 
+// fewer gates. It is now implemented using six 57-bit 2-to-1 muxes. The 
+// barrel shifter takes a 57-bit input A and right shifts it by up to 
+// 63-bits, as specified by Shift, to produce a 57-bit output Z. 
+// It also computes a Sticky bit, which is set to 
+// one if any of the bits that were shifted out was one.
+// Bits shifted into the left are filled with zeros. 
+// The 63 bit shift is implemented using 6 stages of shifts of 32
+// 16, 8, 4, 2, and 1 bits.
+
+module barrel_shifter_r57 (Z, Sticky, A, Shift);
+   
+   input [56:0] A;
+   input [5:0] 	Shift;
+
+   output 	Sticky;
+   output [56:0] Z;      
+   
+   wire [56:0] 	stage1;
+   wire [56:0] 	stage2;
+   wire [56:0] 	stage3;
+   wire [56:0] 	stage4;
+   wire [56:0] 	stage5;
+   wire [62:0] 	sixtythreezeros = 63'h0;
+   wire [31:0] 	thirtytwozeros = 32'h0;
+   wire [15:0] 	sixteenzeros = 16'h0;
+   wire [ 7:0] 	eightzeros = 8'h0;
+   wire [ 3:0] 	fourzeros = 4'h0;
+   wire [ 1:0] 	twozeros = 2'b00;
+   wire 	onezero = 1'b0;   
+   wire [62:0] 	S;
+
+   // Shift operations
+   mux21x57  mx01(stage1,      A, {thirtytwozeros,    A[56:32]}, Shift[5]);
+   mux21x57  mx02(stage2, stage1, {sixteenzeros, stage1[56:16]}, Shift[4]);
+   mux21x57  mx03(stage3, stage2, {eightzeros, stage2[56:8]}, Shift[3]);
+   mux21x57  mx04(stage4, stage3, {fourzeros, stage3[56:4]}, Shift[2]);
+   mux21x57  mx05(stage5, stage4, {twozeros, stage4[56:2]}, Shift[1]);
+   mux21x57  mx06(Z     , stage5, {onezero, stage5[56:1]}, Shift[0]);
+
+   // Sticky bit calculation. The Sticky bit is set to one if any of the
+   // bits that were shifter out were one
+
+   assign S[31:0]  = {32{Shift[5]}} &      A[31:0];  
+   assign S[47:32] = {16{Shift[4]}} & stage1[15:0];  
+   assign S[55:48] = { 8{Shift[3]}} & stage2[7:0];  
+   assign S[59:56] = { 4{Shift[2]}} & stage3[3:0];  
+   assign S[61:60] = { 2{Shift[1]}} & stage4[1:0];  
+   assign S[62] =        Shift[0]   & stage5[0];  
+   assign Sticky = (S != sixtythreezeros);
+
+endmodule // barrel_shifter_r57
\ No newline at end of file
diff --git a/wally-pipelined/src/fpu/fpadd/tb.v b/wally-pipelined/src/fpu/fpadd/tb.v
new file mode 100755
index 00000000..e3c65559
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb.v
@@ -0,0 +1,86 @@
+//
+// File name : tb.v
+// Title     : stimulus
+// project   : mult
+// Library   : test
+// Author(s) : James E. Stine, Jr.
+// Purpose   : definition of modules for testbench 
+// notes :   
+//
+// Copyright Oklahoma State University
+//
+
+// Top level stimulus module
+
+module stimulus;
+
+   reg clk;  // Always declared so can simulate based on clock
+    
+   // Declare variables for stimulating input
+   reg [63:0]  op1;
+   reg [63:0]  op2;
+   reg [1:0] rm;
+   reg [2:0] op_type;
+   reg P;
+   reg OvEn;
+   reg UnEn;
+   
+   wire [63:0] AS_Result;
+   wire [4:0] Flags;
+   wire Denorm;
+
+   integer     handle3;
+   integer     desc3;      
+
+   // Instantiate the design block counter
+   fpadd dut (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P , OvEn, UnEn);
+   
+   // Setup the clock to toggle every 1 time units 
+   initial 
+     begin	
+	clk = 1'b1;
+	forever #25 clk = ~clk;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("tb.out");
+     end
+   
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#5 $display(desc3, "%h %h || %h", op1, op2, AS_Result);
+     end
+   
+   // Stimulate the Input Signals
+   initial
+     begin
+	// Add your test vectors here
+	$display("%h", AS_Result);
+	#0   rm = 2'b00;
+	#0   op_type = 3'b000;
+	#0   P = 1'b0;
+	#0   OvEn = 1'b0;
+	#0   UnEn = 1'b0;
+	#0   op1 = 64'h4031e147ae147ae1;
+	#0   op2 = 64'h4046e147ae147ae1;
+	$display("%h", AS_Result);
+	#200;
+	#0   rm = 2'b00;
+	#0   op_type = 3'b000;
+	#0   P = 1'b0;
+	#0   OvEn = 1'b0;
+	#0   UnEn = 1'b0;
+	#0   op1 = 64'h4031e147ae147ae1;
+	#0   op2 = 64'h4046e147ae147ae1;
+	$display("%h", AS_Result);
+	
+     end
+
+endmodule // stimulus
+
+
+
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f32_add_rd.sv b/wally-pipelined/src/fpu/fpadd/tb_f32_add_rd.sv
new file mode 100755
index 00000000..9b2060cb
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f32_add_rd.sv
@@ -0,0 +1,79 @@
+// testbench
+module tb ();
+
+   logic [31:0]  op1;		
+   logic [31:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [31:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [103:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, {op1, 32'h0}, {op2, 32'h0}, 
+	      rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f32_add_rd.out");
+	$readmemh("f32_add_rd.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b000;
+	#0  P = 1'b1;
+	#0  rm = 2'b11;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5  $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result[63:32] !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f32_add_rne.sv b/wally-pipelined/src/fpu/fpadd/tb_f32_add_rne.sv
new file mode 100755
index 00000000..49e70bae
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f32_add_rne.sv
@@ -0,0 +1,79 @@
+// testbench
+module tb ();
+
+   logic [31:0]  op1;		
+   logic [31:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [31:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [103:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, {op1, 32'h0}, {op2, 32'h0}, 
+	      rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f32_add_rne.out");
+	$readmemh("f32_add_rne.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b000;
+	#0  P = 1'b1;
+	#0  rm = 2'b00;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5  $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result[63:32] !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f32_add_ru.sv b/wally-pipelined/src/fpu/fpadd/tb_f32_add_ru.sv
new file mode 100755
index 00000000..c6dabea3
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f32_add_ru.sv
@@ -0,0 +1,79 @@
+// testbench
+module tb ();
+
+   logic [31:0]  op1;		
+   logic [31:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [31:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [103:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, {op1, 32'h0}, {op2, 32'h0}, 
+	      rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f32_add_ru.out");
+	$readmemh("f32_add_ru.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b000;
+	#0  P = 1'b1;
+	#0  rm = 2'b10;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5  $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result[63:32] !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f32_add_rz.sv b/wally-pipelined/src/fpu/fpadd/tb_f32_add_rz.sv
new file mode 100755
index 00000000..95ee9287
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f32_add_rz.sv
@@ -0,0 +1,79 @@
+// testbench
+module tb ();
+
+   logic [31:0]  op1;		
+   logic [31:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [31:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [103:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, {op1, 32'h0}, {op2, 32'h0}, 
+	      rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f32_add_rz.out");
+	$readmemh("f32_add_rz.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b000;
+	#0  P = 1'b1;
+	#0  rm = 2'b01;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5  $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result[63:32] !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f32_f64_rne.sv b/wally-pipelined/src/fpu/fpadd/tb_f32_f64_rne.sv
new file mode 100755
index 00000000..d0766c2b
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f32_f64_rne.sv
@@ -0,0 +1,75 @@
+// testbench
+module tb ();
+
+   logic [31:0]  op1;		
+   logic [63:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [63:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [103:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, {op1, 32'h0}, op2, rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f32_f64_rne.out");
+	$readmemh("f32_f64_rne.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b110;
+	#0  P = 1'b0;
+	#0  rm = 2'b00;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;
+	#0  op2 = 64'h0;	
+	#1; {op1, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f32_sub_rd.sv b/wally-pipelined/src/fpu/fpadd/tb_f32_sub_rd.sv
new file mode 100755
index 00000000..366e4d76
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f32_sub_rd.sv
@@ -0,0 +1,79 @@
+// testbench
+module tb ();
+
+   logic [31:0]  op1;		
+   logic [31:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [31:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [103:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, {op1, 32'h0}, {op2, 32'h0}, 
+	      rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f32_sub_rd.out");
+	$readmemh("f32_sub_rd.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b001;
+	#0  P = 1'b1;
+	#0  rm = 2'b11;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5  $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result[63:32] !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f32_sub_rne.sv b/wally-pipelined/src/fpu/fpadd/tb_f32_sub_rne.sv
new file mode 100755
index 00000000..b8fca359
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f32_sub_rne.sv
@@ -0,0 +1,79 @@
+// testbench
+module tb ();
+
+   logic [31:0]  op1;		
+   logic [31:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [31:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [103:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, {op1, 32'h0}, {op2, 32'h0}, 
+	      rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f32_sub_rne.out");
+	$readmemh("f32_sub_rne.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b001;
+	#0  P = 1'b1;
+	#0  rm = 2'b00;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5  $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result[63:32] !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f32_sub_ru.sv b/wally-pipelined/src/fpu/fpadd/tb_f32_sub_ru.sv
new file mode 100755
index 00000000..158ff474
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f32_sub_ru.sv
@@ -0,0 +1,79 @@
+// testbench
+module tb ();
+
+   logic [31:0]  op1;		
+   logic [31:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [31:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [103:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, {op1, 32'h0}, {op2, 32'h0}, 
+	      rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f32_sub_ru.out");
+	$readmemh("f32_sub_ru.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b001;
+	#0  P = 1'b1;
+	#0  rm = 2'b10;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h", op1, op2, result);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result[63:32] !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f32_sub_rz.sv b/wally-pipelined/src/fpu/fpadd/tb_f32_sub_rz.sv
new file mode 100755
index 00000000..ef8eb65e
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f32_sub_rz.sv
@@ -0,0 +1,79 @@
+// testbench
+module tb ();
+
+   logic [31:0]  op1;		
+   logic [31:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [31:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [103:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, {op1, 32'h0}, {op2, 32'h0}, 
+	      rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f32_sub_rz.out");
+	$readmemh("f32_sub_rz.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b001;
+	#0  P = 1'b1;
+	#0  rm = 2'b01;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5  $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result[63:32] !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f64_add_rd.sv b/wally-pipelined/src/fpu/fpadd/tb_f64_add_rd.sv
new file mode 100755
index 00000000..0f37bca4
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f64_add_rd.sv
@@ -0,0 +1,78 @@
+// testbench
+module tb ();
+
+   logic [63:0]  op1;		
+   logic [63:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [63:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [199:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f64_add_rd.out");
+	$readmemh("f64_add_rd.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b000;
+	#0  P = 1'b0;
+	#0  rm = 2'b11;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f64_add_rne.sv b/wally-pipelined/src/fpu/fpadd/tb_f64_add_rne.sv
new file mode 100755
index 00000000..1e473357
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f64_add_rne.sv
@@ -0,0 +1,78 @@
+// testbench
+module tb ();
+
+   logic [63:0]  op1;		
+   logic [63:0]  op2;		
+   logic [2:0] 	 rm; 
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [63:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [199:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f64_add_rne.out");
+	$readmemh("f64_add_rne.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b000;
+	#0  P = 1'b0;
+	#0  rm = 3'b000;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f64_add_ru.sv b/wally-pipelined/src/fpu/fpadd/tb_f64_add_ru.sv
new file mode 100755
index 00000000..191cca38
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f64_add_ru.sv
@@ -0,0 +1,78 @@
+// testbench
+module tb ();
+
+   logic [63:0]  op1;		
+   logic [63:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [63:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [199:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f64_add_ru.out");
+	$readmemh("f64_add_ru.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b000;
+	#0  P = 1'b0;
+	#0  rm = 2'b10;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);	
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f64_add_rz.sv b/wally-pipelined/src/fpu/fpadd/tb_f64_add_rz.sv
new file mode 100755
index 00000000..6a4df797
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f64_add_rz.sv
@@ -0,0 +1,78 @@
+// testbench
+module tb ();
+
+   logic [63:0]  op1;		
+   logic [63:0]  op2;		
+   logic [2:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [63:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [199:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f64_add_rz.out");
+	$readmemh("f64_add_rz.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b000;
+	#0  P = 1'b0;
+	#0  rm = 3'b001;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);	
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f64_f32_rne.sv b/wally-pipelined/src/fpu/fpadd/tb_f64_f32_rne.sv
new file mode 100755
index 00000000..53eb2598
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f64_f32_rne.sv
@@ -0,0 +1,79 @@
+// testbench
+module tb ();
+
+   logic [63:0]  op1;		
+   logic [63:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [31:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [103:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f64_f32_rne.out");
+	$readmemh("f64_f32_rne.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b110;
+	#0  P = 1'b1;
+	#0  rm = 2'b00;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;
+	#0  op2 = 64'h0;	
+	#1; {op1, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result[63:32] !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f64_sub_rd.sv b/wally-pipelined/src/fpu/fpadd/tb_f64_sub_rd.sv
new file mode 100755
index 00000000..a427ebf9
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f64_sub_rd.sv
@@ -0,0 +1,78 @@
+// testbench
+module tb ();
+
+   logic [63:0]  op1;		
+   logic [63:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [63:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [199:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f64_sub_rd.out");
+	$readmemh("f64_sub_rd.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b001;
+	#0  P = 1'b0;
+	#0  rm = 2'b11;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f64_sub_rne.sv b/wally-pipelined/src/fpu/fpadd/tb_f64_sub_rne.sv
new file mode 100755
index 00000000..dd25bd03
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f64_sub_rne.sv
@@ -0,0 +1,78 @@
+// testbench
+module tb ();
+
+   logic [63:0]  op1;		
+   logic [63:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [63:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [199:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f64_sub_rne.out");
+	$readmemh("f64_sub_rne.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b001;
+	#0  P = 1'b0;
+	#0  rm = 2'b00;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f64_sub_ru.sv b/wally-pipelined/src/fpu/fpadd/tb_f64_sub_ru.sv
new file mode 100755
index 00000000..d16ea7c2
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f64_sub_ru.sv
@@ -0,0 +1,78 @@
+// testbench
+module tb ();
+
+   logic [63:0]  op1;		
+   logic [63:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [63:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [199:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f64_sub_ru.out");
+	$readmemh("f64_sub_ru.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b001;
+	#0  P = 1'b0;
+	#0  rm = 2'b10;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+
diff --git a/wally-pipelined/src/fpu/fpadd/tb_f64_sub_rz.sv b/wally-pipelined/src/fpu/fpadd/tb_f64_sub_rz.sv
new file mode 100755
index 00000000..e68ec215
--- /dev/null
+++ b/wally-pipelined/src/fpu/fpadd/tb_f64_sub_rz.sv
@@ -0,0 +1,78 @@
+// testbench
+module tb ();
+
+   logic [63:0]  op1;		
+   logic [63:0]  op2;		
+   logic [1:0] 	 rm;		
+   logic [2:0]	 op_type;	
+   logic 	 P;   		
+   logic 	 OvEn;		
+   logic 	 UnEn;   	
+
+   logic [63:0]  result;
+   logic [4:0]   Flags;   	
+   logic 	 Denorm;   	
+
+   logic         clk;
+   logic [63:0]  yexpected;
+   logic 	 reset;   
+   logic [63:0]  vectornum, errors;    // bookkeeping variables
+   logic [199:0] testvectors[50000:0]; // array of testvectors
+   logic [7:0] 	 flags_expected;
+
+   integer 	handle3;
+   integer 	desc3;   
+   
+   // instantiate device under test
+   fpadd dut (result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);   
+
+   always     
+     begin
+	clk = 1; #5; clk = 0; #5;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("f64_sub_rz.out");
+	$readmemh("f64_sub_rz.tv", testvectors);
+	vectornum = 0; errors = 0;
+	reset = 1; #27; reset = 0;
+     end
+
+   always @(posedge clk)
+     begin
+	desc3 = handle3;
+	#0  op_type = 3'b001;
+	#0  P = 1'b0;
+	#0  rm = 2'b01;
+	#0  OvEn = 1'b0;
+	#0  UnEn = 1'b0;	
+	#1; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
+	#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);	
+     end
+
+   // check results on falling edge of clk
+   always @(negedge clk)
+     if (~reset) 
+       begin // skip during reset
+	  if (result !== yexpected) begin  
+             $display("Error: inputs = %h %h", op1, op2);
+             $display("  outputs = %h (%h expected)", result, yexpected);
+             errors = errors + 1;
+	  end
+	  //else 
+	  //begin
+          //$display("Good");
+	  // end
+	  
+	  vectornum = vectornum + 1;
+	  if (testvectors[vectornum] === 56'bx) 
+	    begin 
+               $display("%d tests completed with %d errors", 
+			vectornum, errors);
+	    end	
+       end // if (~reset)
+   
+endmodule // tb
+
+