Merge branch 'main' into cache

Conflicts:
	wally-pipelined/testbench/testbench-busybear.sv
This commit is contained in:
Noah Boorstin 2021-03-26 12:26:30 -04:00
commit b5a1691c2b
42 changed files with 20468 additions and 358 deletions

1
.gitignore vendored
View File

@ -15,3 +15,4 @@ wlft*
/imperas-riscv-tests/FunctionRadix_64.addr
/imperas-riscv-tests/FunctionRadix.addr
/imperas-riscv-tests/ProgramMap.txt
/wally-pipelined/busybear-testgen/gdbcombined.txt

View File

@ -25,10 +25,10 @@ def test_config(config, print_res=True):
logname = "wally_"+config+".log"
if config == "busybear":
# Handle busybear separately
cmd = "echo 'quit' | vsim -do wally-busybear.do -c >" + logname
cmd = "vsim -do wally-busybear-batch.do -c >" + logname
os.system(cmd)
# check for success. grep returns 0 if found, 1 if not found
passed = search_log_for_text("# loaded 200000 instructions", logname)
passed = search_log_for_text("# loaded 800000 instructions", logname)
else:
# Any other configuration loads that name from the config folder and runs vsim
cmd = "vsim -c >" + logname +" <<!\ndo wally-pipelined-batch.do ../config/" + config + " " + config + "\n!\n"

View File

@ -0,0 +1,63 @@
# wally-pipelined.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
# do wally-pipelined.do ../config/rv32ic
switch $argc {
0 {vlog +incdir+../config/rv64ic ../testbench/testbench-privileged.sv ../src/*/*.sv -suppress 2583}
1 {vlog +incdir+$1 ../testbench/testbench-privileged.sv ../testbench/function_radix.sv ../src/*/*.sv -suppress 2583}
}
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt +acc work.testbench -o workopt
vsim workopt
view wave
-- display input and output signals as hexidecimal values
do ./wave-dos/default-waves.do
-- Set Wave Output Items
TreeUpdate [SetDefaultTree]
WaveRestoreZoom {0 ps} {100 ps}
configure wave -namecolwidth 250
configure wave -valuecolwidth 140
configure wave -justifyvalue left
configure wave -signalnamewidth 0
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
set DefaultRadix hexadecimal
-- Run the Simulation
#run 4100
run -all
#quit

View File

@ -0,0 +1,50 @@
`include "../../config/rv64icfd/wally-config.vh"
module fputop (
input logic [2:0] FrmW,
input logic reset,
input logic clear,
input logic clk,
input logic [31:0] InstrD,
input logic [`XLEN-1:0] SrcAE,
input logic [`XLEN-1:0] SrcAW,
output logic [31:0] FSROutW,
output logic DivSqrtDoneE,
output logic FInvalInstrD,
output logic [`XLEN-1:0] FPUResultW);
fctrl ();
//regfile instantiation and decode stage
freg3adr ();
//can easily be merged into privledged core
//if necessary
//fcsr ();
//E pipe and execution stage
fpdivsqrt ();
//fma1 ();
fpaddcvt1 fpadd1 ();
fpcmp1 cmp1 ();
fpusgn ();
//M pipe and memory stage
//fma2 ();
fpaddcvt2 fpadd2 ();
fpcmp2 cmp2 ();
//W pipe and writeback stage
//flag signal mux
//result mux
endmodule

View File

@ -0,0 +1,758 @@
// The following module make up the basic building blocks that
// are used by the cla64, cla_sub64, and cla52.
module INVBLOCK ( GIN, GOUT );
input GIN;
output GOUT;
assign GOUT = ~ GIN;
endmodule // INVBLOCK
module XXOR1 ( A, B, GIN, SUM );
input A;
input B;
input GIN;
output SUM;
assign SUM = ( ~ (A ^ B)) ^ GIN;
endmodule // XXOR1
module BLOCK0 ( A, B, POUT, GOUT );
input A;
input B;
output POUT;
output GOUT;
assign POUT = ~ (A | B);
assign GOUT = ~ (A & B);
endmodule // BLOCK0
module BLOCK1 ( PIN1, PIN2, GIN1, GIN2, POUT, GOUT );
input PIN1;
input PIN2;
input GIN1;
input GIN2;
output POUT;
output GOUT;
assign POUT = ~ (PIN1 | PIN2);
assign GOUT = ~ (GIN2 & (PIN2 | GIN1));
endmodule // BLOCK1
module BLOCK2 ( PIN1, PIN2, GIN1, GIN2, POUT, GOUT );
input PIN1;
input PIN2;
input GIN1;
input GIN2;
output POUT;
output GOUT;
assign POUT = ~ (PIN1 & PIN2);
assign GOUT = ~ (GIN2 | (PIN2 & GIN1));
endmodule // BLOCK2
module BLOCK1A ( PIN2, GIN1, GIN2, GOUT );
input PIN2;
input GIN1;
input GIN2;
output GOUT;
assign GOUT = ~ (GIN2 & (PIN2 | GIN1));
endmodule // BLOCK1A
module BLOCK2A ( PIN2, GIN1, GIN2, GOUT );
input PIN2;
input GIN1;
input GIN2;
output GOUT;
assign GOUT = ~ (GIN2 | (PIN2 & GIN1));
endmodule
module PRESTAGE_64 ( A, B, CIN, POUT, GOUT );
input [0:63] A;
input [0:63] B;
input CIN;
output [0:63] POUT;
output [0:64] GOUT;
BLOCK0 U10 (A[0] , B[0] , POUT[0] , GOUT[1] );
BLOCK0 U11 (A[1] , B[1] , POUT[1] , GOUT[2] );
BLOCK0 U12 (A[2] , B[2] , POUT[2] , GOUT[3] );
BLOCK0 U13 (A[3] , B[3] , POUT[3] , GOUT[4] );
BLOCK0 U14 (A[4] , B[4] , POUT[4] , GOUT[5] );
BLOCK0 U15 (A[5] , B[5] , POUT[5] , GOUT[6] );
BLOCK0 U16 (A[6] , B[6] , POUT[6] , GOUT[7] );
BLOCK0 U17 (A[7] , B[7] , POUT[7] , GOUT[8] );
BLOCK0 U18 (A[8] , B[8] , POUT[8] , GOUT[9] );
BLOCK0 U19 (A[9] , B[9] , POUT[9] , GOUT[10] );
BLOCK0 U110 (A[10] , B[10] , POUT[10] , GOUT[11] );
BLOCK0 U111 (A[11] , B[11] , POUT[11] , GOUT[12] );
BLOCK0 U112 (A[12] , B[12] , POUT[12] , GOUT[13] );
BLOCK0 U113 (A[13] , B[13] , POUT[13] , GOUT[14] );
BLOCK0 U114 (A[14] , B[14] , POUT[14] , GOUT[15] );
BLOCK0 U115 (A[15] , B[15] , POUT[15] , GOUT[16] );
BLOCK0 U116 (A[16] , B[16] , POUT[16] , GOUT[17] );
BLOCK0 U117 (A[17] , B[17] , POUT[17] , GOUT[18] );
BLOCK0 U118 (A[18] , B[18] , POUT[18] , GOUT[19] );
BLOCK0 U119 (A[19] , B[19] , POUT[19] , GOUT[20] );
BLOCK0 U120 (A[20] , B[20] , POUT[20] , GOUT[21] );
BLOCK0 U121 (A[21] , B[21] , POUT[21] , GOUT[22] );
BLOCK0 U122 (A[22] , B[22] , POUT[22] , GOUT[23] );
BLOCK0 U123 (A[23] , B[23] , POUT[23] , GOUT[24] );
BLOCK0 U124 (A[24] , B[24] , POUT[24] , GOUT[25] );
BLOCK0 U125 (A[25] , B[25] , POUT[25] , GOUT[26] );
BLOCK0 U126 (A[26] , B[26] , POUT[26] , GOUT[27] );
BLOCK0 U127 (A[27] , B[27] , POUT[27] , GOUT[28] );
BLOCK0 U128 (A[28] , B[28] , POUT[28] , GOUT[29] );
BLOCK0 U129 (A[29] , B[29] , POUT[29] , GOUT[30] );
BLOCK0 U130 (A[30] , B[30] , POUT[30] , GOUT[31] );
BLOCK0 U131 (A[31] , B[31] , POUT[31] , GOUT[32] );
BLOCK0 U132 (A[32] , B[32] , POUT[32] , GOUT[33] );
BLOCK0 U133 (A[33] , B[33] , POUT[33] , GOUT[34] );
BLOCK0 U134 (A[34] , B[34] , POUT[34] , GOUT[35] );
BLOCK0 U135 (A[35] , B[35] , POUT[35] , GOUT[36] );
BLOCK0 U136 (A[36] , B[36] , POUT[36] , GOUT[37] );
BLOCK0 U137 (A[37] , B[37] , POUT[37] , GOUT[38] );
BLOCK0 U138 (A[38] , B[38] , POUT[38] , GOUT[39] );
BLOCK0 U139 (A[39] , B[39] , POUT[39] , GOUT[40] );
BLOCK0 U140 (A[40] , B[40] , POUT[40] , GOUT[41] );
BLOCK0 U141 (A[41] , B[41] , POUT[41] , GOUT[42] );
BLOCK0 U142 (A[42] , B[42] , POUT[42] , GOUT[43] );
BLOCK0 U143 (A[43] , B[43] , POUT[43] , GOUT[44] );
BLOCK0 U144 (A[44] , B[44] , POUT[44] , GOUT[45] );
BLOCK0 U145 (A[45] , B[45] , POUT[45] , GOUT[46] );
BLOCK0 U146 (A[46] , B[46] , POUT[46] , GOUT[47] );
BLOCK0 U147 (A[47] , B[47] , POUT[47] , GOUT[48] );
BLOCK0 U148 (A[48] , B[48] , POUT[48] , GOUT[49] );
BLOCK0 U149 (A[49] , B[49] , POUT[49] , GOUT[50] );
BLOCK0 U150 (A[50] , B[50] , POUT[50] , GOUT[51] );
BLOCK0 U151 (A[51] , B[51] , POUT[51] , GOUT[52] );
BLOCK0 U152 (A[52] , B[52] , POUT[52] , GOUT[53] );
BLOCK0 U153 (A[53] , B[53] , POUT[53] , GOUT[54] );
BLOCK0 U154 (A[54] , B[54] , POUT[54] , GOUT[55] );
BLOCK0 U155 (A[55] , B[55] , POUT[55] , GOUT[56] );
BLOCK0 U156 (A[56] , B[56] , POUT[56] , GOUT[57] );
BLOCK0 U157 (A[57] , B[57] , POUT[57] , GOUT[58] );
BLOCK0 U158 (A[58] , B[58] , POUT[58] , GOUT[59] );
BLOCK0 U159 (A[59] , B[59] , POUT[59] , GOUT[60] );
BLOCK0 U160 (A[60] , B[60] , POUT[60] , GOUT[61] );
BLOCK0 U161 (A[61] , B[61] , POUT[61] , GOUT[62] );
BLOCK0 U162 (A[62] , B[62] , POUT[62] , GOUT[63] );
BLOCK0 U163 (A[63] , B[63] , POUT[63] , GOUT[64] );
INVBLOCK U2 (CIN , GOUT[0] );
endmodule // PRESTAGE_64
module DBLC_0_64 ( PIN, GIN, POUT, GOUT );
input [0:63] PIN;
input [0:64] GIN;
output [0:62] POUT;
output [0:64] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] );
BLOCK1A U21 (PIN[0] , GIN[0] , GIN[1] , GOUT[1] );
BLOCK1 U32 (PIN[0] , PIN[1] , GIN[1] , GIN[2] , POUT[0] , GOUT[2] );
BLOCK1 U33 (PIN[1] , PIN[2] , GIN[2] , GIN[3] , POUT[1] , GOUT[3] );
BLOCK1 U34 (PIN[2] , PIN[3] , GIN[3] , GIN[4] , POUT[2] , GOUT[4] );
BLOCK1 U35 (PIN[3] , PIN[4] , GIN[4] , GIN[5] , POUT[3] , GOUT[5] );
BLOCK1 U36 (PIN[4] , PIN[5] , GIN[5] , GIN[6] , POUT[4] , GOUT[6] );
BLOCK1 U37 (PIN[5] , PIN[6] , GIN[6] , GIN[7] , POUT[5] , GOUT[7] );
BLOCK1 U38 (PIN[6] , PIN[7] , GIN[7] , GIN[8] , POUT[6] , GOUT[8] );
BLOCK1 U39 (PIN[7] , PIN[8] , GIN[8] , GIN[9] , POUT[7] , GOUT[9] );
BLOCK1 U310 (PIN[8] , PIN[9] , GIN[9] , GIN[10] , POUT[8] , GOUT[10] );
BLOCK1 U311 (PIN[9] , PIN[10] , GIN[10] , GIN[11] , POUT[9] , GOUT[11] );
BLOCK1 U312 (PIN[10] , PIN[11] , GIN[11] , GIN[12] , POUT[10] , GOUT[12] );
BLOCK1 U313 (PIN[11] , PIN[12] , GIN[12] , GIN[13] , POUT[11] , GOUT[13] );
BLOCK1 U314 (PIN[12] , PIN[13] , GIN[13] , GIN[14] , POUT[12] , GOUT[14] );
BLOCK1 U315 (PIN[13] , PIN[14] , GIN[14] , GIN[15] , POUT[13] , GOUT[15] );
BLOCK1 U316 (PIN[14] , PIN[15] , GIN[15] , GIN[16] , POUT[14] , GOUT[16] );
BLOCK1 U317 (PIN[15] , PIN[16] , GIN[16] , GIN[17] , POUT[15] , GOUT[17] );
BLOCK1 U318 (PIN[16] , PIN[17] , GIN[17] , GIN[18] , POUT[16] , GOUT[18] );
BLOCK1 U319 (PIN[17] , PIN[18] , GIN[18] , GIN[19] , POUT[17] , GOUT[19] );
BLOCK1 U320 (PIN[18] , PIN[19] , GIN[19] , GIN[20] , POUT[18] , GOUT[20] );
BLOCK1 U321 (PIN[19] , PIN[20] , GIN[20] , GIN[21] , POUT[19] , GOUT[21] );
BLOCK1 U322 (PIN[20] , PIN[21] , GIN[21] , GIN[22] , POUT[20] , GOUT[22] );
BLOCK1 U323 (PIN[21] , PIN[22] , GIN[22] , GIN[23] , POUT[21] , GOUT[23] );
BLOCK1 U324 (PIN[22] , PIN[23] , GIN[23] , GIN[24] , POUT[22] , GOUT[24] );
BLOCK1 U325 (PIN[23] , PIN[24] , GIN[24] , GIN[25] , POUT[23] , GOUT[25] );
BLOCK1 U326 (PIN[24] , PIN[25] , GIN[25] , GIN[26] , POUT[24] , GOUT[26] );
BLOCK1 U327 (PIN[25] , PIN[26] , GIN[26] , GIN[27] , POUT[25] , GOUT[27] );
BLOCK1 U328 (PIN[26] , PIN[27] , GIN[27] , GIN[28] , POUT[26] , GOUT[28] );
BLOCK1 U329 (PIN[27] , PIN[28] , GIN[28] , GIN[29] , POUT[27] , GOUT[29] );
BLOCK1 U330 (PIN[28] , PIN[29] , GIN[29] , GIN[30] , POUT[28] , GOUT[30] );
BLOCK1 U331 (PIN[29] , PIN[30] , GIN[30] , GIN[31] , POUT[29] , GOUT[31] );
BLOCK1 U332 (PIN[30] , PIN[31] , GIN[31] , GIN[32] , POUT[30] , GOUT[32] );
BLOCK1 U333 (PIN[31] , PIN[32] , GIN[32] , GIN[33] , POUT[31] , GOUT[33] );
BLOCK1 U334 (PIN[32] , PIN[33] , GIN[33] , GIN[34] , POUT[32] , GOUT[34] );
BLOCK1 U335 (PIN[33] , PIN[34] , GIN[34] , GIN[35] , POUT[33] , GOUT[35] );
BLOCK1 U336 (PIN[34] , PIN[35] , GIN[35] , GIN[36] , POUT[34] , GOUT[36] );
BLOCK1 U337 (PIN[35] , PIN[36] , GIN[36] , GIN[37] , POUT[35] , GOUT[37] );
BLOCK1 U338 (PIN[36] , PIN[37] , GIN[37] , GIN[38] , POUT[36] , GOUT[38] );
BLOCK1 U339 (PIN[37] , PIN[38] , GIN[38] , GIN[39] , POUT[37] , GOUT[39] );
BLOCK1 U340 (PIN[38] , PIN[39] , GIN[39] , GIN[40] , POUT[38] , GOUT[40] );
BLOCK1 U341 (PIN[39] , PIN[40] , GIN[40] , GIN[41] , POUT[39] , GOUT[41] );
BLOCK1 U342 (PIN[40] , PIN[41] , GIN[41] , GIN[42] , POUT[40] , GOUT[42] );
BLOCK1 U343 (PIN[41] , PIN[42] , GIN[42] , GIN[43] , POUT[41] , GOUT[43] );
BLOCK1 U344 (PIN[42] , PIN[43] , GIN[43] , GIN[44] , POUT[42] , GOUT[44] );
BLOCK1 U345 (PIN[43] , PIN[44] , GIN[44] , GIN[45] , POUT[43] , GOUT[45] );
BLOCK1 U346 (PIN[44] , PIN[45] , GIN[45] , GIN[46] , POUT[44] , GOUT[46] );
BLOCK1 U347 (PIN[45] , PIN[46] , GIN[46] , GIN[47] , POUT[45] , GOUT[47] );
BLOCK1 U348 (PIN[46] , PIN[47] , GIN[47] , GIN[48] , POUT[46] , GOUT[48] );
BLOCK1 U349 (PIN[47] , PIN[48] , GIN[48] , GIN[49] , POUT[47] , GOUT[49] );
BLOCK1 U350 (PIN[48] , PIN[49] , GIN[49] , GIN[50] , POUT[48] , GOUT[50] );
BLOCK1 U351 (PIN[49] , PIN[50] , GIN[50] , GIN[51] , POUT[49] , GOUT[51] );
BLOCK1 U352 (PIN[50] , PIN[51] , GIN[51] , GIN[52] , POUT[50] , GOUT[52] );
BLOCK1 U353 (PIN[51] , PIN[52] , GIN[52] , GIN[53] , POUT[51] , GOUT[53] );
BLOCK1 U354 (PIN[52] , PIN[53] , GIN[53] , GIN[54] , POUT[52] , GOUT[54] );
BLOCK1 U355 (PIN[53] , PIN[54] , GIN[54] , GIN[55] , POUT[53] , GOUT[55] );
BLOCK1 U356 (PIN[54] , PIN[55] , GIN[55] , GIN[56] , POUT[54] , GOUT[56] );
BLOCK1 U357 (PIN[55] , PIN[56] , GIN[56] , GIN[57] , POUT[55] , GOUT[57] );
BLOCK1 U358 (PIN[56] , PIN[57] , GIN[57] , GIN[58] , POUT[56] , GOUT[58] );
BLOCK1 U359 (PIN[57] , PIN[58] , GIN[58] , GIN[59] , POUT[57] , GOUT[59] );
BLOCK1 U360 (PIN[58] , PIN[59] , GIN[59] , GIN[60] , POUT[58] , GOUT[60] );
BLOCK1 U361 (PIN[59] , PIN[60] , GIN[60] , GIN[61] , POUT[59] , GOUT[61] );
BLOCK1 U362 (PIN[60] , PIN[61] , GIN[61] , GIN[62] , POUT[60] , GOUT[62] );
BLOCK1 U363 (PIN[61] , PIN[62] , GIN[62] , GIN[63] , POUT[61] , GOUT[63] );
BLOCK1 U364 (PIN[62] , PIN[63] , GIN[63] , GIN[64] , POUT[62] , GOUT[64] );
endmodule // DBLC_0_64
module DBLC_1_64 ( PIN, GIN, POUT, GOUT );
input [0:62] PIN;
input [0:64] GIN;
output [0:60] POUT;
output [0:64] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] );
INVBLOCK U11 (GIN[1] , GOUT[1] );
BLOCK2A U22 (PIN[0] , GIN[0] , GIN[2] , GOUT[2] );
BLOCK2A U23 (PIN[1] , GIN[1] , GIN[3] , GOUT[3] );
BLOCK2 U34 (PIN[0] , PIN[2] , GIN[2] , GIN[4] , POUT[0] , GOUT[4] );
BLOCK2 U35 (PIN[1] , PIN[3] , GIN[3] , GIN[5] , POUT[1] , GOUT[5] );
BLOCK2 U36 (PIN[2] , PIN[4] , GIN[4] , GIN[6] , POUT[2] , GOUT[6] );
BLOCK2 U37 (PIN[3] , PIN[5] , GIN[5] , GIN[7] , POUT[3] , GOUT[7] );
BLOCK2 U38 (PIN[4] , PIN[6] , GIN[6] , GIN[8] , POUT[4] , GOUT[8] );
BLOCK2 U39 (PIN[5] , PIN[7] , GIN[7] , GIN[9] , POUT[5] , GOUT[9] );
BLOCK2 U310 (PIN[6] , PIN[8] , GIN[8] , GIN[10] , POUT[6] , GOUT[10] );
BLOCK2 U311 (PIN[7] , PIN[9] , GIN[9] , GIN[11] , POUT[7] , GOUT[11] );
BLOCK2 U312 (PIN[8] , PIN[10] , GIN[10] , GIN[12] , POUT[8] , GOUT[12] );
BLOCK2 U313 (PIN[9] , PIN[11] , GIN[11] , GIN[13] , POUT[9] , GOUT[13] );
BLOCK2 U314 (PIN[10] , PIN[12] , GIN[12] , GIN[14] , POUT[10] , GOUT[14] );
BLOCK2 U315 (PIN[11] , PIN[13] , GIN[13] , GIN[15] , POUT[11] , GOUT[15] );
BLOCK2 U316 (PIN[12] , PIN[14] , GIN[14] , GIN[16] , POUT[12] , GOUT[16] );
BLOCK2 U317 (PIN[13] , PIN[15] , GIN[15] , GIN[17] , POUT[13] , GOUT[17] );
BLOCK2 U318 (PIN[14] , PIN[16] , GIN[16] , GIN[18] , POUT[14] , GOUT[18] );
BLOCK2 U319 (PIN[15] , PIN[17] , GIN[17] , GIN[19] , POUT[15] , GOUT[19] );
BLOCK2 U320 (PIN[16] , PIN[18] , GIN[18] , GIN[20] , POUT[16] , GOUT[20] );
BLOCK2 U321 (PIN[17] , PIN[19] , GIN[19] , GIN[21] , POUT[17] , GOUT[21] );
BLOCK2 U322 (PIN[18] , PIN[20] , GIN[20] , GIN[22] , POUT[18] , GOUT[22] );
BLOCK2 U323 (PIN[19] , PIN[21] , GIN[21] , GIN[23] , POUT[19] , GOUT[23] );
BLOCK2 U324 (PIN[20] , PIN[22] , GIN[22] , GIN[24] , POUT[20] , GOUT[24] );
BLOCK2 U325 (PIN[21] , PIN[23] , GIN[23] , GIN[25] , POUT[21] , GOUT[25] );
BLOCK2 U326 (PIN[22] , PIN[24] , GIN[24] , GIN[26] , POUT[22] , GOUT[26] );
BLOCK2 U327 (PIN[23] , PIN[25] , GIN[25] , GIN[27] , POUT[23] , GOUT[27] );
BLOCK2 U328 (PIN[24] , PIN[26] , GIN[26] , GIN[28] , POUT[24] , GOUT[28] );
BLOCK2 U329 (PIN[25] , PIN[27] , GIN[27] , GIN[29] , POUT[25] , GOUT[29] );
BLOCK2 U330 (PIN[26] , PIN[28] , GIN[28] , GIN[30] , POUT[26] , GOUT[30] );
BLOCK2 U331 (PIN[27] , PIN[29] , GIN[29] , GIN[31] , POUT[27] , GOUT[31] );
BLOCK2 U332 (PIN[28] , PIN[30] , GIN[30] , GIN[32] , POUT[28] , GOUT[32] );
BLOCK2 U333 (PIN[29] , PIN[31] , GIN[31] , GIN[33] , POUT[29] , GOUT[33] );
BLOCK2 U334 (PIN[30] , PIN[32] , GIN[32] , GIN[34] , POUT[30] , GOUT[34] );
BLOCK2 U335 (PIN[31] , PIN[33] , GIN[33] , GIN[35] , POUT[31] , GOUT[35] );
BLOCK2 U336 (PIN[32] , PIN[34] , GIN[34] , GIN[36] , POUT[32] , GOUT[36] );
BLOCK2 U337 (PIN[33] , PIN[35] , GIN[35] , GIN[37] , POUT[33] , GOUT[37] );
BLOCK2 U338 (PIN[34] , PIN[36] , GIN[36] , GIN[38] , POUT[34] , GOUT[38] );
BLOCK2 U339 (PIN[35] , PIN[37] , GIN[37] , GIN[39] , POUT[35] , GOUT[39] );
BLOCK2 U340 (PIN[36] , PIN[38] , GIN[38] , GIN[40] , POUT[36] , GOUT[40] );
BLOCK2 U341 (PIN[37] , PIN[39] , GIN[39] , GIN[41] , POUT[37] , GOUT[41] );
BLOCK2 U342 (PIN[38] , PIN[40] , GIN[40] , GIN[42] , POUT[38] , GOUT[42] );
BLOCK2 U343 (PIN[39] , PIN[41] , GIN[41] , GIN[43] , POUT[39] , GOUT[43] );
BLOCK2 U344 (PIN[40] , PIN[42] , GIN[42] , GIN[44] , POUT[40] , GOUT[44] );
BLOCK2 U345 (PIN[41] , PIN[43] , GIN[43] , GIN[45] , POUT[41] , GOUT[45] );
BLOCK2 U346 (PIN[42] , PIN[44] , GIN[44] , GIN[46] , POUT[42] , GOUT[46] );
BLOCK2 U347 (PIN[43] , PIN[45] , GIN[45] , GIN[47] , POUT[43] , GOUT[47] );
BLOCK2 U348 (PIN[44] , PIN[46] , GIN[46] , GIN[48] , POUT[44] , GOUT[48] );
BLOCK2 U349 (PIN[45] , PIN[47] , GIN[47] , GIN[49] , POUT[45] , GOUT[49] );
BLOCK2 U350 (PIN[46] , PIN[48] , GIN[48] , GIN[50] , POUT[46] , GOUT[50] );
BLOCK2 U351 (PIN[47] , PIN[49] , GIN[49] , GIN[51] , POUT[47] , GOUT[51] );
BLOCK2 U352 (PIN[48] , PIN[50] , GIN[50] , GIN[52] , POUT[48] , GOUT[52] );
BLOCK2 U353 (PIN[49] , PIN[51] , GIN[51] , GIN[53] , POUT[49] , GOUT[53] );
BLOCK2 U354 (PIN[50] , PIN[52] , GIN[52] , GIN[54] , POUT[50] , GOUT[54] );
BLOCK2 U355 (PIN[51] , PIN[53] , GIN[53] , GIN[55] , POUT[51] , GOUT[55] );
BLOCK2 U356 (PIN[52] , PIN[54] , GIN[54] , GIN[56] , POUT[52] , GOUT[56] );
BLOCK2 U357 (PIN[53] , PIN[55] , GIN[55] , GIN[57] , POUT[53] , GOUT[57] );
BLOCK2 U358 (PIN[54] , PIN[56] , GIN[56] , GIN[58] , POUT[54] , GOUT[58] );
BLOCK2 U359 (PIN[55] , PIN[57] , GIN[57] , GIN[59] , POUT[55] , GOUT[59] );
BLOCK2 U360 (PIN[56] , PIN[58] , GIN[58] , GIN[60] , POUT[56] , GOUT[60] );
BLOCK2 U361 (PIN[57] , PIN[59] , GIN[59] , GIN[61] , POUT[57] , GOUT[61] );
BLOCK2 U362 (PIN[58] , PIN[60] , GIN[60] , GIN[62] , POUT[58] , GOUT[62] );
BLOCK2 U363 (PIN[59] , PIN[61] , GIN[61] , GIN[63] , POUT[59] , GOUT[63] );
BLOCK2 U364 (PIN[60] , PIN[62] , GIN[62] , GIN[64] , POUT[60] , GOUT[64] );
endmodule // DBLC_1_64
module DBLC_2_64 ( PIN, GIN, POUT, GOUT );
input [0:60] PIN;
input [0:64] GIN;
output [0:56] POUT;
output [0:64] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] );
INVBLOCK U11 (GIN[1] , GOUT[1] );
INVBLOCK U12 (GIN[2] , GOUT[2] );
INVBLOCK U13 (GIN[3] , GOUT[3] );
BLOCK1A U24 (PIN[0] , GIN[0] , GIN[4] , GOUT[4] );
BLOCK1A U25 (PIN[1] , GIN[1] , GIN[5] , GOUT[5] );
BLOCK1A U26 (PIN[2] , GIN[2] , GIN[6] , GOUT[6] );
BLOCK1A U27 (PIN[3] , GIN[3] , GIN[7] , GOUT[7] );
BLOCK1 U38 (PIN[0] , PIN[4] , GIN[4] , GIN[8] , POUT[0] , GOUT[8] );
BLOCK1 U39 (PIN[1] , PIN[5] , GIN[5] , GIN[9] , POUT[1] , GOUT[9] );
BLOCK1 U310 (PIN[2] , PIN[6] , GIN[6] , GIN[10] , POUT[2] , GOUT[10] );
BLOCK1 U311 (PIN[3] , PIN[7] , GIN[7] , GIN[11] , POUT[3] , GOUT[11] );
BLOCK1 U312 (PIN[4] , PIN[8] , GIN[8] , GIN[12] , POUT[4] , GOUT[12] );
BLOCK1 U313 (PIN[5] , PIN[9] , GIN[9] , GIN[13] , POUT[5] , GOUT[13] );
BLOCK1 U314 (PIN[6] , PIN[10] , GIN[10] , GIN[14] , POUT[6] , GOUT[14] );
BLOCK1 U315 (PIN[7] , PIN[11] , GIN[11] , GIN[15] , POUT[7] , GOUT[15] );
BLOCK1 U316 (PIN[8] , PIN[12] , GIN[12] , GIN[16] , POUT[8] , GOUT[16] );
BLOCK1 U317 (PIN[9] , PIN[13] , GIN[13] , GIN[17] , POUT[9] , GOUT[17] );
BLOCK1 U318 (PIN[10] , PIN[14] , GIN[14] , GIN[18] , POUT[10] , GOUT[18] );
BLOCK1 U319 (PIN[11] , PIN[15] , GIN[15] , GIN[19] , POUT[11] , GOUT[19] );
BLOCK1 U320 (PIN[12] , PIN[16] , GIN[16] , GIN[20] , POUT[12] , GOUT[20] );
BLOCK1 U321 (PIN[13] , PIN[17] , GIN[17] , GIN[21] , POUT[13] , GOUT[21] );
BLOCK1 U322 (PIN[14] , PIN[18] , GIN[18] , GIN[22] , POUT[14] , GOUT[22] );
BLOCK1 U323 (PIN[15] , PIN[19] , GIN[19] , GIN[23] , POUT[15] , GOUT[23] );
BLOCK1 U324 (PIN[16] , PIN[20] , GIN[20] , GIN[24] , POUT[16] , GOUT[24] );
BLOCK1 U325 (PIN[17] , PIN[21] , GIN[21] , GIN[25] , POUT[17] , GOUT[25] );
BLOCK1 U326 (PIN[18] , PIN[22] , GIN[22] , GIN[26] , POUT[18] , GOUT[26] );
BLOCK1 U327 (PIN[19] , PIN[23] , GIN[23] , GIN[27] , POUT[19] , GOUT[27] );
BLOCK1 U328 (PIN[20] , PIN[24] , GIN[24] , GIN[28] , POUT[20] , GOUT[28] );
BLOCK1 U329 (PIN[21] , PIN[25] , GIN[25] , GIN[29] , POUT[21] , GOUT[29] );
BLOCK1 U330 (PIN[22] , PIN[26] , GIN[26] , GIN[30] , POUT[22] , GOUT[30] );
BLOCK1 U331 (PIN[23] , PIN[27] , GIN[27] , GIN[31] , POUT[23] , GOUT[31] );
BLOCK1 U332 (PIN[24] , PIN[28] , GIN[28] , GIN[32] , POUT[24] , GOUT[32] );
BLOCK1 U333 (PIN[25] , PIN[29] , GIN[29] , GIN[33] , POUT[25] , GOUT[33] );
BLOCK1 U334 (PIN[26] , PIN[30] , GIN[30] , GIN[34] , POUT[26] , GOUT[34] );
BLOCK1 U335 (PIN[27] , PIN[31] , GIN[31] , GIN[35] , POUT[27] , GOUT[35] );
BLOCK1 U336 (PIN[28] , PIN[32] , GIN[32] , GIN[36] , POUT[28] , GOUT[36] );
BLOCK1 U337 (PIN[29] , PIN[33] , GIN[33] , GIN[37] , POUT[29] , GOUT[37] );
BLOCK1 U338 (PIN[30] , PIN[34] , GIN[34] , GIN[38] , POUT[30] , GOUT[38] );
BLOCK1 U339 (PIN[31] , PIN[35] , GIN[35] , GIN[39] , POUT[31] , GOUT[39] );
BLOCK1 U340 (PIN[32] , PIN[36] , GIN[36] , GIN[40] , POUT[32] , GOUT[40] );
BLOCK1 U341 (PIN[33] , PIN[37] , GIN[37] , GIN[41] , POUT[33] , GOUT[41] );
BLOCK1 U342 (PIN[34] , PIN[38] , GIN[38] , GIN[42] , POUT[34] , GOUT[42] );
BLOCK1 U343 (PIN[35] , PIN[39] , GIN[39] , GIN[43] , POUT[35] , GOUT[43] );
BLOCK1 U344 (PIN[36] , PIN[40] , GIN[40] , GIN[44] , POUT[36] , GOUT[44] );
BLOCK1 U345 (PIN[37] , PIN[41] , GIN[41] , GIN[45] , POUT[37] , GOUT[45] );
BLOCK1 U346 (PIN[38] , PIN[42] , GIN[42] , GIN[46] , POUT[38] , GOUT[46] );
BLOCK1 U347 (PIN[39] , PIN[43] , GIN[43] , GIN[47] , POUT[39] , GOUT[47] );
BLOCK1 U348 (PIN[40] , PIN[44] , GIN[44] , GIN[48] , POUT[40] , GOUT[48] );
BLOCK1 U349 (PIN[41] , PIN[45] , GIN[45] , GIN[49] , POUT[41] , GOUT[49] );
BLOCK1 U350 (PIN[42] , PIN[46] , GIN[46] , GIN[50] , POUT[42] , GOUT[50] );
BLOCK1 U351 (PIN[43] , PIN[47] , GIN[47] , GIN[51] , POUT[43] , GOUT[51] );
BLOCK1 U352 (PIN[44] , PIN[48] , GIN[48] , GIN[52] , POUT[44] , GOUT[52] );
BLOCK1 U353 (PIN[45] , PIN[49] , GIN[49] , GIN[53] , POUT[45] , GOUT[53] );
BLOCK1 U354 (PIN[46] , PIN[50] , GIN[50] , GIN[54] , POUT[46] , GOUT[54] );
BLOCK1 U355 (PIN[47] , PIN[51] , GIN[51] , GIN[55] , POUT[47] , GOUT[55] );
BLOCK1 U356 (PIN[48] , PIN[52] , GIN[52] , GIN[56] , POUT[48] , GOUT[56] );
BLOCK1 U357 (PIN[49] , PIN[53] , GIN[53] , GIN[57] , POUT[49] , GOUT[57] );
BLOCK1 U358 (PIN[50] , PIN[54] , GIN[54] , GIN[58] , POUT[50] , GOUT[58] );
BLOCK1 U359 (PIN[51] , PIN[55] , GIN[55] , GIN[59] , POUT[51] , GOUT[59] );
BLOCK1 U360 (PIN[52] , PIN[56] , GIN[56] , GIN[60] , POUT[52] , GOUT[60] );
BLOCK1 U361 (PIN[53] , PIN[57] , GIN[57] , GIN[61] , POUT[53] , GOUT[61] );
BLOCK1 U362 (PIN[54] , PIN[58] , GIN[58] , GIN[62] , POUT[54] , GOUT[62] );
BLOCK1 U363 (PIN[55] , PIN[59] , GIN[59] , GIN[63] , POUT[55] , GOUT[63] );
BLOCK1 U364 (PIN[56] , PIN[60] , GIN[60] , GIN[64] , POUT[56] , GOUT[64] );
endmodule // DBLC_2_64
module DBLC_3_64 ( PIN, GIN, POUT, GOUT );
input [0:56] PIN;
input [0:64] GIN;
output [0:48] POUT;
output [0:64] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] );
INVBLOCK U11 (GIN[1] , GOUT[1] );
INVBLOCK U12 (GIN[2] , GOUT[2] );
INVBLOCK U13 (GIN[3] , GOUT[3] );
INVBLOCK U14 (GIN[4] , GOUT[4] );
INVBLOCK U15 (GIN[5] , GOUT[5] );
INVBLOCK U16 (GIN[6] , GOUT[6] );
INVBLOCK U17 (GIN[7] , GOUT[7] );
BLOCK2A U28 (PIN[0] , GIN[0] , GIN[8] , GOUT[8] );
BLOCK2A U29 (PIN[1] , GIN[1] , GIN[9] , GOUT[9] );
BLOCK2A U210 (PIN[2] , GIN[2] , GIN[10] , GOUT[10] );
BLOCK2A U211 (PIN[3] , GIN[3] , GIN[11] , GOUT[11] );
BLOCK2A U212 (PIN[4] , GIN[4] , GIN[12] , GOUT[12] );
BLOCK2A U213 (PIN[5] , GIN[5] , GIN[13] , GOUT[13] );
BLOCK2A U214 (PIN[6] , GIN[6] , GIN[14] , GOUT[14] );
BLOCK2A U215 (PIN[7] , GIN[7] , GIN[15] , GOUT[15] );
BLOCK2 U316 (PIN[0] , PIN[8] , GIN[8] , GIN[16] , POUT[0] , GOUT[16] );
BLOCK2 U317 (PIN[1] , PIN[9] , GIN[9] , GIN[17] , POUT[1] , GOUT[17] );
BLOCK2 U318 (PIN[2] , PIN[10] , GIN[10] , GIN[18] , POUT[2] , GOUT[18] );
BLOCK2 U319 (PIN[3] , PIN[11] , GIN[11] , GIN[19] , POUT[3] , GOUT[19] );
BLOCK2 U320 (PIN[4] , PIN[12] , GIN[12] , GIN[20] , POUT[4] , GOUT[20] );
BLOCK2 U321 (PIN[5] , PIN[13] , GIN[13] , GIN[21] , POUT[5] , GOUT[21] );
BLOCK2 U322 (PIN[6] , PIN[14] , GIN[14] , GIN[22] , POUT[6] , GOUT[22] );
BLOCK2 U323 (PIN[7] , PIN[15] , GIN[15] , GIN[23] , POUT[7] , GOUT[23] );
BLOCK2 U324 (PIN[8] , PIN[16] , GIN[16] , GIN[24] , POUT[8] , GOUT[24] );
BLOCK2 U325 (PIN[9] , PIN[17] , GIN[17] , GIN[25] , POUT[9] , GOUT[25] );
BLOCK2 U326 (PIN[10] , PIN[18] , GIN[18] , GIN[26] , POUT[10] , GOUT[26] );
BLOCK2 U327 (PIN[11] , PIN[19] , GIN[19] , GIN[27] , POUT[11] , GOUT[27] );
BLOCK2 U328 (PIN[12] , PIN[20] , GIN[20] , GIN[28] , POUT[12] , GOUT[28] );
BLOCK2 U329 (PIN[13] , PIN[21] , GIN[21] , GIN[29] , POUT[13] , GOUT[29] );
BLOCK2 U330 (PIN[14] , PIN[22] , GIN[22] , GIN[30] , POUT[14] , GOUT[30] );
BLOCK2 U331 (PIN[15] , PIN[23] , GIN[23] , GIN[31] , POUT[15] , GOUT[31] );
BLOCK2 U332 (PIN[16] , PIN[24] , GIN[24] , GIN[32] , POUT[16] , GOUT[32] );
BLOCK2 U333 (PIN[17] , PIN[25] , GIN[25] , GIN[33] , POUT[17] , GOUT[33] );
BLOCK2 U334 (PIN[18] , PIN[26] , GIN[26] , GIN[34] , POUT[18] , GOUT[34] );
BLOCK2 U335 (PIN[19] , PIN[27] , GIN[27] , GIN[35] , POUT[19] , GOUT[35] );
BLOCK2 U336 (PIN[20] , PIN[28] , GIN[28] , GIN[36] , POUT[20] , GOUT[36] );
BLOCK2 U337 (PIN[21] , PIN[29] , GIN[29] , GIN[37] , POUT[21] , GOUT[37] );
BLOCK2 U338 (PIN[22] , PIN[30] , GIN[30] , GIN[38] , POUT[22] , GOUT[38] );
BLOCK2 U339 (PIN[23] , PIN[31] , GIN[31] , GIN[39] , POUT[23] , GOUT[39] );
BLOCK2 U340 (PIN[24] , PIN[32] , GIN[32] , GIN[40] , POUT[24] , GOUT[40] );
BLOCK2 U341 (PIN[25] , PIN[33] , GIN[33] , GIN[41] , POUT[25] , GOUT[41] );
BLOCK2 U342 (PIN[26] , PIN[34] , GIN[34] , GIN[42] , POUT[26] , GOUT[42] );
BLOCK2 U343 (PIN[27] , PIN[35] , GIN[35] , GIN[43] , POUT[27] , GOUT[43] );
BLOCK2 U344 (PIN[28] , PIN[36] , GIN[36] , GIN[44] , POUT[28] , GOUT[44] );
BLOCK2 U345 (PIN[29] , PIN[37] , GIN[37] , GIN[45] , POUT[29] , GOUT[45] );
BLOCK2 U346 (PIN[30] , PIN[38] , GIN[38] , GIN[46] , POUT[30] , GOUT[46] );
BLOCK2 U347 (PIN[31] , PIN[39] , GIN[39] , GIN[47] , POUT[31] , GOUT[47] );
BLOCK2 U348 (PIN[32] , PIN[40] , GIN[40] , GIN[48] , POUT[32] , GOUT[48] );
BLOCK2 U349 (PIN[33] , PIN[41] , GIN[41] , GIN[49] , POUT[33] , GOUT[49] );
BLOCK2 U350 (PIN[34] , PIN[42] , GIN[42] , GIN[50] , POUT[34] , GOUT[50] );
BLOCK2 U351 (PIN[35] , PIN[43] , GIN[43] , GIN[51] , POUT[35] , GOUT[51] );
BLOCK2 U352 (PIN[36] , PIN[44] , GIN[44] , GIN[52] , POUT[36] , GOUT[52] );
BLOCK2 U353 (PIN[37] , PIN[45] , GIN[45] , GIN[53] , POUT[37] , GOUT[53] );
BLOCK2 U354 (PIN[38] , PIN[46] , GIN[46] , GIN[54] , POUT[38] , GOUT[54] );
BLOCK2 U355 (PIN[39] , PIN[47] , GIN[47] , GIN[55] , POUT[39] , GOUT[55] );
BLOCK2 U356 (PIN[40] , PIN[48] , GIN[48] , GIN[56] , POUT[40] , GOUT[56] );
BLOCK2 U357 (PIN[41] , PIN[49] , GIN[49] , GIN[57] , POUT[41] , GOUT[57] );
BLOCK2 U358 (PIN[42] , PIN[50] , GIN[50] , GIN[58] , POUT[42] , GOUT[58] );
BLOCK2 U359 (PIN[43] , PIN[51] , GIN[51] , GIN[59] , POUT[43] , GOUT[59] );
BLOCK2 U360 (PIN[44] , PIN[52] , GIN[52] , GIN[60] , POUT[44] , GOUT[60] );
BLOCK2 U361 (PIN[45] , PIN[53] , GIN[53] , GIN[61] , POUT[45] , GOUT[61] );
BLOCK2 U362 (PIN[46] , PIN[54] , GIN[54] , GIN[62] , POUT[46] , GOUT[62] );
BLOCK2 U363 (PIN[47] , PIN[55] , GIN[55] , GIN[63] , POUT[47] , GOUT[63] );
BLOCK2 U364 (PIN[48] , PIN[56] , GIN[56] , GIN[64] , POUT[48] , GOUT[64] );
endmodule // DBLC_3_64
module DBLC_4_64 ( PIN, GIN, POUT, GOUT );
input [0:48] PIN;
input [0:64] GIN;
output [0:32] POUT;
output [0:64] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] );
INVBLOCK U11 (GIN[1] , GOUT[1] );
INVBLOCK U12 (GIN[2] , GOUT[2] );
INVBLOCK U13 (GIN[3] , GOUT[3] );
INVBLOCK U14 (GIN[4] , GOUT[4] );
INVBLOCK U15 (GIN[5] , GOUT[5] );
INVBLOCK U16 (GIN[6] , GOUT[6] );
INVBLOCK U17 (GIN[7] , GOUT[7] );
INVBLOCK U18 (GIN[8] , GOUT[8] );
INVBLOCK U19 (GIN[9] , GOUT[9] );
INVBLOCK U110 (GIN[10] , GOUT[10] );
INVBLOCK U111 (GIN[11] , GOUT[11] );
INVBLOCK U112 (GIN[12] , GOUT[12] );
INVBLOCK U113 (GIN[13] , GOUT[13] );
INVBLOCK U114 (GIN[14] , GOUT[14] );
INVBLOCK U115 (GIN[15] , GOUT[15] );
BLOCK1A U216 (PIN[0] , GIN[0] , GIN[16] , GOUT[16] );
BLOCK1A U217 (PIN[1] , GIN[1] , GIN[17] , GOUT[17] );
BLOCK1A U218 (PIN[2] , GIN[2] , GIN[18] , GOUT[18] );
BLOCK1A U219 (PIN[3] , GIN[3] , GIN[19] , GOUT[19] );
BLOCK1A U220 (PIN[4] , GIN[4] , GIN[20] , GOUT[20] );
BLOCK1A U221 (PIN[5] , GIN[5] , GIN[21] , GOUT[21] );
BLOCK1A U222 (PIN[6] , GIN[6] , GIN[22] , GOUT[22] );
BLOCK1A U223 (PIN[7] , GIN[7] , GIN[23] , GOUT[23] );
BLOCK1A U224 (PIN[8] , GIN[8] , GIN[24] , GOUT[24] );
BLOCK1A U225 (PIN[9] , GIN[9] , GIN[25] , GOUT[25] );
BLOCK1A U226 (PIN[10] , GIN[10] , GIN[26] , GOUT[26] );
BLOCK1A U227 (PIN[11] , GIN[11] , GIN[27] , GOUT[27] );
BLOCK1A U228 (PIN[12] , GIN[12] , GIN[28] , GOUT[28] );
BLOCK1A U229 (PIN[13] , GIN[13] , GIN[29] , GOUT[29] );
BLOCK1A U230 (PIN[14] , GIN[14] , GIN[30] , GOUT[30] );
BLOCK1A U231 (PIN[15] , GIN[15] , GIN[31] , GOUT[31] );
BLOCK1 U332 (PIN[0] , PIN[16] , GIN[16] , GIN[32] , POUT[0] , GOUT[32] );
BLOCK1 U333 (PIN[1] , PIN[17] , GIN[17] , GIN[33] , POUT[1] , GOUT[33] );
BLOCK1 U334 (PIN[2] , PIN[18] , GIN[18] , GIN[34] , POUT[2] , GOUT[34] );
BLOCK1 U335 (PIN[3] , PIN[19] , GIN[19] , GIN[35] , POUT[3] , GOUT[35] );
BLOCK1 U336 (PIN[4] , PIN[20] , GIN[20] , GIN[36] , POUT[4] , GOUT[36] );
BLOCK1 U337 (PIN[5] , PIN[21] , GIN[21] , GIN[37] , POUT[5] , GOUT[37] );
BLOCK1 U338 (PIN[6] , PIN[22] , GIN[22] , GIN[38] , POUT[6] , GOUT[38] );
BLOCK1 U339 (PIN[7] , PIN[23] , GIN[23] , GIN[39] , POUT[7] , GOUT[39] );
BLOCK1 U340 (PIN[8] , PIN[24] , GIN[24] , GIN[40] , POUT[8] , GOUT[40] );
BLOCK1 U341 (PIN[9] , PIN[25] , GIN[25] , GIN[41] , POUT[9] , GOUT[41] );
BLOCK1 U342 (PIN[10] , PIN[26] , GIN[26] , GIN[42] , POUT[10] , GOUT[42] );
BLOCK1 U343 (PIN[11] , PIN[27] , GIN[27] , GIN[43] , POUT[11] , GOUT[43] );
BLOCK1 U344 (PIN[12] , PIN[28] , GIN[28] , GIN[44] , POUT[12] , GOUT[44] );
BLOCK1 U345 (PIN[13] , PIN[29] , GIN[29] , GIN[45] , POUT[13] , GOUT[45] );
BLOCK1 U346 (PIN[14] , PIN[30] , GIN[30] , GIN[46] , POUT[14] , GOUT[46] );
BLOCK1 U347 (PIN[15] , PIN[31] , GIN[31] , GIN[47] , POUT[15] , GOUT[47] );
BLOCK1 U348 (PIN[16] , PIN[32] , GIN[32] , GIN[48] , POUT[16] , GOUT[48] );
BLOCK1 U349 (PIN[17] , PIN[33] , GIN[33] , GIN[49] , POUT[17] , GOUT[49] );
BLOCK1 U350 (PIN[18] , PIN[34] , GIN[34] , GIN[50] , POUT[18] , GOUT[50] );
BLOCK1 U351 (PIN[19] , PIN[35] , GIN[35] , GIN[51] , POUT[19] , GOUT[51] );
BLOCK1 U352 (PIN[20] , PIN[36] , GIN[36] , GIN[52] , POUT[20] , GOUT[52] );
BLOCK1 U353 (PIN[21] , PIN[37] , GIN[37] , GIN[53] , POUT[21] , GOUT[53] );
BLOCK1 U354 (PIN[22] , PIN[38] , GIN[38] , GIN[54] , POUT[22] , GOUT[54] );
BLOCK1 U355 (PIN[23] , PIN[39] , GIN[39] , GIN[55] , POUT[23] , GOUT[55] );
BLOCK1 U356 (PIN[24] , PIN[40] , GIN[40] , GIN[56] , POUT[24] , GOUT[56] );
BLOCK1 U357 (PIN[25] , PIN[41] , GIN[41] , GIN[57] , POUT[25] , GOUT[57] );
BLOCK1 U358 (PIN[26] , PIN[42] , GIN[42] , GIN[58] , POUT[26] , GOUT[58] );
BLOCK1 U359 (PIN[27] , PIN[43] , GIN[43] , GIN[59] , POUT[27] , GOUT[59] );
BLOCK1 U360 (PIN[28] , PIN[44] , GIN[44] , GIN[60] , POUT[28] , GOUT[60] );
BLOCK1 U361 (PIN[29] , PIN[45] , GIN[45] , GIN[61] , POUT[29] , GOUT[61] );
BLOCK1 U362 (PIN[30] , PIN[46] , GIN[46] , GIN[62] , POUT[30] , GOUT[62] );
BLOCK1 U363 (PIN[31] , PIN[47] , GIN[47] , GIN[63] , POUT[31] , GOUT[63] );
BLOCK1 U364 (PIN[32] , PIN[48] , GIN[48] , GIN[64] , POUT[32] , GOUT[64] );
endmodule // DBLC_4_64
module DBLC_5_64 ( PIN, GIN, POUT, GOUT );
input [0:32] PIN;
input [0:64] GIN;
output [0:0] POUT;
output [0:64] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] );
INVBLOCK U11 (GIN[1] , GOUT[1] );
INVBLOCK U12 (GIN[2] , GOUT[2] );
INVBLOCK U13 (GIN[3] , GOUT[3] );
INVBLOCK U14 (GIN[4] , GOUT[4] );
INVBLOCK U15 (GIN[5] , GOUT[5] );
INVBLOCK U16 (GIN[6] , GOUT[6] );
INVBLOCK U17 (GIN[7] , GOUT[7] );
INVBLOCK U18 (GIN[8] , GOUT[8] );
INVBLOCK U19 (GIN[9] , GOUT[9] );
INVBLOCK U110 (GIN[10] , GOUT[10] );
INVBLOCK U111 (GIN[11] , GOUT[11] );
INVBLOCK U112 (GIN[12] , GOUT[12] );
INVBLOCK U113 (GIN[13] , GOUT[13] );
INVBLOCK U114 (GIN[14] , GOUT[14] );
INVBLOCK U115 (GIN[15] , GOUT[15] );
INVBLOCK U116 (GIN[16] , GOUT[16] );
INVBLOCK U117 (GIN[17] , GOUT[17] );
INVBLOCK U118 (GIN[18] , GOUT[18] );
INVBLOCK U119 (GIN[19] , GOUT[19] );
INVBLOCK U120 (GIN[20] , GOUT[20] );
INVBLOCK U121 (GIN[21] , GOUT[21] );
INVBLOCK U122 (GIN[22] , GOUT[22] );
INVBLOCK U123 (GIN[23] , GOUT[23] );
INVBLOCK U124 (GIN[24] , GOUT[24] );
INVBLOCK U125 (GIN[25] , GOUT[25] );
INVBLOCK U126 (GIN[26] , GOUT[26] );
INVBLOCK U127 (GIN[27] , GOUT[27] );
INVBLOCK U128 (GIN[28] , GOUT[28] );
INVBLOCK U129 (GIN[29] , GOUT[29] );
INVBLOCK U130 (GIN[30] , GOUT[30] );
INVBLOCK U131 (GIN[31] , GOUT[31] );
BLOCK2A U232 (PIN[0] , GIN[0] , GIN[32] , GOUT[32] );
BLOCK2A U233 (PIN[1] , GIN[1] , GIN[33] , GOUT[33] );
BLOCK2A U234 (PIN[2] , GIN[2] , GIN[34] , GOUT[34] );
BLOCK2A U235 (PIN[3] , GIN[3] , GIN[35] , GOUT[35] );
BLOCK2A U236 (PIN[4] , GIN[4] , GIN[36] , GOUT[36] );
BLOCK2A U237 (PIN[5] , GIN[5] , GIN[37] , GOUT[37] );
BLOCK2A U238 (PIN[6] , GIN[6] , GIN[38] , GOUT[38] );
BLOCK2A U239 (PIN[7] , GIN[7] , GIN[39] , GOUT[39] );
BLOCK2A U240 (PIN[8] , GIN[8] , GIN[40] , GOUT[40] );
BLOCK2A U241 (PIN[9] , GIN[9] , GIN[41] , GOUT[41] );
BLOCK2A U242 (PIN[10] , GIN[10] , GIN[42] , GOUT[42] );
BLOCK2A U243 (PIN[11] , GIN[11] , GIN[43] , GOUT[43] );
BLOCK2A U244 (PIN[12] , GIN[12] , GIN[44] , GOUT[44] );
BLOCK2A U245 (PIN[13] , GIN[13] , GIN[45] , GOUT[45] );
BLOCK2A U246 (PIN[14] , GIN[14] , GIN[46] , GOUT[46] );
BLOCK2A U247 (PIN[15] , GIN[15] , GIN[47] , GOUT[47] );
BLOCK2A U248 (PIN[16] , GIN[16] , GIN[48] , GOUT[48] );
BLOCK2A U249 (PIN[17] , GIN[17] , GIN[49] , GOUT[49] );
BLOCK2A U250 (PIN[18] , GIN[18] , GIN[50] , GOUT[50] );
BLOCK2A U251 (PIN[19] , GIN[19] , GIN[51] , GOUT[51] );
BLOCK2A U252 (PIN[20] , GIN[20] , GIN[52] , GOUT[52] );
BLOCK2A U253 (PIN[21] , GIN[21] , GIN[53] , GOUT[53] );
BLOCK2A U254 (PIN[22] , GIN[22] , GIN[54] , GOUT[54] );
BLOCK2A U255 (PIN[23] , GIN[23] , GIN[55] , GOUT[55] );
BLOCK2A U256 (PIN[24] , GIN[24] , GIN[56] , GOUT[56] );
BLOCK2A U257 (PIN[25] , GIN[25] , GIN[57] , GOUT[57] );
BLOCK2A U258 (PIN[26] , GIN[26] , GIN[58] , GOUT[58] );
BLOCK2A U259 (PIN[27] , GIN[27] , GIN[59] , GOUT[59] );
BLOCK2A U260 (PIN[28] , GIN[28] , GIN[60] , GOUT[60] );
BLOCK2A U261 (PIN[29] , GIN[29] , GIN[61] , GOUT[61] );
BLOCK2A U262 (PIN[30] , GIN[30] , GIN[62] , GOUT[62] );
BLOCK2A U263 (PIN[31] , GIN[31] , GIN[63] , GOUT[63] );
BLOCK2 U364 (PIN[0] , PIN[32] , GIN[32] , GIN[64] , POUT[0] , GOUT[64] );
endmodule // DBLC_5_64
module XORSTAGE_64 ( A, B, PBIT, CARRY, SUM, COUT );
input [0:63] A;
input [0:63] B;
input PBIT;
input [0:64] CARRY;
output [0:63] SUM;
output COUT;
XXOR1 U20 (A[0] , B[0] , CARRY[0] , SUM[0] );
XXOR1 U21 (A[1] , B[1] , CARRY[1] , SUM[1] );
XXOR1 U22 (A[2] , B[2] , CARRY[2] , SUM[2] );
XXOR1 U23 (A[3] , B[3] , CARRY[3] , SUM[3] );
XXOR1 U24 (A[4] , B[4] , CARRY[4] , SUM[4] );
XXOR1 U25 (A[5] , B[5] , CARRY[5] , SUM[5] );
XXOR1 U26 (A[6] , B[6] , CARRY[6] , SUM[6] );
XXOR1 U27 (A[7] , B[7] , CARRY[7] , SUM[7] );
XXOR1 U28 (A[8] , B[8] , CARRY[8] , SUM[8] );
XXOR1 U29 (A[9] , B[9] , CARRY[9] , SUM[9] );
XXOR1 U210 (A[10] , B[10] , CARRY[10] , SUM[10] );
XXOR1 U211 (A[11] , B[11] , CARRY[11] , SUM[11] );
XXOR1 U212 (A[12] , B[12] , CARRY[12] , SUM[12] );
XXOR1 U213 (A[13] , B[13] , CARRY[13] , SUM[13] );
XXOR1 U214 (A[14] , B[14] , CARRY[14] , SUM[14] );
XXOR1 U215 (A[15] , B[15] , CARRY[15] , SUM[15] );
XXOR1 U216 (A[16] , B[16] , CARRY[16] , SUM[16] );
XXOR1 U217 (A[17] , B[17] , CARRY[17] , SUM[17] );
XXOR1 U218 (A[18] , B[18] , CARRY[18] , SUM[18] );
XXOR1 U219 (A[19] , B[19] , CARRY[19] , SUM[19] );
XXOR1 U220 (A[20] , B[20] , CARRY[20] , SUM[20] );
XXOR1 U221 (A[21] , B[21] , CARRY[21] , SUM[21] );
XXOR1 U222 (A[22] , B[22] , CARRY[22] , SUM[22] );
XXOR1 U223 (A[23] , B[23] , CARRY[23] , SUM[23] );
XXOR1 U224 (A[24] , B[24] , CARRY[24] , SUM[24] );
XXOR1 U225 (A[25] , B[25] , CARRY[25] , SUM[25] );
XXOR1 U226 (A[26] , B[26] , CARRY[26] , SUM[26] );
XXOR1 U227 (A[27] , B[27] , CARRY[27] , SUM[27] );
XXOR1 U228 (A[28] , B[28] , CARRY[28] , SUM[28] );
XXOR1 U229 (A[29] , B[29] , CARRY[29] , SUM[29] );
XXOR1 U230 (A[30] , B[30] , CARRY[30] , SUM[30] );
XXOR1 U231 (A[31] , B[31] , CARRY[31] , SUM[31] );
XXOR1 U232 (A[32] , B[32] , CARRY[32] , SUM[32] );
XXOR1 U233 (A[33] , B[33] , CARRY[33] , SUM[33] );
XXOR1 U234 (A[34] , B[34] , CARRY[34] , SUM[34] );
XXOR1 U235 (A[35] , B[35] , CARRY[35] , SUM[35] );
XXOR1 U236 (A[36] , B[36] , CARRY[36] , SUM[36] );
XXOR1 U237 (A[37] , B[37] , CARRY[37] , SUM[37] );
XXOR1 U238 (A[38] , B[38] , CARRY[38] , SUM[38] );
XXOR1 U239 (A[39] , B[39] , CARRY[39] , SUM[39] );
XXOR1 U240 (A[40] , B[40] , CARRY[40] , SUM[40] );
XXOR1 U241 (A[41] , B[41] , CARRY[41] , SUM[41] );
XXOR1 U242 (A[42] , B[42] , CARRY[42] , SUM[42] );
XXOR1 U243 (A[43] , B[43] , CARRY[43] , SUM[43] );
XXOR1 U244 (A[44] , B[44] , CARRY[44] , SUM[44] );
XXOR1 U245 (A[45] , B[45] , CARRY[45] , SUM[45] );
XXOR1 U246 (A[46] , B[46] , CARRY[46] , SUM[46] );
XXOR1 U247 (A[47] , B[47] , CARRY[47] , SUM[47] );
XXOR1 U248 (A[48] , B[48] , CARRY[48] , SUM[48] );
XXOR1 U249 (A[49] , B[49] , CARRY[49] , SUM[49] );
XXOR1 U250 (A[50] , B[50] , CARRY[50] , SUM[50] );
XXOR1 U251 (A[51] , B[51] , CARRY[51] , SUM[51] );
XXOR1 U252 (A[52] , B[52] , CARRY[52] , SUM[52] );
XXOR1 U253 (A[53] , B[53] , CARRY[53] , SUM[53] );
XXOR1 U254 (A[54] , B[54] , CARRY[54] , SUM[54] );
XXOR1 U255 (A[55] , B[55] , CARRY[55] , SUM[55] );
XXOR1 U256 (A[56] , B[56] , CARRY[56] , SUM[56] );
XXOR1 U257 (A[57] , B[57] , CARRY[57] , SUM[57] );
XXOR1 U258 (A[58] , B[58] , CARRY[58] , SUM[58] );
XXOR1 U259 (A[59] , B[59] , CARRY[59] , SUM[59] );
XXOR1 U260 (A[60] , B[60] , CARRY[60] , SUM[60] );
XXOR1 U261 (A[61] , B[61] , CARRY[61] , SUM[61] );
XXOR1 U262 (A[62] , B[62] , CARRY[62] , SUM[62] );
XXOR1 U263 (A[63] , B[63] , CARRY[63] , SUM[63] );
BLOCK1A U1 (PBIT , CARRY[0] , CARRY[64] , COUT );
endmodule // XORSTAGE_64
module DBLCTREE_64 ( PIN, GIN, GOUT, POUT );
input [0:63] PIN;
input [0:64] GIN;
output [0:64] GOUT;
output [0:0] POUT;
wire [0:62] INTPROP_0;
wire [0:64] INTGEN_0;
wire [0:60] INTPROP_1;
wire [0:64] INTGEN_1;
wire [0:56] INTPROP_2;
wire [0:64] INTGEN_2;
wire [0:48] INTPROP_3;
wire [0:64] INTGEN_3;
wire [0:32] INTPROP_4;
wire [0:64] INTGEN_4;
DBLC_0_64 U_0 (.PIN(PIN) , .GIN(GIN) , .POUT(INTPROP_0) , .GOUT(INTGEN_0) );
DBLC_1_64 U_1 (.PIN(INTPROP_0) , .GIN(INTGEN_0) , .POUT(INTPROP_1) , .GOUT(INTGEN_1) );
DBLC_2_64 U_2 (.PIN(INTPROP_1) , .GIN(INTGEN_1) , .POUT(INTPROP_2) , .GOUT(INTGEN_2) );
DBLC_3_64 U_3 (.PIN(INTPROP_2) , .GIN(INTGEN_2) , .POUT(INTPROP_3) , .GOUT(INTGEN_3) );
DBLC_4_64 U_4 (.PIN(INTPROP_3) , .GIN(INTGEN_3) , .POUT(INTPROP_4) , .GOUT(INTGEN_4) );
DBLC_5_64 U_5 (.PIN(INTPROP_4) , .GIN(INTGEN_4) , .POUT(POUT) , .GOUT(GOUT) );
endmodule // DBLCTREE_64
module DBLCADDER_64_64 ( OPA, OPB, CIN, SUM, COUT );
input [0:63] OPA;
input [0:63] OPB;
input CIN;
output [0:63] SUM;
output COUT;
wire [0:63] INTPROP;
wire [0:64] INTGEN;
wire [0:0] PBIT;
wire [0:64] CARRY;
PRESTAGE_64 U1 (OPA , OPB , CIN , INTPROP , INTGEN );
DBLCTREE_64 U2 (INTPROP , INTGEN , CARRY , PBIT );
XORSTAGE_64 U3 (OPA[0:63] , OPB[0:63] , PBIT[0] , CARRY[0:64] , SUM , COUT );
endmodule

View File

@ -0,0 +1,620 @@
// Brent-Kung Carry-save Prefix Adder
module bk128 (cout, sum, a, b, cin);
input [127:0] a, b;
input cin;
output [127:0] sum;
output cout;
wire [128:0] p,g,t;
wire [127:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
assign t[1]=p[1];
assign t[2]=p[2];
assign t[3]=p[3]^g[2];
assign t[4]=p[4];
assign t[5]=p[5]^g[4];
assign t[6]=p[6];
assign t[7]=p[7]^g[6];
assign t[8]=p[8];
assign t[9]=p[9]^g[8];
assign t[10]=p[10];
assign t[11]=p[11]^g[10];
assign t[12]=p[12];
assign t[13]=p[13]^g[12];
assign t[14]=p[14];
assign t[15]=p[15]^g[14];
assign t[16]=p[16];
assign t[17]=p[17]^g[16];
assign t[18]=p[18];
assign t[19]=p[19]^g[18];
assign t[20]=p[20];
assign t[21]=p[21]^g[20];
assign t[22]=p[22];
assign t[23]=p[23]^g[22];
assign t[24]=p[24];
assign t[25]=p[25]^g[24];
assign t[26]=p[26];
assign t[27]=p[27]^g[26];
assign t[28]=p[28];
assign t[29]=p[29]^g[28];
assign t[30]=p[30];
assign t[31]=p[31]^g[30];
assign t[32]=p[32];
assign t[33]=p[33]^g[32];
assign t[34]=p[34];
assign t[35]=p[35]^g[34];
assign t[36]=p[36];
assign t[37]=p[37]^g[36];
assign t[38]=p[38];
assign t[39]=p[39]^g[38];
assign t[40]=p[40];
assign t[41]=p[41]^g[40];
assign t[42]=p[42];
assign t[43]=p[43]^g[42];
assign t[44]=p[44];
assign t[45]=p[45]^g[44];
assign t[46]=p[46];
assign t[47]=p[47]^g[46];
assign t[48]=p[48];
assign t[49]=p[49]^g[48];
assign t[50]=p[50];
assign t[51]=p[51]^g[50];
assign t[52]=p[52];
assign t[53]=p[53]^g[52];
assign t[54]=p[54];
assign t[55]=p[55]^g[54];
assign t[56]=p[56];
assign t[57]=p[57]^g[56];
assign t[58]=p[58];
assign t[59]=p[59]^g[58];
assign t[60]=p[60];
assign t[61]=p[61]^g[60];
assign t[62]=p[62];
assign t[63]=p[63]^g[62];
assign t[64]=p[64];
assign t[65]=p[65]^g[64];
assign t[66]=p[66];
assign t[67]=p[67]^g[66];
assign t[68]=p[68];
assign t[69]=p[69]^g[68];
assign t[70]=p[70];
assign t[71]=p[71]^g[70];
assign t[72]=p[72];
assign t[73]=p[73]^g[72];
assign t[74]=p[74];
assign t[75]=p[75]^g[74];
assign t[76]=p[76];
assign t[77]=p[77]^g[76];
assign t[78]=p[78];
assign t[79]=p[79]^g[78];
assign t[80]=p[80];
assign t[81]=p[81]^g[80];
assign t[82]=p[82];
assign t[83]=p[83]^g[82];
assign t[84]=p[84];
assign t[85]=p[85]^g[84];
assign t[86]=p[86];
assign t[87]=p[87]^g[86];
assign t[88]=p[88];
assign t[89]=p[89]^g[88];
assign t[90]=p[90];
assign t[91]=p[91]^g[90];
assign t[92]=p[92];
assign t[93]=p[93]^g[92];
assign t[94]=p[94];
assign t[95]=p[95]^g[94];
assign t[96]=p[96];
assign t[97]=p[97]^g[96];
assign t[98]=p[98];
assign t[99]=p[99]^g[98];
assign t[100]=p[100];
assign t[101]=p[101]^g[100];
assign t[102]=p[102];
assign t[103]=p[103]^g[102];
assign t[104]=p[104];
assign t[105]=p[105]^g[104];
assign t[106]=p[106];
assign t[107]=p[107]^g[106];
assign t[108]=p[108];
assign t[109]=p[109]^g[108];
assign t[110]=p[110];
assign t[111]=p[111]^g[110];
assign t[112]=p[112];
assign t[113]=p[113]^g[112];
assign t[114]=p[114];
assign t[115]=p[115]^g[114];
assign t[116]=p[116];
assign t[117]=p[117]^g[116];
assign t[118]=p[118];
assign t[119]=p[119]^g[118];
assign t[120]=p[120];
assign t[121]=p[121]^g[120];
assign t[122]=p[122];
assign t[123]=p[123]^g[122];
assign t[124]=p[124];
assign t[125]=p[125]^g[124];
assign t[126]=p[126];
assign t[127]=p[127]^g[126];
assign t[128]=p[128];
// prefix tree
brent_kung_cs prefix_tree(c, p[127:0], g[127:0]);
// post-computation
assign sum=p[128:1]^c;
assign cout=g[128]|(p[128]&c[127]);
endmodule
module brent_kung_cs (c, p, g);
input [127:0] p;
input [127:0] g;
output [128:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]});
black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]});
black b_69_68 (G_69_68, P_69_68, {g[69],g[68]}, {p[69],p[68]});
black b_71_70 (G_71_70, P_71_70, {g[71],g[70]}, {p[71],p[70]});
black b_73_72 (G_73_72, P_73_72, {g[73],g[72]}, {p[73],p[72]});
black b_75_74 (G_75_74, P_75_74, {g[75],g[74]}, {p[75],p[74]});
black b_77_76 (G_77_76, P_77_76, {g[77],g[76]}, {p[77],p[76]});
black b_79_78 (G_79_78, P_79_78, {g[79],g[78]}, {p[79],p[78]});
black b_81_80 (G_81_80, P_81_80, {g[81],g[80]}, {p[81],p[80]});
black b_83_82 (G_83_82, P_83_82, {g[83],g[82]}, {p[83],p[82]});
black b_85_84 (G_85_84, P_85_84, {g[85],g[84]}, {p[85],p[84]});
black b_87_86 (G_87_86, P_87_86, {g[87],g[86]}, {p[87],p[86]});
black b_89_88 (G_89_88, P_89_88, {g[89],g[88]}, {p[89],p[88]});
black b_91_90 (G_91_90, P_91_90, {g[91],g[90]}, {p[91],p[90]});
black b_93_92 (G_93_92, P_93_92, {g[93],g[92]}, {p[93],p[92]});
black b_95_94 (G_95_94, P_95_94, {g[95],g[94]}, {p[95],p[94]});
black b_97_96 (G_97_96, P_97_96, {g[97],g[96]}, {p[97],p[96]});
black b_99_98 (G_99_98, P_99_98, {g[99],g[98]}, {p[99],p[98]});
black b_101_100 (G_101_100, P_101_100, {g[101],g[100]}, {p[101],p[100]});
black b_103_102 (G_103_102, P_103_102, {g[103],g[102]}, {p[103],p[102]});
black b_105_104 (G_105_104, P_105_104, {g[105],g[104]}, {p[105],p[104]});
black b_107_106 (G_107_106, P_107_106, {g[107],g[106]}, {p[107],p[106]});
black b_109_108 (G_109_108, P_109_108, {g[109],g[108]}, {p[109],p[108]});
black b_111_110 (G_111_110, P_111_110, {g[111],g[110]}, {p[111],p[110]});
black b_113_112 (G_113_112, P_113_112, {g[113],g[112]}, {p[113],p[112]});
black b_115_114 (G_115_114, P_115_114, {g[115],g[114]}, {p[115],p[114]});
black b_117_116 (G_117_116, P_117_116, {g[117],g[116]}, {p[117],p[116]});
black b_119_118 (G_119_118, P_119_118, {g[119],g[118]}, {p[119],p[118]});
black b_121_120 (G_121_120, P_121_120, {g[121],g[120]}, {p[121],p[120]});
black b_123_122 (G_123_122, P_123_122, {g[123],g[122]}, {p[123],p[122]});
black b_125_124 (G_125_124, P_125_124, {g[125],g[124]}, {p[125],p[124]});
black b_127_126 (G_127_126, P_127_126, {g[127],g[126]}, {p[127],p[126]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64});
black b_71_68 (G_71_68, P_71_68, {G_71_70,G_69_68}, {P_71_70,P_69_68});
black b_75_72 (G_75_72, P_75_72, {G_75_74,G_73_72}, {P_75_74,P_73_72});
black b_79_76 (G_79_76, P_79_76, {G_79_78,G_77_76}, {P_79_78,P_77_76});
black b_83_80 (G_83_80, P_83_80, {G_83_82,G_81_80}, {P_83_82,P_81_80});
black b_87_84 (G_87_84, P_87_84, {G_87_86,G_85_84}, {P_87_86,P_85_84});
black b_91_88 (G_91_88, P_91_88, {G_91_90,G_89_88}, {P_91_90,P_89_88});
black b_95_92 (G_95_92, P_95_92, {G_95_94,G_93_92}, {P_95_94,P_93_92});
black b_99_96 (G_99_96, P_99_96, {G_99_98,G_97_96}, {P_99_98,P_97_96});
black b_103_100 (G_103_100, P_103_100, {G_103_102,G_101_100}, {P_103_102,P_101_100});
black b_107_104 (G_107_104, P_107_104, {G_107_106,G_105_104}, {P_107_106,P_105_104});
black b_111_108 (G_111_108, P_111_108, {G_111_110,G_109_108}, {P_111_110,P_109_108});
black b_115_112 (G_115_112, P_115_112, {G_115_114,G_113_112}, {P_115_114,P_113_112});
black b_119_116 (G_119_116, P_119_116, {G_119_118,G_117_116}, {P_119_118,P_117_116});
black b_123_120 (G_123_120, P_123_120, {G_123_122,G_121_120}, {P_123_122,P_121_120});
black b_127_124 (G_127_124, P_127_124, {G_127_126,G_125_124}, {P_127_126,P_125_124});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64});
black b_79_72 (G_79_72, P_79_72, {G_79_76,G_75_72}, {P_79_76,P_75_72});
black b_87_80 (G_87_80, P_87_80, {G_87_84,G_83_80}, {P_87_84,P_83_80});
black b_95_88 (G_95_88, P_95_88, {G_95_92,G_91_88}, {P_95_92,P_91_88});
black b_103_96 (G_103_96, P_103_96, {G_103_100,G_99_96}, {P_103_100,P_99_96});
black b_111_104 (G_111_104, P_111_104, {G_111_108,G_107_104}, {P_111_108,P_107_104});
black b_119_112 (G_119_112, P_119_112, {G_119_116,G_115_112}, {P_119_116,P_115_112});
black b_127_120 (G_127_120, P_127_120, {G_127_124,G_123_120}, {P_127_124,P_123_120});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64});
black b_95_80 (G_95_80, P_95_80, {G_95_88,G_87_80}, {P_95_88,P_87_80});
black b_111_96 (G_111_96, P_111_96, {G_111_104,G_103_96}, {P_111_104,P_103_96});
black b_127_112 (G_127_112, P_127_112, {G_127_120,G_119_112}, {P_127_120,P_119_112});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64});
black b_127_96 (G_127_96, P_127_96, {G_127_112,G_111_96}, {P_127_112,P_111_96});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64});
// Stage 7: Generates G/P pairs that span 64 bits
grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64);
// Stage 8: Generates G/P pairs that span 32 bits
grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64);
// Stage 9: Generates G/P pairs that span 16 bits
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64);
grey g_111_0 (G_111_0, {G_111_96,G_95_0}, P_111_96);
// Stage 10: Generates G/P pairs that span 8 bits
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_55_0 (G_55_0, {G_55_48,G_47_0}, P_55_48);
grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64);
grey g_87_0 (G_87_0, {G_87_80,G_79_0}, P_87_80);
grey g_103_0 (G_103_0, {G_103_96,G_95_0}, P_103_96);
grey g_119_0 (G_119_0, {G_119_112,G_111_0}, P_119_112);
// Stage 11: Generates G/P pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_27_0 (G_27_0, {G_27_24,G_23_0}, P_27_24);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_43_0 (G_43_0, {G_43_40,G_39_0}, P_43_40);
grey g_51_0 (G_51_0, {G_51_48,G_47_0}, P_51_48);
grey g_59_0 (G_59_0, {G_59_56,G_55_0}, P_59_56);
grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64);
grey g_75_0 (G_75_0, {G_75_72,G_71_0}, P_75_72);
grey g_83_0 (G_83_0, {G_83_80,G_79_0}, P_83_80);
grey g_91_0 (G_91_0, {G_91_88,G_87_0}, P_91_88);
grey g_99_0 (G_99_0, {G_99_96,G_95_0}, P_99_96);
grey g_107_0 (G_107_0, {G_107_104,G_103_0}, P_107_104);
grey g_115_0 (G_115_0, {G_115_112,G_111_0}, P_115_112);
grey g_123_0 (G_123_0, {G_123_120,G_119_0}, P_123_120);
// Stage 12: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_21_0 (G_21_0, {G_21_20,G_19_0}, P_21_20);
grey g_25_0 (G_25_0, {G_25_24,G_23_0}, P_25_24);
grey g_29_0 (G_29_0, {G_29_28,G_27_0}, P_29_28);
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_37_0 (G_37_0, {G_37_36,G_35_0}, P_37_36);
grey g_41_0 (G_41_0, {G_41_40,G_39_0}, P_41_40);
grey g_45_0 (G_45_0, {G_45_44,G_43_0}, P_45_44);
grey g_49_0 (G_49_0, {G_49_48,G_47_0}, P_49_48);
grey g_53_0 (G_53_0, {G_53_52,G_51_0}, P_53_52);
grey g_57_0 (G_57_0, {G_57_56,G_55_0}, P_57_56);
grey g_61_0 (G_61_0, {G_61_60,G_59_0}, P_61_60);
grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64);
grey g_69_0 (G_69_0, {G_69_68,G_67_0}, P_69_68);
grey g_73_0 (G_73_0, {G_73_72,G_71_0}, P_73_72);
grey g_77_0 (G_77_0, {G_77_76,G_75_0}, P_77_76);
grey g_81_0 (G_81_0, {G_81_80,G_79_0}, P_81_80);
grey g_85_0 (G_85_0, {G_85_84,G_83_0}, P_85_84);
grey g_89_0 (G_89_0, {G_89_88,G_87_0}, P_89_88);
grey g_93_0 (G_93_0, {G_93_92,G_91_0}, P_93_92);
grey g_97_0 (G_97_0, {G_97_96,G_95_0}, P_97_96);
grey g_101_0 (G_101_0, {G_101_100,G_99_0}, P_101_100);
grey g_105_0 (G_105_0, {G_105_104,G_103_0}, P_105_104);
grey g_109_0 (G_109_0, {G_109_108,G_107_0}, P_109_108);
grey g_113_0 (G_113_0, {G_113_112,G_111_0}, P_113_112);
grey g_117_0 (G_117_0, {G_117_116,G_115_0}, P_117_116);
grey g_121_0 (G_121_0, {G_121_120,G_119_0}, P_121_120);
grey g_125_0 (G_125_0, {G_125_124,G_123_0}, P_125_124);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]);
grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]);
grey g_68_0 (G_68_0, {g[68],G_67_0}, p[68]);
grey g_70_0 (G_70_0, {g[70],G_69_0}, p[70]);
grey g_72_0 (G_72_0, {g[72],G_71_0}, p[72]);
grey g_74_0 (G_74_0, {g[74],G_73_0}, p[74]);
grey g_76_0 (G_76_0, {g[76],G_75_0}, p[76]);
grey g_78_0 (G_78_0, {g[78],G_77_0}, p[78]);
grey g_80_0 (G_80_0, {g[80],G_79_0}, p[80]);
grey g_82_0 (G_82_0, {g[82],G_81_0}, p[82]);
grey g_84_0 (G_84_0, {g[84],G_83_0}, p[84]);
grey g_86_0 (G_86_0, {g[86],G_85_0}, p[86]);
grey g_88_0 (G_88_0, {g[88],G_87_0}, p[88]);
grey g_90_0 (G_90_0, {g[90],G_89_0}, p[90]);
grey g_92_0 (G_92_0, {g[92],G_91_0}, p[92]);
grey g_94_0 (G_94_0, {g[94],G_93_0}, p[94]);
grey g_96_0 (G_96_0, {g[96],G_95_0}, p[96]);
grey g_98_0 (G_98_0, {g[98],G_97_0}, p[98]);
grey g_100_0 (G_100_0, {g[100],G_99_0}, p[100]);
grey g_102_0 (G_102_0, {g[102],G_101_0}, p[102]);
grey g_104_0 (G_104_0, {g[104],G_103_0}, p[104]);
grey g_106_0 (G_106_0, {g[106],G_105_0}, p[106]);
grey g_108_0 (G_108_0, {g[108],G_107_0}, p[108]);
grey g_110_0 (G_110_0, {g[110],G_109_0}, p[110]);
grey g_112_0 (G_112_0, {g[112],G_111_0}, p[112]);
grey g_114_0 (G_114_0, {g[114],G_113_0}, p[114]);
grey g_116_0 (G_116_0, {g[116],G_115_0}, p[116]);
grey g_118_0 (G_118_0, {g[118],G_117_0}, p[118]);
grey g_120_0 (G_120_0, {g[120],G_119_0}, p[120]);
grey g_122_0 (G_122_0, {g[122],G_121_0}, p[122]);
grey g_124_0 (G_124_0, {g[124],G_123_0}, p[124]);
grey g_126_0 (G_126_0, {g[126],G_125_0}, p[126]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
assign c[37]=G_36_0;
assign c[38]=G_37_0;
assign c[39]=G_38_0;
assign c[40]=G_39_0;
assign c[41]=G_40_0;
assign c[42]=G_41_0;
assign c[43]=G_42_0;
assign c[44]=G_43_0;
assign c[45]=G_44_0;
assign c[46]=G_45_0;
assign c[47]=G_46_0;
assign c[48]=G_47_0;
assign c[49]=G_48_0;
assign c[50]=G_49_0;
assign c[51]=G_50_0;
assign c[52]=G_51_0;
assign c[53]=G_52_0;
assign c[54]=G_53_0;
assign c[55]=G_54_0;
assign c[56]=G_55_0;
assign c[57]=G_56_0;
assign c[58]=G_57_0;
assign c[59]=G_58_0;
assign c[60]=G_59_0;
assign c[61]=G_60_0;
assign c[62]=G_61_0;
assign c[63]=G_62_0;
assign c[64]=G_63_0;
assign c[65]=G_64_0;
assign c[66]=G_65_0;
assign c[67]=G_66_0;
assign c[68]=G_67_0;
assign c[69]=G_68_0;
assign c[70]=G_69_0;
assign c[71]=G_70_0;
assign c[72]=G_71_0;
assign c[73]=G_72_0;
assign c[74]=G_73_0;
assign c[75]=G_74_0;
assign c[76]=G_75_0;
assign c[77]=G_76_0;
assign c[78]=G_77_0;
assign c[79]=G_78_0;
assign c[80]=G_79_0;
assign c[81]=G_80_0;
assign c[82]=G_81_0;
assign c[83]=G_82_0;
assign c[84]=G_83_0;
assign c[85]=G_84_0;
assign c[86]=G_85_0;
assign c[87]=G_86_0;
assign c[88]=G_87_0;
assign c[89]=G_88_0;
assign c[90]=G_89_0;
assign c[91]=G_90_0;
assign c[92]=G_91_0;
assign c[93]=G_92_0;
assign c[94]=G_93_0;
assign c[95]=G_94_0;
assign c[96]=G_95_0;
assign c[97]=G_96_0;
assign c[98]=G_97_0;
assign c[99]=G_98_0;
assign c[100]=G_99_0;
assign c[101]=G_100_0;
assign c[102]=G_101_0;
assign c[103]=G_102_0;
assign c[104]=G_103_0;
assign c[105]=G_104_0;
assign c[106]=G_105_0;
assign c[107]=G_106_0;
assign c[108]=G_107_0;
assign c[109]=G_108_0;
assign c[110]=G_109_0;
assign c[111]=G_110_0;
assign c[112]=G_111_0;
assign c[113]=G_112_0;
assign c[114]=G_113_0;
assign c[115]=G_114_0;
assign c[116]=G_115_0;
assign c[117]=G_116_0;
assign c[118]=G_117_0;
assign c[119]=G_118_0;
assign c[120]=G_119_0;
assign c[121]=G_120_0;
assign c[122]=G_121_0;
assign c[123]=G_122_0;
assign c[124]=G_123_0;
assign c[125]=G_124_0;
assign c[126]=G_125_0;
assign c[127]=G_126_0;
assign c[128]=G_127_0;
endmodule // brent_kung_cs
// Black cell
module black(gout, pout, gin, pin);
input [1:0] gin, pin;
output gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule // black
// Grey cell
module grey(gout, gin, pin);
input[1:0] gin;
input pin;
output gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule // grey

View File

@ -0,0 +1,120 @@
// Brent-Kung Carry-save Prefix Adder
module exp_add (cout, sum, a, b, cin);
input [12:0] a, b;
input cin;
output [12:0] sum;
output cout;
wire [13:0] p,g,t;
wire [12:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
assign t[1]=p[1];
assign t[2]=p[2];
assign t[3]=p[3]^g[2];
assign t[4]=p[4];
assign t[5]=p[5]^g[4];
assign t[6]=p[6];
assign t[7]=p[7]^g[6];
assign t[8]=p[8];
assign t[9]=p[9]^g[8];
assign t[10]=p[10];
assign t[11]=p[11]^g[10];
assign t[12]=p[12];
assign t[13]=p[13];
// prefix tree
brent_kung_cs prefix_tree(c, p[12:0], g[12:0]);
// post-computation
assign sum=p[13:1]^c;
assign cout=g[13]|(p[13]&c[12]);
endmodule
module brent_kung_cs (c, p, g);
input [13:0] p;
input [13:0] g;
output [13:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/P pairs that span 8 bits
// Stage 5: Generates G/P pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// Stage 6: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
endmodule
// Black cell
module black(gout, pout, gin, pin);
input [1:0] gin, pin;
output gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule
// Grey cell
module grey(gout, gin, pin);
input[1:0] gin;
input pin;
output gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule

View File

@ -0,0 +1,109 @@
// Brent-Kung Prefix Adder
module add (cout, sum, a, b, cin);
input [13:0] a, b;
input cin;
output [13:0] sum;
output cout;
wire [14:0] p,g;
wire [13:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
brent_kung prefix_tree(c, p[13:0], g[13:0]);
// post-computation
assign sum=p[14:1]^c;
assign cout=g[14]|(p[14]&c[13]);
endmodule
module brent_kung (c, p, g);
input [13:0] p;
input [13:0] g;
output [14:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/P pairs that span 8 bits
// Stage 5: Generates G/P pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// Stage 6: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
endmodule
// Black cell
module black(gout, pout, gin, pin);
input [1:0] gin, pin;
output gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule
// Grey cell
module grey(gout, gin, pin);
input[1:0] gin;
input pin;
output gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule

View File

@ -0,0 +1,155 @@
// Kogge-Stone Prefix Adder
module bk15 (cout, sum, a, b, cin);
input [14:0] a, b;
input cin;
output [14:0] sum;
output cout;
wire [15:0] p,g;
wire [15:1] h,c;
// pre-computation
assign p={a|b,1'b1};
assign g={a&b, cin};
// prefix tree
kogge_stone prefix_tree(h, c, p[14:0], g[14:0]);
// post-computation
assign h[15]=g[15]|c[15];
assign sum=p[15:1]^h|g[15:1]&c;
assign cout=p[15]&h[15];
endmodule // bk15
module kogge_stone (h, c, p, g);
input [14:0] p;
input [14:0] g;
output [15:1] h;
output [15:1] c;
// parallel-prefix, Kogge-Stone
// Stage 1: Generates G/P pairs that span 1 bits
rgry g_1_0 (H_1_0, {g[1],g[0]});
rblk b_2_1 (H_2_1, I_2_1, {g[2],g[1]}, {p[1],p[0]});
rblk b_3_2 (H_3_2, I_3_2, {g[3],g[2]}, {p[2],p[1]});
rblk b_4_3 (H_4_3, I_4_3, {g[4],g[3]}, {p[3],p[2]});
rblk b_5_4 (H_5_4, I_5_4, {g[5],g[4]}, {p[4],p[3]});
rblk b_6_5 (H_6_5, I_6_5, {g[6],g[5]}, {p[5],p[4]});
rblk b_7_6 (H_7_6, I_7_6, {g[7],g[6]}, {p[6],p[5]});
rblk b_8_7 (H_8_7, I_8_7, {g[8],g[7]}, {p[7],p[6]});
rblk b_9_8 (H_9_8, I_9_8, {g[9],g[8]}, {p[8],p[7]});
rblk b_10_9 (H_10_9, I_10_9, {g[10],g[9]}, {p[9],p[8]});
rblk b_11_10 (H_11_10, I_11_10, {g[11],g[10]}, {p[10],p[9]});
rblk b_12_11 (H_12_11, I_12_11, {g[12],g[11]}, {p[11],p[10]});
rblk b_13_12 (H_13_12, I_13_12, {g[13],g[12]}, {p[12],p[11]});
rblk b_14_13 (H_14_13, I_14_13, {g[14],g[13]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_2_0 (H_2_0, {H_2_1,g[0]}, I_2_1);
grey g_3_0 (H_3_0, {H_3_2,H_1_0}, I_3_2);
black b_4_1 (H_4_1, I_4_1, {H_4_3,H_2_1}, {I_4_3,I_2_1});
black b_5_2 (H_5_2, I_5_2, {H_5_4,H_3_2}, {I_5_4,I_3_2});
black b_6_3 (H_6_3, I_6_3, {H_6_5,H_4_3}, {I_6_5,I_4_3});
black b_7_4 (H_7_4, I_7_4, {H_7_6,H_5_4}, {I_7_6,I_5_4});
black b_8_5 (H_8_5, I_8_5, {H_8_7,H_6_5}, {I_8_7,I_6_5});
black b_9_6 (H_9_6, I_9_6, {H_9_8,H_7_6}, {I_9_8,I_7_6});
black b_10_7 (H_10_7, I_10_7, {H_10_9,H_8_7}, {I_10_9,I_8_7});
black b_11_8 (H_11_8, I_11_8, {H_11_10,H_9_8}, {I_11_10,I_9_8});
black b_12_9 (H_12_9, I_12_9, {H_12_11,H_10_9}, {I_12_11,I_10_9});
black b_13_10 (H_13_10, I_13_10, {H_13_12,H_11_10}, {I_13_12,I_11_10});
black b_14_11 (H_14_11, I_14_11, {H_14_13,H_12_11}, {I_14_13,I_12_11});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_4_0 (H_4_0, {H_4_1,g[0]}, I_4_1);
grey g_5_0 (H_5_0, {H_5_2,H_1_0}, I_5_2);
grey g_6_0 (H_6_0, {H_6_3,H_2_0}, I_6_3);
grey g_7_0 (H_7_0, {H_7_4,H_3_0}, I_7_4);
black b_8_1 (H_8_1, I_8_1, {H_8_5,H_4_1}, {I_8_5,I_4_1});
black b_9_2 (H_9_2, I_9_2, {H_9_6,H_5_2}, {I_9_6,I_5_2});
black b_10_3 (H_10_3, I_10_3, {H_10_7,H_6_3}, {I_10_7,I_6_3});
black b_11_4 (H_11_4, I_11_4, {H_11_8,H_7_4}, {I_11_8,I_7_4});
black b_12_5 (H_12_5, I_12_5, {H_12_9,H_8_5}, {I_12_9,I_8_5});
black b_13_6 (H_13_6, I_13_6, {H_13_10,H_9_6}, {I_13_10,I_9_6});
black b_14_7 (H_14_7, I_14_7, {H_14_11,H_10_7}, {I_14_11,I_10_7});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_8_0 (H_8_0, {H_8_1,g[0]}, I_8_1);
grey g_9_0 (H_9_0, {H_9_2,H_1_0}, I_9_2);
grey g_10_0 (H_10_0, {H_10_3,H_2_0}, I_10_3);
grey g_11_0 (H_11_0, {H_11_4,H_3_0}, I_11_4);
grey g_12_0 (H_12_0, {H_12_5,H_4_0}, I_12_5);
grey g_13_0 (H_13_0, {H_13_6,H_5_0}, I_13_6);
grey g_14_0 (H_14_0, {H_14_7,H_6_0}, I_14_7);
// Final Stage: Apply c_k+1=p_k&H_k_0
assign c[1]=g[0];
assign h[1]=H_1_0; assign c[2]=p[1]&H_1_0;
assign h[2]=H_2_0; assign c[3]=p[2]&H_2_0;
assign h[3]=H_3_0; assign c[4]=p[3]&H_3_0;
assign h[4]=H_4_0; assign c[5]=p[4]&H_4_0;
assign h[5]=H_5_0; assign c[6]=p[5]&H_5_0;
assign h[6]=H_6_0; assign c[7]=p[6]&H_6_0;
assign h[7]=H_7_0; assign c[8]=p[7]&H_7_0;
assign h[8]=H_8_0; assign c[9]=p[8]&H_8_0;
assign h[9]=H_9_0; assign c[10]=p[9]&H_9_0;
assign h[10]=H_10_0; assign c[11]=p[10]&H_10_0;
assign h[11]=H_11_0; assign c[12]=p[11]&H_11_0;
assign h[12]=H_12_0; assign c[13]=p[12]&H_12_0;
assign h[13]=H_13_0; assign c[14]=p[13]&H_13_0;
assign h[14]=H_14_0; assign c[15]=p[14]&H_14_0;
endmodule // kogge_stone
// Black cell
module black(gout, pout, gin, pin);
input [1:0] gin, pin;
output gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule // black
// Grey cell
module grey(gout, gin, pin);
input[1:0] gin;
input pin;
output gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule // grey
// reduced Black cell
module rblk(hout, iout, gin, pin);
input [1:0] gin, pin;
output hout, iout;
assign iout=pin[1]&pin[0];
assign hout=gin[1]|gin[0];
endmodule // rblk
// reduced Grey cell
module rgry(hout, gin);
input[1:0] gin;
output hout;
assign hout=gin[1]|gin[0];
endmodule // rgry

View File

@ -0,0 +1,331 @@
// This module implements a 12-bit carry lookahead adder. It is used
// for rounding in the floating point adder.
module cla12 (S, CO, X, Y);
input [11:0] X;
input [11:0] Y;
output [11:0] S;
output CO;
wire [0:63] A,B,Q;
wire LOGIC0;
wire CIN;
wire CO_64;
assign LOGIC0 = 0;
assign CIN = 0;
DBLCADDER_64_64 U1 (A , B , CIN, Q , CO_64);
assign A[0] = X[0];
assign B[0] = Y[0];
assign A[1] = X[1];
assign B[1] = Y[1];
assign A[2] = X[2];
assign B[2] = Y[2];
assign A[3] = X[3];
assign B[3] = Y[3];
assign A[4] = X[4];
assign B[4] = Y[4];
assign A[5] = X[5];
assign B[5] = Y[5];
assign A[6] = X[6];
assign B[6] = Y[6];
assign A[7] = X[7];
assign B[7] = Y[7];
assign A[8] = X[8];
assign B[8] = Y[8];
assign A[9] = X[9];
assign B[9] = Y[9];
assign A[10] = X[10];
assign B[10] = Y[10];
assign A[11] = X[11];
assign B[11] = Y[11];
assign A[12] = LOGIC0;
assign B[12] = LOGIC0;
assign A[13] = LOGIC0;
assign B[13] = LOGIC0;
assign A[14] = LOGIC0;
assign B[14] = LOGIC0;
assign A[15] = LOGIC0;
assign B[15] = LOGIC0;
assign A[16] = LOGIC0;
assign B[16] = LOGIC0;
assign A[17] = LOGIC0;
assign B[17] = LOGIC0;
assign A[18] = LOGIC0;
assign B[18] = LOGIC0;
assign A[19] = LOGIC0;
assign B[19] = LOGIC0;
assign A[20] = LOGIC0;
assign B[20] = LOGIC0;
assign A[21] = LOGIC0;
assign B[21] = LOGIC0;
assign A[22] = LOGIC0;
assign B[22] = LOGIC0;
assign A[23] = LOGIC0;
assign B[23] = LOGIC0;
assign A[24] = LOGIC0;
assign B[24] = LOGIC0;
assign A[25] = LOGIC0;
assign B[25] = LOGIC0;
assign A[26] = LOGIC0;
assign B[26] = LOGIC0;
assign A[27] = LOGIC0;
assign B[27] = LOGIC0;
assign A[28] = LOGIC0;
assign B[28] = LOGIC0;
assign A[29] = LOGIC0;
assign B[29] = LOGIC0;
assign A[30] = LOGIC0;
assign B[30] = LOGIC0;
assign A[31] = LOGIC0;
assign B[31] = LOGIC0;
assign A[32] = LOGIC0;
assign B[32] = LOGIC0;
assign A[33] = LOGIC0;
assign B[33] = LOGIC0;
assign A[34] = LOGIC0;
assign B[34] = LOGIC0;
assign A[35] = LOGIC0;
assign B[35] = LOGIC0;
assign A[36] = LOGIC0;
assign B[36] = LOGIC0;
assign A[37] = LOGIC0;
assign B[37] = LOGIC0;
assign A[38] = LOGIC0;
assign B[38] = LOGIC0;
assign A[39] = LOGIC0;
assign B[39] = LOGIC0;
assign A[40] = LOGIC0;
assign B[40] = LOGIC0;
assign A[41] = LOGIC0;
assign B[41] = LOGIC0;
assign A[42] = LOGIC0;
assign B[42] = LOGIC0;
assign A[43] = LOGIC0;
assign B[43] = LOGIC0;
assign A[44] = LOGIC0;
assign B[44] = LOGIC0;
assign A[45] = LOGIC0;
assign B[45] = LOGIC0;
assign A[46] = LOGIC0;
assign B[46] = LOGIC0;
assign A[47] = LOGIC0;
assign B[47] = LOGIC0;
assign A[48] = LOGIC0;
assign B[48] = LOGIC0;
assign A[49] = LOGIC0;
assign B[49] = LOGIC0;
assign A[50] = LOGIC0;
assign B[50] = LOGIC0;
assign A[51] = LOGIC0;
assign B[51] = LOGIC0;
assign A[52] = LOGIC0;
assign B[52] = LOGIC0;
assign A[53] = LOGIC0;
assign B[53] = LOGIC0;
assign A[54] = LOGIC0;
assign B[54] = LOGIC0;
assign A[55] = LOGIC0;
assign B[55] = LOGIC0;
assign A[56] = LOGIC0;
assign B[56] = LOGIC0;
assign A[57] = LOGIC0;
assign B[57] = LOGIC0;
assign A[58] = LOGIC0;
assign B[58] = LOGIC0;
assign A[59] = LOGIC0;
assign B[59] = LOGIC0;
assign A[60] = LOGIC0;
assign B[60] = LOGIC0;
assign A[61] = LOGIC0;
assign B[61] = LOGIC0;
assign A[62] = LOGIC0;
assign B[62] = LOGIC0;
assign A[63] = LOGIC0;
assign B[63] = LOGIC0;
assign S[0] = Q[0];
assign S[1] = Q[1];
assign S[2] = Q[2];
assign S[3] = Q[3];
assign S[4] = Q[4];
assign S[5] = Q[5];
assign S[6] = Q[6];
assign S[7] = Q[7];
assign S[8] = Q[8];
assign S[9] = Q[9];
assign S[10] = Q[10];
assign S[11] = Q[11];
assign CO = Q[12];
endmodule //cla52
// This module implements a 12-bit carry lookahead subtractor. It is used
// for rounding in the floating point adder.
module cla_sub12 (S, X, Y);
input [11:0] X;
input [11:0] Y;
output [11:0] S;
wire [0:63] A,B,Q,Bbar;
wire CO;
wire LOGIC0;
wire VDD;
assign Bbar = ~B;
assign LOGIC0 = 0;
assign VDD = 1;
DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO);
assign A[0] = X[0];
assign B[0] = Y[0];
assign A[1] = X[1];
assign B[1] = Y[1];
assign A[2] = X[2];
assign B[2] = Y[2];
assign A[3] = X[3];
assign B[3] = Y[3];
assign A[4] = X[4];
assign B[4] = Y[4];
assign A[5] = X[5];
assign B[5] = Y[5];
assign A[6] = X[6];
assign B[6] = Y[6];
assign A[7] = X[7];
assign B[7] = Y[7];
assign A[8] = X[8];
assign B[8] = Y[8];
assign A[9] = X[9];
assign B[9] = Y[9];
assign A[10] = X[10];
assign B[10] = Y[10];
assign A[11] = X[11];
assign B[11] = Y[11];
assign A[12] = LOGIC0;
assign B[12] = LOGIC0;
assign A[13] = LOGIC0;
assign B[13] = LOGIC0;
assign A[14] = LOGIC0;
assign B[14] = LOGIC0;
assign A[15] = LOGIC0;
assign B[15] = LOGIC0;
assign A[16] = LOGIC0;
assign B[16] = LOGIC0;
assign A[17] = LOGIC0;
assign B[17] = LOGIC0;
assign A[18] = LOGIC0;
assign B[18] = LOGIC0;
assign A[19] = LOGIC0;
assign B[19] = LOGIC0;
assign A[20] = LOGIC0;
assign B[20] = LOGIC0;
assign A[21] = LOGIC0;
assign B[21] = LOGIC0;
assign A[22] = LOGIC0;
assign B[22] = LOGIC0;
assign A[23] = LOGIC0;
assign B[23] = LOGIC0;
assign A[24] = LOGIC0;
assign B[24] = LOGIC0;
assign A[25] = LOGIC0;
assign B[25] = LOGIC0;
assign A[26] = LOGIC0;
assign B[26] = LOGIC0;
assign A[27] = LOGIC0;
assign B[27] = LOGIC0;
assign A[28] = LOGIC0;
assign B[28] = LOGIC0;
assign A[29] = LOGIC0;
assign B[29] = LOGIC0;
assign A[30] = LOGIC0;
assign B[30] = LOGIC0;
assign A[31] = LOGIC0;
assign B[31] = LOGIC0;
assign A[32] = LOGIC0;
assign B[32] = LOGIC0;
assign A[33] = LOGIC0;
assign B[33] = LOGIC0;
assign A[34] = LOGIC0;
assign B[34] = LOGIC0;
assign A[35] = LOGIC0;
assign B[35] = LOGIC0;
assign A[36] = LOGIC0;
assign B[36] = LOGIC0;
assign A[37] = LOGIC0;
assign B[37] = LOGIC0;
assign A[38] = LOGIC0;
assign B[38] = LOGIC0;
assign A[39] = LOGIC0;
assign B[39] = LOGIC0;
assign A[40] = LOGIC0;
assign B[40] = LOGIC0;
assign A[41] = LOGIC0;
assign B[41] = LOGIC0;
assign A[42] = LOGIC0;
assign B[42] = LOGIC0;
assign A[43] = LOGIC0;
assign B[43] = LOGIC0;
assign A[44] = LOGIC0;
assign B[44] = LOGIC0;
assign A[45] = LOGIC0;
assign B[45] = LOGIC0;
assign A[46] = LOGIC0;
assign B[46] = LOGIC0;
assign A[47] = LOGIC0;
assign B[47] = LOGIC0;
assign A[48] = LOGIC0;
assign B[48] = LOGIC0;
assign A[49] = LOGIC0;
assign B[49] = LOGIC0;
assign A[50] = LOGIC0;
assign B[50] = LOGIC0;
assign A[51] = LOGIC0;
assign B[51] = LOGIC0;
assign A[52] = LOGIC0;
assign B[52] = LOGIC0;
assign A[53] = LOGIC0;
assign B[53] = LOGIC0;
assign A[54] = LOGIC0;
assign B[54] = LOGIC0;
assign A[55] = LOGIC0;
assign B[55] = LOGIC0;
assign A[56] = LOGIC0;
assign B[56] = LOGIC0;
assign A[57] = LOGIC0;
assign B[57] = LOGIC0;
assign A[58] = LOGIC0;
assign B[58] = LOGIC0;
assign A[59] = LOGIC0;
assign B[59] = LOGIC0;
assign A[60] = LOGIC0;
assign B[60] = LOGIC0;
assign A[61] = LOGIC0;
assign B[61] = LOGIC0;
assign A[62] = LOGIC0;
assign B[62] = LOGIC0;
assign A[63] = LOGIC0;
assign B[63] = LOGIC0;
assign S[0] = Q[0];
assign S[1] = Q[1];
assign S[2] = Q[2];
assign S[3] = Q[3];
assign S[4] = Q[4];
assign S[5] = Q[5];
assign S[6] = Q[6];
assign S[7] = Q[7];
assign S[8] = Q[8];
assign S[9] = Q[9];
assign S[10] = Q[10];
assign S[11] = Q[11];
assign CO_12 = Q[12];
endmodule //cla_sub52

View File

@ -0,0 +1,408 @@
// This module implements a 52-bit carry lookahead adder. It is used
// for rounding in the floating point adder.
module cla52 (S, CO, X, Y);
input [51:0] X;
input [51:0] Y;
output [51:0] S;
output CO;
wire [0:63] A,B,Q;
wire LOGIC0;
wire CIN;
wire CO_64;
assign LOGIC0 = 0;
assign CIN = 0;
DBLCADDER_64_64 U1 (A , B , CIN, Q , CO_64);
assign A[0] = X[0];
assign B[0] = Y[0];
assign A[1] = X[1];
assign B[1] = Y[1];
assign A[2] = X[2];
assign B[2] = Y[2];
assign A[3] = X[3];
assign B[3] = Y[3];
assign A[4] = X[4];
assign B[4] = Y[4];
assign A[5] = X[5];
assign B[5] = Y[5];
assign A[6] = X[6];
assign B[6] = Y[6];
assign A[7] = X[7];
assign B[7] = Y[7];
assign A[8] = X[8];
assign B[8] = Y[8];
assign A[9] = X[9];
assign B[9] = Y[9];
assign A[10] = X[10];
assign B[10] = Y[10];
assign A[11] = X[11];
assign B[11] = Y[11];
assign A[12] = X[12];
assign B[12] = Y[12];
assign A[13] = X[13];
assign B[13] = Y[13];
assign A[14] = X[14];
assign B[14] = Y[14];
assign A[15] = X[15];
assign B[15] = Y[15];
assign A[16] = X[16];
assign B[16] = Y[16];
assign A[17] = X[17];
assign B[17] = Y[17];
assign A[18] = X[18];
assign B[18] = Y[18];
assign A[19] = X[19];
assign B[19] = Y[19];
assign A[20] = X[20];
assign B[20] = Y[20];
assign A[21] = X[21];
assign B[21] = Y[21];
assign A[22] = X[22];
assign B[22] = Y[22];
assign A[23] = X[23];
assign B[23] = Y[23];
assign A[24] = X[24];
assign B[24] = Y[24];
assign A[25] = X[25];
assign B[25] = Y[25];
assign A[26] = X[26];
assign B[26] = Y[26];
assign A[27] = X[27];
assign B[27] = Y[27];
assign A[28] = X[28];
assign B[28] = Y[28];
assign A[29] = X[29];
assign B[29] = Y[29];
assign A[30] = X[30];
assign B[30] = Y[30];
assign A[31] = X[31];
assign B[31] = Y[31];
assign A[32] = X[32];
assign B[32] = Y[32];
assign A[33] = X[33];
assign B[33] = Y[33];
assign A[34] = X[34];
assign B[34] = Y[34];
assign A[35] = X[35];
assign B[35] = Y[35];
assign A[36] = X[36];
assign B[36] = Y[36];
assign A[37] = X[37];
assign B[37] = Y[37];
assign A[38] = X[38];
assign B[38] = Y[38];
assign A[39] = X[39];
assign B[39] = Y[39];
assign A[40] = X[40];
assign B[40] = Y[40];
assign A[41] = X[41];
assign B[41] = Y[41];
assign A[42] = X[42];
assign B[42] = Y[42];
assign A[43] = X[43];
assign B[43] = Y[43];
assign A[44] = X[44];
assign B[44] = Y[44];
assign A[45] = X[45];
assign B[45] = Y[45];
assign A[46] = X[46];
assign B[46] = Y[46];
assign A[47] = X[47];
assign B[47] = Y[47];
assign A[48] = X[48];
assign B[48] = Y[48];
assign A[49] = X[49];
assign B[49] = Y[49];
assign A[50] = X[50];
assign B[50] = Y[50];
assign A[51] = X[51];
assign B[51] = Y[51];
assign A[52] = LOGIC0;
assign B[52] = LOGIC0;
assign A[53] = LOGIC0;
assign B[53] = LOGIC0;
assign A[54] = LOGIC0;
assign B[54] = LOGIC0;
assign A[55] = LOGIC0;
assign B[55] = LOGIC0;
assign A[56] = LOGIC0;
assign B[56] = LOGIC0;
assign A[57] = LOGIC0;
assign B[57] = LOGIC0;
assign A[58] = LOGIC0;
assign B[58] = LOGIC0;
assign A[59] = LOGIC0;
assign B[59] = LOGIC0;
assign A[60] = LOGIC0;
assign B[60] = LOGIC0;
assign A[61] = LOGIC0;
assign B[61] = LOGIC0;
assign A[62] = LOGIC0;
assign B[62] = LOGIC0;
assign A[63] = LOGIC0;
assign B[63] = LOGIC0;
assign S[0] = Q[0];
assign S[1] = Q[1];
assign S[2] = Q[2];
assign S[3] = Q[3];
assign S[4] = Q[4];
assign S[5] = Q[5];
assign S[6] = Q[6];
assign S[7] = Q[7];
assign S[8] = Q[8];
assign S[9] = Q[9];
assign S[10] = Q[10];
assign S[11] = Q[11];
assign S[12] = Q[12];
assign S[13] = Q[13];
assign S[14] = Q[14];
assign S[15] = Q[15];
assign S[16] = Q[16];
assign S[17] = Q[17];
assign S[18] = Q[18];
assign S[19] = Q[19];
assign S[20] = Q[20];
assign S[21] = Q[21];
assign S[22] = Q[22];
assign S[23] = Q[23];
assign S[24] = Q[24];
assign S[25] = Q[25];
assign S[26] = Q[26];
assign S[27] = Q[27];
assign S[28] = Q[28];
assign S[29] = Q[29];
assign S[30] = Q[30];
assign S[31] = Q[31];
assign S[32] = Q[32];
assign S[33] = Q[33];
assign S[34] = Q[34];
assign S[35] = Q[35];
assign S[36] = Q[36];
assign S[37] = Q[37];
assign S[38] = Q[38];
assign S[39] = Q[39];
assign S[40] = Q[40];
assign S[41] = Q[41];
assign S[42] = Q[42];
assign S[43] = Q[43];
assign S[44] = Q[44];
assign S[45] = Q[45];
assign S[46] = Q[46];
assign S[47] = Q[47];
assign S[48] = Q[48];
assign S[49] = Q[49];
assign S[50] = Q[50];
assign S[51] = Q[51];
assign CO = Q[52];
endmodule //cla52
// This module implements a 52-bit carry lookahead subtractor. It is used
// for rounding in the floating point adder.
module cla_sub52 (S, X, Y);
input [51:0] X;
input [51:0] Y;
output [51:0] S;
wire [0:63] A,B,Q,Bbar;
wire LOGIC0;
wire CIN;
wire CO_52;
assign Bbar = ~B;
assign LOGIC0 = 0;
assign CIN = 0;
DBLCADDER_64_64 U1 (A , Bbar , CIN, Q , CO_64);
assign A[0] = X[0];
assign B[0] = Y[0];
assign A[1] = X[1];
assign B[1] = Y[1];
assign A[2] = X[2];
assign B[2] = Y[2];
assign A[3] = X[3];
assign B[3] = Y[3];
assign A[4] = X[4];
assign B[4] = Y[4];
assign A[5] = X[5];
assign B[5] = Y[5];
assign A[6] = X[6];
assign B[6] = Y[6];
assign A[7] = X[7];
assign B[7] = Y[7];
assign A[8] = X[8];
assign B[8] = Y[8];
assign A[9] = X[9];
assign B[9] = Y[9];
assign A[10] = X[10];
assign B[10] = Y[10];
assign A[11] = X[11];
assign B[11] = Y[11];
assign A[12] = X[12];
assign B[12] = Y[12];
assign A[13] = X[13];
assign B[13] = Y[13];
assign A[14] = X[14];
assign B[14] = Y[14];
assign A[15] = X[15];
assign B[15] = Y[15];
assign A[16] = X[16];
assign B[16] = Y[16];
assign A[17] = X[17];
assign B[17] = Y[17];
assign A[18] = X[18];
assign B[18] = Y[18];
assign A[19] = X[19];
assign B[19] = Y[19];
assign A[20] = X[20];
assign B[20] = Y[20];
assign A[21] = X[21];
assign B[21] = Y[21];
assign A[22] = X[22];
assign B[22] = Y[22];
assign A[23] = X[23];
assign B[23] = Y[23];
assign A[24] = X[24];
assign B[24] = Y[24];
assign A[25] = X[25];
assign B[25] = Y[25];
assign A[26] = X[26];
assign B[26] = Y[26];
assign A[27] = X[27];
assign B[27] = Y[27];
assign A[28] = X[28];
assign B[28] = Y[28];
assign A[29] = X[29];
assign B[29] = Y[29];
assign A[30] = X[30];
assign B[30] = Y[30];
assign A[31] = X[31];
assign B[31] = Y[31];
assign A[32] = X[32];
assign B[32] = Y[32];
assign A[33] = X[33];
assign B[33] = Y[33];
assign A[34] = X[34];
assign B[34] = Y[34];
assign A[35] = X[35];
assign B[35] = Y[35];
assign A[36] = X[36];
assign B[36] = Y[36];
assign A[37] = X[37];
assign B[37] = Y[37];
assign A[38] = X[38];
assign B[38] = Y[38];
assign A[39] = X[39];
assign B[39] = Y[39];
assign A[40] = X[40];
assign B[40] = Y[40];
assign A[41] = X[41];
assign B[41] = Y[41];
assign A[42] = X[42];
assign B[42] = Y[42];
assign A[43] = X[43];
assign B[43] = Y[43];
assign A[44] = X[44];
assign B[44] = Y[44];
assign A[45] = X[45];
assign B[45] = Y[45];
assign A[46] = X[46];
assign B[46] = Y[46];
assign A[47] = X[47];
assign B[47] = Y[47];
assign A[48] = X[48];
assign B[48] = Y[48];
assign A[49] = X[49];
assign B[49] = Y[49];
assign A[50] = X[50];
assign B[50] = Y[50];
assign A[51] = X[51];
assign B[51] = Y[51];
assign A[52] = LOGIC0;
assign B[52] = LOGIC0;
assign A[53] = LOGIC0;
assign B[53] = LOGIC0;
assign A[54] = LOGIC0;
assign B[54] = LOGIC0;
assign A[55] = LOGIC0;
assign B[55] = LOGIC0;
assign A[56] = LOGIC0;
assign B[56] = LOGIC0;
assign A[57] = LOGIC0;
assign B[57] = LOGIC0;
assign A[58] = LOGIC0;
assign B[58] = LOGIC0;
assign A[59] = LOGIC0;
assign B[59] = LOGIC0;
assign A[60] = LOGIC0;
assign B[60] = LOGIC0;
assign A[61] = LOGIC0;
assign B[61] = LOGIC0;
assign A[62] = LOGIC0;
assign B[62] = LOGIC0;
assign A[63] = LOGIC0;
assign B[63] = LOGIC0;
assign S[0] = Q[0];
assign S[1] = Q[1];
assign S[2] = Q[2];
assign S[3] = Q[3];
assign S[4] = Q[4];
assign S[5] = Q[5];
assign S[6] = Q[6];
assign S[7] = Q[7];
assign S[8] = Q[8];
assign S[9] = Q[9];
assign S[10] = Q[10];
assign S[11] = Q[11];
assign S[12] = Q[12];
assign S[13] = Q[13];
assign S[14] = Q[14];
assign S[15] = Q[15];
assign S[16] = Q[16];
assign S[17] = Q[17];
assign S[18] = Q[18];
assign S[19] = Q[19];
assign S[20] = Q[20];
assign S[21] = Q[21];
assign S[22] = Q[22];
assign S[23] = Q[23];
assign S[24] = Q[24];
assign S[25] = Q[25];
assign S[26] = Q[26];
assign S[27] = Q[27];
assign S[28] = Q[28];
assign S[29] = Q[29];
assign S[30] = Q[30];
assign S[31] = Q[31];
assign S[32] = Q[32];
assign S[33] = Q[33];
assign S[34] = Q[34];
assign S[35] = Q[35];
assign S[36] = Q[36];
assign S[37] = Q[37];
assign S[38] = Q[38];
assign S[39] = Q[39];
assign S[40] = Q[40];
assign S[41] = Q[41];
assign S[42] = Q[42];
assign S[43] = Q[43];
assign S[44] = Q[44];
assign S[45] = Q[45];
assign S[46] = Q[46];
assign S[47] = Q[47];
assign S[48] = Q[48];
assign S[49] = Q[49];
assign S[50] = Q[50];
assign S[51] = Q[51];
assign CO_52 = Q[52];
endmodule //cla_sub52

View File

@ -0,0 +1,420 @@
// This module implements a 64-bit carry lookehead adder/subtractor.
// It is used to perform the primary addition in the floating point
// adder
module cla64 (S, X, Y, Sub);
input [63:0] X;
input [63:0] Y;
input Sub;
output [63:0] S;
wire CO;
wire [0:63] A,B,Q, Bbar;
DBLCADDER_64_64 U1 (A , Bbar , Sub , Q , CO );
assign A[0] = X[0];
assign B[0] = Y[0];
assign A[1] = X[1];
assign B[1] = Y[1];
assign A[2] = X[2];
assign B[2] = Y[2];
assign A[3] = X[3];
assign B[3] = Y[3];
assign A[4] = X[4];
assign B[4] = Y[4];
assign A[5] = X[5];
assign B[5] = Y[5];
assign A[6] = X[6];
assign B[6] = Y[6];
assign A[7] = X[7];
assign B[7] = Y[7];
assign A[8] = X[8];
assign B[8] = Y[8];
assign A[9] = X[9];
assign B[9] = Y[9];
assign A[10] = X[10];
assign B[10] = Y[10];
assign A[11] = X[11];
assign B[11] = Y[11];
assign A[12] = X[12];
assign B[12] = Y[12];
assign A[13] = X[13];
assign B[13] = Y[13];
assign A[14] = X[14];
assign B[14] = Y[14];
assign A[15] = X[15];
assign B[15] = Y[15];
assign A[16] = X[16];
assign B[16] = Y[16];
assign A[17] = X[17];
assign B[17] = Y[17];
assign A[18] = X[18];
assign B[18] = Y[18];
assign A[19] = X[19];
assign B[19] = Y[19];
assign A[20] = X[20];
assign B[20] = Y[20];
assign A[21] = X[21];
assign B[21] = Y[21];
assign A[22] = X[22];
assign B[22] = Y[22];
assign A[23] = X[23];
assign B[23] = Y[23];
assign A[24] = X[24];
assign B[24] = Y[24];
assign A[25] = X[25];
assign B[25] = Y[25];
assign A[26] = X[26];
assign B[26] = Y[26];
assign A[27] = X[27];
assign B[27] = Y[27];
assign A[28] = X[28];
assign B[28] = Y[28];
assign A[29] = X[29];
assign B[29] = Y[29];
assign A[30] = X[30];
assign B[30] = Y[30];
assign A[31] = X[31];
assign B[31] = Y[31];
assign A[32] = X[32];
assign B[32] = Y[32];
assign A[33] = X[33];
assign B[33] = Y[33];
assign A[34] = X[34];
assign B[34] = Y[34];
assign A[35] = X[35];
assign B[35] = Y[35];
assign A[36] = X[36];
assign B[36] = Y[36];
assign A[37] = X[37];
assign B[37] = Y[37];
assign A[38] = X[38];
assign B[38] = Y[38];
assign A[39] = X[39];
assign B[39] = Y[39];
assign A[40] = X[40];
assign B[40] = Y[40];
assign A[41] = X[41];
assign B[41] = Y[41];
assign A[42] = X[42];
assign B[42] = Y[42];
assign A[43] = X[43];
assign B[43] = Y[43];
assign A[44] = X[44];
assign B[44] = Y[44];
assign A[45] = X[45];
assign B[45] = Y[45];
assign A[46] = X[46];
assign B[46] = Y[46];
assign A[47] = X[47];
assign B[47] = Y[47];
assign A[48] = X[48];
assign B[48] = Y[48];
assign A[49] = X[49];
assign B[49] = Y[49];
assign A[50] = X[50];
assign B[50] = Y[50];
assign A[51] = X[51];
assign B[51] = Y[51];
assign A[52] = X[52];
assign B[52] = Y[52];
assign A[53] = X[53];
assign B[53] = Y[53];
assign A[54] = X[54];
assign B[54] = Y[54];
assign A[55] = X[55];
assign B[55] = Y[55];
assign A[56] = X[56];
assign B[56] = Y[56];
assign A[57] = X[57];
assign B[57] = Y[57];
assign A[58] = X[58];
assign B[58] = Y[58];
assign A[59] = X[59];
assign B[59] = Y[59];
assign A[60] = X[60];
assign B[60] = Y[60];
assign A[61] = X[61];
assign B[61] = Y[61];
assign A[62] = X[62];
assign B[62] = Y[62];
assign A[63] = X[63];
assign B[63] = Y[63];
assign S[0] = Q[0];
assign S[1] = Q[1];
assign S[2] = Q[2];
assign S[3] = Q[3];
assign S[4] = Q[4];
assign S[5] = Q[5];
assign S[6] = Q[6];
assign S[7] = Q[7];
assign S[8] = Q[8];
assign S[9] = Q[9];
assign S[10] = Q[10];
assign S[11] = Q[11];
assign S[12] = Q[12];
assign S[13] = Q[13];
assign S[14] = Q[14];
assign S[15] = Q[15];
assign S[16] = Q[16];
assign S[17] = Q[17];
assign S[18] = Q[18];
assign S[19] = Q[19];
assign S[20] = Q[20];
assign S[21] = Q[21];
assign S[22] = Q[22];
assign S[23] = Q[23];
assign S[24] = Q[24];
assign S[25] = Q[25];
assign S[26] = Q[26];
assign S[27] = Q[27];
assign S[28] = Q[28];
assign S[29] = Q[29];
assign S[30] = Q[30];
assign S[31] = Q[31];
assign S[32] = Q[32];
assign S[33] = Q[33];
assign S[34] = Q[34];
assign S[35] = Q[35];
assign S[36] = Q[36];
assign S[37] = Q[37];
assign S[38] = Q[38];
assign S[39] = Q[39];
assign S[40] = Q[40];
assign S[41] = Q[41];
assign S[42] = Q[42];
assign S[43] = Q[43];
assign S[44] = Q[44];
assign S[45] = Q[45];
assign S[46] = Q[46];
assign S[47] = Q[47];
assign S[48] = Q[48];
assign S[49] = Q[49];
assign S[50] = Q[50];
assign S[51] = Q[51];
assign S[52] = Q[52];
assign S[53] = Q[53];
assign S[54] = Q[54];
assign S[55] = Q[55];
assign S[56] = Q[56];
assign S[57] = Q[57];
assign S[58] = Q[58];
assign S[59] = Q[59];
assign S[60] = Q[60];
assign S[61] = Q[61];
assign S[62] = Q[62];
assign S[63] = Q[63];
assign Bbar = B ^ {64{Sub}};
endmodule // cla64
// This module performs 64-bit subtraction. It is used to get the two's complement
// of main addition or subtraction in the floating point adder.
module cla_sub64 (S, X, Y);
input [63:0] X;
input [63:0] Y;
output [63:0] S;
wire CO;
wire VDD = 1'b1;
wire [0:63] A,B,Q, Bbar;
DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO );
assign A[0] = X[0];
assign B[0] = Y[0];
assign A[1] = X[1];
assign B[1] = Y[1];
assign A[2] = X[2];
assign B[2] = Y[2];
assign A[3] = X[3];
assign B[3] = Y[3];
assign A[4] = X[4];
assign B[4] = Y[4];
assign A[5] = X[5];
assign B[5] = Y[5];
assign A[6] = X[6];
assign B[6] = Y[6];
assign A[7] = X[7];
assign B[7] = Y[7];
assign A[8] = X[8];
assign B[8] = Y[8];
assign A[9] = X[9];
assign B[9] = Y[9];
assign A[10] = X[10];
assign B[10] = Y[10];
assign A[11] = X[11];
assign B[11] = Y[11];
assign A[12] = X[12];
assign B[12] = Y[12];
assign A[13] = X[13];
assign B[13] = Y[13];
assign A[14] = X[14];
assign B[14] = Y[14];
assign A[15] = X[15];
assign B[15] = Y[15];
assign A[16] = X[16];
assign B[16] = Y[16];
assign A[17] = X[17];
assign B[17] = Y[17];
assign A[18] = X[18];
assign B[18] = Y[18];
assign A[19] = X[19];
assign B[19] = Y[19];
assign A[20] = X[20];
assign B[20] = Y[20];
assign A[21] = X[21];
assign B[21] = Y[21];
assign A[22] = X[22];
assign B[22] = Y[22];
assign A[23] = X[23];
assign B[23] = Y[23];
assign A[24] = X[24];
assign B[24] = Y[24];
assign A[25] = X[25];
assign B[25] = Y[25];
assign A[26] = X[26];
assign B[26] = Y[26];
assign A[27] = X[27];
assign B[27] = Y[27];
assign A[28] = X[28];
assign B[28] = Y[28];
assign A[29] = X[29];
assign B[29] = Y[29];
assign A[30] = X[30];
assign B[30] = Y[30];
assign A[31] = X[31];
assign B[31] = Y[31];
assign A[32] = X[32];
assign B[32] = Y[32];
assign A[33] = X[33];
assign B[33] = Y[33];
assign A[34] = X[34];
assign B[34] = Y[34];
assign A[35] = X[35];
assign B[35] = Y[35];
assign A[36] = X[36];
assign B[36] = Y[36];
assign A[37] = X[37];
assign B[37] = Y[37];
assign A[38] = X[38];
assign B[38] = Y[38];
assign A[39] = X[39];
assign B[39] = Y[39];
assign A[40] = X[40];
assign B[40] = Y[40];
assign A[41] = X[41];
assign B[41] = Y[41];
assign A[42] = X[42];
assign B[42] = Y[42];
assign A[43] = X[43];
assign B[43] = Y[43];
assign A[44] = X[44];
assign B[44] = Y[44];
assign A[45] = X[45];
assign B[45] = Y[45];
assign A[46] = X[46];
assign B[46] = Y[46];
assign A[47] = X[47];
assign B[47] = Y[47];
assign A[48] = X[48];
assign B[48] = Y[48];
assign A[49] = X[49];
assign B[49] = Y[49];
assign A[50] = X[50];
assign B[50] = Y[50];
assign A[51] = X[51];
assign B[51] = Y[51];
assign A[52] = X[52];
assign B[52] = Y[52];
assign A[53] = X[53];
assign B[53] = Y[53];
assign A[54] = X[54];
assign B[54] = Y[54];
assign A[55] = X[55];
assign B[55] = Y[55];
assign A[56] = X[56];
assign B[56] = Y[56];
assign A[57] = X[57];
assign B[57] = Y[57];
assign A[58] = X[58];
assign B[58] = Y[58];
assign A[59] = X[59];
assign B[59] = Y[59];
assign A[60] = X[60];
assign B[60] = Y[60];
assign A[61] = X[61];
assign B[61] = Y[61];
assign A[62] = X[62];
assign B[62] = Y[62];
assign A[63] = X[63];
assign B[63] = Y[63];
assign S[0] = Q[0];
assign S[1] = Q[1];
assign S[2] = Q[2];
assign S[3] = Q[3];
assign S[4] = Q[4];
assign S[5] = Q[5];
assign S[6] = Q[6];
assign S[7] = Q[7];
assign S[8] = Q[8];
assign S[9] = Q[9];
assign S[10] = Q[10];
assign S[11] = Q[11];
assign S[12] = Q[12];
assign S[13] = Q[13];
assign S[14] = Q[14];
assign S[15] = Q[15];
assign S[16] = Q[16];
assign S[17] = Q[17];
assign S[18] = Q[18];
assign S[19] = Q[19];
assign S[20] = Q[20];
assign S[21] = Q[21];
assign S[22] = Q[22];
assign S[23] = Q[23];
assign S[24] = Q[24];
assign S[25] = Q[25];
assign S[26] = Q[26];
assign S[27] = Q[27];
assign S[28] = Q[28];
assign S[29] = Q[29];
assign S[30] = Q[30];
assign S[31] = Q[31];
assign S[32] = Q[32];
assign S[33] = Q[33];
assign S[34] = Q[34];
assign S[35] = Q[35];
assign S[36] = Q[36];
assign S[37] = Q[37];
assign S[38] = Q[38];
assign S[39] = Q[39];
assign S[40] = Q[40];
assign S[41] = Q[41];
assign S[42] = Q[42];
assign S[43] = Q[43];
assign S[44] = Q[44];
assign S[45] = Q[45];
assign S[46] = Q[46];
assign S[47] = Q[47];
assign S[48] = Q[48];
assign S[49] = Q[49];
assign S[50] = Q[50];
assign S[51] = Q[51];
assign S[52] = Q[52];
assign S[53] = Q[53];
assign S[54] = Q[54];
assign S[55] = Q[55];
assign S[56] = Q[56];
assign S[57] = Q[57];
assign S[58] = Q[58];
assign S[59] = Q[59];
assign S[60] = Q[60];
assign S[61] = Q[61];
assign S[62] = Q[62];
assign S[63] = Q[63];
assign Bbar = ~B;
endmodule // cla_sub64

View File

@ -0,0 +1,61 @@
// This module takes as inputs two operands (op1 and op2)
// the operation type (op_type) and the result precision (P).
// Based on the operation and precision , it conditionally
// converts single precision values to double precision values
// and modifies the sign of op1. The converted operands are Float1
// and Float2.
module convert_inputs(Float1, Float2, op1, op2, op_type, P);
input [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B)
input [3:0] op_type; // Function opcode
input P; // Result Precision (0 for double, 1 for single)
output [63:0] Float1; // Converted 1st input operand
output [63:0] Float2; // Converted 2nd input operand
wire conv_SP; // Convert from SP to DP
wire negate; // Operation is negation
wire abs_val; // Operation is absolute value
wire Zexp1; // One if the exponent of op1 is zero
wire Zexp2; // One if the exponent of op2 is zero
wire Oexp1; // One if the exponent of op1 is all ones
wire Oexp2; // One if the exponent of op2 is all ones
// Convert from single precision to double precision if (op_type is 11X
// and P is 0) or (op_type is not 11X and P is one).
assign conv_SP = (op_type[2]&op_type[1]) ^ P;
// Test if the input exponent is zero, because if it is then the
// exponent of the converted number should be zero.
assign Zexp1 = ~(op1[62] | op1[61] | op1[60] | op1[59] |
op1[58] | op1[57] | op1[56] | op1[55]);
assign Zexp2 = ~(op2[62] | op2[61] | op2[60] | op2[59] |
op2[58] | op2[57] | op2[56] | op2[55]);
assign Oexp1 = (op1[62] & op1[61] & op1[60] & op1[59] &
op1[58] & op1[57] & op1[56] & op1[55]);
assign Oexp2 = (op2[62] & op2[61] & op2[60] & op2[59] &
op2[58] & op2[57] & op2[56] &op2[55]);
// Conditionally convert op1. Lower 29 bits are zero for single precision.
assign Float1[62:29] = conv_SP ? {op1[62], {3{(~op1[62]&~Zexp1)|Oexp1}}, op1[61:32]}
: op1[62:29];
assign Float1[28:0] = op1[28:0] & {29{~conv_SP}};
// Conditionally convert op2. Lower 29 bits are zero for single precision.
assign Float2[62:29] = conv_SP ? {op2[62],
{3{(~op2[62]&~Zexp2)|Oexp2}}, op2[61:32]}
: op2[62:29];
assign Float2[28:0] = op2[28:0] & {29{~conv_SP}};
// Set the sign of Float1 based on its original sign and if the operation
// is negation (op_type = 101) or absolute value (op_type = 100)
assign negate = op_type[2] & ~op_type[1] & op_type[0];
assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0];
assign Float1[63] = (op1[63] ^ negate) & ~abs_val;
assign Float2[63] = op2[63];
endmodule // convert_inputs

View File

@ -0,0 +1,51 @@
// This module takes as inputs two operands (op1 and op2)
// and the result precision (P). Based on the operation and precision,
// it conditionally converts single precision values to double
// precision values and modifies the sign of op1.
// The converted operands are Float1 and Float2.
module convert_inputs_div (Float1, Float2b, op1, op2, op_type, P);
input [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B)
input P; // Result Precision (0 for double, 1 for single)
input op_type; // Operation
output [63:0] Float1; // Converted 1st input operand
output [63:0] Float2b; // Converted 2nd input operand
wire [63:0] Float2;
wire Zexp1; // One if the exponent of op1 is zero
wire Zexp2; // One if the exponent of op2 is zero
wire Oexp1; // One if the exponent of op1 is all ones
wire Oexp2; // One if the exponent of op2 is all ones
// Test if the input exponent is zero, because if it is then the
// exponent of the converted number should be zero.
assign Zexp1 = ~(op1[62] | op1[61] | op1[60] | op1[59] |
op1[58] | op1[57] | op1[56] | op1[55]);
assign Zexp2 = ~(op2[62] | op2[61] | op2[60] | op2[59] |
op2[58] | op2[57] | op2[56] | op2[55]);
assign Oexp1 = (op1[62] & op1[61] & op1[60] & op1[59] &
op1[58] & op1[57] & op1[56] & op1[55]);
assign Oexp2 = (op2[62] & op2[61] & op2[60] & op2[59] &
op2[58] & op2[57] & op2[56] &op2[55]);
// Conditionally convert op1. Lower 29 bits are zero for single precision.
assign Float1[62:29] = P ? {op1[62], {3{(~op1[62]&~Zexp1)|Oexp1}}, op1[61:32]}
: op1[62:29];
assign Float1[28:0] = op1[28:0] & {29{~P}};
// Conditionally convert op2. Lower 29 bits are zero for single precision.
assign Float2[62:29] = P ? {op2[62], {3{(~op2[62]&~Zexp2)|Oexp2}}, op2[61:32]}
: op2[62:29];
assign Float2[28:0] = op2[28:0] & {29{~P}};
// Set the sign of Float1 based on its original sign
assign Float1[63] = op1[63];
assign Float2[63] = op2[63];
// For sqrt, assign Float2 same as Float1 for simplicity
assign Float2b = op_type ? Float1 : Float2;
endmodule // convert_inputs

View File

@ -0,0 +1,120 @@
// Exception logic for the floating point adder. Note: We may
// actually want to move to where the result is computed.
module exception (Ztype, Invalid, Denorm, ANorm, BNorm, Sub, A, B, op_type);
input [63:0] A; // 1st input operand (op1)
input [63:0] B; // 2nd input operand (op2)
input [3:0] op_type; // Function opcode
output [3:0] Ztype; // Indicates type of result (Z)
output Invalid; // Invalid operation exception
output Denorm; // Denormalized input
output ANorm; // A is not zero or Denorm
output BNorm; // B is not zero or Denorm
output Sub; // The effective operation is subtraction
wire AzeroM; // '1' if the mantissa of A is zero
wire BzeroM; // '1' if the mantissa of B is zero
wire AzeroE; // '1' if the exponent of A is zero
wire BzeroE; // '1' if the exponent of B is zero
wire AonesE; // '1' if the exponent of A is all ones
wire BonesE; // '1' if the exponent of B is all ones
wire ADenorm; // '1' if A is a denomalized number
wire BDenorm; // '1' if B is a denomalized number
wire AInf; // '1' if A is infinite
wire BInf; // '1' if B is infinite
wire AZero; // '1' if A is 0
wire BZero; // '1' if B is 0
wire ANaN; // '1' if A is a not-a-number
wire BNaN; // '1' if B is a not-a-number
wire ASNaN; // '1' if A is a signalling not-a-number
wire BSNaN; // '1' if B is a signalling not-a-number
wire ZQNaN; // '1' if result Z is a quiet NaN
wire ZPInf; // '1' if result Z positive infnity
wire ZNInf; // '1' if result Z negative infnity
wire add_sub; // '1' if operation is add or subtract
wire converts; // See if there are any converts
parameter [51:0] fifty_two_zeros = 52'h0000000000000; // Use parameter?
// Is this instruction a convert
assign converts = ~(~op_type[1] & ~op_type[2]);
// Determine if mantissas are all zeros
assign AzeroM = (A[51:0] == fifty_two_zeros);
assign BzeroM = (B[51:0] == fifty_two_zeros);
// Determine if exponents are all ones or all zeros
assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52];
assign BonesE = B[62]&B[61]&B[60]&B[59]&B[58]&B[57]&B[56]&B[55]&B[54]&B[53]&B[52];
assign AzeroE = ~(A[62]|A[61]|A[60]|A[59]|A[58]|A[57]|A[56]|A[55]|A[54]|A[53]|A[52]);
assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]);
// Determine special cases. Note: Zero is not really a special case.
assign ADenorm = AzeroE & ~AzeroM;
assign BDenorm = BzeroE & ~BzeroM;
assign AInf = AonesE & AzeroM;
assign BInf = BonesE & BzeroM;
assign ANaN = AonesE & ~AzeroM;
assign BNaN = BonesE & ~BzeroM;
assign ASNaN = ANaN & ~A[51];
assign BSNaN = BNaN & ~B[51];
assign AZero = AzeroE & AzeroM;
assign BZero = BzeroE & BzeroE;
// A and B are normalized if their exponents are not zero.
assign ANorm = ~AzeroE;
assign BNorm = ~BzeroE;
// An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
// or (A and B are both Infinite and the "effective operation" is
// subtraction).
assign add_sub = ~op_type[2] & ~op_type[1];
assign Invalid = (ASNaN | BSNaN |
(add_sub & AInf & BInf & (A[63]^B[63]^op_type[0]))) & ~converts;
// The Denorm flag is set if (A is denormlized and the operation is not integer
// conversion ) or (if B is normalized and the operation is addition or subtraction).
assign Denorm = ADenorm&(op_type[2]|~op_type[1]) | BDenorm & add_sub;
// The result is a quiet NaN if (an "Invalid Operation" exception occurs)
// or (A is a NaN) or (B is a NaN and the operation uses B).
assign ZQNaN = Invalid | ANaN | (BNaN & add_sub);
// The result is +Inf if ((A is +Inf) or (B is -Inf and the operation is
// subtraction) or (B is +Inf and the operation is addition)) and (the
// result is not a quiet NaN).
assign ZPInf = (AInf&A[63] | add_sub&BInf&(~B[63]^op_type[0]))&~ZQNaN;
// The result is -Inf if ((A is -Inf) or (B is +Inf and the operation is
// subtraction) or (B is -Inf and the operation is addition)) and the
// result is not a quiet NaN.
assign ZNInf = (AInf&~A[63] | add_sub&BInf&(B[63]^op_type[0]))&~ZQNaN;
// Set the type of the result as follows:
// (needs optimization - got lazy or was late)
// Ztype Result
// 0000 Normal
// 0001 Quiet NaN
// 0010 Negative Infinity
// 0011 Positive Infinity
// 0100 +Bzero and +Azero (and vice-versa)
// 0101 +Bzero and -Azero (and vice-versa)
// 1000 Convert SP to DP (and vice-versa)
assign Ztype[0] = ((ZQNaN | ZPInf) & ~(~op_type[2] & op_type[1])) |
((AZero & BZero & (A[63]^B[63]^op_type[0]))
& ~converts);
assign Ztype[1] = ((ZNInf | ZPInf) & ~(~op_type[2] & op_type[1])) |
(((AZero & BZero & A[63] & B[63] & ~op_type[0]) |
(AZero & BZero & A[63] & ~B[63] & op_type[0]))
& ~converts);
assign Ztype[2] = ((AZero & BZero & ~op_type[1] & ~op_type[2])
& ~converts);
assign Ztype[3] = (op_type[1] & op_type[2] & ~op_type[0]);
// Determine if the effective operation is subtraction
assign Sub = ~(op_type[3] & ~op_type[0]) & ( (op_type[3] & op_type[0]) | (add_sub & (A[63]^B[63]^op_type[0])) );
endmodule // exception

View File

@ -0,0 +1,96 @@
// Exception logic for the floating point adder. Note: We may
// actually want to move to where the result is computed.
module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
input [63:0] A; // 1st input operand (op1)
input [63:0] B; // 2nd input operand (op2)
input op_type; // Determine operation
output [2:0] Ztype; // Indicates type of result (Z)
output Invalid; // Invalid operation exception
output Denorm; // Denormalized input
output ANorm; // A is not zero or Denorm
output BNorm; // B is not zero or Denorm
wire AzeroM; // '1' if the mantissa of A is zero
wire BzeroM; // '1' if the mantissa of B is zero
wire AzeroE; // '1' if the exponent of A is zero
wire BzeroE; // '1' if the exponent of B is zero
wire AonesE; // '1' if the exponent of A is all ones
wire BonesE; // '1' if the exponent of B is all ones
wire ADenorm; // '1' if A is a denomalized number
wire BDenorm; // '1' if B is a denomalized number
wire AInf; // '1' if A is infinite
wire BInf; // '1' if B is infinite
wire AZero; // '1' if A is 0
wire BZero; // '1' if B is 0
wire ANaN; // '1' if A is a not-a-number
wire BNaN; // '1' if B is a not-a-number
wire ASNaN; // '1' if A is a signalling not-a-number
wire BSNaN; // '1' if B is a signalling not-a-number
wire ZQNaN; // '1' if result Z is a quiet NaN
wire ZInf; // '1' if result Z is an infnity
wire square_root; // '1' if square root operation
wire Zero; // '1' if result is zero
parameter [51:0] fifty_two_zeros = 52'h0; // Use parameter?
// Determine if mantissas are all zeros
assign AzeroM = (A[51:0] == fifty_two_zeros);
assign BzeroM = (B[51:0] == fifty_two_zeros);
// Determine if exponents are all ones or all zeros
assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52];
assign BonesE = B[62]&B[61]&B[60]&B[59]&B[58]&B[57]&B[56]&B[55]&B[54]&B[53]&B[52];
assign AzeroE = ~(A[62]|A[61]|A[60]|A[59]|A[58]|A[57]|A[56]|A[55]|A[54]|A[53]|A[52]);
assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]);
// Determine special cases. Note: Zero is not really a special case.
assign ADenorm = AzeroE & ~AzeroM;
assign BDenorm = BzeroE & ~BzeroM;
assign AInf = AonesE & AzeroM;
assign BInf = BonesE & BzeroM;
assign ANaN = AonesE & ~AzeroM;
assign BNaN = BonesE & ~BzeroM;
assign ASNaN = ANaN & A[50];
assign BSNaN = ANaN & A[50];
assign AZero = AzeroE & AzeroM;
assign BZero = BzeroE & BzeroE;
// A and B are normalized if their exponents are not zero.
assign ANorm = ~AzeroE;
assign BNorm = ~BzeroE;
// An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
// or (A and B are both Infinite)
assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) |
(A[63] & op_type);
// The Denorm flag is set if A is denormlized or if B is normalized
assign Denorm = ADenorm | BDenorm;
// The result is a quiet NaN if (an "Invalid Operation" exception occurs)
// or (A is a NaN) or (B is a NaN).
assign ZQNaN = Invalid | ANaN | BNaN;
// The result is zero
assign Zero = (AZero | BInf)&~op_type | AZero&op_type;
// The result is +Inf if ((A is Inf) or (B is 0)) and (the
// result is not a quiet NaN).
assign ZInf = (AInf | BZero)&~ZQNaN&~op_type | AInf&op_type&~ZQNaN;
// Set the type of the result as follows:
// Ztype Result
// 000 Normal
// 001 Quiet NaN
// 010 Infinity
// 011 Zero
// 110 DivZero
assign Ztype[0] = ZQNaN | Zero;
assign Ztype[1] = ZInf | Zero;
assign Ztype[2] = BZero&~op_type;
endmodule // exception

View File

@ -1,29 +0,0 @@
`include "../../config/rv64icfd/wally-config.vh"
module fcsr(
input logic [2:0] frm,
input logic reset,
input logic clear,
input logic clk,
input logic write,
input logic [4:0] flags,
output logic [31:0] readData);
//register I/O assignment
logic [31:0] regInput;
logic [31:0] regOutput;
//no L instruction support
//only last 8 bits used for FCSR
//latching input to write signal
//AND clk and write and remove latch
//for clk-based write
assign regInput = (write) ? {24'h0,frm,flags} : regInput;
floprc #(32) fcsrreg(.clk(clk), .reset(reset), .clear(clear), .d(regInput), .q(regOutput));
assign readData = regOutput;
endmodule

45
wally-pipelined/src/fpu/dev/fctrl.sv Normal file → Executable file
View File

@ -1,4 +1,3 @@
`include "../../config/rv64icfd/wally-config.vh"
module fctrl (
input logic [6:0] Funct7D,
@ -8,9 +7,9 @@ module fctrl (
input logic [2:0] FrmW,
output logic WriteEnD,
output logic DivSqrtStartD,
output logic [2:0] regSelD,
output logic [2:0] writeSelD,
output logic [3:0] OpCtrlD,
//output logic [2:0] regSelD,
output logic [2:0] WriteSelD,
output logic [2:0] OpCtrlD,
output logic FmtD,
output logic WriteIntD);
@ -103,17 +102,11 @@ module fctrl (
logic isSign;
assign isSign = ~Funct7D[6] & ~Funct7D[5] & Funct7D[4] & ~Funct7D[3] & ~Funct7D[2];
assign regSelD[2] = isDivSqrt & ~isFMA;
assign regSelD[1] = isFMA | isCmp;
//AND of Funct7 for sign
assign regSelD[0] = isCmp | isSign;
//write select
assign writeSelD[2] = isDivSqrt & ~isFMA;
assign writeSelD[1] = isFMA | isCmp;
assign WriteSelD[2] = isDivSqrt & ~isFMA;
assign WriteSelD[1] = isFMA | isCmp;
//AND of Funct7 for sign
assign writeSelD[0] = isCmp | isSign;
assign WriteSelD[0] = isCmp | isSign;
//if op is div/sqrt - start div/sqrt
assign DivSqrtStartD = isDivSqrt & ~isFMA;
@ -128,13 +121,25 @@ module fctrl (
//also need to be added later as I find the opcode
//version I used for this repo
assign OpCtrlD[3] = 1'b0;
//if is positive sign injection OR is precision convert
assign OpCtrlD[2] = (isSign & ~FrmW[0]) | (~Funct7D[6] & Funct7D[5] & ~Funct7D[4] & ~Funct7D[3] & ~Funct7D[2] & ~Funct7D[1]);
//if is precision convert OR is sign xor
assign OpCtrlD[1] = (isSign & FrmW[1]) | (~Funct7D[6] & Funct7D[5] & ~Funct7D[4] & ~Funct7D[3] & ~Funct7D[2] & ~Funct7D[1]);
//if is sqrt OR is sub OR is single-precision cmp OR negation
assign OpCtrlD[0] = (isDivSqrt & ~isFMA & Funct7D[6]) | (isAddSub & ~isFMA & Funct7D[2]) | (isCmp & ~isFMA & Funct7D[0]) | (isSign & FrmW[0]);
//let's do separate SOP for each type of operation
// assign OpCtrlD[3] = 1'b0;
//
//
//add/cvt chooses unsigned conversion here
assign OpCtrlD[3] = (isAddSub & Rs2D[0]) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & 1'b0) | (isSign & 1'b0);
//add/cvt chooses FP/int or int/FP conversion
assign OpCtrlD[2] = (isAddSub & (Funct7D[6] & Funct7D[5] & ~Funct7D[4])) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & 1'b0) | (isSign & 1'b0);
//compare chooses equals
//sign chooses sgnjx
//add/cvt can chooses between abs/neg functions, but they aren't used in the
//wally-spec
assign OpCtrlD[1] = (isAddSub & 1'b0) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & FrmW[2]) | (isSign & FrmW[1]);
//divide chooses between div/sqrt
//compare chooses between LT and LE
//sign chooses between sgnj and sgnjn
//add/cvt chooses between add/sub or single-precision conversion
assign OpCtrlD[0] = (isAddSub & (Funct7D[2] | Funct7D[0])) | (isFMA & 1'b0) | (isDivSqrt & Funct7D[5]) | (isCmp & FrmW[1]) | (isSign & FrmW[0]);
//write to integer source if conv to int occurs
//AND of Funct7 for int results

View File

@ -0,0 +1,274 @@
//
// File name : fpadd
// Title : Floating-Point Adder/Subtractor
// project : FPU
// Library : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
// or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller exponent,
// and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put sum onto output.
//
module fpadd (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);
input [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B)
input [2:0] rm; // Rounding mode - specify values
input [3:0] op_type; // Function opcode
input P; // Result Precision (0 for double, 1 for single)
input OvEn; // Overflow trap enabled
input UnEn; // Underflow trap enabled
output [63:0] AS_Result; // Result of operation
output [4:0] Flags; // IEEE exception flags
output Denorm; // Denorm on input or output
wire [63:0] Float1;
wire [63:0] Float2;
wire [63:0] IntValue;
wire [11:0] exp1, exp2;
wire [11:0] exp_diff1, exp_diff2;
wire [10:0] exponent, exp_pre;
wire [11:0] exp_shift;
wire [63:0] Result;
wire [51:0] mantissaA;
wire [56:0] mantissaA1;
wire [63:0] mantissaA3;
wire [51:0] mantissaB;
wire [56:0] mantissaB1, mantissaB2;
wire [63:0] mantissaB3;
wire [63:0] sum, sum_tc, sum_corr, sum_norm, sum_norm_w_bypass;
wire [5:0] align_shift;
wire [5:0] norm_shift, norm_shift_denorm;
wire [3:0] sel_inv;
wire op1_Norm, op2_Norm;
wire opA_Norm, opB_Norm;
wire Invalid;
wire DenormIn, DenormIO;
wire [4:0] FlagsIn;
wire exp_valid;
wire exp_gt63;
wire Sticky_out;
wire signA, sign_corr;
wire corr_sign;
wire zeroB;
wire convert;
wire swap;
wire sub;
wire [10:0] exponent_postsum;
wire mantissa_comp;
wire mantissa_comp_sum;
wire mantissa_comp_sum_tc;
wire Float1_sum_comp;
wire Float2_sum_comp;
wire Float1_sum_tc_comp;
wire Float2_sum_tc_comp;
wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB;
wire ZV_mantissaA;
wire ZV_mantissaB;
wire normal_underflow;
wire normal_overflow;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the op_type , and their precision P.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs conv1 (Float1, Float2, op1, op2, op_type, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, sub,
Float1, Float2, op_type);
// Perform Exponent Subtraction (used for alignment). For performance
// both exponent subtractions are performed in parallel. This was
// changed to a behavior level to allow the tools to try to optimize
// the two parallel additions. The input values are zero-extended to 12
// bits prior to performing the addition.
assign exp1 = {1'b0, Float1[62:52]};
assign exp2 = {1'b0, Float2[62:52]};
assign exp_diff1 = exp1 - exp2;
assign exp_diff2 = DenormIn ? ({Float2[63], exp2[10:0]} - {Float1[63], exp1[10:0]}): exp2 - exp1;
// The second operand (B) should be set to zero, if op_type does not
// specify addition or subtraction
assign zeroB = op_type[2] | op_type[1];
// Swapped operands if zeroB is not one and exp1 < exp2.
// Swapping causes exp2 to be used for the result exponent.
// Only the exponent of the larger operand is used to determine
// the final result.
assign swap = exp_diff1[11] & ~zeroB;
assign exponent = swap ? exp2[10:0] : exp1[10:0];
assign exponent_postsum = swap ? exp2[10:0] : exp1[10:0];
assign mantissaA = swap ? Float2[51:0] : Float1[51:0];
assign mantissaB = swap ? Float1[51:0] : Float2[51:0];
assign signA = swap ? Float2[63] : Float1[63];
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
// modified to 52 bits to detect leading zeroes on denormalized mantissas
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
assign exp1_denorm = swap ? (exp1 - ZP_mantissaB) : (exp1 - ZP_mantissaA);
assign exp2_denorm = swap ? (exp2 - ZP_mantissaA) : (exp2 - ZP_mantissaB);
// Finds normal underflow result to determine whether to round final exponent down
// Comparison between each float and the resulting sum of the primary cla adder/subtractor and cla subtractor
assign Float1_sum_comp = (Float1[51:0] > sum[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_comp = (Float2[51:0] > sum[51:0]) ? 1'b0 : 1'b1;
assign Float1_sum_tc_comp = (Float1[51:0] > sum_tc[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_tc_comp = (Float2[51:0] > sum_tc[51:0]) ? 1'b0 : 1'b1;
// Determines the correct Float value to compare based on swap result
assign mantissa_comp_sum = swap ? Float2_sum_comp : Float1_sum_comp;
assign mantissa_comp_sum_tc = swap ? Float2_sum_tc_comp : Float1_sum_tc_comp;
// Determines the correct comparison result based on operation and sign of resulting sum
assign mantissa_comp = (op_type[0] ^ sum[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
// If the signs are different and both operands aren't denormalized
// the normal underflow bit is needed and therefore updated.
assign normal_underflow = ((Float1[63] ~^ Float2[63]) & (opA_Norm | opB_Norm)) ? mantissa_comp : 1'b0;
// Determine the alignment shift and limit it to 63. If any bit from
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
assign exp_shift = swap ? exp_diff2 : exp_diff1;
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
| exp_shift[8] | exp_shift[7] | exp_shift[6];
assign align_shift = exp_shift | {6{exp_gt63}};
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
// 001.M[51]M[50] ... M[1]M[0]00
// Unless the number has an exponent of zero, in which case it
// is unpacked as
// 000.00 ... 00
// This effectively flushes denormalized values to zero.
// The three bits of to the left of the binary point prevent overflow
// and loss of sign information. The two bits to the right of the
// original mantissa form the "guard" and "round" bits that are used
// to round the result.
assign opA_Norm = swap ? op2_Norm : op1_Norm;
assign opB_Norm = swap ? op1_Norm : op2_Norm;
assign mantissaA1 = {2'h0, opA_Norm, mantissaA[51:0]&{52{opA_Norm}}, 2'h0};
assign mantissaB1 = {2'h0, opB_Norm, mantissaB[51:0]&{52{opB_Norm}}, 2'h0};
// Perform mantissa alignment using a 57-bit barrel shifter
// If any of the bits shifted out are one, Sticky_out is set.
// The size of the barrel shifter could be reduced by two bits
// by not adding the leading two zeros until after the shift.
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
// Place either the sign-extened 32-bit value or the original 64-bit value
// into IntValue (to be used for integer to floating point conversion)
assign IntValue [31:0] = op1[31:0];
assign IntValue [63:32] = op_type[0] ? {32{op1[31]}} : op1[63:32];
// If doing an integer to floating point conversion, mantissaA3 is set to
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
// and the exponent value is left unchanged.
// Under denormalized cases, the exponent before the rounder is set to 1
// if the normal shift value is 11.
assign convert = ~op_type[2] & op_type[1];
assign mantissaA3 = (op_type[3]) ? (op_type[0] ? Float1 : ~Float1) : (DenormIn ? ({12'h0, mantissaA}) : (convert ? IntValue : {mantissaA1, 7'h0}));
assign exp_pre = DenormIn ?
((norm_shift == 6'b001011) ? 11'b00000000001 : (swap ? exp2_denorm : exp1_denorm))
: (convert ? 11'b10000111100 : exponent);
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
// zeros.
assign mantissaB3[63:7] = (op_type[3]) ? (57'h0) : (DenormIn ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
assign mantissaB3[6] = (op_type[3]) ? (1'b0) : (DenormIn ? mantissaB[6] : Sticky_out & ~zeroB);
assign mantissaB3[5:0] = (op_type[3]) ? (6'h01) : (DenormIn ? mantissaB[5:0] : 6'h0);
// The sign of the result needs to be corrected if the true
// operation is subtraction and the input operands were swapped.
assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (sum, mantissaA3, mantissaB3, sub);
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
// Determine the correct sign of the result
assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63];
// If the sum is negative, use its two complement instead.
// This value has to be 64-bits to correctly handle the
// case 10...00
assign sum_corr = (DenormIn & (opA_Norm | opB_Norm) & ( ( (Float1[63] ~^ Float2[63]) & op_type[0] ) | ((Float1[63] ^ Float2[63]) & ~op_type[0]) ))
? (sum[63] ? sum : sum_tc) : ( (op_type[3]) ? sum : (sum[63] ? sum_tc : sum));
// Finds normal underflow result to determine whether to round final exponent down
assign normal_overflow = (DenormIn & (sum == 16'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
assign norm_shift_denorm = (DenormIn & ( (~opA_Norm & ~opB_Norm) | normal_underflow)) ? (6'h00) : (norm_shift);
// Barell shifter used for normalization. It takes as inputs the
// the corrected sum and the amount by which the sum should
// be right shifted. It outputs the normalized sum.
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
assign sum_norm_w_bypass = (op_type[3]) ? (op_type[0] ? ~sum_corr : sum_corr) : (sum_norm);
// Round the mantissa to a 52-bit value, with the leading one
// removed. If the result is a single precision number, the actual
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
// At this point, normalization has already been performed, so we know
// exactly where the rounding point is. The rounding units also
// handles special cases and set the exception flags.
// Changed DenormIO -> Denorm and FlagsIn -> Flags in order to
// help in processor reservation station detection of load/stores. In
// other words, the processor would like to know ahead of time that
// if the result is an exception then don't load or store.
rounder round1 (Result, DenormIO, FlagsIn, rm, P, OvEn, UnEn, exp_valid,
sel_inv, Invalid, DenormIn, convert, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
exponent_postsum, op1_Norm, op2_Norm, Float1[63:52], Float2[63:52],
normal_overflow, normal_underflow, swap, op_type, sum);
// Store the final result and the exception flags in registers.
assign AS_Result = Result;
assign {Denorm, Flags} = {DenormIO, FlagsIn};
endmodule // fpadd

View File

@ -0,0 +1,248 @@
//
// File name : fpdiv
// Title : Floating-Point Divider/Square-Root
// project : FPU
// Library : fpdiv
// Author(s) : James E. Stine, Jr.
// Purpose : definition of main unit to floating-point div/sqrt
// notes :
//
// Copyright Oklahoma State University
//
// Basic Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Exponent Logic
// Step 4: Divide/Sqrt using Goldschmidt
// Step 5: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 6: Round the result.//
// Step 7: Put quotient/remainder onto output.
//
`timescale 1ps/1ps
module fpdiv (done, AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn,
start, reset, clk);
input [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B)
input [1:0] rm; // Rounding mode - specify values
input op_type; // Function opcode
input P; // Result Precision (0 for double, 1 for single)
input OvEn; // Overflow trap enabled
input UnEn; // Underflow trap enabled
input start;
input reset;
input clk;
output [63:0] AS_Result; // Result of operation
output [4:0] Flags; // IEEE exception flags
output Denorm; // Denorm on input or output
output done;
supply1 vdd;
supply0 vss;
wire [63:0] Float1;
wire [63:0] Float2;
wire [63:0] IntValue;
wire [12:0] exp1, exp2, expF;
wire [12:0] exp_diff, bias;
wire [13:0] exp_sqrt;
wire [12:0] exp_s;
wire [12:0] exp_c;
wire [10:0] exponent, exp_pre;
wire [63:0] Result;
wire [52:0] mantissaA;
wire [52:0] mantissaB;
wire [63:0] sum, sum_tc, sum_corr, sum_norm;
wire [5:0] align_shift;
wire [5:0] norm_shift;
wire [2:0] sel_inv;
wire op1_Norm, op2_Norm;
wire opA_Norm, opB_Norm;
wire Invalid;
wire DenormIn, DenormIO;
wire [4:0] FlagsIn;
wire exp_gt63;
wire Sticky_out;
wire signResult, sign_corr;
wire corr_sign;
wire zeroB;
wire convert;
wire swap;
wire sub;
wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
wire [63:0] rega_out, regb_out, regc_out, regd_out;
wire [127:0] regr_out;
wire [2:0] sel_muxa, sel_muxb;
wire sel_muxr;
wire load_rega, load_regb, load_regc, load_regd, load_regr;
wire donev, sel_muxrv, sel_muxsv;
wire [1:0] sel_muxav, sel_muxbv;
wire load_regav, load_regbv, load_regcv;
wire load_regrv, load_regsv;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the op_type , and their precision P.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs_div divconv1 (Float1, Float2, op1, op2, op_type, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
Float1, Float2, op_type);
// Determine Sign/Mantissa
assign signResult = ((Float1[63]^Float2[63])&~op_type) | Float1[63]&op_type;
assign mantissaA = {vdd, Float1[51:0]};
assign mantissaB = {vdd, Float2[51:0]};
// Perform Exponent Subtraction - expA - expB + Bias
assign exp1 = {2'b0, Float1[62:52]};
assign exp2 = {2'b0, Float2[62:52]};
// bias : DP = 2^{11-1}-1 = 1023
assign bias = {3'h0, 10'h3FF};
// Divide exponent
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
exp_add explogic1 (exp_cout1, {open, exp_diff},
{vss, exp_s}, {vss, exp_c}, 1'b1);
// Sqrt exponent (check if exponent is odd)
assign exp_odd = Float1[52] ? vss : vdd;
exp_add explogic2 (exp_cout2, exp_sqrt,
{vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
// Choose correct exponent
assign expF = op_type ? exp_sqrt[13:1] : exp_diff;
// Main Goldschmidt/Division Routine
divconv goldy (q1, qm1, qp1, q0, qm0, qp0,
rega_out, regb_out, regc_out, regd_out,
regr_out, mantissaB, mantissaA,
sel_muxa, sel_muxb, sel_muxr,
reset, clk,
load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, P, op_type, exp_odd);
// FSM : control divider
fsm control (done, load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
clk, reset, start, error, op_type);
// Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and
// set the exception flags.
rounder_div divround1 (Result, DenormIO, FlagsIn,
rm, P, OvEn, UnEn, expF,
sel_inv, Invalid, DenormIn, signResult,
q1, qm1, qp1, q0, qm0, qp0, regr_out);
// Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, done, Result, AS_Result);
flopenr #(1) regb (clk, reset, done, DenormIO, Denorm);
flopenr #(5) regc (clk, reset, done, FlagsIn, Flags);
endmodule // fpadd
//
// Brent-Kung Prefix Adder
// (yes, it is 14 bits as my generator is broken for 13 bits :(
// assume, synthesizer will delete stuff not needed )
//
module exp_add (cout, sum, a, b, cin);
input [13:0] a, b;
input cin;
output [13:0] sum;
output cout;
wire [14:0] p,g;
wire [13:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
brent_kung prefix_tree(c, p[13:0], g[13:0]);
// post-computation
assign sum=p[14:1]^c;
assign cout=g[14]|(p[14]&c[13]);
endmodule // exp_add
module brent_kung (c, p, g);
input [13:0] p;
input [13:0] g;
output [14:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/P pairs that span 8 bits
// Stage 5: Generates G/P pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// Stage 6: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
endmodule // brent_kung

View File

@ -0,0 +1,200 @@
//
// File name : fpadd
// Title : Floating-Point Adder/Subtractor
// project : FPU
// Library : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
// or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller exponent,
// and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put sum onto output.
//
module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin, OvEn, UnEn);
input [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B)
input [2:0] rm; // Rounding mode - specify values
input [3:0] op_type; // Function opcode
input Pin; // Result Precision (0 for double, 1 for single)
input OvEn; // Overflow trap enabled
input UnEn; // Underflow trap enabled
wire P;
assign P = Pin | op_type[2];
wire [63:0] IntValue;
wire [11:0] exp1, exp2;
wire [11:0] exp_diff1, exp_diff2;
wire [11:0] exp_shift;
wire [51:0] mantissaA;
wire [56:0] mantissaA1;
wire [63:0] mantissaA3;
wire [51:0] mantissaB;
wire [56:0] mantissaB1, mantissaB2;
wire [63:0] mantissaB3;
wire exp_gt63;
wire Sticky_out;
wire sub;
wire zeroB;
wire [5:0] align_shift;
output [63:0] Float1;
output [63:0] Float2;
output [10:0] exponent;
output [10:0] exponent_postsum;
output [10:0] exp1_denorm, exp2_denorm;
output [63:0] sum, sum_tc;
output [3:0] sel_inv;
output corr_sign;
output signA;
output op1_Norm, op2_Norm;
output opA_Norm, opB_Norm;
output Invalid;
output DenormIn;
// output exp_valid;
output convert;
output swap;
output normal_overflow;
wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB;
wire ZV_mantissaA;
wire ZV_mantissaB;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the op_type , and their precision P.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs conv1 (Float1, Float2, op1, op2, op_type, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, sub,
Float1, Float2, op_type);
// Perform Exponent Subtraction (used for alignment). For performance
// both exponent subtractions are performed in parallel. This was
// changed to a behavior level to allow the tools to try to optimize
// the two parallel additions. The input values are zero-extended to 12
// bits prior to performing the addition.
assign exp1 = {1'b0, Float1[62:52]};
assign exp2 = {1'b0, Float2[62:52]};
assign exp_diff1 = exp1 - exp2;
assign exp_diff2 = DenormIn ? ({Float2[63], exp2[10:0]} - {Float1[63], exp1[10:0]}): exp2 - exp1;
// The second operand (B) should be set to zero, if op_type does not
// specify addition or subtraction
assign zeroB = op_type[2] | op_type[1];
// Swapped operands if zeroB is not one and exp1 < exp2.
// Swapping causes exp2 to be used for the result exponent.
// Only the exponent of the larger operand is used to determine
// the final result.
assign swap = exp_diff1[11] & ~zeroB;
assign exponent = swap ? exp2[10:0] : exp1[10:0];
assign exponent_postsum = swap ? exp2[10:0] : exp1[10:0];
assign mantissaA = swap ? Float2[51:0] : Float1[51:0];
assign mantissaB = swap ? Float1[51:0] : Float2[51:0];
assign signA = swap ? Float2[63] : Float1[63];
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
// modified to 52 bits to detect leading zeroes on denormalized mantissas
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
assign exp1_denorm = swap ? (exp1 - ZP_mantissaB) : (exp1 - ZP_mantissaA);
assign exp2_denorm = swap ? (exp2 - ZP_mantissaA) : (exp2 - ZP_mantissaB);
// Determine the alignment shift and limit it to 63. If any bit from
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
assign exp_shift = swap ? exp_diff2 : exp_diff1;
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
| exp_shift[8] | exp_shift[7] | exp_shift[6];
assign align_shift = exp_shift | {6{exp_gt63}};
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
// 001.M[51]M[50] ... M[1]M[0]00
// Unless the number has an exponent of zero, in which case it
// is unpacked as
// 000.00 ... 00
// This effectively flushes denormalized values to zero.
// The three bits of to the left of the binary point prevent overflow
// and loss of sign information. The two bits to the right of the
// original mantissa form the "guard" and "round" bits that are used
// to round the result.
assign opA_Norm = swap ? op2_Norm : op1_Norm;
assign opB_Norm = swap ? op1_Norm : op2_Norm;
assign mantissaA1 = {2'h0, opA_Norm, mantissaA[51:0]&{52{opA_Norm}}, 2'h0};
assign mantissaB1 = {2'h0, opB_Norm, mantissaB[51:0]&{52{opB_Norm}}, 2'h0};
// Perform mantissa alignment using a 57-bit barrel shifter
// If any of the bits shifted out are one, Sticky_out is set.
// The size of the barrel shifter could be reduced by two bits
// by not adding the leading two zeros until after the shift.
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
// Place either the sign-extened 32-bit value or the original 64-bit value
// into IntValue (to be used for integer to floating point conversion)
assign IntValue [31:0] = op1[31:0];
assign IntValue [63:32] = op_type[0] ? {32{op1[31]}} : op1[63:32];
// If doing an integer to floating point conversion, mantissaA3 is set to
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
// and the exponent value is left unchanged.
// Under denormalized cases, the exponent before the rounder is set to 1
// if the normal shift value is 11.
assign convert = ~op_type[2] & op_type[1];
assign mantissaA3 = (op_type[3]) ? (op_type[0] ? Float1 : ~Float1) : (DenormIn ? ({12'h0, mantissaA}) : (convert ? IntValue : {mantissaA1, 7'h0}));
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
// zeros.
assign mantissaB3[63:7] = (op_type[3]) ? (57'h0) : (DenormIn ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
assign mantissaB3[6] = (op_type[3]) ? (1'b0) : (DenormIn ? mantissaB[6] : Sticky_out & ~zeroB);
assign mantissaB3[5:0] = (op_type[3]) ? (6'h01) : (DenormIn ? mantissaB[5:0] : 6'h0);
// The sign of the result needs to be corrected if the true
// operation is subtraction and the input operands were swapped.
assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (sum, mantissaA3, mantissaB3, sub);
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
// Finds normal underflow result to determine whether to round final exponent down
assign normal_overflow = (DenormIn & (sum == 16'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
endmodule // fpadd

View File

@ -0,0 +1,156 @@
//
// File name : fpadd
// Title : Floating-Point Adder/Subtractor
// project : FPU
// Library : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
// or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller exponent,
// and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put sum onto output.
//
module fpuaddcvt2sv (AS_Result, Flags, Denorm, sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, exp_valid, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin, OvEn, UnEn);
input [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B)
input [2:0] rm; // Rounding mode - specify values
input [3:0] op_type; // Function opcode
input Pin; // Result Precision (0 for double, 1 for single)
input OvEn; // Overflow trap enabled
input UnEn; // Underflow trap enabled
input [63:0] sum, sum_tc;
input [63:0] Float1;
input [63:0] Float2;
input [10:0] exp1_denorm, exp2_denorm;
input [10:0] exponent, exponent_postsum; //exp_pre;
input exp_valid;
input [3:0] sel_inv;
input op1_Norm, op2_Norm;
input opA_Norm, opB_Norm;
input Invalid;
input DenormIn;
input signA;
input corr_sign;
input convert;
input swap;
input normal_overflow;
output [63:0] AS_Result; // Result of operation
output [4:0] Flags; // IEEE exception flags
output Denorm; // Denorm on input or output
wire P;
assign P = Pin | op_type[2];
wire [10:0] exp_pre;
wire [63:0] Result;
wire [63:0] sum_norm, sum_norm_w_bypass;
wire [5:0] norm_shift, norm_shift_denorm;
wire DenormIO;
wire [4:0] FlagsIn;
wire Sticky_out;
wire sign_corr;
wire zeroB;
wire [10:0] exponent_postsum;
wire mantissa_comp;
wire mantissa_comp_sum;
wire mantissa_comp_sum_tc;
wire Float1_sum_comp;
wire Float2_sum_comp;
wire Float1_sum_tc_comp;
wire Float2_sum_tc_comp;
wire normal_underflow;
wire [63:0] sum_corr;
//exponent value pre-rounding with considerations for denormalized
//cases/conversion cases
assign exp_pre = DenormIn ?
((norm_shift == 6'b001011) ? 11'b00000000001 : (swap ? exp2_denorm : exp1_denorm))
: (convert ? 11'b10000111100 : exponent);
// Finds normal underflow result to determine whether to round final exponent down
// Comparison between each float and the resulting sum of the primary cla adder/subtractor and cla subtractor
assign Float1_sum_comp = (Float1[51:0] > sum[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_comp = (Float2[51:0] > sum[51:0]) ? 1'b0 : 1'b1;
assign Float1_sum_tc_comp = (Float1[51:0] > sum_tc[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_tc_comp = (Float2[51:0] > sum_tc[51:0]) ? 1'b0 : 1'b1;
// Determines the correct Float value to compare based on swap result
assign mantissa_comp_sum = swap ? Float2_sum_comp : Float1_sum_comp;
assign mantissa_comp_sum_tc = swap ? Float2_sum_tc_comp : Float1_sum_tc_comp;
// Determines the correct comparison result based on operation and sign of resulting sum
assign mantissa_comp = (op_type[0] ^ sum[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
// If the signs are different and both operands aren't denormalized
// the normal underflow bit is needed and therefore updated.
assign normal_underflow = ((Float1[63] ~^ Float2[63]) & (opA_Norm | opB_Norm)) ? mantissa_comp : 1'b0;
// Determine the correct sign of the result
assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63];
// If the sum is negative, use its two complement instead.
// This value has to be 64-bits to correctly handle the
// case 10...00
assign sum_corr = (DenormIn & (opA_Norm | opB_Norm) & ( ( (Float1[63] ~^ Float2[63]) & op_type[0] ) | ((Float1[63] ^ Float2[63]) & ~op_type[0]) ))
? (sum[63] ? sum : sum_tc) : ( (op_type[3]) ? sum : (sum[63] ? sum_tc : sum));
// Finds normal underflow result to determine whether to round final exponent down
assign normal_overflow = (DenormIn & (sum == 16'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
assign norm_shift_denorm = (DenormIn & ( (~opA_Norm & ~opB_Norm) | normal_underflow)) ? (6'h00) : (norm_shift);
// Barell shifter used for normalization. It takes as inputs the
// the corrected sum and the amount by which the sum should
// be right shifted. It outputs the normalized sum.
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
assign sum_norm_w_bypass = (op_type[3]) ? (op_type[0] ? ~sum_corr : sum_corr) : (sum_norm);
// Round the mantissa to a 52-bit value, with the leading one
// removed. If the result is a single precision number, the actual
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
// At this point, normalization has already been performed, so we know
// exactly where the rounding point is. The rounding units also
// handles special cases and set the exception flags.
// Changed DenormIO -> Denorm and FlagsIn -> Flags in order to
// help in processor reservation station detection of load/stores. In
// other words, the processor would like to know ahead of time that
// if the result is an exception then don't load or store.
rounder round1 (Result, DenormIO, FlagsIn, rm, P, OvEn, UnEn, exp_valid,
sel_inv, Invalid, DenormIn, convert, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
exponent_postsum, op1_Norm, op2_Norm, Float1[63:52], Float2[63:52],
normal_overflow, normal_underflow, swap, op_type, sum);
// Store the final result and the exception flags in registers.
assign AS_Result = Result;
assign {Denorm, Flags} = {DenormIO, FlagsIn};
endmodule // fpadd

View File

@ -0,0 +1,235 @@
//
// File name : fpcomp.v
// Title : Floating-Point Comparator
// project : FPU
// Library : fpcomp
// Author(s) : James E. Stine
// Purpose : definition of main unit to floating-point comparator
// notes :
//
// Copyright Oklahoma State University
//
// Floating Point Comparator (Algorithm)
//
// 1.) Performs sign-extension if the inputs are 32-bit integers.
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
// and correct for sign bits
//
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal Sel that indicates the type of
// operands being compared as indicated below.
// Sel Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 (unused)
//
// The comparator produces a 2-bit signal FCC, which
// indicates the result of the comparison:
//
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
//
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);
input logic [63:0] op1;
input logic [63:0] op2;
input logic [1:0] Sel;
output logic [7:0] w, x;
output logic ANaN, BNaN;
output logic Azero, Bzero;
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
magcompare64b_1 magcomp2 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, Sel);
endmodule // fpcomp
module magcompare2b (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
// Determine if A < B using a minimized sum-of-products expression
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// Determine if A > B using a minimized sum-of-products expression
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
endmodule // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// this version actually incorporates don't cares into the equation to
// simplify the optimization
module magcompare2c (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
assign LT = B[1] | (!A[1]&B[0]);
assign GT = A[1] | (!B[1]&A[0]);
endmodule // magcompare2b
// This module compares two 64-bit values A and B. LT is '1' if A < B
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// This structure was modified so
// that it only does a strict magnitdude comparison, and only
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// of 63 2-bit magnitude comparators, followed by one OR gates.
//
// J. E. Stine and M. J. Schulte, "A combined two's complement and
// floating-point comparator," 2005 IEEE International Symposium on
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// doi: 10.1109/ISCAS.2005.1464531
module magcompare64b_1 (w, x, A, B);
input logic [63:0] A;
input logic [63:0] B;
logic [31:0] s;
logic [31:0] t;
logic [15:0] u;
logic [15:0] v;
output logic [7:0] w;
output logic [7:0] x;
magcompare2b mag1(s[0], t[0], A[1:0], B[1:0]);
magcompare2b mag2(s[1], t[1], A[3:2], B[3:2]);
magcompare2b mag3(s[2], t[2], A[5:4], B[5:4]);
magcompare2b mag4(s[3], t[3], A[7:6], B[7:6]);
magcompare2b mag5(s[4], t[4], A[9:8], B[9:8]);
magcompare2b mag6(s[5], t[5], A[11:10], B[11:10]);
magcompare2b mag7(s[6], t[6], A[13:12], B[13:12]);
magcompare2b mag8(s[7], t[7], A[15:14], B[15:14]);
magcompare2b mag9(s[8], t[8], A[17:16], B[17:16]);
magcompare2b magA(s[9], t[9], A[19:18], B[19:18]);
magcompare2b magB(s[10], t[10], A[21:20], B[21:20]);
magcompare2b magC(s[11], t[11], A[23:22], B[23:22]);
magcompare2b magD(s[12], t[12], A[25:24], B[25:24]);
magcompare2b magE(s[13], t[13], A[27:26], B[27:26]);
magcompare2b magF(s[14], t[14], A[29:28], B[29:28]);
magcompare2b mag10(s[15], t[15], A[31:30], B[31:30]);
magcompare2b mag11(s[16], t[16], A[33:32], B[33:32]);
magcompare2b mag12(s[17], t[17], A[35:34], B[35:34]);
magcompare2b mag13(s[18], t[18], A[37:36], B[37:36]);
magcompare2b mag14(s[19], t[19], A[39:38], B[39:38]);
magcompare2b mag15(s[20], t[20], A[41:40], B[41:40]);
magcompare2b mag16(s[21], t[21], A[43:42], B[43:42]);
magcompare2b mag17(s[22], t[22], A[45:44], B[45:44]);
magcompare2b mag18(s[23], t[23], A[47:46], B[47:46]);
magcompare2b mag19(s[24], t[24], A[49:48], B[49:48]);
magcompare2b mag1A(s[25], t[25], A[51:50], B[51:50]);
magcompare2b mag1B(s[26], t[26], A[53:52], B[53:52]);
magcompare2b mag1C(s[27], t[27], A[55:54], B[55:54]);
magcompare2b mag1D(s[28], t[28], A[57:56], B[57:56]);
magcompare2b mag1E(s[29], t[29], A[59:58], B[59:58]);
magcompare2b mag1F(s[30], t[30], A[61:60], B[61:60]);
magcompare2b mag20(s[31], t[31], A[63:62], B[63:62]);
magcompare2c mag21(u[0], v[0], t[1:0], s[1:0]);
magcompare2c mag22(u[1], v[1], t[3:2], s[3:2]);
magcompare2c mag23(u[2], v[2], t[5:4], s[5:4]);
magcompare2c mag24(u[3], v[3], t[7:6], s[7:6]);
magcompare2c mag25(u[4], v[4], t[9:8], s[9:8]);
magcompare2c mag26(u[5], v[5], t[11:10], s[11:10]);
magcompare2c mag27(u[6], v[6], t[13:12], s[13:12]);
magcompare2c mag28(u[7], v[7], t[15:14], s[15:14]);
magcompare2c mag29(u[8], v[8], t[17:16], s[17:16]);
magcompare2c mag2A(u[9], v[9], t[19:18], s[19:18]);
magcompare2c mag2B(u[10], v[10], t[21:20], s[21:20]);
magcompare2c mag2C(u[11], v[11], t[23:22], s[23:22]);
magcompare2c mag2D(u[12], v[12], t[25:24], s[25:24]);
magcompare2c mag2E(u[13], v[13], t[27:26], s[27:26]);
magcompare2c mag2F(u[14], v[14], t[29:28], s[29:28]);
magcompare2c mag30(u[15], v[15], t[31:30], s[31:30]);
magcompare2c mag31(w[0], x[0], v[1:0], u[1:0]);
magcompare2c mag32(w[1], x[1], v[3:2], u[3:2]);
magcompare2c mag33(w[2], x[2], v[5:4], u[5:4]);
magcompare2c mag34(w[3], x[3], v[7:6], u[7:6]);
magcompare2c mag35(w[4], x[4], v[9:8], u[9:8]);
magcompare2c mag36(w[5], x[5], v[11:10], u[11:10]);
magcompare2c mag37(w[6], x[6], v[13:12], u[13:12]);
magcompare2c mag38(w[7], x[7], v[15:14], u[15:14]);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
// operands being compared as indicated below.
// Sel Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 bfloat precision numbers
//
// The comparator produces a 2-bit signal fcc, which
// indicates the result of the comparison as follows:
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
input logic [63:0] A;
input logic [63:0] B;
input logic [1:0] Sel;
logic dp, sp, hp;
output logic ANaN;
output logic BNaN;
output logic Azero;
output logic Bzero;
logic [62:0] sixtythreezeros = 63'h0;
assign dp = !Sel[1]&!Sel[0];
assign sp = !Sel[1]&Sel[0];
assign hp = Sel[1]&!Sel[0];
// Test if A or B is NaN.
assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) &
((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) |
(dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) |
(hp&(A[57]|A[56])));
assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) &
((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) |
(dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) |
(hp&(B[57]|B[56])));
// Test if A is +0 or -0 when viewed as a floating point number (i.e,
// the 63 least siginficant bits of A are zero).
// Depending on how this synthesizes, it may work better to replace
// this with assign Azero = ~(A[62] | A[61] | ... | A[0])
assign Azero = (A[62:0] == sixtythreezeros);
assign Bzero = (B[62:0] == sixtythreezeros);
endmodule // exception_cmp

View File

@ -0,0 +1,226 @@
//
// File name : fpcomp.v
// Title : Floating-Point Comparator
// project : FPU
// Library : fpcomp
// Author(s) : James E. Stine
// Purpose : definition of main unit to floating-point comparator
// notes :
//
// Copyright Oklahoma State University
//
// Floating Point Comparator (Algorithm)
//
// 1.) Performs sign-extension if the inputs are 32-bit integers.
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
// and correct for sign bits
//
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal Sel that indicates the type of
// operands being compared as indicated below.
// Sel Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 (unused)
//
// The comparator produces a 2-bit signal FCC, which
// indicates the result of the comparison:
//
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
//
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
module fpucmp2 (Invalid, FCC, ANaN, BNaN, Azero, Bzero, w, x, Sel, op1, op2);
input logic [63:0] op1;
input logic [63:0] op2;
input logic [1:0] Sel;
input logic [7:0] w, x;
input logic ANaN, BNaN;
input logic Azero, Bzero;
output logic Invalid; // Invalid Operation
output logic [1:0] FCC; // Condition Codes
logic LT; // magnitude op1 < magnitude op2
logic EQ; // magnitude op1 = magnitude op2
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
magcompare64b_2 magcomp2 (LT, EQ, w, x);
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2));
endmodule // fpcomp
/*module magcompare2b (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
// Determine if A < B using a minimized sum-of-products expression
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// Determine if A > B using a minimized sum-of-products expression
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
endmodule*/ // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// this version actually incorporates don't cares into the equation to
// simplify the optimization
module magcompare2c (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
assign LT = B[1] | (!A[1]&B[0]);
assign GT = A[1] | (!B[1]&A[0]);
endmodule // magcompare2b
// This module compares two 64-bit values A and B. LT is '1' if A < B
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// This structure was modified so
// that it only does a strict magnitdude comparison, and only
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// of 63 2-bit magnitude comparators, followed by one OR gates.
//
// J. E. Stine and M. J. Schulte, "A combined two's complement and
// floating-point comparator," 2005 IEEE International Symposium on
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// doi: 10.1109/ISCAS.2005.1464531
module magcompare64b_2 (LT, EQ, w, x);
input logic [7:0] w;
input logic [7:0] x;
logic [3:0] y;
logic [3:0] z;
logic [1:0] a;
logic [1:0] b;
logic GT;
output logic LT;
output logic EQ;
magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
assign EQ = ~(LT | GT);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
// operands being compared as indicated below.
// Sel Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 bfloat precision numbers
//
// The comparator produces a 2-bit signal fcc, which
// indicates the result of the comparison as follows:
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_2 (invalid, fcc, LT_mag, EQ_mag, ANaN, BNaN, Azero, Bzero, Sel, A, B);
input logic [63:0] A;
input logic [63:0] B;
input logic LT_mag;
input logic EQ_mag;
input logic [1:0] Sel;
output logic invalid;
output logic [1:0] fcc;
logic dp;
logic sp;
logic hp;
input logic Azero;
input logic Bzero;
input logic ANaN;
input logic BNaN;
logic ASNaN;
logic BSNaN;
logic UO;
logic GT;
logic LT;
logic EQ;
logic [62:0] sixtythreezeros = 63'h0;
assign dp = !Sel[1]&!Sel[0];
assign sp = !Sel[1]&Sel[0];
assign hp = Sel[1]&!Sel[0];
// Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
// point comparison is being performed.
assign UO = (ANaN | BNaN);
// Test if A or B is a signaling NaN.
assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
// If either A or B is a signaling NaN the "Invalid Operation"
// exception flag is set to one; otherwise it is zero.
assign invalid = (ASNaN | BSNaN);
// A and B are equal if (their magnitudes are equal) AND ((their signs are
// equal) or (their magnitudes are zero AND they are floating point
// numbers)). Also, A and B are not equal if they are unordered.
assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
// A is less than B if (A is negative and B is posiive) OR
// (A and B are positive and the magnitude of A is less than
// the magnitude of B) or (A and B are negative integers and
// the magnitude of A is less than the magnitude of B) or
// (A and B are negative floating point numbers and
// the magnitude of A is greater than the magnitude of B).
// Also, A is not less than B if A and B are equal or unordered.
assign LT = ((~LT_mag & A[63] & B[63]) |
(LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
// A is greater than B when LT, EQ, and UO are are false.
assign GT = ~(LT | EQ | UO);
// Note: it may be possible to optimize the setting of fcc
// a little more, but it is probably not worth the effort.
// Set the bits of fcc based on LT, GT, EQ, and UO
assign fcc[0] = LT | UO;
assign fcc[1] = GT | UO;
endmodule // exception_cmp

489
wally-pipelined/src/fpu/dev/fputop.sv Normal file → Executable file
View File

@ -1,7 +1,7 @@
`include "../../config/rv64icfd/wally-config.vh"
`include "../../../config/rv64icfd/wally-config.vh"
module fputop (
input logic [2:0] FrmW,
input logic [2:0] FrmD,
input logic reset,
input logic clear,
input logic clk,
@ -12,48 +12,465 @@ module fputop (
output logic DivSqrtDoneE,
output logic FInvalInstrD,
output logic [`XLEN-1:0] FPUResultW);
//NOTE:
//For readability and ease of modification, logic signals will be
//instantiated as they occur within the pipeline. This will keep local
//signals, modules, and combinational logic closely defined.
//used for OSU DP-size hardware to wally XLEN interfacing
integer XLENDIFF;
assign XLENDIFF = `XLEN - 64;
integer XLENDIFFN;
assign XLENDIFFN = 63 - `XLEN;
//#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
//BEGIN PIPELINE CONTROL LOGIC
//
logic PipeEnableDE;
logic PipeEnableEM;
logic PipeEnableMW;
logic PipeClearDE;
logic PipeClearEM;
logic PipeClearMW;
//temporarily assign pipe clear and enable signals
//to never flush & always be running
assign PipeClear = 1'b0;
assign PipeEnable = 1'b1;
always_comb begin
PipeEnableDE = PipeEnable;
PipeEnableEM = PipeEnable;
PipeEnableMW = PipeEnable;
PipeClearDE = PipeClear;
PipeClearEM = PipeClear;
PipeClearMW = PipeClear;
end
//
//END PIPELINE CONTROL LOGIC
//#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
//#########################################
//BEGIN DECODE STAGE
//
/*fctrl ();
//regfile instantiation and decode stage
//freg1adr ();
//wally-spec D stage control logic signal instantiation
logic IllegalFPUInstrFaultD;
logic FRegWriteD;
logic [2:0] FResultSelD;
//logic [2:0] FrmD;
logic PD;
logic DivSqrtStartD;
logic [3:0] OpCtrlD;
logic WriteIntD;
//freg2adr ();
//top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Rs1D(InstrD[19:15]), .FrmW(InstrD[14:12]), .WriteEnD(FRegWriteD), .DivSqrtStartD(DivSqrtStartD), .WriteSelD(FResultSelD), .OpCtrlD(OpCtrlD), .FmtD(PD), .WriteIntD(WriteIntD));
//instantiation of D stage regfile signals (includes some W stage signals
//for easy reference)
logic [2:0] FrmW;
logic WriteEnW;
logic [4:0] RdW, Rs1D, Rs2D, Rs3D;
logic [`XLEN-1:0] WriteDataW;
logic [`XLEN-1:0] ReadData1D, ReadData2D, ReadData3D;
//regfile instantiation
freg3adr fpregfile (FrmW, reset, PipeClear, clk, RdW, WriteEnW, Rs1D, Rs2D, Rs3D, WriteDataW, ReadData1D, ReadData2D, ReadData3D);
always_comb begin
FrmW = InstrD[14:12];
end
//
//END DECODE STAGE
//#########################################
//*****************************************
//BEGIN D/E PIPE
//
//wally-spec E stage control logic signal instantiation
logic FRegWriteE;
logic [2:0] FResultSelE;
logic [2:0] FrmE;
logic PE;
logic DivSqrtStartE;
logic [3:0] OpCtrlE;
//instantiation of E stage regfile signals
logic [4:0] RdE;
logic [`XLEN-1:0] ReadData1E, ReadData2E, ReadData3E;
//instantiation of E/M stage div/sqrt signals
logic DivSqrtDone, DivDenormM;
logic [63:0] DivResultM;
logic [4:0] DivFlagsM;
logic [63:0] DivOp1, DivOp2;
logic [2:0] DivFrm;
logic DivOpType;
logic DivP;
logic DivOvEn, DivUnEn;
logic DivStart;
//instantiate E stage FMA signals here
//instantiation of E stage add/cvt signals
logic [63:0] AddSumE, AddSumTcE;
logic [3:0] AddSelInvE;
logic [10:0] AddExpPostSumE;
logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE;
logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
logic [63:0] AddFloat1E, AddFloat2E;
logic [10:0] AddExp1DenormE, AddExp2DenormE, AddExponentE;
logic [63:0] AddOp1E, AddOp2E;
logic [2:0] AddRmE;
logic [3:0] AddOpTypeE;
logic AddPE, AddOvEnE, AddUnEnE;
//instantiation of E stage cmp signals
logic [7:0] WE, XE;
logic ANaNE, BNaNE, AzeroE, BzeroE;
logic [63:0] CmpOp1E, CmpOp2E;
logic [1:0] CmpSelE;
//instantiation of E/M stage fsgn signals (due to bypass logic)
logic [63:0] SgnOp1E, SgnOp2E;
logic [1:0] SgnOpCodeE, SgnOpCodeM;
logic [63:0] SgnResultE, SgnResultM;
logic [4:0] SgnFlagsE, SgnFlagsM;
//*****************
//fpregfile D/E pipe registers
//*****************
flopenrc #(64) (clk, reset, PipeClearDE, PipeEnableDE, ReadData1D, ReadData1E);
flopenrc #(64) (clk, reset, PipeClearDE, PipeEnableDE, ReadData2D, ReadData2E);
flopenrc #(64) (clk, reset, PipeClearDE, PipeEnableDE, ReadData3D, ReadData3E);
//*****************
//other D/E pipe registers
//*****************
flopenrc #(1) (clk, reset, PipeClearDE, PipeEnableDE, FRegWriteD, FRegWriteE);
flopenrc #(3) (clk, reset, PipeClearDE, PipeEnableDE, FResultsSelD, FResultsSelE);
flopenrc #(3) (clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
flopenrc #(1) (clk, reset, PipeClearDE, PipeEnableDE, PD, PE);
flopenrc #(4) (clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE);
flopenrc #(1) (clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE);
//
//END D/E PIPE
//*****************************************
//#########################################
//BEGIN EXECUTION STAGE
//
//fma1 ();
//first and only instance of floating-point divider
fpdivsqrt (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, DivFrm, DivOpType, DivP, DivOvEn, DivUnEn, DivStart, reset, clk);
//first of two-stage instance of floating-point add/cvt unit
fpaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, AddOp1E, AddOp2E, AddRmE, AddOpTypeE, AddPE, AddOvEnE, AddUnEnE);
//first of two-stage instance of floating-point comparator
fpcmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, CmpOp1E, CmpOp2E, CmpSelE);
//first and only instance of floating-point sign converter
fpusgn fpsgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SgnOp1, SgnOp2);
//interface between XLEN size datapath and double-precision sized
//floating-point results
//
//define offsets for LSB zero extension or truncation
always_comb begin
//truncate to 64 bits
//(causes warning during compilation - case never reached)
if(`XLEN > 64) begin
DivOp1 <= ReadData1E[`XLEN:`XLEN-64];
DivOp2 <= ReadData2E[`XLEN:`XLEN-64];
AddOp1E <= ReadData1E[`XLEN:`XLEN-64];
AddOp2E <= ReadData2E[`XLEN:`XLEN-64];
CmpOp1E <= ReadData1E[`XLEN:`XLEN-64];
CmpOp2E <= ReadData2E[`XLEN:`XLEN-64];
SgnOp1E <= ReadData1E[`XLEN:`XLEN-64];
SgnOp2E <= ReadData2E[`XLEN:`XLEN-64];
end
//zero extend to 64 bits
else begin
DivOp1 <= {ReadData1E,{64-`XLEN{1'b0}}};
DivOp2 <= {ReadData2E,{64-`XLEN{1'b0}}};
AddOp1E <= {ReadData1E,{64-`XLEN{1'b0}}};
AddOp2E <= {ReadData2E,{64-`XLEN{1'b0}}};
CmpOp1E <= {ReadData1E,{64-`XLEN{1'b0}}};
CmpOp2E <= {ReadData2E,{64-`XLEN{1'b0}}};
SgnOp1E <= {ReadData1E,{64-`XLEN{1'b0}}};
SgnOp2E <= {ReadData2E,{64-`XLEN{1'b0}}};
end
//assign op codes
AddOpTypeE[3:0] <= OpCtrlE[3:0];
CmpSelE[1:0] <= OpCtrlE[1:0];
DivOpType <= OpCtrlE[0];
SgnOpCodeE[1:0] <= OpCtrlE[1:0];
end
//E stage control signal interfacing between wally spec and OSU fp hardware
//op codes
//
//END EXECUTION STAGE
//#########################################
//*****************************************
//BEGIN E/M PIPE
//
//wally-spec M stage control logic signal instantiation
logic FRegWriteM;
logic [2:0] FResultSelM;
logic [2:0] FrmM;
logic PM;
logic [3:0] OpCtrlM;
//instantiate M stage FMA signals here
//instantiation of M stage regfile signals
logic [4:0] RdM;
logic [`XLEN-1:0] ReadData1M, ReadData2M, ReadData3M;
//instantiation of M stage add/cvt signals
logic [63:0] AddResultM;
logic [4:0] AddFlagsM;
logic AddDenormM;
logic [63:0] AddSumM, AddSumTcM;
logic [3:0] AddSelInvM;
logic [10:0] AddExpPostSumM;
logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM;
logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
logic [63:0] AddFloat1M, AddFloat2M;
logic [10:0] AddExp1DenormM, AddExp2DenormM, AddExponentM;
logic [63:0] AddOp1M, AddOp2M;
logic [2:0] AddRmM;
logic [3:0] AddOpTypeM;
logic AddPM, AddOvEnM, AddUnEnM;
//instantiation of M stage cmp signals
logic CmpInvalidM;
logic [1:0] CmpFCCM;
logic [7:0] WM, XM;
logic ANaNM, BNaNM, AzeroM, BzeroM;
logic [63:0] CmpOp1M, CmpOp2M;
logic [1:0] CmpSelM;
//*****************
//fma E/M pipe registers
//*****************
//*****************
//fpadd E/M pipe registers
//*****************
flopenrc #(64) (clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM);
flopenrc #(64) (clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM);
flopenrc #(4) (clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM);
flopenrc #(11) (clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignM);
flopenrc #(64) (clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M);
flopenrc #(64) (clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M);
flopenrc #(11) (clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(11) (clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) (clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM);
flopenrc #(64) (clk, reset, PipeClearEM, PipeEnableEM, AddOp1E, AddOp1M);
flopenrc #(64) (clk, reset, PipeClearEM, PipeEnableEM, AddOp2E, AddOp2M);
flopenrc #(3) (clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM);
flopenrc #(4) (clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM);
//*****************
//fpcmp E/M pipe registers
//*****************
flopenrc #(8) (clk, reset, PipeClearEM, PipeEnableEM, WE, WM);
flopenrc #(8) (clk, reset, PipeClearEM, PipeEnableEM, XE, XM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM);
flopenrc #(64) (clk, reset, PipeClearEM, PipeEnableEM, CmpOp1E, CmpOp1M);
flopenrc #(64) (clk, reset, PipeClearEM, PipeEnableEM, CmpOp2E, CmpOp2M);
flopenrc #(2) (clk, reset, PipeClearEM, PipeEnableEM, CmpSelE, CmpSelM);
//put this in for the event we want to delay fsgn - will otherwise bypass
//*****************
//fpsgn E/M pipe registers
//*****************
flopenrc #(2) (clk, reset, PipeClearEM, PipeEnableEM, SgnOpCodeE, SgnOpCodeM);
flopenrc #(64) (clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
flopenrc #(5) (clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM);
//*****************
//other E/M pipe registers
//*****************
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, FRegWriteE, FRegWriteM);
flopenrc #(3) (clk, reset, PipeClearEM, PipeEnableEM, FResultsSelE, FResultsSelM);
flopenrc #(3) (clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
flopenrc #(1) (clk, reset, PipeClearEM, PipeEnableEM, PE, PM);
flopenrc #(4) (clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM);
//
//END E/M PIPE
//*****************************************
//#########################################
//BEGIN MEMORY STAGE
//
//fma2 ();
//second instance of two-stage floating-point add/cvt unit
fpaddcvt2 fpadd2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM);
//second instance of two-stage floating-point comparator
fpcmp2 fpcmp2 (CmpInvalidM, CmpFCCM, ANaNM, BNaNM, AzeroM, BzeroM, WM, XM, CmpSelM, CmpOp1M, CmpOp2M);
//
//END MEMORY STAGE
//#########################################
//*****************************************
//BEGIN M/W PIPE
//
//freg2adr ();
//wally-spec W stage control logic signal instantiation
logic FRegWriteW;
logic [2:0] FResultSelW;
logic PW;
//freg2adr ();
//instantiate W stage fma signals here
freg3adr ();
//instantiation of W stage div/sqrt signals
logic DivDenormW;
logic [63:0] DivResultW;
logic [4:0] DivFlagsW;
//can easily be merged into privledged core
//if necessary
//fcsr ();
//instantiation of W stage regfile signals
logic [`XLEN-1:0] ReadData1W, ReadData2W, ReadData3W;
//E pipe and execution stage
fpdivsqrt ();
//instantiation of W stage add/cvt signals
logic [63:0] AddResultW;
logic [4:0] AddFlagsW;
logic AddDenormW;
fma1 ();
//instantiation of W stage cmp signals
logic CmpInvalidW;
logic [1:0] CmpFCCW;
fpaddcvt1 ();
fpcmp1 ();
fpsign ();
//M pipe and memory stage
fma2 ();
fpaddcvt2 ();
fpcmp2 ();
//W pipe and writeback stage
//flag signal mux
//*****************
//fma M/W pipe registers
//*****************
//result mux
*/
//*****************
//fpdiv M/W pipe registers
//*****************
flopenrc #(64) (clk, reset, PipeClearMW, PipeEnableMW, DivResultM, DivResultW);
flopenrc #(5) (clk, reset, PipeClearMW, PipeEnableMW, DivFlagsM, DivFlagsW);
flopenrc #(1) (clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW);
//*****************
//fpadd M/W pipe registers
//*****************
flopenrc #(64) (clk, reset, PipeClearMW, PipeEnableMW, AddResultM, AddResultW);
flopenrc #(5) (clk, reset, PipeClearMW, PipeEnableMW, AddFlagsM, AddFlagsW);
flopenrc #(1) (clk, reset, PipeClearMW, PipeEnableMW, AddDenormM, AddDenormW);
//*****************
//fpcmp M/W pipe registers
//*****************
flopenrc #(1) (clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW);
flopenrc #(2) (clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW);
//*****************
//fpsgn M/W pipe registers
//*****************
flopenrc #(64) (clk, reset, PipeClearMW, PipeEnableMw, SgnResultM, SgnResultW);
flopenrc #(5) (clk, reset, PipeClearMw, PipeEnableMw, SgnFlagsM, SgnFlagsW);
//*****************
//other M/W pipe registers
//*****************
flopenrc #(1) (clk, reset, PipeClearMW, PipeEnableMW, FRegWriteM, FRegWriteW);
flopenrc #(3) (clk, reset, PipeClearMW, PipeEnableMW, FResultsSelM, FResultsSelW);
flopenrc #(1) (clk, reset, PipeClearMW, PipeEnableMW, PM, PW);
////END M/W PIPE
//*****************************************
//#########################################
//BEGIN WRITEBACK STAGE
//
//flag signal mux via in-line ternaries
logic [4:0] FPUFlagsW;
//if bit 2 is active set to sign flags - otherwise:
//iff bit one is high - if bit zero is active set to fma flags - otherwise
//set to cmp flags
//iff bit one is low - if bit zero is active set to add/cvt flags - otherwise
//set to div/sqrt flags
assign FPUFlagsW = (FResultSelW[2]) ? (SgnFlagsW) : (
(FResultSelW[1]) ?
( (FResultSelW[0]) ? (5'b00000) : ({CmpInvalidW,4'b0000}) )
: ( (FResultSelW[0]) ? (AddFlagsW) : (DivFlagsW) )
);
//result mux via in-line ternaries
logic [63:0] FPUResultDirW;
//the uses the same logic as for flag signals
assign FPUResultDirW = (FResultSelW[2]) ? (SgnResultW) : (
(FResultSelW[1]) ?
( (FResultSelW[0]) ? (64'b0) : ({62'b0,CmpFCCW}) )
: ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) )
);
//interface between XLEN size datapath and double-precision sized
//floating-point results
//
//define offsets for LSB zero extension or truncation
always_comb begin
//zero extension
if(`XLEN > 64) begin
FPUResultW <= {FPUResultDirW,{XLENDIFF{1'b0}}};
end
//truncate
else begin
FPUResultW <= FPUResultDirW[63:64-`XLEN];
end
end
//
//END WRITEBACK STAGE
//#########################################
endmodule

18
wally-pipelined/src/fpu/dev/freg.sv Normal file → Executable file
View File

@ -1,4 +1,4 @@
`include "../../config/rv64icfd/wally-config.vh"
`include "../../../config/rv64icfd/wally-config.vh"
module freg1adr (
input logic [2:0] frm,
@ -482,10 +482,10 @@ module freg3adr (
5'b00110 : regInput[6] = writeData;
5'b00111 : regInput[7] = writeData;
5'b01000 : regInput[8] = writeData;
5'b01000 : regInput[9] = writeData;
5'b01001 : regInput[10] = writeData;
5'b01010 : regInput[11] = writeData;
5'b01111 : regInput[12] = writeData;
5'b01001 : regInput[9] = writeData;
5'b01010 : regInput[10] = writeData;
5'b01011 : regInput[11] = writeData;
5'b01100 : regInput[12] = writeData;
5'b01101 : regInput[13] = writeData;
5'b01110 : regInput[14] = writeData;
5'b01111 : regInput[15] = writeData;
@ -498,10 +498,10 @@ module freg3adr (
5'b10110 : regInput[22] = writeData;
5'b10111 : regInput[23] = writeData;
5'b11000 : regInput[24] = writeData;
5'b11000 : regInput[25] = writeData;
5'b11001 : regInput[26] = writeData;
5'b11010 : regInput[27] = writeData;
5'b11111 : regInput[28] = writeData;
5'b11001 : regInput[25] = writeData;
5'b11010 : regInput[26] = writeData;
5'b11011 : regInput[27] = writeData;
5'b11100 : regInput[28] = writeData;
5'b11101 : regInput[29] = writeData;
5'b11110 : regInput[30] = writeData;
5'b11111 : regInput[31] = writeData;

View File

@ -0,0 +1,31 @@
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
module fpusgn (op_code, Y, Flags, A, B);
input [63:0] A, B;
input [1:0] op_code;
output [63:0] Y;
output [4:0] Flags;
wire AonesExp;
//op code designation:
//
//00 - fsgnj - directly copy over sign value of B
//01 - fsgnjn - negate sign value of B
//10 - fsgnjx - XOR sign values of A & B
//
assign Y[63] = op_code[1] ? (A[63] ^ B[63]) : (B[63] ^ op_code[0]);
assign Y[62:0] = A[62:0];
//If the exponent is all ones, then the value is either Inf or NaN,
//both of which will produce a QNaN/SNaN value of some sort. This will
//set the invalid flag high.
assign AonesExp = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52];
//the only flag that can occur during this operation is invalid
//due to changing sign on already existing NaN
assign Flags = {AonesExp & Y[63], 1'b0, 1'b0, 1'b0, 1'b0};
endmodule

459
wally-pipelined/src/fpu/dev/fsm.v Executable file
View File

@ -0,0 +1,459 @@
module fsm (done, load_rega, load_regb, load_regc,
load_regd, load_regr, load_regs,
sel_muxa, sel_muxb, sel_muxr,
clk, reset, start, error, op_type);
input clk;
input reset;
input start;
input error;
input op_type;
output done;
output load_rega;
output load_regb;
output load_regc;
output load_regd;
output load_regr;
output load_regs;
output [2:0] sel_muxa;
output [2:0] sel_muxb;
output sel_muxr;
reg done; // End of cycles
reg load_rega; // enable for regA
reg load_regb; // enable for regB
reg load_regc; // enable for regC
reg load_regd; // enable for regD
reg load_regr; // enable for rem
reg load_regs; // enable for q,qm,qp
reg [2:0] sel_muxa; // Select muxA
reg [2:0] sel_muxb; // Select muxB
reg sel_muxr; // Select rem mux
reg [4:0] CURRENT_STATE;
reg [4:0] NEXT_STATE;
parameter [4:0]
S0=5'd0, S1=5'd1, S2=5'd2,
S3=5'd3, S4=5'd4, S5=5'd5,
S6=5'd6, S7=5'd7, S8=5'd8,
S9=5'd9, S10=5'd10,
S13=5'd13, S14=5'd14, S15=5'd15,
S16=5'd16, S17=5'd17, S18=5'd18,
S19=5'd19, S20=5'd20, S21=5'd21,
S22=5'd22, S23=5'd23, S24=5'd24,
S25=5'd25, S26=5'd26, S27=5'd27,
S28=5'd28, S29=5'd29, S30=5'd30;
always @(posedge clk)
begin
if(reset==1'b1)
CURRENT_STATE<=S0;
else
CURRENT_STATE<=NEXT_STATE;
end
always @(*)
begin
case(CURRENT_STATE)
S0: // iteration 0
begin
if (start==1'b0)
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
NEXT_STATE <= S0;
end
else if (start==1'b1 && op_type==1'b0)
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b001;
sel_muxb = 3'b001;
sel_muxr = 1'b0;
NEXT_STATE <= S1;
end // if (start==1'b1 && op_type==1'b0)
else if (start==1'b1 && op_type==1'b1)
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b010;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
NEXT_STATE <= S13;
end
end // case: S0
S1:
begin
done = 1'b0;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b010;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
NEXT_STATE <= S2;
end
S2: // iteration 1
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
NEXT_STATE <= S3;
end
S3:
begin
done = 1'b0;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b010;
sel_muxr = 1'b0;
NEXT_STATE <= S4;
end
S4: // iteration 2
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
NEXT_STATE <= S5;
end
S5:
begin
done = 1'b0;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b010;
sel_muxr = 1'b0; // add
NEXT_STATE <= S6;
end
S6: // iteration 3
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
NEXT_STATE <= S8;
end
S7:
begin
done = 1'b0;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b010;
sel_muxr = 1'b0;
NEXT_STATE <= S8;
end // case: S7
S8: // q,qm,qp
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b1;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
NEXT_STATE <= S9;
end
S9: // rem
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b1;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b1;
NEXT_STATE <= S10;
end
S10: // done
begin
done = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
NEXT_STATE <= S0;
end
S13: // start of sqrt path
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b1;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b010;
sel_muxb = 3'b001;
sel_muxr = 1'b0;
NEXT_STATE <= S14;
end
S14:
begin
done = 1'b0;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b001;
sel_muxb = 3'b100;
sel_muxr = 1'b0;
NEXT_STATE <= S15;
end
S15: // iteration 1
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
NEXT_STATE <= S16;
end
S16:
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b1;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
NEXT_STATE <= S17;
end
S17:
begin
done = 1'b0;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b100;
sel_muxb = 3'b010;
sel_muxr = 1'b0;
NEXT_STATE <= S18;
end
S18: // iteration 2
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
NEXT_STATE <= S19;
end
S19:
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b1;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
NEXT_STATE <= S20;
end
S20:
begin
done = 1'b0;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b100;
sel_muxb = 3'b010;
sel_muxr = 1'b0;
NEXT_STATE <= S21;
end
S21: // iteration 3
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
NEXT_STATE <= S22;
end
S22:
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b1;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
NEXT_STATE <= S23;
end
S23:
begin
done = 1'b0;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b100;
sel_muxb = 3'b010;
sel_muxr = 1'b0;
NEXT_STATE <= S24;
end
S24: // q,qm,qp
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b1;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
NEXT_STATE <= S25;
end
S25: // rem
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b1;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b110;
sel_muxr = 1'b1;
NEXT_STATE <= S26;
end
S26: // done
begin
done = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
NEXT_STATE <= S0;
end
default:
begin
done = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
NEXT_STATE <= S0;
end
endcase // case(CURRENT_STATE)
end // always @ (CURRENT_STATE or X)
endmodule // fsm

View File

@ -0,0 +1,543 @@
// Ladner-Fischer Prefix Adder
module ldf128 (cout, sum, a, b, cin);
input [127:0] a, b;
input cin;
output [127:0] sum;
output cout;
wire [128:0] p,g;
wire [127:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
ladner_fischer128 prefix_tree (c, p[127:0], g[127:0]);
// post-computation
assign sum=p[128:1]^c;
assign cout=g[128]|(p[128]&c[127]);
endmodule
module ladner_fischer128 (c, p, g);
input [127:0] p;
input [127:0] g;
output [128:1] c;
// parallel-prefix, Ladner-Fischer
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]});
black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]});
black b_69_68 (G_69_68, P_69_68, {g[69],g[68]}, {p[69],p[68]});
black b_71_70 (G_71_70, P_71_70, {g[71],g[70]}, {p[71],p[70]});
black b_73_72 (G_73_72, P_73_72, {g[73],g[72]}, {p[73],p[72]});
black b_75_74 (G_75_74, P_75_74, {g[75],g[74]}, {p[75],p[74]});
black b_77_76 (G_77_76, P_77_76, {g[77],g[76]}, {p[77],p[76]});
black b_79_78 (G_79_78, P_79_78, {g[79],g[78]}, {p[79],p[78]});
black b_81_80 (G_81_80, P_81_80, {g[81],g[80]}, {p[81],p[80]});
black b_83_82 (G_83_82, P_83_82, {g[83],g[82]}, {p[83],p[82]});
black b_85_84 (G_85_84, P_85_84, {g[85],g[84]}, {p[85],p[84]});
black b_87_86 (G_87_86, P_87_86, {g[87],g[86]}, {p[87],p[86]});
black b_89_88 (G_89_88, P_89_88, {g[89],g[88]}, {p[89],p[88]});
black b_91_90 (G_91_90, P_91_90, {g[91],g[90]}, {p[91],p[90]});
black b_93_92 (G_93_92, P_93_92, {g[93],g[92]}, {p[93],p[92]});
black b_95_94 (G_95_94, P_95_94, {g[95],g[94]}, {p[95],p[94]});
black b_97_96 (G_97_96, P_97_96, {g[97],g[96]}, {p[97],p[96]});
black b_99_98 (G_99_98, P_99_98, {g[99],g[98]}, {p[99],p[98]});
black b_101_100 (G_101_100, P_101_100, {g[101],g[100]}, {p[101],p[100]});
black b_103_102 (G_103_102, P_103_102, {g[103],g[102]}, {p[103],p[102]});
black b_105_104 (G_105_104, P_105_104, {g[105],g[104]}, {p[105],p[104]});
black b_107_106 (G_107_106, P_107_106, {g[107],g[106]}, {p[107],p[106]});
black b_109_108 (G_109_108, P_109_108, {g[109],g[108]}, {p[109],p[108]});
black b_111_110 (G_111_110, P_111_110, {g[111],g[110]}, {p[111],p[110]});
black b_113_112 (G_113_112, P_113_112, {g[113],g[112]}, {p[113],p[112]});
black b_115_114 (G_115_114, P_115_114, {g[115],g[114]}, {p[115],p[114]});
black b_117_116 (G_117_116, P_117_116, {g[117],g[116]}, {p[117],p[116]});
black b_119_118 (G_119_118, P_119_118, {g[119],g[118]}, {p[119],p[118]});
black b_121_120 (G_121_120, P_121_120, {g[121],g[120]}, {p[121],p[120]});
black b_123_122 (G_123_122, P_123_122, {g[123],g[122]}, {p[123],p[122]});
black b_125_124 (G_125_124, P_125_124, {g[125],g[124]}, {p[125],p[124]});
black b_127_126 (G_127_126, P_127_126, {g[127],g[126]}, {p[127],p[126]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64});
black b_71_68 (G_71_68, P_71_68, {G_71_70,G_69_68}, {P_71_70,P_69_68});
black b_75_72 (G_75_72, P_75_72, {G_75_74,G_73_72}, {P_75_74,P_73_72});
black b_79_76 (G_79_76, P_79_76, {G_79_78,G_77_76}, {P_79_78,P_77_76});
black b_83_80 (G_83_80, P_83_80, {G_83_82,G_81_80}, {P_83_82,P_81_80});
black b_87_84 (G_87_84, P_87_84, {G_87_86,G_85_84}, {P_87_86,P_85_84});
black b_91_88 (G_91_88, P_91_88, {G_91_90,G_89_88}, {P_91_90,P_89_88});
black b_95_92 (G_95_92, P_95_92, {G_95_94,G_93_92}, {P_95_94,P_93_92});
black b_99_96 (G_99_96, P_99_96, {G_99_98,G_97_96}, {P_99_98,P_97_96});
black b_103_100 (G_103_100, P_103_100, {G_103_102,G_101_100}, {P_103_102,P_101_100});
black b_107_104 (G_107_104, P_107_104, {G_107_106,G_105_104}, {P_107_106,P_105_104});
black b_111_108 (G_111_108, P_111_108, {G_111_110,G_109_108}, {P_111_110,P_109_108});
black b_115_112 (G_115_112, P_115_112, {G_115_114,G_113_112}, {P_115_114,P_113_112});
black b_119_116 (G_119_116, P_119_116, {G_119_118,G_117_116}, {P_119_118,P_117_116});
black b_123_120 (G_123_120, P_123_120, {G_123_122,G_121_120}, {P_123_122,P_121_120});
black b_127_124 (G_127_124, P_127_124, {G_127_126,G_125_124}, {P_127_126,P_125_124});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_21_16 (G_21_16, P_21_16, {G_21_20,G_19_16}, {P_21_20,P_19_16});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_29_24 (G_29_24, P_29_24, {G_29_28,G_27_24}, {P_29_28,P_27_24});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_37_32 (G_37_32, P_37_32, {G_37_36,G_35_32}, {P_37_36,P_35_32});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
black b_45_40 (G_45_40, P_45_40, {G_45_44,G_43_40}, {P_45_44,P_43_40});
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
black b_53_48 (G_53_48, P_53_48, {G_53_52,G_51_48}, {P_53_52,P_51_48});
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
black b_61_56 (G_61_56, P_61_56, {G_61_60,G_59_56}, {P_61_60,P_59_56});
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
black b_69_64 (G_69_64, P_69_64, {G_69_68,G_67_64}, {P_69_68,P_67_64});
black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64});
black b_77_72 (G_77_72, P_77_72, {G_77_76,G_75_72}, {P_77_76,P_75_72});
black b_79_72 (G_79_72, P_79_72, {G_79_76,G_75_72}, {P_79_76,P_75_72});
black b_85_80 (G_85_80, P_85_80, {G_85_84,G_83_80}, {P_85_84,P_83_80});
black b_87_80 (G_87_80, P_87_80, {G_87_84,G_83_80}, {P_87_84,P_83_80});
black b_93_88 (G_93_88, P_93_88, {G_93_92,G_91_88}, {P_93_92,P_91_88});
black b_95_88 (G_95_88, P_95_88, {G_95_92,G_91_88}, {P_95_92,P_91_88});
black b_101_96 (G_101_96, P_101_96, {G_101_100,G_99_96}, {P_101_100,P_99_96});
black b_103_96 (G_103_96, P_103_96, {G_103_100,G_99_96}, {P_103_100,P_99_96});
black b_109_104 (G_109_104, P_109_104, {G_109_108,G_107_104}, {P_109_108,P_107_104});
black b_111_104 (G_111_104, P_111_104, {G_111_108,G_107_104}, {P_111_108,P_107_104});
black b_117_112 (G_117_112, P_117_112, {G_117_116,G_115_112}, {P_117_116,P_115_112});
black b_119_112 (G_119_112, P_119_112, {G_119_116,G_115_112}, {P_119_116,P_115_112});
black b_125_120 (G_125_120, P_125_120, {G_125_124,G_123_120}, {P_125_124,P_123_120});
black b_127_120 (G_127_120, P_127_120, {G_127_124,G_123_120}, {P_127_124,P_123_120});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_25_16 (G_25_16, P_25_16, {G_25_24,G_23_16}, {P_25_24,P_23_16});
black b_27_16 (G_27_16, P_27_16, {G_27_24,G_23_16}, {P_27_24,P_23_16});
black b_29_16 (G_29_16, P_29_16, {G_29_24,G_23_16}, {P_29_24,P_23_16});
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_41_32 (G_41_32, P_41_32, {G_41_40,G_39_32}, {P_41_40,P_39_32});
black b_43_32 (G_43_32, P_43_32, {G_43_40,G_39_32}, {P_43_40,P_39_32});
black b_45_32 (G_45_32, P_45_32, {G_45_40,G_39_32}, {P_45_40,P_39_32});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
black b_57_48 (G_57_48, P_57_48, {G_57_56,G_55_48}, {P_57_56,P_55_48});
black b_59_48 (G_59_48, P_59_48, {G_59_56,G_55_48}, {P_59_56,P_55_48});
black b_61_48 (G_61_48, P_61_48, {G_61_56,G_55_48}, {P_61_56,P_55_48});
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
black b_73_64 (G_73_64, P_73_64, {G_73_72,G_71_64}, {P_73_72,P_71_64});
black b_75_64 (G_75_64, P_75_64, {G_75_72,G_71_64}, {P_75_72,P_71_64});
black b_77_64 (G_77_64, P_77_64, {G_77_72,G_71_64}, {P_77_72,P_71_64});
black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64});
black b_89_80 (G_89_80, P_89_80, {G_89_88,G_87_80}, {P_89_88,P_87_80});
black b_91_80 (G_91_80, P_91_80, {G_91_88,G_87_80}, {P_91_88,P_87_80});
black b_93_80 (G_93_80, P_93_80, {G_93_88,G_87_80}, {P_93_88,P_87_80});
black b_95_80 (G_95_80, P_95_80, {G_95_88,G_87_80}, {P_95_88,P_87_80});
black b_105_96 (G_105_96, P_105_96, {G_105_104,G_103_96}, {P_105_104,P_103_96});
black b_107_96 (G_107_96, P_107_96, {G_107_104,G_103_96}, {P_107_104,P_103_96});
black b_109_96 (G_109_96, P_109_96, {G_109_104,G_103_96}, {P_109_104,P_103_96});
black b_111_96 (G_111_96, P_111_96, {G_111_104,G_103_96}, {P_111_104,P_103_96});
black b_121_112 (G_121_112, P_121_112, {G_121_120,G_119_112}, {P_121_120,P_119_112});
black b_123_112 (G_123_112, P_123_112, {G_123_120,G_119_112}, {P_123_120,P_119_112});
black b_125_112 (G_125_112, P_125_112, {G_125_120,G_119_112}, {P_125_120,P_119_112});
black b_127_112 (G_127_112, P_127_112, {G_127_120,G_119_112}, {P_127_120,P_119_112});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_21_0 (G_21_0, {G_21_16,G_15_0}, P_21_16);
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_25_0 (G_25_0, {G_25_16,G_15_0}, P_25_16);
grey g_27_0 (G_27_0, {G_27_16,G_15_0}, P_27_16);
grey g_29_0 (G_29_0, {G_29_16,G_15_0}, P_29_16);
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_49_32 (G_49_32, P_49_32, {G_49_48,G_47_32}, {P_49_48,P_47_32});
black b_51_32 (G_51_32, P_51_32, {G_51_48,G_47_32}, {P_51_48,P_47_32});
black b_53_32 (G_53_32, P_53_32, {G_53_48,G_47_32}, {P_53_48,P_47_32});
black b_55_32 (G_55_32, P_55_32, {G_55_48,G_47_32}, {P_55_48,P_47_32});
black b_57_32 (G_57_32, P_57_32, {G_57_48,G_47_32}, {P_57_48,P_47_32});
black b_59_32 (G_59_32, P_59_32, {G_59_48,G_47_32}, {P_59_48,P_47_32});
black b_61_32 (G_61_32, P_61_32, {G_61_48,G_47_32}, {P_61_48,P_47_32});
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
black b_81_64 (G_81_64, P_81_64, {G_81_80,G_79_64}, {P_81_80,P_79_64});
black b_83_64 (G_83_64, P_83_64, {G_83_80,G_79_64}, {P_83_80,P_79_64});
black b_85_64 (G_85_64, P_85_64, {G_85_80,G_79_64}, {P_85_80,P_79_64});
black b_87_64 (G_87_64, P_87_64, {G_87_80,G_79_64}, {P_87_80,P_79_64});
black b_89_64 (G_89_64, P_89_64, {G_89_80,G_79_64}, {P_89_80,P_79_64});
black b_91_64 (G_91_64, P_91_64, {G_91_80,G_79_64}, {P_91_80,P_79_64});
black b_93_64 (G_93_64, P_93_64, {G_93_80,G_79_64}, {P_93_80,P_79_64});
black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64});
black b_113_96 (G_113_96, P_113_96, {G_113_112,G_111_96}, {P_113_112,P_111_96});
black b_115_96 (G_115_96, P_115_96, {G_115_112,G_111_96}, {P_115_112,P_111_96});
black b_117_96 (G_117_96, P_117_96, {G_117_112,G_111_96}, {P_117_112,P_111_96});
black b_119_96 (G_119_96, P_119_96, {G_119_112,G_111_96}, {P_119_112,P_111_96});
black b_121_96 (G_121_96, P_121_96, {G_121_112,G_111_96}, {P_121_112,P_111_96});
black b_123_96 (G_123_96, P_123_96, {G_123_112,G_111_96}, {P_123_112,P_111_96});
black b_125_96 (G_125_96, P_125_96, {G_125_112,G_111_96}, {P_125_112,P_111_96});
black b_127_96 (G_127_96, P_127_96, {G_127_112,G_111_96}, {P_127_112,P_111_96});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_37_0 (G_37_0, {G_37_32,G_31_0}, P_37_32);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_41_0 (G_41_0, {G_41_32,G_31_0}, P_41_32);
grey g_43_0 (G_43_0, {G_43_32,G_31_0}, P_43_32);
grey g_45_0 (G_45_0, {G_45_32,G_31_0}, P_45_32);
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_49_0 (G_49_0, {G_49_32,G_31_0}, P_49_32);
grey g_51_0 (G_51_0, {G_51_32,G_31_0}, P_51_32);
grey g_53_0 (G_53_0, {G_53_32,G_31_0}, P_53_32);
grey g_55_0 (G_55_0, {G_55_32,G_31_0}, P_55_32);
grey g_57_0 (G_57_0, {G_57_32,G_31_0}, P_57_32);
grey g_59_0 (G_59_0, {G_59_32,G_31_0}, P_59_32);
grey g_61_0 (G_61_0, {G_61_32,G_31_0}, P_61_32);
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
black b_97_64 (G_97_64, P_97_64, {G_97_96,G_95_64}, {P_97_96,P_95_64});
black b_99_64 (G_99_64, P_99_64, {G_99_96,G_95_64}, {P_99_96,P_95_64});
black b_101_64 (G_101_64, P_101_64, {G_101_96,G_95_64}, {P_101_96,P_95_64});
black b_103_64 (G_103_64, P_103_64, {G_103_96,G_95_64}, {P_103_96,P_95_64});
black b_105_64 (G_105_64, P_105_64, {G_105_96,G_95_64}, {P_105_96,P_95_64});
black b_107_64 (G_107_64, P_107_64, {G_107_96,G_95_64}, {P_107_96,P_95_64});
black b_109_64 (G_109_64, P_109_64, {G_109_96,G_95_64}, {P_109_96,P_95_64});
black b_111_64 (G_111_64, P_111_64, {G_111_96,G_95_64}, {P_111_96,P_95_64});
black b_113_64 (G_113_64, P_113_64, {G_113_96,G_95_64}, {P_113_96,P_95_64});
black b_115_64 (G_115_64, P_115_64, {G_115_96,G_95_64}, {P_115_96,P_95_64});
black b_117_64 (G_117_64, P_117_64, {G_117_96,G_95_64}, {P_117_96,P_95_64});
black b_119_64 (G_119_64, P_119_64, {G_119_96,G_95_64}, {P_119_96,P_95_64});
black b_121_64 (G_121_64, P_121_64, {G_121_96,G_95_64}, {P_121_96,P_95_64});
black b_123_64 (G_123_64, P_123_64, {G_123_96,G_95_64}, {P_123_96,P_95_64});
black b_125_64 (G_125_64, P_125_64, {G_125_96,G_95_64}, {P_125_96,P_95_64});
black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64});
// Stage 7: Generates G/P pairs that span 64 bits
grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64);
grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64);
grey g_69_0 (G_69_0, {G_69_64,G_63_0}, P_69_64);
grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64);
grey g_73_0 (G_73_0, {G_73_64,G_63_0}, P_73_64);
grey g_75_0 (G_75_0, {G_75_64,G_63_0}, P_75_64);
grey g_77_0 (G_77_0, {G_77_64,G_63_0}, P_77_64);
grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64);
grey g_81_0 (G_81_0, {G_81_64,G_63_0}, P_81_64);
grey g_83_0 (G_83_0, {G_83_64,G_63_0}, P_83_64);
grey g_85_0 (G_85_0, {G_85_64,G_63_0}, P_85_64);
grey g_87_0 (G_87_0, {G_87_64,G_63_0}, P_87_64);
grey g_89_0 (G_89_0, {G_89_64,G_63_0}, P_89_64);
grey g_91_0 (G_91_0, {G_91_64,G_63_0}, P_91_64);
grey g_93_0 (G_93_0, {G_93_64,G_63_0}, P_93_64);
grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64);
grey g_97_0 (G_97_0, {G_97_64,G_63_0}, P_97_64);
grey g_99_0 (G_99_0, {G_99_64,G_63_0}, P_99_64);
grey g_101_0 (G_101_0, {G_101_64,G_63_0}, P_101_64);
grey g_103_0 (G_103_0, {G_103_64,G_63_0}, P_103_64);
grey g_105_0 (G_105_0, {G_105_64,G_63_0}, P_105_64);
grey g_107_0 (G_107_0, {G_107_64,G_63_0}, P_107_64);
grey g_109_0 (G_109_0, {G_109_64,G_63_0}, P_109_64);
grey g_111_0 (G_111_0, {G_111_64,G_63_0}, P_111_64);
grey g_113_0 (G_113_0, {G_113_64,G_63_0}, P_113_64);
grey g_115_0 (G_115_0, {G_115_64,G_63_0}, P_115_64);
grey g_117_0 (G_117_0, {G_117_64,G_63_0}, P_117_64);
grey g_119_0 (G_119_0, {G_119_64,G_63_0}, P_119_64);
grey g_121_0 (G_121_0, {G_121_64,G_63_0}, P_121_64);
grey g_123_0 (G_123_0, {G_123_64,G_63_0}, P_123_64);
grey g_125_0 (G_125_0, {G_125_64,G_63_0}, P_125_64);
grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64);
// Extra grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]);
grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]);
grey g_68_0 (G_68_0, {g[68],G_67_0}, p[68]);
grey g_70_0 (G_70_0, {g[70],G_69_0}, p[70]);
grey g_72_0 (G_72_0, {g[72],G_71_0}, p[72]);
grey g_74_0 (G_74_0, {g[74],G_73_0}, p[74]);
grey g_76_0 (G_76_0, {g[76],G_75_0}, p[76]);
grey g_78_0 (G_78_0, {g[78],G_77_0}, p[78]);
grey g_80_0 (G_80_0, {g[80],G_79_0}, p[80]);
grey g_82_0 (G_82_0, {g[82],G_81_0}, p[82]);
grey g_84_0 (G_84_0, {g[84],G_83_0}, p[84]);
grey g_86_0 (G_86_0, {g[86],G_85_0}, p[86]);
grey g_88_0 (G_88_0, {g[88],G_87_0}, p[88]);
grey g_90_0 (G_90_0, {g[90],G_89_0}, p[90]);
grey g_92_0 (G_92_0, {g[92],G_91_0}, p[92]);
grey g_94_0 (G_94_0, {g[94],G_93_0}, p[94]);
grey g_96_0 (G_96_0, {g[96],G_95_0}, p[96]);
grey g_98_0 (G_98_0, {g[98],G_97_0}, p[98]);
grey g_100_0 (G_100_0, {g[100],G_99_0}, p[100]);
grey g_102_0 (G_102_0, {g[102],G_101_0}, p[102]);
grey g_104_0 (G_104_0, {g[104],G_103_0}, p[104]);
grey g_106_0 (G_106_0, {g[106],G_105_0}, p[106]);
grey g_108_0 (G_108_0, {g[108],G_107_0}, p[108]);
grey g_110_0 (G_110_0, {g[110],G_109_0}, p[110]);
grey g_112_0 (G_112_0, {g[112],G_111_0}, p[112]);
grey g_114_0 (G_114_0, {g[114],G_113_0}, p[114]);
grey g_116_0 (G_116_0, {g[116],G_115_0}, p[116]);
grey g_118_0 (G_118_0, {g[118],G_117_0}, p[118]);
grey g_120_0 (G_120_0, {g[120],G_119_0}, p[120]);
grey g_122_0 (G_122_0, {g[122],G_121_0}, p[122]);
grey g_124_0 (G_124_0, {g[124],G_123_0}, p[124]);
grey g_126_0 (G_126_0, {g[126],G_125_0}, p[126]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
assign c[37]=G_36_0;
assign c[38]=G_37_0;
assign c[39]=G_38_0;
assign c[40]=G_39_0;
assign c[41]=G_40_0;
assign c[42]=G_41_0;
assign c[43]=G_42_0;
assign c[44]=G_43_0;
assign c[45]=G_44_0;
assign c[46]=G_45_0;
assign c[47]=G_46_0;
assign c[48]=G_47_0;
assign c[49]=G_48_0;
assign c[50]=G_49_0;
assign c[51]=G_50_0;
assign c[52]=G_51_0;
assign c[53]=G_52_0;
assign c[54]=G_53_0;
assign c[55]=G_54_0;
assign c[56]=G_55_0;
assign c[57]=G_56_0;
assign c[58]=G_57_0;
assign c[59]=G_58_0;
assign c[60]=G_59_0;
assign c[61]=G_60_0;
assign c[62]=G_61_0;
assign c[63]=G_62_0;
assign c[64]=G_63_0;
assign c[65]=G_64_0;
assign c[66]=G_65_0;
assign c[67]=G_66_0;
assign c[68]=G_67_0;
assign c[69]=G_68_0;
assign c[70]=G_69_0;
assign c[71]=G_70_0;
assign c[72]=G_71_0;
assign c[73]=G_72_0;
assign c[74]=G_73_0;
assign c[75]=G_74_0;
assign c[76]=G_75_0;
assign c[77]=G_76_0;
assign c[78]=G_77_0;
assign c[79]=G_78_0;
assign c[80]=G_79_0;
assign c[81]=G_80_0;
assign c[82]=G_81_0;
assign c[83]=G_82_0;
assign c[84]=G_83_0;
assign c[85]=G_84_0;
assign c[86]=G_85_0;
assign c[87]=G_86_0;
assign c[88]=G_87_0;
assign c[89]=G_88_0;
assign c[90]=G_89_0;
assign c[91]=G_90_0;
assign c[92]=G_91_0;
assign c[93]=G_92_0;
assign c[94]=G_93_0;
assign c[95]=G_94_0;
assign c[96]=G_95_0;
assign c[97]=G_96_0;
assign c[98]=G_97_0;
assign c[99]=G_98_0;
assign c[100]=G_99_0;
assign c[101]=G_100_0;
assign c[102]=G_101_0;
assign c[103]=G_102_0;
assign c[104]=G_103_0;
assign c[105]=G_104_0;
assign c[106]=G_105_0;
assign c[107]=G_106_0;
assign c[108]=G_107_0;
assign c[109]=G_108_0;
assign c[110]=G_109_0;
assign c[111]=G_110_0;
assign c[112]=G_111_0;
assign c[113]=G_112_0;
assign c[114]=G_113_0;
assign c[115]=G_114_0;
assign c[116]=G_115_0;
assign c[117]=G_116_0;
assign c[118]=G_117_0;
assign c[119]=G_118_0;
assign c[120]=G_119_0;
assign c[121]=G_120_0;
assign c[122]=G_121_0;
assign c[123]=G_122_0;
assign c[124]=G_123_0;
assign c[125]=G_124_0;
assign c[126]=G_125_0;
assign c[127]=G_126_0;
assign c[128]=G_127_0;
endmodule // ladner_fischer

View File

@ -0,0 +1,273 @@
// Ladner-Fischer Prefix Adder
module ldf64 (cout, sum, a, b, cin);
input [63:0] a, b;
input cin;
output [63:0] sum;
output cout;
wire [64:0] p,g;
wire [63:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
ladner_fischer64 prefix_tree(c, p[63:0], g[63:0]);
// post-computation
assign sum=p[64:1]^c;
assign cout=g[64]|(p[64]&c[63]);
endmodule
module ladner_fischer64 (c, p, g);
input [63:0] p;
input [63:0] g;
output [64:1] c;
// parallel-prefix, Ladner-Fischer
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_21_16 (G_21_16, P_21_16, {G_21_20,G_19_16}, {P_21_20,P_19_16});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_29_24 (G_29_24, P_29_24, {G_29_28,G_27_24}, {P_29_28,P_27_24});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_37_32 (G_37_32, P_37_32, {G_37_36,G_35_32}, {P_37_36,P_35_32});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
black b_45_40 (G_45_40, P_45_40, {G_45_44,G_43_40}, {P_45_44,P_43_40});
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
black b_53_48 (G_53_48, P_53_48, {G_53_52,G_51_48}, {P_53_52,P_51_48});
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
black b_61_56 (G_61_56, P_61_56, {G_61_60,G_59_56}, {P_61_60,P_59_56});
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_25_16 (G_25_16, P_25_16, {G_25_24,G_23_16}, {P_25_24,P_23_16});
black b_27_16 (G_27_16, P_27_16, {G_27_24,G_23_16}, {P_27_24,P_23_16});
black b_29_16 (G_29_16, P_29_16, {G_29_24,G_23_16}, {P_29_24,P_23_16});
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_41_32 (G_41_32, P_41_32, {G_41_40,G_39_32}, {P_41_40,P_39_32});
black b_43_32 (G_43_32, P_43_32, {G_43_40,G_39_32}, {P_43_40,P_39_32});
black b_45_32 (G_45_32, P_45_32, {G_45_40,G_39_32}, {P_45_40,P_39_32});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
black b_57_48 (G_57_48, P_57_48, {G_57_56,G_55_48}, {P_57_56,P_55_48});
black b_59_48 (G_59_48, P_59_48, {G_59_56,G_55_48}, {P_59_56,P_55_48});
black b_61_48 (G_61_48, P_61_48, {G_61_56,G_55_48}, {P_61_56,P_55_48});
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_21_0 (G_21_0, {G_21_16,G_15_0}, P_21_16);
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_25_0 (G_25_0, {G_25_16,G_15_0}, P_25_16);
grey g_27_0 (G_27_0, {G_27_16,G_15_0}, P_27_16);
grey g_29_0 (G_29_0, {G_29_16,G_15_0}, P_29_16);
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_49_32 (G_49_32, P_49_32, {G_49_48,G_47_32}, {P_49_48,P_47_32});
black b_51_32 (G_51_32, P_51_32, {G_51_48,G_47_32}, {P_51_48,P_47_32});
black b_53_32 (G_53_32, P_53_32, {G_53_48,G_47_32}, {P_53_48,P_47_32});
black b_55_32 (G_55_32, P_55_32, {G_55_48,G_47_32}, {P_55_48,P_47_32});
black b_57_32 (G_57_32, P_57_32, {G_57_48,G_47_32}, {P_57_48,P_47_32});
black b_59_32 (G_59_32, P_59_32, {G_59_48,G_47_32}, {P_59_48,P_47_32});
black b_61_32 (G_61_32, P_61_32, {G_61_48,G_47_32}, {P_61_48,P_47_32});
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_37_0 (G_37_0, {G_37_32,G_31_0}, P_37_32);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_41_0 (G_41_0, {G_41_32,G_31_0}, P_41_32);
grey g_43_0 (G_43_0, {G_43_32,G_31_0}, P_43_32);
grey g_45_0 (G_45_0, {G_45_32,G_31_0}, P_45_32);
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_49_0 (G_49_0, {G_49_32,G_31_0}, P_49_32);
grey g_51_0 (G_51_0, {G_51_32,G_31_0}, P_51_32);
grey g_53_0 (G_53_0, {G_53_32,G_31_0}, P_53_32);
grey g_55_0 (G_55_0, {G_55_32,G_31_0}, P_55_32);
grey g_57_0 (G_57_0, {G_57_32,G_31_0}, P_57_32);
grey g_59_0 (G_59_0, {G_59_32,G_31_0}, P_59_32);
grey g_61_0 (G_61_0, {G_61_32,G_31_0}, P_61_32);
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
// Extra grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
assign c[37]=G_36_0;
assign c[38]=G_37_0;
assign c[39]=G_38_0;
assign c[40]=G_39_0;
assign c[41]=G_40_0;
assign c[42]=G_41_0;
assign c[43]=G_42_0;
assign c[44]=G_43_0;
assign c[45]=G_44_0;
assign c[46]=G_45_0;
assign c[47]=G_46_0;
assign c[48]=G_47_0;
assign c[49]=G_48_0;
assign c[50]=G_49_0;
assign c[51]=G_50_0;
assign c[52]=G_51_0;
assign c[53]=G_52_0;
assign c[54]=G_53_0;
assign c[55]=G_54_0;
assign c[56]=G_55_0;
assign c[57]=G_56_0;
assign c[58]=G_57_0;
assign c[59]=G_58_0;
assign c[60]=G_59_0;
assign c[61]=G_60_0;
assign c[62]=G_61_0;
assign c[63]=G_62_0;
assign c[64]=G_63_0;
endmodule // ladner_fischer

View File

@ -0,0 +1,132 @@
// Brent-Kung Prefix Adder
module exp_add (cout, sum, a, b, cin);
input [12:0] a, b;
input cin;
output [12:0] sum;
output cout;
wire [13:0] p,g;
wire [13:1] h,c;
// pre-computation
assign p={a|b,1'b1};
assign g={a&b, cin};
// prefix tree
brent_kung prefix_tree(h, c, p[12:0], g[12:0]);
// post-computation
assign h[13]=g[13]|c[13];
assign sum=p[13:1]^h|g[13:1]&c;
assign cout=p[13]&h[13];
endmodule
module brent_kung (h, c, p, g);
input [12:0] p;
input [13:0] g;
output [13:1] h;
output [13:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates H/I pairs that span 1 bits
rgry g_1_0 (H_1_0, {g[1],g[0]});
rblk b_3_2 (H_3_2, I_3_2, {g[3],g[2]}, {p[2],p[1]});
rblk b_5_4 (H_5_4, I_5_4, {g[5],g[4]}, {p[4],p[3]});
rblk b_7_6 (H_7_6, I_7_6, {g[7],g[6]}, {p[6],p[5]});
rblk b_9_8 (H_9_8, I_9_8, {g[9],g[8]}, {p[8],p[7]});
rblk b_11_10 (H_11_10, I_11_10, {g[11],g[10]}, {p[10],p[9]});
rblk b_13_12 (H_13_12, I_13_12, {g[13],g[12]}, {p[12],p[11]});
// Stage 2: Generates H/I pairs that span 2 bits
grey g_3_0 (H_3_0, {H_3_2,H_1_0}, I_3_2);
black b_7_4 (H_7_4, I_7_4, {H_7_6,H_5_4}, {I_7_6,I_5_4});
black b_11_8 (H_11_8, I_11_8, {H_11_10,H_9_8}, {I_11_10,I_9_8});
// Stage 3: Generates H/I pairs that span 4 bits
grey g_7_0 (H_7_0, {H_7_4,H_3_0}, I_7_4);
// Stage 4: Generates H/I pairs that span 8 bits
// Stage 5: Generates H/I pairs that span 4 bits
grey g_11_0 (H_11_0, {H_11_8,H_7_0}, I_11_8);
// Stage 6: Generates H/I pairs that span 2 bits
grey g_5_0 (H_5_0, {H_5_4,H_3_0}, I_5_4);
grey g_9_0 (H_9_0, {H_9_8,H_7_0}, I_9_8);
// Last grey cell stage
grey g_2_0 (H_2_0, {g[2],H_1_0}, p[1]);
grey g_4_0 (H_4_0, {g[4],H_3_0}, p[3]);
grey g_6_0 (H_6_0, {g[6],H_5_0}, p[5]);
grey g_8_0 (H_8_0, {g[8],H_7_0}, p[7]);
grey g_10_0 (H_10_0, {g[10],H_9_0}, p[9]);
grey g_12_0 (H_12_0, {g[12],H_11_0}, p[11]);
// Final Stage: Apply c_k+1=p_k&H_k_0
assign c[1]=g[0];
assign h[1]=H_1_0; assign c[2]=p[1]&H_1_0;
assign h[2]=H_2_0; assign c[3]=p[2]&H_2_0;
assign h[3]=H_3_0; assign c[4]=p[3]&H_3_0;
assign h[4]=H_4_0; assign c[5]=p[4]&H_4_0;
assign h[5]=H_5_0; assign c[6]=p[5]&H_5_0;
assign h[6]=H_6_0; assign c[7]=p[6]&H_6_0;
assign h[7]=H_7_0; assign c[8]=p[7]&H_7_0;
assign h[8]=H_8_0; assign c[9]=p[8]&H_8_0;
assign h[9]=H_9_0; assign c[10]=p[9]&H_9_0;
assign h[10]=H_10_0; assign c[11]=p[10]&H_10_0;
assign h[11]=H_11_0; assign c[12]=p[11]&H_11_0;
assign h[12]=H_12_0; assign c[13]=p[12]&H_12_0;
endmodule
// Black cell
module black(gout, pout, gin, pin);
input [1:0] gin, pin;
output gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule
// Grey cell
module grey(gout, gin, pin);
input[1:0] gin;
input pin;
output gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule
// reduced Black cell
module rblk(hout, iout, gin, pin);
input [1:0] gin, pin;
output hout, iout;
assign iout=pin[1]&pin[0];
assign hout=gin[1]|gin[0];
endmodule
// reduced Grey cell
module rgry(hout, gin);
input[1:0] gin;
output hout;
assign hout=gin[1]|gin[0];
endmodule

View File

@ -0,0 +1,170 @@
module lz2 (P, V, B0, B1);
input B0;
input B1;
output P;
output V;
assign V = B0 | B1;
assign P = B0 & ~B1;
endmodule // lz2
// Note: This module is not made out of two lz2's - why not? (MJS)
module lz4 (ZP, ZV, B0, B1, V0, V1);
input B0;
input B1;
input V0;
input V1;
output [1:0] ZP;
output ZV;
assign ZP[0] = V0 ? B0 : B1;
assign ZP[1] = ~V0;
assign ZV = V0 | V1;
endmodule // lz4
// Note: This module is not made out of two lz4's - why not? (MJS)
module lz8 (ZP, ZV, B);
input [7:0] B;
wire s1p0;
wire s1v0;
wire s1p1;
wire s1v1;
wire s2p0;
wire s2v0;
wire s2p1;
wire s2v1;
wire [1:0] ZPa;
wire [1:0] ZPb;
wire ZVa;
wire ZVb;
output [2:0] ZP;
output ZV;
lz2 l1(s1p0, s1v0, B[2], B[3]);
lz2 l2(s1p1, s1v1, B[0], B[1]);
lz4 l3(ZPa, ZVa, s1p0, s1p1, s1v0, s1v1);
lz2 l4(s2p0, s2v0, B[6], B[7]);
lz2 l5(s2p1, s2v1, B[4], B[5]);
lz4 l6(ZPb, ZVb, s2p0, s2p1, s2v0, s2v1);
assign ZP[1:0] = ZVb ? ZPb : ZPa;
assign ZP[2] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz8
module lz16 (ZP, ZV, B);
input [15:0] B;
wire [2:0] ZPa;
wire [2:0] ZPb;
wire ZVa;
wire ZVb;
output [3:0] ZP;
output ZV;
lz8 l1(ZPa, ZVa, B[7:0]);
lz8 l2(ZPb, ZVb, B[15:8]);
assign ZP[2:0] = ZVb ? ZPb : ZPa;
assign ZP[3] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz16
module lz32 (ZP, ZV, B);
input [31:0] B;
wire [3:0] ZPa;
wire [3:0] ZPb;
wire ZVa;
wire ZVb;
output [4:0] ZP;
output ZV;
lz16 l1(ZPa, ZVa, B[15:0]);
lz16 l2(ZPb, ZVb, B[31:16]);
assign ZP[3:0] = ZVb ? ZPb : ZPa;
assign ZP[4] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz32
// This module returns the number of leading zeros ZP in the 64-bit
// number B. If there are no ones in B, then ZP and ZV are both 0.
module lz64 (ZP, ZV, B);
input [63:0] B;
wire [4:0] ZPa;
wire [4:0] ZPb;
wire ZVa;
wire ZVb;
output [5:0] ZP;
output ZV;
lz32 l1(ZPa, ZVa, B[31:0]);
lz32 l2(ZPb, ZVb, B[63:32]);
assign ZV = ZVa | ZVb;
assign ZP[4:0] = (ZVb ? ZPb : ZPa) & {5{ZV}};
assign ZP[5] = ~ZVb & ZV;
endmodule // lz64
// This module returns the number of leading zeros ZP in the 52-bit
// number B. If there are no ones in B, then ZP and ZV are both 0.
module lz52 (ZP, ZV, B);
input [51:0] B;
wire [4:0] ZP_32;
wire [3:0] ZP_16;
wire [1:0] ZP_4;
wire ZV_32;
wire ZV_16;
wire ZV_4;
wire ZP_2_1;
wire ZP_2_2;
wire ZV_2_1;
wire ZV_2_2;
output [5:0] ZP;
output ZV;
lz32 l1 (ZP_32, ZV_32, B[51:20]);
lz16 l2 (ZP_16, ZV_16, B[19:4]);
lz2 l3_1 (ZP_2_1, ZV_2_1, B[3], B[2]);
lz2 l3_2 (ZP_2_2, ZV_2_2, B[1], B[0]);
lz4 l3_final (ZP_4, ZV_4, ZP_2_1, ZP_2_2, ZV_2_1, ZV_2_2);
assign ZV = ZV_32 | ZV_16 | ZV_4;
assign ZP[5] = ~ZV_32;
assign ZP[4] = ZV_32 ? ZP_32[4] : ~ZV_16;
assign ZP[3:2] = ZV_32 ? ZP_32[3:2] : (ZV_16 ? ZP_16[3:2] : 2'b0);
assign ZP[1:0] = ZV_32 ? ZP_32[1:0] : (ZV_16 ? ZP_16[1:0] : ZP_4);
endmodule // lz52

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,265 @@
// The rounder takes as inputs a 64-bit value to be rounded, A, the
// exponent of the value to be rounded, the sign of the final result, Sign,
// the precision of the results, P, and the two-bit rounding mode, rm.
// It produces a rounded 52-bit result, Z, the exponent of the rounded
// result, Z_exp, and a flag that indicates if the result was rounded,
// Inexact. The rounding mode has the following values.
// rm Modee
// 00 round-to-nearest-even
// 01 round-toward-zero
// 10 round-toward-plus infinity
// 11 round-toward-minus infinity
// The rounding algorithm determines if '1' should be added to the
// truncated signficant result, based on three significant bits
// (least (L), round (R) and sticky (S)), the rounding mode (rm)
// and the sign of the final result (Sign). Visually, L and R appear as
// xxxxxL,Rxxxxxxx
// where , denotes the rounding boundary. S is the logical OR of all the
// bits to the right of R.
module rounder (Result, DenormIO, Flags, rm, P, OvEn,
UnEn, exp_valid, sel_inv, Invalid, DenormIn, convert, Asign, Aexp,
norm_shift, A, exponent_postsum, A_Norm, B_Norm, exp_A_unmodified, exp_B_unmodified,
normal_overflow, normal_underflow, swap, op_type, sum);
input [2:0] rm;
input P;
input OvEn;
input UnEn;
input exp_valid;
input [3:0] sel_inv;
input Invalid;
input DenormIn;
input convert;
input Asign;
input [10:0] Aexp;
input [5:0] norm_shift;
input [63:0] A;
input [10:0] exponent_postsum;
input A_Norm;
input B_Norm;
input [11:0] exp_A_unmodified;
input [11:0] exp_B_unmodified;
input normal_overflow;
input normal_underflow;
input swap;
input [3:0] op_type;
input [63:0] sum;
output [63:0] Result;
output DenormIO;
output [4:0] Flags;
wire Rsign;
wire Sticky_out;
wire [51:0] ShiftMant;
wire [63:0] ShiftMant_64;
wire [10:0] Rexp;
wire [10:0] Rexp_denorm;
wire [11:0] Texp; //Parallelized for denorm exponent
wire [11:0] Texp_addone; //results
wire [11:0] Texp_subone;
wire [51:0] Rmant;
wire [51:0] Tmant;
wire Rzero;
wire VSS = 1'b0;
wire VDD = 1'b1;
wire [51:0] B; // Value used to add the "ones"
wire [11:0] B_12_overflow; // Value used to add one to exponent
wire [11:0] B_12_underflow; // Value used to subtract one from exponent
wire S_SP; // Single precision sticky bit
wire S_DP; // Double precision sticky bit
wire S; // Actual sticky bit
wire R; // Round bit
wire L; // Least significant bit
wire add_one; // '1' if one should be added
wire UnFlow_SP, UnFlow_DP, UnderFlow;
wire OvFlow_SP, OvFlow_DP, OverFlow;
wire Inexact;
wire Round_zero;
wire Infinite;
wire VeryLarge;
wire Largest;
wire Adj_exp;
wire Valid;
wire NaN;
wire Cout;
wire Cout_overflow;
wire Texp_l7z;
wire Texp_l7o;
wire OvCon;
// Determine the sticky bits for double and single precision
assign S_DP= A[9]|A[8]|A[7]|A[6]|A[5]|A[4]|A[3]|A[2]|A[1]|A[0];
assign S_SP = S_DP |A[38]|A[37]|A[36]|A[35]|A[34]|A[33]|A[32]|A[31]|A[30]|
A[29]|A[28]|A[27]|A[26]|A[25]|A[24]|A[23]|A[22]|A[21]|A[20]|
A[19]|A[18]|A[17]|A[16]|A[15]|A[14]|A[13]|A[12]|A[11]|A[10];
// Set the least (L), round (R), and sticky (S) bits based on
// the precision.
assign {L, R, S} = P ? {A[40],A[39],S_SP} : {A[11],A[10],S_DP};
// Add one if ((the rounding mode is round-to-nearest) and (R is one) and
// (S or L is one)) or ((the rounding mode is towards plus or minus
// infinity (rm[1] = 1)) and (the sign and rm[0] are the same) and
// (R or S is one)).
assign add_one = ~rm[2] & ((~rm[1]&~rm[0]&R&(L|S)) | (rm[1]&(Asign^~rm[0])&(R|S))) | (rm[2] & R);
// Add one using a 52-bit adder. The one is added to the LSB B[0] for
// double precision or to B[29] for single precision.
// This could be simplified by using a specialized adder.
// The current adder is actually 64-bits. The leading one
// for normalized results in not included in the addition.
assign B = {{22{VSS}}, add_one&P, {28{VSS}}, add_one&~P};
assign B_12_overflow = {8'h0, 3'b0, normal_overflow};
assign B_12_underflow = {8'h0, 3'b0, normal_underflow};
cla52 add1(Tmant, Cout, A[62:11], B);
cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow);
cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow);
// Now that rounding is done, we compute the final exponent
// and test for special cases.
// Compute the value of the exponent by subtracting the shift
// value from the previous exponent and then adding 2 + cout.
// If needed this could be optimized to used a specialized
// adder.
assign Texp = DenormIn ? ({1'b0, exponent_postsum}) : ({VSS, Aexp} - {{6{VSS}}, norm_shift} +{{10{VSS}}, VDD, Cout});
// Overflow only occurs for double precision, if Texp[10] to Texp[0] are
// all ones. To encourage sharing with single precision overflow detection,
// the lower 7 bits are tested separately.
assign Texp_l7o = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0];
assign OvFlow_DP = Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o;
// Overflow occurs for single precision if (Texp[10] is one) and
// ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0]
// are all ones.
assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o);
// Underflow occurs for double precision if (Texp[11] is one) or Texp[10] to
// Texp[0] are all zeros.
assign Texp_l7z = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0];
assign UnFlow_DP = Texp[11] | ~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z;
// Underflow occurs for single precision if (Texp[10] is zero) and
// (Texp[9] or Texp[8] or Texp[7]) is zero.
assign UnFlow_SP = (~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z));
// Set the overflow and underflow flags. They should not be set if
// the input was infinite or NaN or the output of the adder is zero.
// 00 = Valid
// 10 = NaN
assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]);
assign NaN = ~sel_inv[2]&~sel_inv[1]& sel_inv[0];
assign UnderFlow = ((P & UnFlow_SP | UnFlow_DP)&Valid&exp_valid) |
(~Aexp[10]&Aexp[9]&Aexp[8]&Aexp[7]&~Aexp[6]
&~Aexp[5]&~Aexp[4]&~Aexp[3]&~Aexp[2]
&~Aexp[1]&~Aexp[0]&sel_inv[3]);
assign OverFlow = (P & OvFlow_SP | OvFlow_DP)&Valid&~UnderFlow&exp_valid;
// The DenormIO is set if underflow has occurred or if their was a
// denormalized input.
assign DenormIO = DenormIn | UnderFlow;
// The final result is Inexact if any rounding occurred ((i.e., R or S
// is one), or (if the result overflows ) or (if the result underflows and the
// underflow trap is not enabled)) and (value of the result was not previous set
// by an exception case).
assign Inexact = (R|S|OverFlow|(UnderFlow&~UnEn))&Valid;
// Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0,
// Invlalid.
assign Flags = {UnderFlow, VSS, OverFlow, Invalid, Inexact};
// Determine the final result.
// The sign of the final result is one if the result is not zero and
// the sign of A is one, or if the result is zero and the the rounding
// mode is round-to-minus infinity. The final result is zero, if exp_valid
// is zero. If underflow occurs, then the result is set to zero.
//
// For Zero (goes equally for subtraction although
// signs may alter operands sign):
// -0 + -0 = -0 (always)
// +0 + +0 = +0 (always)
// -0 + +0 = +0 (for RN, RZ, RU)
// -0 + +0 = -0 (for RD)
assign Rzero = ~exp_valid | UnderFlow;
assign Rsign = DenormIn ?
( ~(op_type[2] | op_type[1] | op_type[0]) ?
( (sum[63] & (A_Norm | B_Norm) & (exp_A_unmodified[11] ^ exp_B_unmodified[11])) ?
~Asign : Asign)
: ( ((A_Norm ^ B_Norm) & (exp_A_unmodified[11] ~^ exp_B_unmodified[11])) ?
(normal_underflow ? ~Asign : Asign) : Asign)
)
: ( ((Asign&exp_valid |
(sel_inv[2]&~sel_inv[1]&sel_inv[0]&rm[1]&rm[0] |
sel_inv[2]&sel_inv[1]&~sel_inv[0] |
~exp_valid&rm[1]&rm[0]&~sel_inv[2] |
UnderFlow&rm[1]&rm[0]) & ~convert) & ~sel_inv[3]) |
(Asign & sel_inv[3]) );
// The exponent of the final result is zero if the final result is
// zero or a denorm, all ones if the final result is NaN or Infinite
// or overflow occurred and the magnitude of the number is
// not rounded toward from zero, and all ones with an LSB of zero
// if overflow occurred and the magnitude of the number is
// rounded toward zero. If the result is single precision,
// Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1)
// and overflow occurs and the operation is not conversion, bits 10 and 9 are
// inverted for double precision, and bits 7 and 6 are inverted for single precision.
assign Round_zero = ~rm[1]&rm[0] | ~Asign&rm[0] | Asign&rm[1]&~rm[0];
assign VeryLarge = OverFlow & ~OvEn;
assign Infinite = (VeryLarge & ~Round_zero) | (~sel_inv[2] & sel_inv[1]);
assign Largest = VeryLarge & Round_zero;
assign Adj_exp = OverFlow & OvEn & ~convert;
assign Rexp[10:1] = ({10{~Valid}} |
{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8],
(Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} |
{10{VeryLarge}})&{10{~Rzero | NaN}};
assign Rexp[0] = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest;
// The denormalized rounded exponent uses the overflow/underflow values
// computed in the fpadd component to round the exponent up or down
// Depending on the operation and the signs of the orignal operands,
// underflow may or may not be needed to round.
assign Rexp_denorm = DenormIn ?
((~op_type[2] & ~op_type[1] & op_type[0]) ?
( ((A_Norm != B_Norm) & (exp_A_unmodified[11] == exp_B_unmodified[11])) ?
( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) )
: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) )
: ( ((A_Norm != B_Norm) & (exp_A_unmodified[11] != exp_B_unmodified[11])) ?
( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) )
: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) )
) :
(op_type[3]) ? exp_A_unmodified : Rexp;
// If the result is zero or infinity, the mantissa is all zeros.
// If the result is NaN, the mantissa is 10...0
// If the result the largest floating point number, the mantissa
// is all ones. Otherwise, the mantissa is not changed.
// If operation is denormalized, take the mantissa directly from
// its normalized value.
assign Rmant[51] = Largest | NaN | (Tmant[51]&~Infinite&~Rzero);
assign Rmant[50:0] = {51{Largest}} | (Tmant[50:0]&{51{~Infinite&Valid&~Rzero}});
assign ShiftMant = A[51:0];
// For single precision, the 8 least significant bits of the exponent
// and 23 most significant bits of the mantissa contain bits used
// for the final result. A double precision result is returned if
// overflow has occurred, the overflow trap is enabled, and a conversion
// is being performed.
assign OvCon = OverFlow & OvEn & convert;
assign Result = (op_type[3]) ? {A[63:0]} : (DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : ((P&~OvCon) ? {Rsign, Rexp[7:0], Rmant[51:29], {32{VSS}}}
: {Rsign, Rexp, Rmant}));
endmodule // rounder

View File

@ -0,0 +1,187 @@
//
// The rounder takes as inputs a 64-bit value to be rounded, A, the
// exponent of the value to be rounded, the sign of the final result, Sign,
// the precision of the results, P, and the two-bit rounding mode, rm.
// It produces a rounded 52-bit result, Z, the exponent of the rounded
// result, Z_exp, and a flag that indicates if the result was rounded,
// Inexact. The rounding mode has the following values.
// rm Modee
// 00 round-to-nearest-even
// 01 round-toward-zero
// 10 round-toward-plus infinity
// 11 round-toward-minus infinity
//
module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
UnEn, exp_diff, sel_inv, Invalid, DenormIn,
SignR, q1, qm1, qp1, q0, qm0, qp0, regr_out);
input [1:0] rm;
input P;
input OvEn;
input UnEn;
input [12:0] exp_diff;
input [2:0] sel_inv;
input Invalid;
input DenormIn;
input SignR;
input [63:0] q1;
input [63:0] qm1;
input [63:0] qp1;
input [63:0] q0;
input [63:0] qm0;
input [63:0] qp0;
input [127:0] regr_out;
output [63:0] Result;
output DenormIO;
output [4:0] Flags;
supply1 vdd;
supply0 vss;
wire Rsign;
wire [10:0] Rexp;
wire [12:0] Texp;
wire [51:0] Rmant;
wire [63:0] Tmant;
wire [51:0] Smant;
wire Rzero;
wire Gdp, Gsp, G;
wire UnFlow_SP, UnFlow_DP, UnderFlow;
wire OvFlow_SP, OvFlow_DP, OverFlow;
wire Inexact;
wire Round_zero;
wire Infinite;
wire VeryLarge;
wire Largest;
wire Div0;
wire Adj_exp;
wire Valid;
wire NaN;
wire Texp_l7z;
wire Texp_l7o;
wire OvCon;
wire [1:0] mux_mant;
wire sign_rem;
wire [63:0] q, qm, qp;
wire exp_ovf, exp_ovfSP, exp_ovfDP;
// Remainder = 0?
assign zero_rem = ~(|regr_out);
// Remainder Sign
assign sign_rem = ~regr_out[127];
// choose correct Guard bit [1,2) or [0,1)
assign Gdp = q1[63] ? q1[10] : q0[10];
assign Gsp = q1[63] ? q1[39] : q0[39];
assign G = P ? Gsp : Gdp;
// Selection of Rounding (from logic/switching)
assign mux_mant[1] = (SignR&rm[1]&rm[0]&G) | (!SignR&rm[1]&!rm[0]&G) |
(!rm[1]&!rm[0]&G&!sign_rem) |
(SignR&rm[1]&rm[0]&!zero_rem&!sign_rem) |
(!SignR&rm[1]&!rm[0]&!zero_rem&!sign_rem);
assign mux_mant[0] = (!SignR&rm[0]&!G&!zero_rem&sign_rem) |
(!rm[1]&rm[0]&!G&!zero_rem&sign_rem) |
(SignR&rm[1]&!rm[0]&!G&!zero_rem&sign_rem);
// Which Q?
mux2 #(64) mx1 (q0, q1, q1[63], q);
mux2 #(64) mx2 (qm0, qm1, q1[63], qm);
mux2 #(64) mx3 (qp0, qp1, q1[63], qp);
// Choose Q, Q+1, Q-1
mux3 #(64) mx4 (q, qm, qp, mux_mant, Tmant);
assign Smant = Tmant[62:11];
// Compute the value of the exponent
// exponent is modified if we choose:
// 1.) we choose any qm0, qp0, q0 (since we shift mant)
// 2.) we choose qp and we overflow (for RU)
assign exp_ovf = |{qp[62:40], (qp[39:11] & {29{~P}})};
assign Texp = exp_diff - {{13{vss}}, ~q1[63]} + {{13{vss}}, mux_mant[1]&qp1[63]&~exp_ovf};
// Overflow only occurs for double precision, if Texp[10] to Texp[0] are
// all ones. To encourage sharing with single precision overflow detection,
// the lower 7 bits are tested separately.
assign Texp_l7o = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0];
assign OvFlow_DP = (~Texp[12]&Texp[11]) | (Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o);
// Overflow occurs for single precision if (Texp[10] is one) and
// ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0]
// are all ones.
assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o);
// Underflow occurs for double precision if (Texp[11]/Texp[10] is one) or
// Texp[10] to Texp[0] are all zeros.
assign Texp_l7z = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0];
assign UnFlow_DP = (Texp[12]&Texp[11]) | ~Texp[11]&~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z;
// Underflow occurs for single precision if (Texp[10] is zero) and
// (Texp[9] or Texp[8] or Texp[7]) is zero.
assign UnFlow_SP = ~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z);
// Set the overflow and underflow flags. They should not be set if
// the input was infinite or NaN or the output of the adder is zero.
// 00 = Valid
// 10 = NaN
assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]);
assign NaN = ~sel_inv[1]& sel_inv[0];
assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid;
assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid;
assign Div0 = sel_inv[2]&sel_inv[1]&~sel_inv[0];
// The DenormIO is set if underflow has occurred or if their was a
// denormalized input.
assign DenormIO = DenormIn | UnderFlow;
// The final result is Inexact if any rounding occurred ((i.e., R or S
// is one), or (if the result overflows ) or (if the result underflows and the
// underflow trap is not enabled)) and (value of the result was not previous set
// by an exception case).
assign Inexact = (G|~zero_rem|OverFlow|(UnderFlow&~UnEn))&Valid;
// Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0,
// Invlalid.
assign Flags = {Inexact, UnderFlow, OverFlow, Div0, Invalid};
// Determine sign
assign Rzero = UnderFlow | (~sel_inv[2]&sel_inv[1]&sel_inv[0]);
assign Rsign = SignR;
// The exponent of the final result is zero if the final result is
// zero or a denorm, all ones if the final result is NaN or Infinite
// or overflow occurred and the magnitude of the number is
// not rounded toward from zero, and all ones with an LSB of zero
// if overflow occurred and the magnitude of the number is
// rounded toward zero. If the result is single precision,
// Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1)
// and overflow occurs and the operation is not conversion, bits 10 and 9 are
// inverted for double precision, and bits 7 and 6 are inverted for single precision.
assign Round_zero = ~rm[1]&rm[0] | ~SignR&rm[0] | SignR&rm[1]&~rm[0];
assign VeryLarge = OverFlow & ~OvEn;
assign Infinite = (VeryLarge & ~Round_zero) | sel_inv[1];
assign Largest = VeryLarge & Round_zero;
assign Adj_exp = OverFlow & OvEn;
assign Rexp[10:1] = ({10{~Valid}} |
{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8],
(Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} |
{10{VeryLarge}})&{10{~Rzero | NaN}};
assign Rexp[0] = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest;
// If the result is zero or infinity, the mantissa is all zeros.
// If the result is NaN, the mantissa is 10...0
// If the result the largest floating point number, the mantissa
// is all ones. Otherwise, the mantissa is not changed.
assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero);
assign Rmant[50:0] = {51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}});
// For single precision, the 8 least significant bits of the exponent
// and 23 most significant bits of the mantissa contain bits used
// for the final result. A double precision result is returned if
// overflow has occurred, the overflow trap is enabled, and a conversion
// is being performed.
assign OvCon = OverFlow & OvEn;
assign Result = (P&~OvCon) ? {Rsign, Rexp[7:0], Rmant[51:29], {32{vss}}}
: {Rsign, Rexp, Rmant};
endmodule // rounder

View File

@ -0,0 +1,162 @@
// MJS - This module implements a 57-bit 2-to-1 multiplexor, which is
// used in the barrel shifter for significand alignment.
module mux21x57 (Z, A, B, Sel);
input [56:0] A;
input [56:0] B;
input Sel;
output [56:0] Z;
assign Z = Sel ? B : A;
endmodule // mux21x57
// MJS - This module implements a 64-bit 2-to-1 multiplexor, which is
// used in the barrel shifter for significand normalization.
module mux21x64 (Z, A, B, Sel);
input [63:0] A;
input [63:0] B;
input Sel;
output [63:0] Z;
assign Z = Sel ? B : A;
endmodule // mux21x64
// The implementation of the barrel shifter was modified to use
// fewer gates. It is now implemented using six 64-bit 2-to-1 muxes. The
// barrel shifter takes a 64-bit input A and shifts it left by up to
// 63-bits, as specified by Shift, to produce a 63-bit output Z.
// Bits to the right are filled with zeros.
// The 64 bit shift is implemented using 6 stages of shifts of 32
// 16, 8, 4, 2, and 1 bit shifts.
module barrel_shifter_l64 (Z, A, Shift);
input [63:0] A;
input [5:0] Shift;
wire [63:0] stage1;
wire [63:0] stage2;
wire [63:0] stage3;
wire [63:0] stage4;
wire [63:0] stage5;
wire [31:0] thirtytwozeros = 32'h0;
wire [15:0] sixteenzeros = 16'h0;
wire [ 7:0] eightzeros = 8'h0;
wire [ 3:0] fourzeros = 4'h0;
wire [ 1:0] twozeros = 2'b00;
wire onezero = 1'b0;
output [63:0] Z;
mux21x64 mx01(stage1, A, {A[31:0], thirtytwozeros}, Shift[5]);
mux21x64 mx02(stage2, stage1, {stage1[47:0], sixteenzeros}, Shift[4]);
mux21x64 mx03(stage3, stage2, {stage2[55:0], eightzeros}, Shift[3]);
mux21x64 mx04(stage4, stage3, {stage3[59:0], fourzeros}, Shift[2]);
mux21x64 mx05(stage5, stage4, {stage4[61:0], twozeros}, Shift[1]);
mux21x64 mx06(Z , stage5, {stage5[62:0], onezero}, Shift[0]);
endmodule // barrel_shifter_l63
// The implementation of the barrel shifter was modified to use
// fewer gates. It is now implemented using six 57-bit 2-to-1 muxes. The
// barrel shifter takes a 57-bit input A and right shifts it by up to
// 63-bits, as specified by Shift, to produce a 57-bit output Z.
// It also computes a Sticky bit, which is set to
// one if any of the bits that were shifted out was one.
// Bits shifted into the left are filled with zeros.
// The 63 bit shift is implemented using 6 stages of shifts of 32
// 16, 8, 4, 2, and 1 bits.
module barrel_shifter_r57 (Z, Sticky, A, Shift);
input [56:0] A;
input [5:0] Shift;
output Sticky;
output [56:0] Z;
wire [56:0] stage1;
wire [56:0] stage2;
wire [56:0] stage3;
wire [56:0] stage4;
wire [56:0] stage5;
wire [62:0] sixtythreezeros = 63'h0;
wire [31:0] thirtytwozeros = 32'h0;
wire [15:0] sixteenzeros = 16'h0;
wire [ 7:0] eightzeros = 8'h0;
wire [ 3:0] fourzeros = 4'h0;
wire [ 1:0] twozeros = 2'b00;
wire onezero = 1'b0;
wire [62:0] S;
// Shift operations
mux21x57 mx01(stage1, A, {thirtytwozeros, A[56:32]}, Shift[5]);
mux21x57 mx02(stage2, stage1, {sixteenzeros, stage1[56:16]}, Shift[4]);
mux21x57 mx03(stage3, stage2, {eightzeros, stage2[56:8]}, Shift[3]);
mux21x57 mx04(stage4, stage3, {fourzeros, stage3[56:4]}, Shift[2]);
mux21x57 mx05(stage5, stage4, {twozeros, stage4[56:2]}, Shift[1]);
mux21x57 mx06(Z , stage5, {onezero, stage5[56:1]}, Shift[0]);
// Sticky bit calculation. The Sticky bit is set to one if any of the
// bits that were shifter out were one
assign S[31:0] = {32{Shift[5]}} & A[31:0];
assign S[47:32] = {16{Shift[4]}} & stage1[15:0];
assign S[55:48] = { 8{Shift[3]}} & stage2[7:0];
assign S[59:56] = { 4{Shift[2]}} & stage3[3:0];
assign S[61:60] = { 2{Shift[1]}} & stage4[1:0];
assign S[62] = Shift[0] & stage5[0];
assign Sticky = (S != sixtythreezeros);
endmodule // barrel_shifter_r57
module barrel_shifter_r64 (Z, Sticky, A, Shift);
input [63:0] A;
input [5:0] Shift;
output Sticky;
output [63:0] Z;
wire [63:0] stage1;
wire [63:0] stage2;
wire [63:0] stage3;
wire [63:0] stage4;
wire [63:0] stage5;
wire [62:0] sixtythreezeros = 63'h0;
wire [31:0] thirtytwozeros = 32'h0;
wire [15:0] sixteenzeros = 16'h0;
wire [ 7:0] eightzeros = 8'h0;
wire [ 3:0] fourzeros = 4'h0;
wire [ 1:0] twozeros = 2'b00;
wire onezero = 1'b0;
wire [62:0] S;
// Shift operations
mux21x64 mx01(stage1, A, {thirtytwozeros, A[63:32]}, Shift[5]);
mux21x64 mx02(stage2, stage1, {sixteenzeros, stage1[63:16]}, Shift[4]);
mux21x64 mx03(stage3, stage2, {eightzeros, stage2[63:8]}, Shift[3]);
mux21x64 mx04(stage4, stage3, {fourzeros, stage3[63:4]}, Shift[2]);
mux21x64 mx05(stage5, stage4, {twozeros, stage4[63:2]}, Shift[1]);
mux21x64 mx06(Z , stage5, {onezero, stage5[63:1]}, Shift[0]);
// Sticky bit calculation. The Sticky bit is set to one if any of the
// bits that were shifter out were one
assign S[31:0] = {32{Shift[5]}} & A[31:0];
assign S[47:32] = {16{Shift[4]}} & stage1[15:0];
assign S[55:48] = { 8{Shift[3]}} & stage2[7:0];
assign S[59:56] = { 4{Shift[2]}} & stage3[3:0];
assign S[61:60] = { 2{Shift[1]}} & stage4[1:0];
assign S[62] = Shift[0] & stage5[0];
assign Sticky = (S != sixtythreezeros);
endmodule // barrel_shifter_r64

View File

@ -0,0 +1,112 @@
// Sklansky Prefix Adder
module exp_add (cout, sum, a, b, cin);
input [13:0] a, b;
input cin;
output [13:0] sum;
output cout;
wire [14:0] p,g;
wire [13:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
sklansky prefix_tree(c, p[13:0], g[13:0]);
// post-computation
assign sum=p[14:1]^c;
assign cout=g[14]|(p[14]&c[13]);
endmodule
module sklansky (c, p, g);
input [14:0] p;
input [14:0] g;
output [14:1] c;
// parallel-prefix, Sklansky
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_6_4 (G_6_4, P_6_4, {g[6],G_5_4}, {p[6],P_5_4});
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_10_8 (G_10_8, P_10_8, {g[10],G_9_8}, {p[10],P_9_8});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_14_12 (G_14_12, P_14_12, {g[14],G_13_12}, {p[14],P_13_12});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_6_0 (G_6_0, {G_6_4,G_3_0}, P_6_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_12_8 (G_12_8, P_12_8, {g[12],G_11_8}, {p[12],P_11_8});
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_14_8 (G_14_8, P_14_8, {G_14_12,G_11_8}, {P_14_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_10_0 (G_10_0, {G_10_8,G_7_0}, P_10_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_12_0 (G_12_0, {G_12_8,G_7_0}, P_12_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_14_0 (G_14_0, {G_14_8,G_7_0}, P_14_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
endmodule
// Black cell
module black(gout, pout, gin, pin);
input [1:0] gin, pin;
output gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule
// Grey cell
module grey(gout, gin, pin);
input[1:0] gin;
input pin;
output gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule

View File

@ -0,0 +1,94 @@
// testbench
module tb ();
reg [63:0] op1;
reg [63:0] op2;
reg [2:0] rm;
reg [3:0] op_type;
reg P;
reg OvEn;
reg UnEn;
wire [63:0] result;
wire [4:0] Flags;
wire Denorm;
reg clk, tb_clk;
reg [63:0] yexpected, yexpected_next;
reg reset;
reg [63:0] vectornum, errors; // bookkeeping variables
reg [199:0] testvectors[49999:0]; // array of testvectors
reg [7:0] flags_expected, flags_expected_next;
integer handle3;
integer desc3;
// instantiate device under test
fpuadd_testpipe dut (result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn, clk);
always
begin
clk = 1;
tb_clk = 1;
#5;
clk = 0;
#5;
clk = 1;
tb_clk = 0;
#5;
clk = 0;
#5;
end
initial
begin
handle3 = $fopen("../../fpuaddcvt/test_vectors/f64_add_rne.txt");
$readmemh("../../fpuaddcvt/test_vectors/f64_add_rne.tv", testvectors);
vectornum = 0; errors = 0;
reset = 1;#10;reset = 0;
//reset = 1; #30; reset = 0;
end
always @(negedge tb_clk)
begin
desc3 = handle3;
#0 op_type = 4'b0000;
#0 P = 1'b0;
#0 rm = 3'b000;
#0 OvEn = 1'b0;
#0 UnEn = 1'b0;
#1; {yexpected_next, flags_expected_next} = {yexpected, flags_expected};
#0; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
#5 $fdisplay(desc3, "%h_%h_%h_%b", op1, op2, result, Flags);
end
// check results on rising edge of clk - actual results calculated on
// negedge
//
// skip initial cycle
always @(posedge tb_clk)
if (~reset)
begin // skip during reset
#1;
if (result !== yexpected) begin
$display("Error: inputs = %h %h", op1, op2);
$display(" outputs = %h (%h expected)", result, yexpected);
errors = errors + 1;
end
//else
//begin
//$display("Good");
// end
vectornum = vectornum + 1;
if (testvectors[vectornum] === 200'bx)
begin
$display("%d tests completed with %d errors",
vectornum, errors);
$stop;
end
end // if (~reset)
endmodule // tb

View File

@ -192,7 +192,7 @@ module testbench_busybear();
always @(dut.HRDATA) begin
#2;
if (dut.hart.MemRWM[1] && HADDR[31:3] != dut.PCF[31:3] && dut.HRDATA !== {64{1'bx}}) begin
if (dut.hart.MemRWM[1] && ~HWRITE && HADDR[31:3] != dut.PCF[31:3] && dut.HRDATA !== {64{1'bx}}) begin
//$display("%0t", $time);
if($feof(data_file_memR)) begin
$display("no more memR data to read");
@ -333,15 +333,8 @@ module testbench_busybear();
`CHECK_CSR2(STVAL, `CSRS)
`CHECK_CSR(STVEC)
initial begin //this is just fun to make causes easier to understand
#38;
force dut.hart.priv.csr.genblk1.csrm.NextCauseM = 0;
#16;
release dut.hart.priv.csr.genblk1.csrm.NextCauseM;
end
initial begin //this is temporary until the bug can be fixed!!!
#18909760;
#11130100;
force dut.hart.ieu.dp.regf.rf[5] = 64'h0000000080000004;
#100;
release dut.hart.ieu.dp.regf.rf[5];
@ -409,19 +402,7 @@ module testbench_busybear();
forcedInstr = 1;
end
else begin
if(dut.hart.ifu.InstrRawD[28:27] != 2'b11 && dut.hart.ifu.InstrRawD[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD
force CheckInstrD = {12'b0, CheckInstrD[19:7], 7'b0000011};
release CheckInstrD;
force dut.hart.ifu.InstrRawD = {12'b0, dut.hart.ifu.InstrRawD[19:7], 7'b0000011};
#7;
release dut.hart.ifu.InstrRawD;
$display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.hart.ifu.PCD);
warningCount += 1;
forcedInstr = 1;
end
else begin
forcedInstr = 0;
end
forcedInstr = 0;
end
end
end
@ -448,19 +429,7 @@ module testbench_busybear();
forcedInstr = 1;
end
else begin
if(dut.hart.ifu.InstrRawD[28:27] != 2'b11 && dut.hart.ifu.InstrRawD[6:0] == 7'b0101111) begin //for now, replace non-SC A instrs with LD
force CheckInstrD = {12'b0, CheckInstrD[19:7], 7'b0000011};
release CheckInstrD;
force dut.hart.ifu.InstrRawD = {12'b0, dut.hart.ifu.InstrRawD[19:7], 7'b0000011};
#7;
release dut.hart.ifu.InstrRawD;
$display("warning: replacing AMO instr %s at PC=%0x with ld", PCtext, dut.hart.ifu.PCD);
warningCount += 1;
forcedInstr = 1;
end
else begin
forcedInstr = 0;
end
forcedInstr = 0;
end
end
// then expected PC value

View File

@ -1,12 +1,11 @@
///////////////////////////////////////////
// testbench-privileged.sv
// testbench-imperas.sv
//
// Written: Ben Bracker (bbracker@hmc.edu) 11 Feb. 2021, Tiny Modifications: Domenico Ottolia (dottolia@hmc.edu) 16 Mar. 2021
// Based on: testbench-imperas.sv by David Harris
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Wally Testbench and helper modules
// Applies test programs meant to test peripherals
// These tests assume the processor itself is already working!
// Applies test programs from the Imperas suite
//
// A component of the Wally configurable RISC-V project.
//
@ -28,6 +27,9 @@
`include "wally-config.vh"
module testbench();
parameter DEBUG = 0;
parameter TESTSBP = 0;
logic clk;
logic reset;
@ -36,13 +38,13 @@ module testbench();
logic [`XLEN-1:0] signature[0:10000];
logic [`XLEN-1:0] testadr;
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
logic [31:0] InstrW;
//logic [31:0] InstrW;
logic [`XLEN-1:0] meminit;
//string tests64i[] =
string tests[] = '{
"rv64p/WALLY-CAUSE", "0"
};
"rv64p/WALLY-CAUSE", "4000"
};
string ProgramAddrMapFile, ProgramLabelMapFile;
logic [`AHBW-1:0] HRDATAEXT;
logic HREADYEXT, HRESPEXT;
logic [31:0] HADDR;
@ -57,9 +59,26 @@ module testbench();
// pick tests based on modes supported
// *** actually I no longer support this
// would need to put this back in if you wanted to test anything other than rv64i
//initial
// if (`XLEN == 64) begin // RV64
// if(TESTSBP) begin
// tests = testsBP64;
// end else begin
// tests = {tests64i};
// if (`C_SUPPORTED) tests = {tests, tests64ic};
// else tests = {tests, tests64iNOc};
// if (`M_SUPPORTED) tests = {tests, tests64m};
// if (`A_SUPPORTED) tests = {tests, tests64a};
// end
// // tests = {tests64a, tests};
// end else begin // RV32
// // *** add the 32 bit bp tests
// tests = {tests32i};
// if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};
// else tests = {tests, tests32iNOc};
// if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
// if (`A_SUPPORTED) tests = {tests, tests32a};
// end
string signame, memfilename;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
@ -75,10 +94,10 @@ module testbench();
wallypipelinedsoc dut(.*);
// Track names of instructions
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, InstrW,
InstrDName, InstrEName, InstrMName, InstrWName);
instrTrackerTBPriv it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, dut.hart.ifu.InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// initialize tests
initial
@ -97,7 +116,10 @@ module testbench();
memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
$readmemh(memfilename, dut.imem.RAM);
$readmemh(memfilename, dut.uncore.dtim.RAM);
reset = 1; # 22; reset = 0;
ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
$display("Read memfile %s", memfilename);
reset = 1; # 42; reset = 0;
end
// generate clock to sequence tests
@ -170,25 +192,43 @@ module testbench();
$readmemh(memfilename, dut.imem.RAM);
$readmemh(memfilename, dut.uncore.dtim.RAM);
$display("Read memfile %s", memfilename);
ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
reset = 1; # 17; reset = 0;
end
end
end
end // always @ (negedge clk)
// track the current function or global label
if (DEBUG == 1) begin : functionRadix
function_radix function_radix(.reset(reset),
.ProgramAddrMapFile(ProgramAddrMapFile),
.ProgramLabelMapFile(ProgramLabelMapFile));
end
// initialize the branch predictor
initial begin
$readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.Predictor.DirPredictor.PHT.memory);
$readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.TargetPredictor.memory.memory);
end
endmodule
/* verilator lint_on STMTDLY */
/* verilator lint_on WIDTH */
module instrTrackerTB(
module instrTrackerTBPriv(
input logic clk, reset, FlushE,
input logic [31:0] InstrD,
input logic [31:0] InstrF, InstrD,
input logic [31:0] InstrE, InstrM,
output logic [31:0] InstrW,
output string InstrDName, InstrEName, InstrMName, InstrWName);
input logic [31:0] InstrW,
// output logic [31:0] InstrW,
output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// stage Instr to Writeback for visualization
flopr #(32) InstrWReg(clk, reset, InstrM, InstrW);
// flopr #(32) InstrWReg(clk, reset, InstrM, InstrW);
instrNameDecTB fdec(InstrF, InstrFName);
instrNameDecTB ddec(InstrD, InstrDName);
instrNameDecTB edec(InstrE, InstrEName);
instrNameDecTB mdec(InstrM, InstrMName);
@ -247,10 +287,18 @@ module instrNameDecTB(
else name = "ILLEGAL";
10'b0111011_000: if (funct7 == 7'b0000000) name = "ADDW";
else if (funct7 == 7'b0100000) name = "SUBW";
else if (funct7 == 7'b0000001) name = "MULW";
else name = "ILLEGAL";
10'b0111011_001: if (funct7 == 7'b0000000) name = "SLLW";
else if (funct7 == 7'b0000001) name = "DIVW";
else name = "ILLEGAL";
10'b0111011_001: name = "SLLW";
10'b0111011_101: if (funct7 == 7'b0000000) name = "SRLW";
else if (funct7 == 7'b0100000) name = "SRAW";
else if (funct7 == 7'b0000001) name = "DIVUW";
else name = "ILLEGAL";
10'b0111011_110: if (funct7 == 7'b0000001) name = "REMW";
else name = "ILLEGAL";
10'b0111011_111: if (funct7 == 7'b0000001) name = "REMUW";
else name = "ILLEGAL";
10'b0110011_000: if (funct7 == 7'b0000000) name = "ADD";
else if (funct7 == 7'b0000001) name = "MUL";
@ -263,10 +311,10 @@ module instrNameDecTB(
else if (funct7 == 7'b0000001) name = "MULHSU";
else name = "ILLEGAL";
10'b0110011_011: if (funct7 == 7'b0000000) name = "SLTU";
else if (funct7 == 7'b0000001) name = "DIV";
else if (funct7 == 7'b0000001) name = "MULHU";
else name = "ILLEGAL";
10'b0110011_100: if (funct7 == 7'b0000000) name = "XOR";
else if (funct7 == 7'b0000001) name = "MUL";
else if (funct7 == 7'b0000001) name = "DIV";
else name = "ILLEGAL";
10'b0110011_101: if (funct7 == 7'b0000000) name = "SRL";
else if (funct7 == 7'b0000001) name = "DIVU";
@ -299,6 +347,30 @@ module instrNameDecTB(
10'b1110011_101: name = "CSRRWI";
10'b1110011_110: name = "CSRRSI";
10'b1110011_111: name = "CSRRCI";
10'b0101111_010: if (funct7[6:2] == 5'b00010) name = "LR.W";
else if (funct7[6:2] == 5'b00011) name = "SC.W";
else if (funct7[6:2] == 5'b00001) name = "AMOSWAP.W";
else if (funct7[6:2] == 5'b00000) name = "AMOADD.W";
else if (funct7[6:2] == 5'b00100) name = "AMOAXOR.W";
else if (funct7[6:2] == 5'b01100) name = "AMOAND.W";
else if (funct7[6:2] == 5'b01000) name = "AMOOR.W";
else if (funct7[6:2] == 5'b10000) name = "AMOMIN.W";
else if (funct7[6:2] == 5'b10100) name = "AMOMAX.W";
else if (funct7[6:2] == 5'b11000) name = "AMOMINU.W";
else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.W";
else name = "ILLEGAL";
10'b0101111_011: if (funct7[6:2] == 5'b00010) name = "LR.D";
else if (funct7[6:2] == 5'b00011) name = "SC.D";
else if (funct7[6:2] == 5'b00001) name = "AMOSWAP.D";
else if (funct7[6:2] == 5'b00000) name = "AMOADD.D";
else if (funct7[6:2] == 5'b00100) name = "AMOAXOR.D";
else if (funct7[6:2] == 5'b01100) name = "AMOAND.D";
else if (funct7[6:2] == 5'b01000) name = "AMOOR.D";
else if (funct7[6:2] == 5'b10000) name = "AMOMIN.D";
else if (funct7[6:2] == 5'b10100) name = "AMOMAX.D";
else if (funct7[6:2] == 5'b11000) name = "AMOMINU.D";
else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.D";
else name = "ILLEGAL";
10'b0001111_???: name = "FENCE";
default: name = "ILLEGAL";
endcase

View File

@ -1,8 +1,8 @@
#!/usr/bin/python3
##################################
# testgen-CAUSE.py
# testgen-ADD-SUB.py
#
# dottolia@hmc.edu 16 Mar 2021
# ushakya@hmc.edu & dottolia@hmc.edu 14 Feb 2021
#
# Generate directed and random test vectors for RISC-V Design Validation.
##################################
@ -13,7 +13,6 @@
from datetime import datetime
from random import randint
from random import seed
from enum import Enum
from random import getrandbits
##################################
@ -30,160 +29,85 @@ from random import getrandbits
# # exit(1)
def randRegs():
reg1 = randint(1,30)
reg2 = randint(1,30)
reg3 = randint(1,30)
reg1 = randint(1,31)
reg2 = randint(1,31)
reg3 = randint(1,31)
if (reg1 == 6 or reg2 == 6 or reg3 == 6 or reg1 == reg2):
return randRegs()
else:
return reg1, reg2, reg3
def writeVectors(storecmd):
def writeVector(a, b, storecmd):
global testnum
#expected = computeExpected(a, b, test)
#expected = expected % 2**xlen # drop carry if necessary
#if (expected < 0): # take twos complement
# expected = 2**xlen + expected
csr = "mscratch"
reg1, reg2, reg3 = randRegs()
lines = "\n# Testcase " + str(testnum) + ": " + csr + "\n"
lines = lines + "li x" + str(reg1) + ", MASK_XLEN(" + formatstr.format(a) + ")\n"
lines = lines + "li x" + str(reg2) + ", MASK_XLEN(0)\n"
# t5 gets written with mtvec?
# Page 6 of unpriviledged spec
# For both CSRRS and CSRRC, if rs1=x0, then the instruction will not write to the CSR at all, and so shall not cause any of the side effects
# lines = f"""
expected = a
# li x{reg1}, 0
# csrwi mtvec, 80002000
# .data 00000000
# j _done{testnum}
if test == "csrrw":
lines += test + " x" + str(reg2) + ", " + csr + ", x" + str(reg1) + "\n"
lines += test + " x" + str(reg2) + ", " + csr + ", x" + str(reg1) + "\n"
# _trap{testnum}:
# csrrs x{reg1}, mcause, x0
# ecall
elif test == "csrrs": # at some point, try writing a non-zero value first
lines += "csrrw x0, " + csr + ", x0\n" # set csr to 0
# _done{testnum}:
# add x0, x0, x0
# """
lines += test + " x" + str(reg2) + ", " + csr + ", x" + str(reg1) + "\n"
lines += test + " x" + str(reg2) + ", " + csr + ", x" + str(reg1) + "\n"
elif test == "csrrc": # at some point, try writing a non-one value first
allOnes = "0xFFFFFFFF" if xlen == 32 else "0xFFFFFFFFFFFFFFFF"
#lines =
lines += "li x" + str(reg1) + ", MASK_XLEN(" + allOnes + ")\n"
lines += "csrrw x0, " + csr + ", x" + str(reg1) + "\n" # set csr to all ones
lines += "li x" + str(reg1) + ", MASK_XLEN(" + formatstr.format(a) + ")\n"
lines += test + " x" + str(reg2) + ", " + csr + ", x" + str(reg1) + "\n"
lines += test + " x" + str(reg2) + ", " + csr + ", x" + str(reg1) + "\n"
expected = a ^ 0xFFFFFFFF if xlen == 32 else a ^ 0xFFFFFFFFFFFFFFFF
elif test == "csrrwi":
a = a & 0x1F # imm is only 5 bits
lines += test + " x" + str(reg2) + ", " + csr + ", " + str(a) + "\n"
lines += test + " x" + str(reg2) + ", " + csr + ", " + str(a) + "\n"
expected = a
elif test == "csrrsi": # at some point, try writing a non-zero value first
a = a & 0x1F
lines += "csrrw x0, " + csr + ", x0\n" # set csr to 0
lines += test + " x" + str(reg2) + ", " + csr + ", " + str(a) + "\n"
lines += test + " x" + str(reg2) + ", " + csr + ", " + str(a) + "\n"
expected = a
elif test == "csrrci": # at some point, try writing a non-one value first
a = a & 0x1F
allOnes = "0xFFFFFFFF" if xlen == 32 else "0xFFFFFFFFFFFFFFFF"
lines += "li x" + str(reg1) + ", MASK_XLEN(" + allOnes + ")\n"
lines += "csrrw x0, " + csr + ", x" + str(reg1) + "\n" # set csr to all ones
lines += test + " x" + str(reg2) + ", " + csr + ", " + str(a) + "\n"
lines += test + " x" + str(reg2) + ", " + csr + ", " + str(a) + "\n"
expected = a ^ 0xFFFFFFFF if xlen == 32 else a ^ 0xFFFFFFFFFFFFFFFF
# https://ftp.gnu.org/old-gnu/Manuals/gas-2.9.1/html_chapter/as_7.html
lines = f"""
j _setup
csrrs x31, mcause, x0
csrrs x30, mepc, x0
addi x30, x30, 0x100
csrrw x0, mepc, x30
mret
_setup:
li x2, 0x80000004
csrrw x0, mtvec, x2
"""
lines += storecmd + " x" + str(reg2) + ", " + str(wordsize*testnum) + "(x6)\n"
lines += "RVTEST_IO_ASSERT_GPR_EQ(x7, " + str(reg2) +", "+formatstr.format(expected)+")\n"
f.write(lines)
# # User Software Interrupt
# write(f"""
# li x3, 0x8000000
# {storecmd} x2, 0(x3)
# """, storecmd, True, 0, "u")
# # A supervisor-level software interrupt is triggered on the current hart by writing 1 to its supervisor software interrupt-pending (SSIP) bit in the sip register.
# # page 58 of priv spec
# # Supervisor Software Interrupt
# write(f"""
# li x3, 0x8000000
# {storecmd} x2, 0(x3)
# """, storecmd, True, 0, "s")
# # Machine Software Interrupt
# write(f"""
# li x3, 0x8000000
# {storecmd} x2, 0(x3)
# """, storecmd, True, 3)
# User Timer Interrupt
#write(f"""
# lw x2, mtimecmp
# {storecmd} x2, mtimecmp
#""", storecmd, True, 4, "u")
# # Supervisor Timer Interrupt
#write(f"""
# lw x2, mtimecmp
# {storecmd} x2, mtimecmp
#""", storecmd, True, 5, "s")
# Machine Timer Interrupt
#write(f"""
# lw x2, mtimecmp
# {storecmd} x2, mtimecmp
#""", storecmd, True, 6)
# User external interrupt True, 8
# Supervisor external interrupt True, 9
# Instr Addr Misalign
write(f"""
li x2, 0x00000000
lw x3, 11(x2)
""", storecmd, False, 0)
# Instr Access Fault False, 1
# Not possible in machine mode, because we can access all memory
# Illegal Instruction
# . fill 1, 2, 0 outputs all 0s
write(f"""
.fill 1, 2, 0
""", storecmd, False, 2)
# Breakpoint
write(f"""
ebreak
""", storecmd, False, 3)
# Load Addr Misalign
write(f"""
li x2, 0x00000000
lw x3, 11(x2)
""", storecmd, False, 4)
# Load Access Fault False, 5
# Not possible in machine mode, because we can access all memory
# Store/AMO address misaligned
write(f"""
li x2, 0x00000000
{storecmd} x3, 11(x2)
""", storecmd, False, 6)
# Store/AMO access fault False, 7
# Not possible in machine mode, because we can access all memory
# Environment call from U-mode
# Environment call from S-mode
def write(lines, storecmd, interrupt, code, mode = "m"):
global testnum
# generate expected interrupt code
expected = (0 if not interrupt else (2**31 if xlen == 32 else 2**63)) + code
lines = f"""
# Testcase {testnum}
li x31, 0
{lines}
{storecmd} x31, {str(wordsize*testnum)}(x6)
# RVTEST_IO_ASSERT_GPR_EQ(x0, 0, {formatstr.format(expected)})
"""
#if mode == "s":
# go to supervisor mode
#elif mode == "u":
# go to user mode
f.write(lines)
if (xlen == 32):
line = formatrefstr.format(expected)+"\n"
else:
@ -195,38 +119,15 @@ def write(lines, storecmd, interrupt, code, mode = "m"):
# main body
##################################
# name: (interrupt?, code)
# tests = {
# 'User software interrupt': (1, '0'),
# 'Supervisor software interrupt': (1, '1'),
# 'Machine software interrupt': (1, '3'),
# 'User timer interrupt': (1, '4'),
# 'Supervisor timer interrupt': (1, '5'),
# 'Machine timer interrupt': (1, '7'),
# 'User external interrupt': (1, '8'),
# 'Supervisor external interrupt': (1, '9'),
# 'Machine external interrupt': (1, '11'),
# 'Instruction address misaligned': (0, '0'),
# 'Instruction access fault': (0, '1'),
# 'Illegal instruction': (0, '2'),
# 'Breakpoint': (0, '3'),
# 'Load address misaligned': (0, '4'),
# 'Load access fault': (0, '5'),
# 'Store/AMO address misaligned': (0, '6'),
# 'Store/AMO access fault': (0, '7'),
# 'Environment call from U-mode': (0, '8'),
# 'Environment call from S-mode': (0, '9'),
# 'Environment call from M-mode': (0, '11'),
# 'Instruction page fault': (0, '12'),
# 'Load page fault': (0, '13'),
# 'Store/AMO page fault': (0, '15'),
# }
author = "Domenico Ottolia (dottolia@hmc.edu)"
# change these to suite your tests
# csrrw, csrrs, csrrc, csrrwi, csrrsi, csrrci
tests = ["csrrw"]
author = "ushakya@hmc.edu & dottolia@hmc.edu"
xlens = [32, 64]
numrand = 60;
# setup
seed(0xC395DDEB9173AD42) # make tests reproducible
seed(0xC365DDEB9173AB42) # make tests reproducible
# generate files for each test
for xlen in xlens:
@ -239,41 +140,55 @@ for xlen in xlens:
else:
storecmd = "sd"
wordsize = 8
for test in tests:
corners = [
0, 1, 2, 0x1E, 0x1F, 0xFF,
0x624B3E976C52DD14 % 2**xlen, 2**(xlen-1)-2, 2**(xlen-1)-1,
2**(xlen-1), 2**(xlen-1)+1, 0xC365DDEB9173AB42 % 2**xlen, 2**(xlen)-2, 2**(xlen)-1
]
imperaspath = "../../../imperas-riscv-tests/riscv-test-suite/rv" + str(xlen) + "p/"
basename = "WALLY-CAUSE"
fname = imperaspath + "src/" + basename + ".S"
refname = imperaspath + "references/" + basename + ".reference_output"
testnum = 0
imperaspath = f"""../../../imperas-riscv-tests/riscv-test-suite/rv{xlen}p/"""
basename = "WALLY-CAUSE"
fname = imperaspath + "src/" + basename + ".S"
refname = imperaspath + "references/" + basename + ".reference_output"
testnum = 0
# print custom header part
f = open(fname, "w")
r = open(refname, "w")
line = "///////////////////////////////////////////\n"
f.write(line)
lines="// "+fname+ "\n// " + author + "\n"
f.write(lines)
line ="// Created " + str(datetime.now())
f.write(line)
# print custom header part
f = open(fname, "w")
r = open(refname, "w")
line = "///////////////////////////////////////////\n"
f.write(line)
lines="// "+fname+ "\n// " + author + "\n"
f.write(lines)
line ="// Created " + str(datetime.now())
f.write(line)
# insert generic header
h = open("../testgen_header.S", "r")
for line in h:
f.write(line)
# insert generic header
# h = open("../testgen_header.S", "r")
# for line in h:
# f.write(line)
# print directed and random test vectors
writeVectors(storecmd)
# print directed and random test vectors
for a in corners:
for b in corners:
writeVector(a, b, storecmd)
for i in range(0,numrand):
a = getrandbits(xlen)
b = getrandbits(xlen)
writeVector(a, b, storecmd)
# print footer
h = open("../testgen_footer.S", "r")
for line in h:
f.write(line)
# Finish
lines = ".fill " + str(testnum) + ", " + str(wordsize) + ", -1\n"
lines = lines + "\nRV_COMPLIANCE_DATA_END\n"
f.write(lines)
f.close()
r.close()
# print footer
# h = open("../testgen_footer.S", "r")
# for line in h:
# f.write(line)
# Finish
# lines = ".fill " + str(testnum) + ", " + str(wordsize) + ", -1\n"
# lines = lines + "\nRV_COMPLIANCE_DATA_END\n"
# f.write(lines)
f.close()
r.close()

View File

@ -0,0 +1,636 @@
#!/usr/bin/python
import numpy as np
import string
from datetime import datetime
INSTRUCTION_SIZE = 32
VALID_REGISTERS = [str(x) for x in range(1, 32) if x != 6] # generates list of ints from (1, 31) without 6
# Enumeration for decoding instruction formats
NAME = 0
REG = 1
ADDR = 5
IMM = 6
ZIMM = 7
LABEL = 8
# Enumeration for decoding alignments
BYTE = 1
HALF = INSTRUCTION_SIZE // 16
WORD = INSTRUCTION_SIZE // 8
NONE = 1
wordsize = 8
class InstrGenerator():
class InstrDict():
INSTR_RV32I = \
["lb", "lh", "lw", "lbu", "lhu", "addi", "slli", "slti", "sltiu", "xori", \
"srli", "srai", "ori", "andi", "auipc", "sb", "sh", "sw", "add", "sub", \
"sll", "slt", "sltu", "xor", "srl", "sra", "or", "and", "lui", "beq", \
"bne", "blt", "bge", "bltu", "bgeu", "jal"] #37 #"jalr"
INSTR_RV64M = ["mul", "mulh", "muhsu", "mulhu"] #"div", "divu", "rem", "remu"
#TODO: Add jalr functionality
# Lists of instructions that follow a specific format
INSTR_MEM = ["lb", "lh", "lw", "lbu", "lhu", "sb", "sh", "sw"] #8
INSTR_DEST_SRC1_IMM = ["addi", "slti", "sltiu", "xori", "ori", "andi", "jarl"] #7
INSTR_DEST_SRC1_ZIMM = ["slli", "srli", "srai"] #3
INSTR_DEST_IMM = ["auipc", "lui"] #2
INSTR_DEST_SRC1_SRC2 = ["add", "sub", "sll", "slt", "sltu", "xor", "srl", "sra", "or", "and"]
INSTR_SRC1_SRC2_LABEL = ["beq", "bne", "blt", "bge", "bltu", "bgeu"] #6
INSTR_DEST_LABEL = ["jal"] #1
INSTR_DEST_SRC1_SRC2_64M = ["mul", "mulh", "muhsu", "mulhu", "div", "divu", "rem", "remu"]
def __init__(self):
self.instr = {}
def fillInstructionDictRV32i(self):
self.instr["lb"] = [(NAME, REG, ADDR), BYTE]
self.instr["lh"] = [(NAME, REG, ADDR), HALF]
self.instr["lw"] = [(NAME, REG, ADDR), WORD]
self.instr["lbu"] = [(NAME, REG, ADDR), BYTE]
self.instr["lhu"] = [(NAME, REG, ADDR), HALF]
self.instr["addi"] = [(NAME, REG, REG, IMM), NONE]
self.instr["slli"] = [(NAME, REG, REG, ZIMM), NONE]
self.instr["slti"] = [(NAME, REG, REG, IMM), NONE]
self.instr["sltiu"] = [(NAME, REG, REG, IMM), NONE]
self.instr["xori"] = [(NAME, REG, REG, IMM), NONE]
self.instr["srli"] = [(NAME, REG, REG, ZIMM), NONE]
self.instr["srai"] = [(NAME, REG, REG, ZIMM), NONE]
self.instr["ori"] = [(NAME, REG, REG, IMM), NONE]
self.instr["andi"] = [(NAME, REG, REG, IMM), NONE]
self.instr["auipc"] = [(NAME, REG, IMM), NONE]
self.instr["sb"] = [(NAME, REG, ADDR), BYTE]
self.instr["sh"] = [(NAME, REG, ADDR), HALF]
self.instr["sw"] = [(NAME, REG, ADDR), WORD]
self.instr["add"] = [(NAME, REG, REG, REG), NONE]
self.instr["sub"] = [(NAME, REG, REG, REG), NONE]
self.instr["sll"] = [(NAME, REG, REG, REG), NONE]
self.instr["slt"] = [(NAME, REG, REG, REG), NONE]
self.instr["sltu"] = [(NAME, REG, REG, REG), NONE]
self.instr["xor"] = [(NAME, REG, REG, REG), NONE]
self.instr["srl"] = [(NAME, REG, REG, REG), NONE]
self.instr["sra"] = [(NAME, REG, REG, REG), NONE]
self.instr["or"] = [(NAME, REG, REG, REG), NONE]
self.instr["and"] = [(NAME, REG, REG, REG), NONE]
self.instr["lui"] = [(NAME, REG, IMM), NONE]
self.instr["beq"] = [(NAME, REG, REG, LABEL), NONE]
self.instr["bne"] = [(NAME, REG, REG, LABEL), NONE]
self.instr["blt"] = [(NAME, REG, REG, LABEL), NONE]
self.instr["bge"] = [(NAME, REG, REG, LABEL), NONE]
self.instr["bltu"] = [(NAME, REG, REG, LABEL), NONE]
self.instr["bgeu"] = [(NAME, REG, REG, LABEL), NONE]
#self.instr["jalr"] = [(NAME, REG, REG, IMM), NONE]
self.instr["jal"] = [(NAME, REG, LABEL), WORD]
def fillInstructionDictRV64M(self):
self.instr["mul"] = [(NAME, REG, REG, REG), NONE]
self.instr["mulh"] = [(NAME, REG, REG, REG), NONE]
self.instr["mulhsu"] = [(NAME, REG, REG, REG), NONE]
self.instr["mulhu"] = [(NAME, REG, REG, REG), NONE]
self.instr["div"] = [(NAME, REG, REG, REG), NONE]
self.instr["divu"] = [(NAME, REG, REG, REG), NONE]
self.instr["rem"] = [(NAME, REG, REG, REG), NONE]
self.instr["remu"] = [(NAME, REG, REG, REG), NONE]
class RandomSelect():
LETTER_CHOICES = string.ascii_letters
def randReg(self, regs):
return regs[np.random.randint(0, len(regs))]
def randBinary(self, signed, numBits, valueAlignment):
# use this for corners: xlen = 32 here
corners = [0, 1, 2, 0xFF, 0x624B3E976C52DD14 % 2**numBits, 2**(numBits-1)-2, 2**(numBits-1)-1, \
2**(numBits-1), 2**(numBits-1)+1, 0xC365DDEB9173AB42 % 2**numBits, 2**(numBits)-2, 2**(numBits)-1]
# when not biased don't gen numbers from (|2^(n-2) to 2^(n-2)|)
biased = np.random.randint(0, 3) # on 2 generate random edge case
returnVal = 0
sign = 0
if biased < 2:
# print("unbiased")
if not(signed):
returnVal = np.random.randint(0, 2**(numBits) - 1)
else:
returnVal = np.random.randint(-2**(numBits - 1), 2**(numBits - 1) - 1)
else:
# print("corner")
returnVal = corners[np.random.randint(0, len(corners))]
binReturnVal = bin(returnVal)
if returnVal >= 0:
binReturnVal = binReturnVal[2:] # get rid of 0b
#make binary correct length
while(len(binReturnVal) < numBits):
binReturnVal = "0" + binReturnVal
else:
binReturnVal = binReturnVal[3:] # get rid of -0b
#two's compliment
flipped = ''.join('1' if x == '0' else '0' for x in binReturnVal)
added = bin(int(flipped, 2) + 1)[2:]
while(len(added) < len(flipped)):
added = "0" + added
while(len(added) < numBits):
added = "1" + added
binReturnVal = added
# ensure correct value assignment
if valueAlignment == 1:
return binReturnVal
indexVal = valueAlignment // 2
returnValue = binReturnVal[:-indexVal] + "0"*indexVal
return returnValue
def randHex(self, sign, numBits, divisibleByValue):
return hex(int(self.randBinary(sign, numBits*4, divisibleByValue), 2))
def randDec(self, valueRangeMin, valueRangeMax, divisibleByValue):
valRange = (valueRangeMax - valueRangeMin)//divisibleByValue
return (np.random.randint(0, valRange + 1) * divisibleByValue + valueRangeMin)
def __init__(self, numMemoryRegisters, xlen):
self.instrDict = self.InstrDict()
self.instrDict.fillInstructionDictRV32i()
if (xlen == 64):
self.instrDict.fillInstructionDictRV64M()
self.randSelect = self.RandomSelect()
self.memoryAdrList = []
self.populateMemory()
self.prevLabel = 0
self.memReg = []
self.anyReg = VALID_REGISTERS[:]
self.xlen = xlen
self.test_count = 0
for i in range(0, numMemoryRegisters):
reg = self.randSelect.randReg(self.anyReg)
self.anyReg.remove(reg)
self.memReg.append(reg)
self.PC = int("80000108",16)
def addPC(self, instr): #adapted from BB code
if instr == "li":
self.PC += 8 if (self.xlen == 32) else 20
elif instr == "la":
self.PC += 8
else:
self.PC += 4
def getRandInstruction(self, instr):
return instr[np.random.randint(0, len(instr))]
def getForwardingInstructions(self, instr): #TODO make so that memory register can be manipulated
fields, alignment = self.instrDict.instr[instr]
ld_instr = "lw"
num = np.random.randint(0, 2) # 0 signed, on 1 unsigned
if alignment == BYTE:
if num == 0:
ld_instr = "lbu"
else:
ld_instr = "lb"
if alignment == HALF:
if num == 0:
ld_instr = "lhu"
else:
ld_instr = "lw"
reg1 = self.randSelect.randReg(self.anyReg)
mask = self.randSelect.randHex(True, 3, alignment)
reg_set_instr = "andi x" + reg1 + ", x" + reg1 + ", SEXT_IMM(" + mask + ")" # set register to multiple of 4
rd = self.randSelect.randReg(self.anyReg)
imm1 = self.memoryAdrList[np.random.randint(0, len(self.memoryAdrList))] #load imm
mem_instr_ld = ld_instr + " x" + rd + ", " + imm1 + "(x" + reg1 + ")" #load value to rd
reg2 = self.randSelect.randReg(self.memReg)
imm2 = self.memoryAdrList[np.random.randint(0, len(self.memoryAdrList))] #str imm
mem_instr_str = "sw" + " x" + rd + ", " + imm2 + "(x" + reg2 + ")" #store value of rd
instructions = [reg_set_instr, mem_instr_ld, mem_instr_str]
check_instr = self.genTestInstr(rd)
for check in check_instr:
instructions.append(check)
return instructions
def jumpInstruction(self, instr):
fields, alignment = self.instrDict.instr[instr]
# randomly determine forward or back branch direction
fwd = np.random.randint(0, 2) #fwd on 1, bwd on 0
taken = np.random.randint(0,2) #not taken on 0, taken on 1
reg_pc = self.randSelect.randReg(self.anyReg)
instructions = []
if fwd == 1:
instructions.append(instr + " x" + reg_pc + ", " + str(self.prevLabel) + "f")
numInstr = np.random.randint(0, 6)
# add random alu instructions after jumping before jump point
reg_check = 1
for i in range(0, numInstr):
curr = self.getRandInstruction(self.instrDict.INSTR_DEST_SRC1_SRC2)
rd = self.randSelect.randReg(self.anyReg)
while (rd == reg_pc):
rd = self.randSelect.randReg(self.anyReg)
reg_check = rd
r1 = self.randSelect.randReg(self.anyReg)
r2 = self.randSelect.randReg(self.anyReg)
instructions.append(curr + " x" + rd + ", x" + r1 + ", x" + r2)
instructions.append(str(self.prevLabel) + ":")
self.prevLabel += 1
#make sure jump was taken
check_instr = self.genTestInstr(reg_check)
for check in check_instr:
instructions.append(check)
#check value in pc + 4 reg
check_instr = self.genTestInstr(reg_pc)
for check in check_instr:
instructions.append(check)
else:
reg1 = self.randSelect.randReg(self.anyReg)
reg2 = self.randSelect.randReg(self.anyReg)
while reg2 == reg1:
reg2 = self.randSelect.randReg(self.anyReg)
instructions.append("jal x" + reg1 + ", 1f")
instructions.append("2:")
instructions.append("jal x" + reg2 + ", 3f")
instructions.append("1:")
numInstr = np.random.randint(0,6)
for i in range(0, numInstr):
curr = self.getRandInstruction(self.instrDict.INSTR_DEST_SRC1_SRC2)
rd = self.randSelect.randReg(self.anyReg)
r1 = self.randSelect.randReg(self.anyReg)
r2 = self.randSelect.randReg(self.anyReg)
instructions.append(curr + " x" + rd + ", x" + r1 + ", x" + r2)
#test case here
instructions.append("jal x" + reg2 + ", 2b")
instructions.append("3:")
check_instr = self.genTestInstr(reg1)
for check in check_instr:
instructions.append(check)
# jump to 1
# #2
# jump to 3
# #1
# junk here
# test: jump to 2
# #3
# check answer from 1
return instructions
def branchInstruction(self, instr):
fields, alignment = self.instrDict.instr[instr]
# randomly determine forward or back branch direction
fwd = np.random.randint(0, 2) #fwd on 1, bwd on 0
taken = np.random.randint(0,2) #not taken on 0, taken on 1
instructions = []
# pick 2 registers for branch comparison
reg1 = self.randSelect.randReg(self.anyReg)
reg2 = reg1
while (reg2 == reg1):
reg2 = self.randSelect.randReg(self.anyReg)
if (fwd == 1):
# set r1 and r2 to what they should be to do the branching we want
#TODO this assumed that greater than equal to or lteq are instead jsut equal, will be changed later
if (instr == "beq" and taken==1) or (instr == "bne" and taken==0) or \
(instr == "blt" and taken==0) or (instr == "bge" and taken==1) or \
(instr == "bltu" and taken==0) or (instr == "bgeu" and taken==1): #r1 = r2
instructions.append("mv x" + reg1 + ", x" + reg2)
elif (instr == "beq" and taken==0) or (instr == "bne" and taken==1) or \
(instr == "blt" and taken==1) or (instr == "bltu" and taken==1): #r2 = r1 + 1
instructions.append("addi x" + reg2 + ", x" + reg1 + ", 1")
else:
instructions.append("addi x" + reg2 + ", x" + reg1 + ", -1") #r2 = r1 - 1
# add branching instruction
instructions.append(instr + " x" + reg1 + ", x" + reg2 + ", " + str(self.prevLabel) + "f")
numInstr = np.random.randint(0, 6)
# add random alu instructions after branching before branch point
reg_check = 1
for i in range(0, numInstr):
curr = self.getRandInstruction(self.instrDict.INSTR_DEST_SRC1_SRC2)
rd = self.randSelect.randReg(self.anyReg)
reg_check = rd
r1 = self.randSelect.randReg(self.anyReg)
r2 = self.randSelect.randReg(self.anyReg)
instructions.append(curr + " x" + rd + ", x" + r1 + ", x" + r2)
instructions.append(str(self.prevLabel) + ":")
self.prevLabel += 1
check_instr = self.genTestInstr(reg_check)
for check in check_instr:
instructions.append(check)
return instructions
# Backwards branch case
else:
if (not taken):
if instr == "beq":
randImm = self.randSelect.randHex(True, 1, 1)
while randImm == "0x0":
randImm = self.randSelect.randHex(True, 1, 1)
instructions.append("addi x" + reg2 + ", x" + reg1 + ", MASK_XLEN(" + str(randImm) + ")") #r2 = r1 + randImm (-10, 10) and not 0
elif instr == "bne":
instructions.append("addi x" + reg2 + ", x" + reg1 + ", 0") #r2 = r1 + 0
elif instr == "bltu" or instr == "blt":
randImm = np.random.randint(-10, 0)
randMaskLen = np.random.randint(10, 100)
instructions.append("addi x" + reg1 + ", x0, " + "MASK_XLEN(" + str(randMaskLen) + ")") # set reg1 to be rand positive number, helps stop infinite looping
instructions.append("addi x" + reg2 + ", x" + reg1 + ", MASK_XLEN(" + str(randImm) + ")") #r2 = r1 + randImm (-10, -1)
elif instr == "bgeu" or instr == "bge":
randImm = np.random.randint(1, 11)
randMaskLen = np.random.randint(10, 100)
instructions.append("addi x" + reg1 + ", x0, "+ "MASK_XLEN(" + str(randMaskLen) + ")") # set reg1 to be rand positive number, helps stop infinite looping
instructions.append("addi x" + reg2 + ", x" + reg1 + ", MASK_XLEN(" + str(randImm) + ")") #r2 = r1 + randImm (1, 11)
instructions.append(str(self.prevLabel) + ":")
numInstr = np.random.randint(0, 6)
# add random alu instructions after branching before branch point
reg_check = 1
for i in range(0, numInstr):
curr = self.getRandInstruction(self.instrDict.INSTR_DEST_SRC1_SRC2)
rd = self.randSelect.randReg(self.anyReg)
reg_check = rd
while rd == reg1 or rd==reg2:
rd = self.randSelect.randReg(self.anyReg)
r1 = self.randSelect.randReg(self.anyReg)
r2 = self.randSelect.randReg(self.anyReg)
instructions.append(curr + " x" + rd + ", x" + r1 + ", x" + r2)
check_instr = self.genTestInstr(reg_check)
for check in check_instr:
instructions.append(check)
else:
#setup reg instructions before any branching stuff
if instr == "beq":
numTimesRepeat = 1 #can only be repeated once with the way we are doing this
instructions.append("addi x" + reg2 + ", x" + reg1 + ", MASK_XLEN(" + str(numTimesRepeat) + ")") #r2 = r1 + numTimesRepeat ( - 6)
instructions.append(str(self.prevLabel) + ":")
instructions.append("addi x" + reg2 + ", x" + reg2 + ", -1")
elif instr == "bne":
numTimesRepeat = np.random.randint(2, 6)
instructions.append("addi x" + reg2 + ", x" + reg1 + ", MASK_XLEN(" + str(numTimesRepeat) + ")") #r2 = r1 + numTimesRepeat (2 - 6)
instructions.append(str(self.prevLabel) + ":")
instructions.append("addi x" + reg2 + ", x" + reg2 + ", -1")
elif instr == "bltu":
numTimesRepeat = np.random.randint(2, 6)
instructions.append("ori x" + reg1 + ", x" + reg1 + ", " + "1") # ensure reg1 is not 0
instructions.append("addi x" + reg2 + ", x" + reg1 + ", MASK_XLEN(" + str(numTimesRepeat) + ")") #r2 = r1 - numTimesRepeat (2 - 6)
instructions.append(str(self.prevLabel) + ":")
instructions.append("addi x" + reg2 + ", x" + reg2 + ", -1")
elif instr == "blt":
numTimesRepeat = np.random.randint(2, 6)
instructions.append("addi x" + reg2 + ", x" + reg1 + ", MASK_XLEN(" + str(numTimesRepeat) + ")") #r2 = r1 - numTimesRepeat (2 - 6)
instructions.append(str(self.prevLabel) + ":")
instructions.append("addi x" + reg2 + ", x" + reg2 + ", -1")
elif instr == "bgeu" or instr == "bge":
numTimesRepeat = np.random.randint(2, 6)*(-1)
instructions.append("addi x" + reg2 + ", x" + reg1 + ", MASK_XLEN(" + str(numTimesRepeat) + ")") #r2 = r1 + numTimesRepeat (2 - 6)
instructions.append(str(self.prevLabel) + ":")
instructions.append("addi x" + reg2 + ", x" + reg2 + ", 1")
# Junk instructions
numInstr = np.random.randint(0, 5)
reg_check = 1
for i in range(0, numInstr):
curr = self.getRandInstruction(self.instrDict.INSTR_DEST_SRC1_SRC2)
rd = self.randSelect.randReg(self.anyReg)
reg_check = rd
while rd == reg1 or rd==reg2:
rd = self.randSelect.randReg(self.anyReg)
r1 = self.randSelect.randReg(self.anyReg)
r2 = self.randSelect.randReg(self.anyReg)
instructions.append(curr + " x" + rd + ", x" + r1 + ", x" + r2)
check_instr = self.genTestInstr(reg_check)
for check in check_instr:
instructions.append(check)
instructions.append(instr + " x" + reg1 + ", x" + reg2 + ", " + str(self.prevLabel) + "b")
self.prevLabel += 1
return instructions
# have (1-5) sudo random alu instructions for backwards branch, (0-5) for forward
# randomly determine if branch is taken or not
# if branch not taken,
# we go by specific instruction and do some bit swizzling to make it false
# if branch taken
# add specific instruction to make branch true
# for backwards, have alu instructions change comparison flags to be true
def genTestInstr(self, reg):
out = ["sw x" + str(reg) + ", "+ str(self.test_count) + "(x6)"]
out.append("RVTEST_IO_ASSERT_GPR_EQ(x7, x{}, 0xdeadbeef) ".format(reg))
self.test_count += 4
if (self.test_count == 2044):
# Reset
wreset = "addi x6, x6, MASK_XLEN(2044)"
self.test_count = 0
out.append(wreset)
return out
def genInstruction(self):
num = np.random.randint(0, 20)
instr = ""
if (self.xlen == 64) and (num == 20):
instr = self.getRandInstruction(self.InstrDict.INSTR_RV64M)
else:
instr = self.getRandInstruction(self.InstrDict.INSTR_RV32I)
num = np.random.randint(0, 2) # 0 mem reg, on 1 do forwarding stuff
if instr in self.InstrDict.INSTR_MEM and num == 1:
return self.getForwardingInstructions(instr)
if instr in self.InstrDict.INSTR_SRC1_SRC2_LABEL:
return self.branchInstruction(instr)
if instr in self.InstrDict.INSTR_DEST_LABEL:
return self.jumpInstruction(instr)
fields, alignment = self.instrDict.instr[instr]
output = ""
rd = 1
for field in fields:
if field == NAME:
output += instr + " "
elif field == REG:
rd = self.randSelect.randReg(self.anyReg)
output += "x" + str(rd) + ", "
elif field == IMM and instr != "lui" and instr != "auipc":
output += "SEXT_IMM(" + self.randSelect.randHex(False, 3, alignment) + ")"
elif field == IMM and (instr == "lui" or instr == "auipc"):
output += self.randSelect.randHex(False, 3, alignment)
elif field == ADDR:
output += self.memoryAdrList[np.random.randint(0, len(self.memoryAdrList))]
output += "("
output += "x" + self.randSelect.randReg(self.memReg)
output += ")"
elif field == ZIMM:
if (instr in self.instrDict.INSTR_DEST_SRC1_ZIMM):
output += "SEXT_IMM(" + str(hex(np.random.randint(0, 32))) + ")" # has to be between 0 and 31
else:
output += "SEXT_IMM(" + self.randSelect.randHex(True, 3, alignment) + ")"
elif field == LABEL:
output += str(self.prevLabel)
if output[-2:] == ", ":
output = output[0:-2]
out_instr = [output]
check_instr = self.genTestInstr(rd)
for check in check_instr:
out_instr.append(check)
return out_instr
def setRegisters(self):
out = []
for reg in self.anyReg:
out.append('li x{}, MASK_XLEN({})'.format(reg, self.randSelect.randHex(False, 5, 1)))
return out
def populateMemory(self):
for i in range(0, 5):
val = self.randSelect.randDec(-1000, 1000, WORD)
while val in self.memoryAdrList:
val = self.randSelect.randDec(-1000, 1000, WORD)
self.memoryAdrList.append(str(val))
def generateInstructions(self, numInstructions):
instructions = []
for i in range(0, numInstructions):
instr = self.genInstruction()
for j in instr:
instructions.append(j)
return instructions
class RandInstrGenerator(InstrGenerator):
pass
class HazardInstrGenerator(InstrGenerator):
pass
# xlens = [32, 64]
xlens = [64]
for xlen in xlens:
test = "PIPELINE"
np.random.seed(42)
instrGen = InstrGenerator(10, xlen)
# write instructions to file
imperaspath = "../../imperas-riscv-tests/riscv-test-suite/rv" + str(xlen) + "wally/"
basename = "WALLY-" + test
fname = imperaspath + "src/" + basename + ".S"
refname = imperaspath + "references/" + basename + ".reference_output"
# FOR LOCAL TESTING
# fname = "PIPELINE_TEST.S"
# print custom header part
f = open(fname, "w")
r = open(refname, "w")
line = "///////////////////////////////////////////\n"
f.write(line)
lines="// "+fname+ "\n// " + "Ethan Falicov & Shriya Nadgauda" + "\n"
f.write(lines)
line ="// Created " + str(datetime.now()) + "\n" + "\n"
f.write(line)
line = "// Begin Tests" + "\n"
f.write(line)
# insert generic header
h = open("testgen_header.S", "r")
for line in h:
f.write(line)
f.write("\n")
#set registerss
reg_instr = instrGen.setRegisters()
for instr in reg_instr:
f.write("\t" + instr + "\n")
# write instructions
testnum = 500
instructions = instrGen.generateInstructions(testnum)
for instr in instructions:
f.write("\t" + instr + "\n")
if ("RVTEST_IO_ASSERT_GPR_EQ" in instr):
f.write("\n")
# print footer
h = open("testgen_footer.S", "r")
for line in h:
f.write(line)
# Finish
lines = ".fill " + str(testnum) + ", " + str(xlen//8) + ", -1\n"
lines = lines + "\nRV_COMPLIANCE_DATA_END\n"
f.write(lines)
f.close()
r.close()
print("Done with ", xlen)