mirror of
https://github.com/openhwgroup/cvw
synced 2025-01-30 00:14:28 +00:00
FPU update - missing files
This commit is contained in:
parent
3f61e313d2
commit
72406b8a88
62
wally-pipelined/src/fpu/fclassify.sv
Normal file
62
wally-pipelined/src/fpu/fclassify.sv
Normal file
@ -0,0 +1,62 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fclassify (
|
||||
input logic [63:0] SrcXE,
|
||||
input logic FmtE, // 0-Single 1-Double
|
||||
output logic [63:0] ClassResE
|
||||
);
|
||||
|
||||
logic [31:0] Single;
|
||||
logic [63:0] Double;
|
||||
logic Sgn;
|
||||
logic Inf, NaN, Zero, Norm, Denorm;
|
||||
logic PInf, QNaN, PZero, PNorm, PDenorm;
|
||||
logic NInf, SNaN, NZero, NNorm, NDenorm;
|
||||
logic MaxExp, ExpZero, ManZero, FirstBitFrac;
|
||||
|
||||
// Single and Double precision layouts
|
||||
assign Single = SrcXE[63:32];
|
||||
assign Double = SrcXE;
|
||||
assign Sgn = SrcXE[63];
|
||||
|
||||
// basic calculations for readabillity
|
||||
|
||||
assign ExpZero = FmtE ? ~|Double[62:52] : ~|Single[30:23];
|
||||
assign MaxExp = FmtE ? &Double[62:52] : &Single[30:23];
|
||||
assign ManZero = FmtE ? ~|Double[51:0] : ~|Single[22:0];
|
||||
assign FirstBitFrac = FmtE ? Double[51] : Single[22];
|
||||
|
||||
// determine the type of number
|
||||
assign NaN = MaxExp & ~ManZero;
|
||||
assign Inf = MaxExp & ManZero;
|
||||
assign Zero = ExpZero & ManZero;
|
||||
assign Denorm= ExpZero & ~ManZero;
|
||||
assign Norm = ~ExpZero;
|
||||
|
||||
// determine the sub categories
|
||||
assign QNaN = FirstBitFrac&NaN;
|
||||
assign SNaN = ~FirstBitFrac&NaN;
|
||||
assign PInf = ~Sgn&Inf;
|
||||
assign NInf = Sgn&Inf;
|
||||
assign PNorm = ~Sgn&Norm;
|
||||
assign NNorm = Sgn&Norm;
|
||||
assign PDenorm = ~Sgn&Denorm;
|
||||
assign NDenorm = Sgn&Denorm;
|
||||
assign PZero = ~Sgn&Zero;
|
||||
assign NZero = Sgn&Zero;
|
||||
|
||||
// determine sub category and combine into the result
|
||||
// bit 0 - -Inf
|
||||
// bit 1 - -Norm
|
||||
// bit 2 - -Denorm
|
||||
// bit 3 - -Zero
|
||||
// bit 4 - +Zero
|
||||
// bit 5 - +Denorm
|
||||
// bit 6 - +Norm
|
||||
// bit 7 - +Inf
|
||||
// bit 8 - signaling NaN
|
||||
// bit 9 - quiet NaN
|
||||
assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
|
||||
|
||||
endmodule
|
465
wally-pipelined/src/fpu/fcmp.sv
Executable file
465
wally-pipelined/src/fpu/fcmp.sv
Executable file
@ -0,0 +1,465 @@
|
||||
|
||||
//
|
||||
// File name : fpcomp.v
|
||||
// Title : Floating-Point Comparator
|
||||
// project : FPU
|
||||
// Library : fpcomp
|
||||
// Author(s) : James E. Stine
|
||||
// Purpose : definition of main unit to floating-point comparator
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
//
|
||||
// Floating Point Comparator (Algorithm)
|
||||
//
|
||||
// 1.) Performs sign-extension if the inputs are 32-bit integers.
|
||||
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
|
||||
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
|
||||
// and correct for sign bits
|
||||
//
|
||||
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
|
||||
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 (unused)
|
||||
//
|
||||
// The comparator produces a 2-bit signal FCC, which
|
||||
// indicates the result of the comparison:
|
||||
//
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
//
|
||||
// It also produces an invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN per 754
|
||||
|
||||
`include "wally-config.vh"
|
||||
module fcmp (
|
||||
input logic [63:0] op1,
|
||||
input logic [63:0] op2,
|
||||
input logic [2:0] FOpCtrlE,
|
||||
input logic FmtE,
|
||||
|
||||
|
||||
output logic Invalid, // Invalid Operation
|
||||
// output logic [1:0] FCC, // Condition Codes
|
||||
output logic [63:0] CmpResE);
|
||||
// Perform magnitude comparison between the 63 least signficant bits
|
||||
// of the input operands. Only LT and EQ are returned, since GT can
|
||||
// be determined from these values.
|
||||
logic [1:0] FCC; // Condition Codes
|
||||
logic [7:0] w, x;
|
||||
logic ANaN, BNaN;
|
||||
logic Azero, Bzero;
|
||||
logic LT; // magnitude op1 < magnitude op2
|
||||
logic EQ; // magnitude op1 = magnitude op2
|
||||
|
||||
magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
|
||||
|
||||
// Determine final values based on output of magnitude comparison,
|
||||
// sign bits, and special case testing.
|
||||
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE);
|
||||
|
||||
// Perform magnitude comparison between the 63 least signficant bits
|
||||
// of the input operands. Only LT and EQ are returned, since GT can
|
||||
// be determined from these values.
|
||||
magcompare64b_2 magcomp2 (LT, EQ, w, x);
|
||||
|
||||
// Determine final values based on output of magnitude comparison,
|
||||
// sign bits, and special case testing.
|
||||
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*);
|
||||
|
||||
endmodule // fpcomp
|
||||
|
||||
// module magcompare2b (LT, GT, A, B);
|
||||
|
||||
// input logic [1:0] A;
|
||||
// input logic [1:0] B;
|
||||
|
||||
// output logic LT;
|
||||
// output logic GT;
|
||||
|
||||
// // Determine if A < B using a minimized sum-of-products expression
|
||||
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
|
||||
// // Determine if A > B using a minimized sum-of-products expression
|
||||
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
|
||||
|
||||
// endmodule // magcompare2b
|
||||
|
||||
// 2-bit magnitude comparator
|
||||
// This module compares two 2-bit values A and B. LT is '1' if A < B
|
||||
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
|
||||
// this version actually incorporates don't cares into the equation to
|
||||
// simplify the optimization
|
||||
|
||||
module magcompare2c (LT, GT, A, B);
|
||||
|
||||
input logic [1:0] A;
|
||||
input logic [1:0] B;
|
||||
|
||||
output logic LT;
|
||||
output logic GT;
|
||||
|
||||
assign LT = B[1] | (!A[1]&B[0]);
|
||||
assign GT = A[1] | (!B[1]&A[0]);
|
||||
|
||||
endmodule // magcompare2b
|
||||
|
||||
// This module compares two 64-bit values A and B. LT is '1' if A < B
|
||||
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
|
||||
// This structure was modified so
|
||||
// that it only does a strict magnitdude comparison, and only
|
||||
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
|
||||
// of 63 2-bit magnitude comparators, followed by one OR gates.
|
||||
//
|
||||
// J. E. Stine and M. J. Schulte, "A combined two's complement and
|
||||
// floating-point comparator," 2005 IEEE International Symposium on
|
||||
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
|
||||
// doi: 10.1109/ISCAS.2005.1464531
|
||||
|
||||
module magcompare64b_1 (w, x, A, B);
|
||||
|
||||
input logic [63:0] A;
|
||||
input logic [63:0] B;
|
||||
|
||||
logic [31:0] s;
|
||||
logic [31:0] t;
|
||||
logic [15:0] u;
|
||||
logic [15:0] v;
|
||||
output logic [7:0] w;
|
||||
output logic [7:0] x;
|
||||
|
||||
magcompare2b mag1(s[0], t[0], A[1:0], B[1:0]);
|
||||
magcompare2b mag2(s[1], t[1], A[3:2], B[3:2]);
|
||||
magcompare2b mag3(s[2], t[2], A[5:4], B[5:4]);
|
||||
magcompare2b mag4(s[3], t[3], A[7:6], B[7:6]);
|
||||
magcompare2b mag5(s[4], t[4], A[9:8], B[9:8]);
|
||||
magcompare2b mag6(s[5], t[5], A[11:10], B[11:10]);
|
||||
magcompare2b mag7(s[6], t[6], A[13:12], B[13:12]);
|
||||
magcompare2b mag8(s[7], t[7], A[15:14], B[15:14]);
|
||||
magcompare2b mag9(s[8], t[8], A[17:16], B[17:16]);
|
||||
magcompare2b magA(s[9], t[9], A[19:18], B[19:18]);
|
||||
magcompare2b magB(s[10], t[10], A[21:20], B[21:20]);
|
||||
magcompare2b magC(s[11], t[11], A[23:22], B[23:22]);
|
||||
magcompare2b magD(s[12], t[12], A[25:24], B[25:24]);
|
||||
magcompare2b magE(s[13], t[13], A[27:26], B[27:26]);
|
||||
magcompare2b magF(s[14], t[14], A[29:28], B[29:28]);
|
||||
magcompare2b mag10(s[15], t[15], A[31:30], B[31:30]);
|
||||
magcompare2b mag11(s[16], t[16], A[33:32], B[33:32]);
|
||||
magcompare2b mag12(s[17], t[17], A[35:34], B[35:34]);
|
||||
magcompare2b mag13(s[18], t[18], A[37:36], B[37:36]);
|
||||
magcompare2b mag14(s[19], t[19], A[39:38], B[39:38]);
|
||||
magcompare2b mag15(s[20], t[20], A[41:40], B[41:40]);
|
||||
magcompare2b mag16(s[21], t[21], A[43:42], B[43:42]);
|
||||
magcompare2b mag17(s[22], t[22], A[45:44], B[45:44]);
|
||||
magcompare2b mag18(s[23], t[23], A[47:46], B[47:46]);
|
||||
magcompare2b mag19(s[24], t[24], A[49:48], B[49:48]);
|
||||
magcompare2b mag1A(s[25], t[25], A[51:50], B[51:50]);
|
||||
magcompare2b mag1B(s[26], t[26], A[53:52], B[53:52]);
|
||||
magcompare2b mag1C(s[27], t[27], A[55:54], B[55:54]);
|
||||
magcompare2b mag1D(s[28], t[28], A[57:56], B[57:56]);
|
||||
magcompare2b mag1E(s[29], t[29], A[59:58], B[59:58]);
|
||||
magcompare2b mag1F(s[30], t[30], A[61:60], B[61:60]);
|
||||
magcompare2b mag20(s[31], t[31], A[63:62], B[63:62]);
|
||||
|
||||
magcompare2c mag21(u[0], v[0], t[1:0], s[1:0]);
|
||||
magcompare2c mag22(u[1], v[1], t[3:2], s[3:2]);
|
||||
magcompare2c mag23(u[2], v[2], t[5:4], s[5:4]);
|
||||
magcompare2c mag24(u[3], v[3], t[7:6], s[7:6]);
|
||||
magcompare2c mag25(u[4], v[4], t[9:8], s[9:8]);
|
||||
magcompare2c mag26(u[5], v[5], t[11:10], s[11:10]);
|
||||
magcompare2c mag27(u[6], v[6], t[13:12], s[13:12]);
|
||||
magcompare2c mag28(u[7], v[7], t[15:14], s[15:14]);
|
||||
magcompare2c mag29(u[8], v[8], t[17:16], s[17:16]);
|
||||
magcompare2c mag2A(u[9], v[9], t[19:18], s[19:18]);
|
||||
magcompare2c mag2B(u[10], v[10], t[21:20], s[21:20]);
|
||||
magcompare2c mag2C(u[11], v[11], t[23:22], s[23:22]);
|
||||
magcompare2c mag2D(u[12], v[12], t[25:24], s[25:24]);
|
||||
magcompare2c mag2E(u[13], v[13], t[27:26], s[27:26]);
|
||||
magcompare2c mag2F(u[14], v[14], t[29:28], s[29:28]);
|
||||
magcompare2c mag30(u[15], v[15], t[31:30], s[31:30]);
|
||||
|
||||
magcompare2c mag31(w[0], x[0], v[1:0], u[1:0]);
|
||||
magcompare2c mag32(w[1], x[1], v[3:2], u[3:2]);
|
||||
magcompare2c mag33(w[2], x[2], v[5:4], u[5:4]);
|
||||
magcompare2c mag34(w[3], x[3], v[7:6], u[7:6]);
|
||||
magcompare2c mag35(w[4], x[4], v[9:8], u[9:8]);
|
||||
magcompare2c mag36(w[5], x[5], v[11:10], u[11:10]);
|
||||
magcompare2c mag37(w[6], x[6], v[13:12], u[13:12]);
|
||||
magcompare2c mag38(w[7], x[7], v[15:14], u[15:14]);
|
||||
|
||||
endmodule // magcompare64b
|
||||
|
||||
// This module takes 64-bits inputs A and B, two magnitude comparison
|
||||
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 bfloat precision numbers
|
||||
//
|
||||
// The comparator produces a 2-bit signal fcc, which
|
||||
// indicates the result of the comparison as follows:
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
// It also produces a invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN.
|
||||
|
||||
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE);
|
||||
|
||||
input logic [63:0] A;
|
||||
input logic [63:0] B;
|
||||
input logic [2:0] FOpCtrlE;
|
||||
|
||||
logic dp, sp, hp;
|
||||
|
||||
output logic ANaN;
|
||||
output logic BNaN;
|
||||
output logic Azero;
|
||||
output logic Bzero;
|
||||
|
||||
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
|
||||
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
|
||||
// Test if A or B is NaN.
|
||||
assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) &
|
||||
((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) |
|
||||
(dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) |
|
||||
(hp&(A[57]|A[56])));
|
||||
|
||||
assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) &
|
||||
((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) |
|
||||
(dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) |
|
||||
(hp&(B[57]|B[56])));
|
||||
|
||||
// Test if A is +0 or -0 when viewed as a floating point number (i.e,
|
||||
// the 63 least siginficant bits of A are zero).
|
||||
// Depending on how this synthesizes, it may work better to replace
|
||||
// this with assign Azero = ~(A[62] | A[61] | ... | A[0])
|
||||
assign Azero = (A[62:0] == 63'h0);
|
||||
assign Bzero = (B[62:0] == 63'h0);
|
||||
|
||||
endmodule // exception_cmp
|
||||
//
|
||||
// File name : fpcomp.v
|
||||
// Title : Floating-Point Comparator
|
||||
// project : FPU
|
||||
// Library : fpcomp
|
||||
// Author(s) : James E. Stine
|
||||
// Purpose : definition of main unit to floating-point comparator
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
//
|
||||
// Floating Point Comparator (Algorithm)
|
||||
//
|
||||
// 1.) Performs sign-extension if the inputs are 32-bit integers.
|
||||
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
|
||||
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
|
||||
// and correct for sign bits
|
||||
//
|
||||
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
|
||||
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 (unused)
|
||||
//
|
||||
// The comparator produces a 2-bit signal FCC, which
|
||||
// indicates the result of the comparison:
|
||||
//
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
//
|
||||
// It also produces an invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN per 754
|
||||
|
||||
|
||||
/*module magcompare2b (LT, GT, A, B);
|
||||
|
||||
input logic [1:0] A;
|
||||
input logic [1:0] B;
|
||||
|
||||
output logic LT;
|
||||
output logic GT;
|
||||
|
||||
// Determine if A < B using a minimized sum-of-products expression
|
||||
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
|
||||
// Determine if A > B using a minimized sum-of-products expression
|
||||
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
|
||||
|
||||
endmodule*/ // magcompare2b
|
||||
|
||||
// 2-bit magnitude comparator
|
||||
// This module compares two 2-bit values A and B. LT is '1' if A < B
|
||||
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
|
||||
// this version actually incorporates don't cares into the equation to
|
||||
// simplify the optimization
|
||||
|
||||
// module magcompare2c (LT, GT, A, B);
|
||||
|
||||
// input logic [1:0] A;
|
||||
// input logic [1:0] B;
|
||||
|
||||
// output logic LT;
|
||||
// output logic GT;
|
||||
|
||||
// assign LT = B[1] | (!A[1]&B[0]);
|
||||
// assign GT = A[1] | (!B[1]&A[0]);
|
||||
|
||||
// endmodule // magcompare2b
|
||||
|
||||
// This module compares two 64-bit values A and B. LT is '1' if A < B
|
||||
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
|
||||
// This structure was modified so
|
||||
// that it only does a strict magnitdude comparison, and only
|
||||
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
|
||||
// of 63 2-bit magnitude comparators, followed by one OR gates.
|
||||
//
|
||||
// J. E. Stine and M. J. Schulte, "A combined two's complement and
|
||||
// floating-point comparator," 2005 IEEE International Symposium on
|
||||
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
|
||||
// doi: 10.1109/ISCAS.2005.1464531
|
||||
|
||||
module magcompare64b_2 (LT, EQ, w, x);
|
||||
|
||||
input logic [7:0] w;
|
||||
input logic [7:0] x;
|
||||
logic [3:0] y;
|
||||
logic [3:0] z;
|
||||
logic [1:0] a;
|
||||
logic [1:0] b;
|
||||
logic GT;
|
||||
|
||||
output logic LT;
|
||||
output logic EQ;
|
||||
|
||||
magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
|
||||
magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
|
||||
magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
|
||||
magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
|
||||
|
||||
magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
|
||||
magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
|
||||
|
||||
magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
|
||||
|
||||
assign EQ = ~(LT | GT);
|
||||
|
||||
endmodule // magcompare64b
|
||||
|
||||
// This module takes 64-bits inputs A and B, two magnitude comparison
|
||||
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 bfloat precision numbers
|
||||
//
|
||||
// The comparator produces a 2-bit signal fcc, which
|
||||
// indicates the result of the comparison as follows:
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
// It also produces a invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN.
|
||||
|
||||
module exception_cmp_2 (
|
||||
input logic [63:0] A,
|
||||
input logic [63:0] B,
|
||||
input logic FmtE,
|
||||
input logic LT_mag,
|
||||
input logic EQ_mag,
|
||||
input logic [2:0] FOpCtrlE,
|
||||
|
||||
output logic invalid,
|
||||
output logic [1:0] fcc,
|
||||
output logic [63:0] CmpResE,
|
||||
|
||||
input logic Azero,
|
||||
input logic Bzero,
|
||||
input logic ANaN,
|
||||
input logic BNaN);
|
||||
|
||||
logic dp;
|
||||
logic sp;
|
||||
logic hp;
|
||||
logic ASNaN;
|
||||
logic BSNaN;
|
||||
logic UO;
|
||||
logic GT;
|
||||
logic LT;
|
||||
logic EQ;
|
||||
logic [62:0] sixtythreezeros = 63'h0;
|
||||
|
||||
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
|
||||
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
|
||||
// Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
|
||||
// point comparison is being performed.
|
||||
assign UO = (ANaN | BNaN);
|
||||
|
||||
// Test if A or B is a signaling NaN.
|
||||
assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
|
||||
assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
|
||||
|
||||
// If either A or B is a signaling NaN the "Invalid Operation"
|
||||
// exception flag is set to one; otherwise it is zero.
|
||||
assign invalid = (ASNaN | BSNaN);
|
||||
|
||||
// A and B are equal if (their magnitudes are equal) AND ((their signs are
|
||||
// equal) or (their magnitudes are zero AND they are floating point
|
||||
// numbers)). Also, A and B are not equal if they are unordered.
|
||||
assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
|
||||
|
||||
// A is less than B if (A is negative and B is posiive) OR
|
||||
// (A and B are positive and the magnitude of A is less than
|
||||
// the magnitude of B) or (A and B are negative integers and
|
||||
// the magnitude of A is less than the magnitude of B) or
|
||||
// (A and B are negative floating point numbers and
|
||||
// the magnitude of A is greater than the magnitude of B).
|
||||
// Also, A is not less than B if A and B are equal or unordered.
|
||||
assign LT = ((~LT_mag & A[63] & B[63]) |
|
||||
(LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
|
||||
|
||||
// A is greater than B when LT, EQ, and UO are are false.
|
||||
assign GT = ~(LT | EQ | UO);
|
||||
|
||||
// Note: it may be possible to optimize the setting of fcc
|
||||
// a little more, but it is probably not worth the effort.
|
||||
|
||||
// Set the bits of fcc based on LT, GT, EQ, and UO
|
||||
assign fcc[0] = LT | UO;
|
||||
assign fcc[1] = GT | UO;
|
||||
|
||||
always_comb begin
|
||||
case (FOpCtrlE[2:0])
|
||||
3'b111: CmpResE = LT ? A : B;//min
|
||||
3'b101: CmpResE = GT ? A : B;//max
|
||||
3'b010: CmpResE = {63'b0, EQ};//equal
|
||||
3'b001: CmpResE = {63'b0, LT};//less than
|
||||
3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal
|
||||
default: CmpResE = 64'b0;
|
||||
endcase
|
||||
end
|
||||
|
||||
endmodule // exception_cmp
|
256
wally-pipelined/src/fpu/fdivsqrt.sv
Executable file
256
wally-pipelined/src/fpu/fdivsqrt.sv
Executable file
@ -0,0 +1,256 @@
|
||||
//
|
||||
// File name : fpdiv
|
||||
// Title : Floating-Point Divider/Square-Root
|
||||
// project : FPU
|
||||
// Library : fpdiv
|
||||
// Author(s) : James E. Stine, Jr.
|
||||
// Purpose : definition of main unit to floating-point div/sqrt
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
//
|
||||
// Basic Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and convert SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Exponent Logic
|
||||
// Step 4: Divide/Sqrt using Goldschmidt
|
||||
// Step 5: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 6: Round the result.//
|
||||
// Step 7: Put quotient/remainder onto output.
|
||||
//
|
||||
|
||||
// `timescale 1ps/1ps
|
||||
module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
|
||||
FDivStartE, reset, clk, FDivBusyE, HoldInputs);
|
||||
|
||||
input [63:0] DivInput1E; // 1st input operand (A)
|
||||
input [63:0] DivInput2E; // 2nd input operand (B)
|
||||
input [2:0] FrmE; // Rounding mode - specify values
|
||||
input DivOpType; // Function opcode
|
||||
input FmtE; // Result Precision (0 for double, 1 for single) //***will need to swap this
|
||||
input DivOvEn; // Overflow trap enabled
|
||||
input DivUnEn; // Underflow trap enabled
|
||||
|
||||
input FDivStartE;
|
||||
input reset;
|
||||
input clk;
|
||||
|
||||
output [63:0] FDivResultM; // Result of operation
|
||||
output [4:0] FDivSqrtFlgM; // IEEE exception flags
|
||||
output FDivSqrtDoneE;
|
||||
output FDivBusyE, HoldInputs;
|
||||
|
||||
supply1 vdd;
|
||||
supply0 vss;
|
||||
|
||||
wire [63:0] Float1;
|
||||
wire [63:0] Float2;
|
||||
wire [63:0] IntValue;
|
||||
|
||||
wire DivDenormM; // DivDenormM on input or output
|
||||
wire [12:0] exp1, exp2, expF;
|
||||
wire [12:0] exp_diff, bias;
|
||||
wire [13:0] exp_sqrt;
|
||||
wire [12:0] exp_s;
|
||||
wire [12:0] exp_c;
|
||||
|
||||
wire [10:0] exponent, exp_pre;
|
||||
wire [63:0] Result;
|
||||
wire [52:0] mantissaA;
|
||||
wire [52:0] mantissaB;
|
||||
wire [63:0] sum, sum_tc, sum_corr, sum_norm;
|
||||
|
||||
wire [5:0] align_shift;
|
||||
wire [5:0] norm_shift;
|
||||
wire [2:0] sel_inv;
|
||||
wire op1_Norm, op2_Norm;
|
||||
wire opA_Norm, opB_Norm;
|
||||
wire Invalid;
|
||||
wire DenormIn, DenormIO;
|
||||
wire [4:0] FlagsIn;
|
||||
wire exp_gt63;
|
||||
wire Sticky_out;
|
||||
wire signResult, sign_corr;
|
||||
wire corr_sign;
|
||||
wire zeroB;
|
||||
wire convert;
|
||||
wire swap;
|
||||
wire sub;
|
||||
|
||||
wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
|
||||
wire [63:0] rega_out, regb_out, regc_out, regd_out;
|
||||
wire [127:0] regr_out;
|
||||
wire [2:0] sel_muxa, sel_muxb;
|
||||
wire sel_muxr;
|
||||
wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;
|
||||
|
||||
wire donev, sel_muxrv, sel_muxsv;
|
||||
wire [1:0] sel_muxav, sel_muxbv;
|
||||
wire load_regav, load_regbv, load_regcv;
|
||||
wire load_regrv, load_regsv;
|
||||
|
||||
logic exp_cout1, exp_cout2, exp_odd, open;
|
||||
|
||||
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the DivOpType , and their precision FmtE.
|
||||
// Single precision inputs are converted to double precision
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in
|
||||
// the third pipeline stage to select the result. Also, op1_Norm
|
||||
// and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
|
||||
Float1, Float2, DivOpType);
|
||||
|
||||
// Determine Sign/Mantissa
|
||||
assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType;
|
||||
assign mantissaA = {vdd, Float1[51:0]};
|
||||
assign mantissaB = {vdd, Float2[51:0]};
|
||||
// Perform Exponent Subtraction - expA - expB + Bias
|
||||
assign exp1 = {2'b0, Float1[62:52]};
|
||||
assign exp2 = {2'b0, Float2[62:52]};
|
||||
// bias : DP = 2^{11-1}-1 = 1023
|
||||
assign bias = {3'h0, 10'h3FF};
|
||||
// Divide exponent
|
||||
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder
|
||||
exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder?
|
||||
{vss, exp_s}, {vss, exp_c}, 1'b1);
|
||||
// Sqrt exponent (check if exponent is odd)
|
||||
assign exp_odd = Float1[52] ? vss : vdd;
|
||||
exp_add explogic2 (exp_cout2, exp_sqrt, //***adder?
|
||||
{vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
|
||||
// Choose correct exponent
|
||||
assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;
|
||||
|
||||
// Main Goldschmidt/Division Routine
|
||||
divconv goldy (q1, qm1, qp1, q0, qm0, qp0,
|
||||
rega_out, regb_out, regc_out, regd_out,
|
||||
regr_out, mantissaB, mantissaA,
|
||||
sel_muxa, sel_muxb, sel_muxr,
|
||||
reset, clk,
|
||||
load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, FmtE, DivOpType, exp_odd);
|
||||
|
||||
// FSM : control divider
|
||||
fsm control (FDivSqrtDoneE, load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
|
||||
clk, reset, FDivStartE, DivOpType, FDivBusyE, HoldInputs);
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. The rounding units also handles special cases and
|
||||
// set the exception flags.
|
||||
//***add max magnitude and swap negitive and positive infinity
|
||||
rounder_div divround1 (Result, DenormIO, FlagsIn,
|
||||
FrmE, FmtE, DivOvEn, DivUnEn, expF,
|
||||
sel_inv, Invalid, DenormIn, signResult,
|
||||
q1, qm1, qp1, q0, qm0, qp0, regr_out);
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM);
|
||||
flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM);
|
||||
flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM);
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
//
|
||||
// Brent-Kung Prefix Adder
|
||||
// (yes, it is 14 bits as my generator is broken for 13 bits :(
|
||||
// assume, synthesizer will delete stuff not needed )
|
||||
//
|
||||
module exp_add (cout, sum, a, b, cin);
|
||||
|
||||
input [13:0] a, b;
|
||||
input cin;
|
||||
|
||||
output [13:0] sum;
|
||||
output cout;
|
||||
|
||||
wire [14:0] p,g;
|
||||
wire [13:0] c;
|
||||
|
||||
// pre-computation
|
||||
assign p={a^b,1'b0};
|
||||
assign g={a&b, cin};
|
||||
|
||||
// prefix tree
|
||||
brent_kung prefix_tree(c, p[13:0], g[13:0]);
|
||||
|
||||
// post-computation
|
||||
assign sum=p[14:1]^c;
|
||||
assign cout=g[14]|(p[14]&c[13]);
|
||||
|
||||
endmodule // exp_add
|
||||
|
||||
module brent_kung (c, p, g);
|
||||
|
||||
input [13:0] p;
|
||||
input [13:0] g;
|
||||
output [14:1] c;
|
||||
|
||||
logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
|
||||
logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
|
||||
logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
|
||||
// parallel-prefix, Brent-Kung
|
||||
|
||||
// Stage 1: Generates G/FmtE pairs that span 1 bits
|
||||
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
|
||||
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
|
||||
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
|
||||
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
|
||||
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
|
||||
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
|
||||
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
|
||||
|
||||
// Stage 2: Generates G/FmtE pairs that span 2 bits
|
||||
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
|
||||
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
|
||||
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
|
||||
|
||||
// Stage 3: Generates G/FmtE pairs that span 4 bits
|
||||
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
|
||||
|
||||
// Stage 4: Generates G/FmtE pairs that span 8 bits
|
||||
|
||||
// Stage 5: Generates G/FmtE pairs that span 4 bits
|
||||
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
|
||||
|
||||
// Stage 6: Generates G/FmtE pairs that span 2 bits
|
||||
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
|
||||
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
|
||||
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
|
||||
|
||||
// Last grey cell stage
|
||||
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
|
||||
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
|
||||
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
|
||||
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
|
||||
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
|
||||
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
|
||||
|
||||
// Final Stage: Apply c_k+1=G_k_0
|
||||
assign c[1]=g[0];
|
||||
assign c[2]=G_1_0;
|
||||
assign c[3]=G_2_0;
|
||||
assign c[4]=G_3_0;
|
||||
assign c[5]=G_4_0;
|
||||
assign c[6]=G_5_0;
|
||||
assign c[7]=G_6_0;
|
||||
assign c[8]=G_7_0;
|
||||
assign c[9]=G_8_0;
|
||||
|
||||
assign c[10]=G_9_0;
|
||||
assign c[11]=G_10_0;
|
||||
assign c[12]=G_11_0;
|
||||
assign c[13]=G_12_0;
|
||||
assign c[14]=G_13_0;
|
||||
|
||||
endmodule // brent_kung
|
||||
|
67
wally-pipelined/src/fpu/fhazard.sv
Normal file
67
wally-pipelined/src/fpu/fhazard.sv
Normal file
@ -0,0 +1,67 @@
|
||||
///////////////////////////////////////////
|
||||
// fpuhazard.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com 19 May 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Determine forwarding, stalls and flushes for the FPU
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fhazard(
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E,
|
||||
input logic FWriteEnM, FWriteEnW,
|
||||
input logic [4:0] RdM, RdW,
|
||||
input logic [2:0] FResultSelM,
|
||||
output logic FStallD,
|
||||
output logic [1:0] ForwardXE, ForwardYE, ForwardZE
|
||||
);
|
||||
|
||||
|
||||
always_comb begin
|
||||
// set ReadData as default
|
||||
ForwardXE = 2'b00; // choose FRD1E
|
||||
ForwardYE = 2'b00; // choose FRD2E
|
||||
ForwardZE = 2'b00; // choose FRD3E
|
||||
FStallD = 0;
|
||||
|
||||
if ((Adr1E == RdM) & FWriteEnM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
if ((Adr2E == RdM) & FWriteEnM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
if ((Adr3E == RdM) & FWriteEnM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
54
wally-pipelined/src/fpu/fregfile.sv
Normal file
54
wally-pipelined/src/fpu/fregfile.sv
Normal file
@ -0,0 +1,54 @@
|
||||
///////////////////////////////////////////
|
||||
// regfile.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 9 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: 4-port register file
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fregfile (
|
||||
input logic clk, reset,
|
||||
input logic we4,
|
||||
input logic [ 4:0] a1, a2, a3, a4,
|
||||
input logic [63:0] wd4, //KEP `XLEN-1 changed to 63 (lint warning) *** figure out if double can be suported when XLEN = 32
|
||||
output logic [63:0] rd1, rd2, rd3);
|
||||
|
||||
logic [63:0] rf[31:0];
|
||||
integer i;
|
||||
|
||||
// three ported register file
|
||||
// read three ports combinationally (A1/RD1, A2/RD2, A3/RD3)
|
||||
// write fourth port on rising edge of clock (A4/WD4/WE4)
|
||||
// write occurs on falling edge of clock
|
||||
|
||||
// reset is intended for simulation only, not synthesis
|
||||
|
||||
always_ff @(negedge clk or posedge reset)
|
||||
if (reset) for(i=0; i<32; i++) rf[i] <= 0;
|
||||
else if (we4) rf[a4] <= wd4;
|
||||
|
||||
assign #2 rd1 = rf[a1];
|
||||
assign #2 rd2 = rf[a2];
|
||||
assign #2 rd3 = rf[a3];
|
||||
|
||||
endmodule // regfile
|
||||
|
Loading…
Reference in New Issue
Block a user