diff --git a/wally-pipelined/src/fpu/fclassify.sv b/wally-pipelined/src/fpu/fclassify.sv new file mode 100644 index 000000000..a15edcb4a --- /dev/null +++ b/wally-pipelined/src/fpu/fclassify.sv @@ -0,0 +1,62 @@ + +`include "wally-config.vh" + +module fclassify ( + input logic [63:0] SrcXE, + input logic FmtE, // 0-Single 1-Double + output logic [63:0] ClassResE + ); + + logic [31:0] Single; + logic [63:0] Double; + logic Sgn; + logic Inf, NaN, Zero, Norm, Denorm; + logic PInf, QNaN, PZero, PNorm, PDenorm; + logic NInf, SNaN, NZero, NNorm, NDenorm; + logic MaxExp, ExpZero, ManZero, FirstBitFrac; + + // Single and Double precision layouts + assign Single = SrcXE[63:32]; + assign Double = SrcXE; + assign Sgn = SrcXE[63]; + + // basic calculations for readabillity + + assign ExpZero = FmtE ? ~|Double[62:52] : ~|Single[30:23]; + assign MaxExp = FmtE ? &Double[62:52] : &Single[30:23]; + assign ManZero = FmtE ? ~|Double[51:0] : ~|Single[22:0]; + assign FirstBitFrac = FmtE ? Double[51] : Single[22]; + + // determine the type of number + assign NaN = MaxExp & ~ManZero; + assign Inf = MaxExp & ManZero; + assign Zero = ExpZero & ManZero; + assign Denorm= ExpZero & ~ManZero; + assign Norm = ~ExpZero; + + // determine the sub categories + assign QNaN = FirstBitFrac&NaN; + assign SNaN = ~FirstBitFrac&NaN; + assign PInf = ~Sgn&Inf; + assign NInf = Sgn&Inf; + assign PNorm = ~Sgn&Norm; + assign NNorm = Sgn&Norm; + assign PDenorm = ~Sgn&Denorm; + assign NDenorm = Sgn&Denorm; + assign PZero = ~Sgn&Zero; + assign NZero = Sgn&Zero; + + // determine sub category and combine into the result + // bit 0 - -Inf + // bit 1 - -Norm + // bit 2 - -Denorm + // bit 3 - -Zero + // bit 4 - +Zero + // bit 5 - +Denorm + // bit 6 - +Norm + // bit 7 - +Inf + // bit 8 - signaling NaN + // bit 9 - quiet NaN + assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; + +endmodule diff --git a/wally-pipelined/src/fpu/fcmp.sv b/wally-pipelined/src/fpu/fcmp.sv new file mode 100755 index 000000000..f47d7c9ef --- /dev/null +++ b/wally-pipelined/src/fpu/fcmp.sv @@ -0,0 +1,465 @@ + +// +// File name : fpcomp.v +// Title : Floating-Point Comparator +// project : FPU +// Library : fpcomp +// Author(s) : James E. Stine +// Purpose : definition of main unit to floating-point comparator +// notes : +// +// Copyright Oklahoma State University +// +// Floating Point Comparator (Algorithm) +// +// 1.) Performs sign-extension if the inputs are 32-bit integers. +// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// 3.) Check for special cases (+0=-0, unordered, and infinite values) +// and correct for sign bits +// +// This module takes 64-bits inputs op1 and op2, VSS, and VDD +// signals, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 (unused) +// +// The comparator produces a 2-bit signal FCC, which +// indicates the result of the comparison: +// +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// +// It also produces an invalid operation flag, which is one +// if either of the input operands is a signaling NaN per 754 + +`include "wally-config.vh" +module fcmp ( + input logic [63:0] op1, + input logic [63:0] op2, + input logic [2:0] FOpCtrlE, + input logic FmtE, + + + output logic Invalid, // Invalid Operation + // output logic [1:0] FCC, // Condition Codes + output logic [63:0] CmpResE); + // Perform magnitude comparison between the 63 least signficant bits + // of the input operands. Only LT and EQ are returned, since GT can + // be determined from these values. + logic [1:0] FCC; // Condition Codes + logic [7:0] w, x; + logic ANaN, BNaN; + logic Azero, Bzero; + logic LT; // magnitude op1 < magnitude op2 + logic EQ; // magnitude op1 = magnitude op2 + + magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); + + // Determine final values based on output of magnitude comparison, + // sign bits, and special case testing. + exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE); + + // Perform magnitude comparison between the 63 least signficant bits + // of the input operands. Only LT and EQ are returned, since GT can + // be determined from these values. + magcompare64b_2 magcomp2 (LT, EQ, w, x); + + // Determine final values based on output of magnitude comparison, + // sign bits, and special case testing. + exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*); + +endmodule // fpcomp + +// module magcompare2b (LT, GT, A, B); + +// input logic [1:0] A; +// input logic [1:0] B; + +// output logic LT; +// output logic GT; + +// // Determine if A < B using a minimized sum-of-products expression +// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; +// // Determine if A > B using a minimized sum-of-products expression +// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +// endmodule // magcompare2b + +// 2-bit magnitude comparator +// This module compares two 2-bit values A and B. LT is '1' if A < B +// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// this version actually incorporates don't cares into the equation to +// simplify the optimization + +module magcompare2c (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + assign LT = B[1] | (!A[1]&B[0]); + assign GT = A[1] | (!B[1]&A[0]); + +endmodule // magcompare2b + +// This module compares two 64-bit values A and B. LT is '1' if A < B +// and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// This structure was modified so +// that it only does a strict magnitdude comparison, and only +// returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// of 63 2-bit magnitude comparators, followed by one OR gates. +// +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare64b_1 (w, x, A, B); + + input logic [63:0] A; + input logic [63:0] B; + + logic [31:0] s; + logic [31:0] t; + logic [15:0] u; + logic [15:0] v; + output logic [7:0] w; + output logic [7:0] x; + + magcompare2b mag1(s[0], t[0], A[1:0], B[1:0]); + magcompare2b mag2(s[1], t[1], A[3:2], B[3:2]); + magcompare2b mag3(s[2], t[2], A[5:4], B[5:4]); + magcompare2b mag4(s[3], t[3], A[7:6], B[7:6]); + magcompare2b mag5(s[4], t[4], A[9:8], B[9:8]); + magcompare2b mag6(s[5], t[5], A[11:10], B[11:10]); + magcompare2b mag7(s[6], t[6], A[13:12], B[13:12]); + magcompare2b mag8(s[7], t[7], A[15:14], B[15:14]); + magcompare2b mag9(s[8], t[8], A[17:16], B[17:16]); + magcompare2b magA(s[9], t[9], A[19:18], B[19:18]); + magcompare2b magB(s[10], t[10], A[21:20], B[21:20]); + magcompare2b magC(s[11], t[11], A[23:22], B[23:22]); + magcompare2b magD(s[12], t[12], A[25:24], B[25:24]); + magcompare2b magE(s[13], t[13], A[27:26], B[27:26]); + magcompare2b magF(s[14], t[14], A[29:28], B[29:28]); + magcompare2b mag10(s[15], t[15], A[31:30], B[31:30]); + magcompare2b mag11(s[16], t[16], A[33:32], B[33:32]); + magcompare2b mag12(s[17], t[17], A[35:34], B[35:34]); + magcompare2b mag13(s[18], t[18], A[37:36], B[37:36]); + magcompare2b mag14(s[19], t[19], A[39:38], B[39:38]); + magcompare2b mag15(s[20], t[20], A[41:40], B[41:40]); + magcompare2b mag16(s[21], t[21], A[43:42], B[43:42]); + magcompare2b mag17(s[22], t[22], A[45:44], B[45:44]); + magcompare2b mag18(s[23], t[23], A[47:46], B[47:46]); + magcompare2b mag19(s[24], t[24], A[49:48], B[49:48]); + magcompare2b mag1A(s[25], t[25], A[51:50], B[51:50]); + magcompare2b mag1B(s[26], t[26], A[53:52], B[53:52]); + magcompare2b mag1C(s[27], t[27], A[55:54], B[55:54]); + magcompare2b mag1D(s[28], t[28], A[57:56], B[57:56]); + magcompare2b mag1E(s[29], t[29], A[59:58], B[59:58]); + magcompare2b mag1F(s[30], t[30], A[61:60], B[61:60]); + magcompare2b mag20(s[31], t[31], A[63:62], B[63:62]); + + magcompare2c mag21(u[0], v[0], t[1:0], s[1:0]); + magcompare2c mag22(u[1], v[1], t[3:2], s[3:2]); + magcompare2c mag23(u[2], v[2], t[5:4], s[5:4]); + magcompare2c mag24(u[3], v[3], t[7:6], s[7:6]); + magcompare2c mag25(u[4], v[4], t[9:8], s[9:8]); + magcompare2c mag26(u[5], v[5], t[11:10], s[11:10]); + magcompare2c mag27(u[6], v[6], t[13:12], s[13:12]); + magcompare2c mag28(u[7], v[7], t[15:14], s[15:14]); + magcompare2c mag29(u[8], v[8], t[17:16], s[17:16]); + magcompare2c mag2A(u[9], v[9], t[19:18], s[19:18]); + magcompare2c mag2B(u[10], v[10], t[21:20], s[21:20]); + magcompare2c mag2C(u[11], v[11], t[23:22], s[23:22]); + magcompare2c mag2D(u[12], v[12], t[25:24], s[25:24]); + magcompare2c mag2E(u[13], v[13], t[27:26], s[27:26]); + magcompare2c mag2F(u[14], v[14], t[29:28], s[29:28]); + magcompare2c mag30(u[15], v[15], t[31:30], s[31:30]); + + magcompare2c mag31(w[0], x[0], v[1:0], u[1:0]); + magcompare2c mag32(w[1], x[1], v[3:2], u[3:2]); + magcompare2c mag33(w[2], x[2], v[5:4], u[5:4]); + magcompare2c mag34(w[3], x[3], v[7:6], u[7:6]); + magcompare2c mag35(w[4], x[4], v[9:8], u[9:8]); + magcompare2c mag36(w[5], x[5], v[11:10], u[11:10]); + magcompare2c mag37(w[6], x[6], v[13:12], u[13:12]); + magcompare2c mag38(w[7], x[7], v[15:14], u[15:14]); + +endmodule // magcompare64b + +// This module takes 64-bits inputs A and B, two magnitude comparison +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 bfloat precision numbers +// +// The comparator produces a 2-bit signal fcc, which +// indicates the result of the comparison as follows: +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// It also produces a invalid operation flag, which is one +// if either of the input operands is a signaling NaN. + +module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE); + + input logic [63:0] A; + input logic [63:0] B; + input logic [2:0] FOpCtrlE; + + logic dp, sp, hp; + + output logic ANaN; + output logic BNaN; + output logic Azero; + output logic Bzero; + + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; + + // Test if A or B is NaN. + assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) & + ((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) | + (dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) | + (hp&(A[57]|A[56]))); + + assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) & + ((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) | + (dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) | + (hp&(B[57]|B[56]))); + + // Test if A is +0 or -0 when viewed as a floating point number (i.e, + // the 63 least siginficant bits of A are zero). + // Depending on how this synthesizes, it may work better to replace + // this with assign Azero = ~(A[62] | A[61] | ... | A[0]) + assign Azero = (A[62:0] == 63'h0); + assign Bzero = (B[62:0] == 63'h0); + +endmodule // exception_cmp +// +// File name : fpcomp.v +// Title : Floating-Point Comparator +// project : FPU +// Library : fpcomp +// Author(s) : James E. Stine +// Purpose : definition of main unit to floating-point comparator +// notes : +// +// Copyright Oklahoma State University +// +// Floating Point Comparator (Algorithm) +// +// 1.) Performs sign-extension if the inputs are 32-bit integers. +// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// 3.) Check for special cases (+0=-0, unordered, and infinite values) +// and correct for sign bits +// +// This module takes 64-bits inputs op1 and op2, VSS, and VDD +// signals, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 (unused) +// +// The comparator produces a 2-bit signal FCC, which +// indicates the result of the comparison: +// +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// +// It also produces an invalid operation flag, which is one +// if either of the input operands is a signaling NaN per 754 + + +/*module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule*/ // magcompare2b + +// 2-bit magnitude comparator +// This module compares two 2-bit values A and B. LT is '1' if A < B +// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// this version actually incorporates don't cares into the equation to +// simplify the optimization + +// module magcompare2c (LT, GT, A, B); + +// input logic [1:0] A; +// input logic [1:0] B; + +// output logic LT; +// output logic GT; + +// assign LT = B[1] | (!A[1]&B[0]); +// assign GT = A[1] | (!B[1]&A[0]); + +// endmodule // magcompare2b + +// This module compares two 64-bit values A and B. LT is '1' if A < B +// and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// This structure was modified so +// that it only does a strict magnitdude comparison, and only +// returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// of 63 2-bit magnitude comparators, followed by one OR gates. +// +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare64b_2 (LT, EQ, w, x); + + input logic [7:0] w; + input logic [7:0] x; + logic [3:0] y; + logic [3:0] z; + logic [1:0] a; + logic [1:0] b; + logic GT; + + output logic LT; + output logic EQ; + + magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); + magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); + magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); + magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); + + magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); + magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); + + magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); + + assign EQ = ~(LT | GT); + +endmodule // magcompare64b + +// This module takes 64-bits inputs A and B, two magnitude comparison +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 bfloat precision numbers +// +// The comparator produces a 2-bit signal fcc, which +// indicates the result of the comparison as follows: +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// It also produces a invalid operation flag, which is one +// if either of the input operands is a signaling NaN. + +module exception_cmp_2 ( + input logic [63:0] A, + input logic [63:0] B, + input logic FmtE, + input logic LT_mag, + input logic EQ_mag, + input logic [2:0] FOpCtrlE, + + output logic invalid, + output logic [1:0] fcc, + output logic [63:0] CmpResE, + + input logic Azero, + input logic Bzero, + input logic ANaN, + input logic BNaN); + + logic dp; + logic sp; + logic hp; + logic ASNaN; + logic BSNaN; + logic UO; + logic GT; + logic LT; + logic EQ; + logic [62:0] sixtythreezeros = 63'h0; + + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; + + // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating + // point comparison is being performed. + assign UO = (ANaN | BNaN); + + // Test if A or B is a signaling NaN. + assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); + assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); + + // If either A or B is a signaling NaN the "Invalid Operation" + // exception flag is set to one; otherwise it is zero. + assign invalid = (ASNaN | BSNaN); + + // A and B are equal if (their magnitudes are equal) AND ((their signs are + // equal) or (their magnitudes are zero AND they are floating point + // numbers)). Also, A and B are not equal if they are unordered. + assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); + + // A is less than B if (A is negative and B is posiive) OR + // (A and B are positive and the magnitude of A is less than + // the magnitude of B) or (A and B are negative integers and + // the magnitude of A is less than the magnitude of B) or + // (A and B are negative floating point numbers and + // the magnitude of A is greater than the magnitude of B). + // Also, A is not less than B if A and B are equal or unordered. + assign LT = ((~LT_mag & A[63] & B[63]) | + (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; + + // A is greater than B when LT, EQ, and UO are are false. + assign GT = ~(LT | EQ | UO); + + // Note: it may be possible to optimize the setting of fcc + // a little more, but it is probably not worth the effort. + + // Set the bits of fcc based on LT, GT, EQ, and UO + assign fcc[0] = LT | UO; + assign fcc[1] = GT | UO; + + always_comb begin + case (FOpCtrlE[2:0]) + 3'b111: CmpResE = LT ? A : B;//min + 3'b101: CmpResE = GT ? A : B;//max + 3'b010: CmpResE = {63'b0, EQ};//equal + 3'b001: CmpResE = {63'b0, LT};//less than + 3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal + default: CmpResE = 64'b0; + endcase + end + +endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fdivsqrt.sv b/wally-pipelined/src/fpu/fdivsqrt.sv new file mode 100755 index 000000000..6d8da23f2 --- /dev/null +++ b/wally-pipelined/src/fpu/fdivsqrt.sv @@ -0,0 +1,256 @@ +// +// File name : fpdiv +// Title : Floating-Point Divider/Square-Root +// project : FPU +// Library : fpdiv +// Author(s) : James E. Stine, Jr. +// Purpose : definition of main unit to floating-point div/sqrt +// notes : +// +// Copyright Oklahoma State University +// +// Basic Operations +// +// Step 1: Load operands, set flags, and convert SP to DP +// Step 2: Check for special inputs ( +/- Infinity, NaN) +// Step 3: Exponent Logic +// Step 4: Divide/Sqrt using Goldschmidt +// Step 5: Normalize the result.// +// Shift left until normalized. Normalized when the value to the +// left of the binrary point is 1. +// Step 6: Round the result.// +// Step 7: Put quotient/remainder onto output. +// + +// `timescale 1ps/1ps +module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn, + FDivStartE, reset, clk, FDivBusyE, HoldInputs); + + input [63:0] DivInput1E; // 1st input operand (A) + input [63:0] DivInput2E; // 2nd input operand (B) + input [2:0] FrmE; // Rounding mode - specify values + input DivOpType; // Function opcode + input FmtE; // Result Precision (0 for double, 1 for single) //***will need to swap this + input DivOvEn; // Overflow trap enabled + input DivUnEn; // Underflow trap enabled + + input FDivStartE; + input reset; + input clk; + + output [63:0] FDivResultM; // Result of operation + output [4:0] FDivSqrtFlgM; // IEEE exception flags + output FDivSqrtDoneE; + output FDivBusyE, HoldInputs; + + supply1 vdd; + supply0 vss; + + wire [63:0] Float1; + wire [63:0] Float2; + wire [63:0] IntValue; + + wire DivDenormM; // DivDenormM on input or output + wire [12:0] exp1, exp2, expF; + wire [12:0] exp_diff, bias; + wire [13:0] exp_sqrt; + wire [12:0] exp_s; + wire [12:0] exp_c; + + wire [10:0] exponent, exp_pre; + wire [63:0] Result; + wire [52:0] mantissaA; + wire [52:0] mantissaB; + wire [63:0] sum, sum_tc, sum_corr, sum_norm; + + wire [5:0] align_shift; + wire [5:0] norm_shift; + wire [2:0] sel_inv; + wire op1_Norm, op2_Norm; + wire opA_Norm, opB_Norm; + wire Invalid; + wire DenormIn, DenormIO; + wire [4:0] FlagsIn; + wire exp_gt63; + wire Sticky_out; + wire signResult, sign_corr; + wire corr_sign; + wire zeroB; + wire convert; + wire swap; + wire sub; + + wire [63:0] q1, qm1, qp1, q0, qm0, qp0; + wire [63:0] rega_out, regb_out, regc_out, regd_out; + wire [127:0] regr_out; + wire [2:0] sel_muxa, sel_muxb; + wire sel_muxr; + wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs; + + wire donev, sel_muxrv, sel_muxsv; + wire [1:0] sel_muxav, sel_muxbv; + wire load_regav, load_regbv, load_regcv; + wire load_regrv, load_regsv; + + logic exp_cout1, exp_cout2, exp_odd, open; + + + // Convert the input operands to their appropriate forms based on + // the orignal operands, the DivOpType , and their precision FmtE. + // Single precision inputs are converted to double precision + // and the sign of the first operand is set appropratiately based on + // if the operation is absolute value or negation. + convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE); + + // Test for exceptions and return the "Invalid Operation" and + // "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in + // the third pipeline stage to select the result. Also, op1_Norm + // and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized. + // sub is one if the effective operation is subtaction. + exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, + Float1, Float2, DivOpType); + + // Determine Sign/Mantissa + assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType; + assign mantissaA = {vdd, Float1[51:0]}; + assign mantissaB = {vdd, Float2[51:0]}; + // Perform Exponent Subtraction - expA - expB + Bias + assign exp1 = {2'b0, Float1[62:52]}; + assign exp2 = {2'b0, Float2[62:52]}; + // bias : DP = 2^{11-1}-1 = 1023 + assign bias = {3'h0, 10'h3FF}; + // Divide exponent + csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder + exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder? + {vss, exp_s}, {vss, exp_c}, 1'b1); + // Sqrt exponent (check if exponent is odd) + assign exp_odd = Float1[52] ? vss : vdd; + exp_add explogic2 (exp_cout2, exp_sqrt, //***adder? + {vss, exp1}, {4'h0, 10'h3ff}, exp_odd); + // Choose correct exponent + assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff; + + // Main Goldschmidt/Division Routine + divconv goldy (q1, qm1, qp1, q0, qm0, qp0, + rega_out, regb_out, regc_out, regd_out, + regr_out, mantissaB, mantissaA, + sel_muxa, sel_muxb, sel_muxr, + reset, clk, + load_rega, load_regb, load_regc, load_regd, + load_regr, load_regs, FmtE, DivOpType, exp_odd); + + // FSM : control divider + fsm control (FDivSqrtDoneE, load_rega, load_regb, load_regc, load_regd, + load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, + clk, reset, FDivStartE, DivOpType, FDivBusyE, HoldInputs); + + // Round the mantissa to a 52-bit value, with the leading one + // removed. The rounding units also handles special cases and + // set the exception flags. + //***add max magnitude and swap negitive and positive infinity + rounder_div divround1 (Result, DenormIO, FlagsIn, + FrmE, FmtE, DivOvEn, DivUnEn, expF, + sel_inv, Invalid, DenormIn, signResult, + q1, qm1, qp1, q0, qm0, qp0, regr_out); + + // Store the final result and the exception flags in registers. + flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM); + flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM); + flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM); + +endmodule // fpadd + +// +// Brent-Kung Prefix Adder +// (yes, it is 14 bits as my generator is broken for 13 bits :( +// assume, synthesizer will delete stuff not needed ) +// +module exp_add (cout, sum, a, b, cin); + + input [13:0] a, b; + input cin; + + output [13:0] sum; + output cout; + + wire [14:0] p,g; + wire [13:0] c; + + // pre-computation + assign p={a^b,1'b0}; + assign g={a&b, cin}; + + // prefix tree + brent_kung prefix_tree(c, p[13:0], g[13:0]); + + // post-computation + assign sum=p[14:1]^c; + assign cout=g[14]|(p[14]&c[13]); + +endmodule // exp_add + +module brent_kung (c, p, g); + + input [13:0] p; + input [13:0] g; + output [14:1] c; + + logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8; + logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8; + logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0; + // parallel-prefix, Brent-Kung + + // Stage 1: Generates G/FmtE pairs that span 1 bits + grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); + black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); + black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); + black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); + black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); + black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); + black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); + + // Stage 2: Generates G/FmtE pairs that span 2 bits + grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); + black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); + black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); + + // Stage 3: Generates G/FmtE pairs that span 4 bits + grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); + + // Stage 4: Generates G/FmtE pairs that span 8 bits + + // Stage 5: Generates G/FmtE pairs that span 4 bits + grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); + + // Stage 6: Generates G/FmtE pairs that span 2 bits + grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); + grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); + grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); + + // Last grey cell stage + grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); + grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); + grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); + grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); + grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); + grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); + + // Final Stage: Apply c_k+1=G_k_0 + assign c[1]=g[0]; + assign c[2]=G_1_0; + assign c[3]=G_2_0; + assign c[4]=G_3_0; + assign c[5]=G_4_0; + assign c[6]=G_5_0; + assign c[7]=G_6_0; + assign c[8]=G_7_0; + assign c[9]=G_8_0; + + assign c[10]=G_9_0; + assign c[11]=G_10_0; + assign c[12]=G_11_0; + assign c[13]=G_12_0; + assign c[14]=G_13_0; + +endmodule // brent_kung + diff --git a/wally-pipelined/src/fpu/fhazard.sv b/wally-pipelined/src/fpu/fhazard.sv new file mode 100644 index 000000000..53f7dde2c --- /dev/null +++ b/wally-pipelined/src/fpu/fhazard.sv @@ -0,0 +1,67 @@ +/////////////////////////////////////////// +// fpuhazard.sv +// +// Written: me@KatherineParry.com 19 May 2021 +// Modified: +// +// Purpose: Determine forwarding, stalls and flushes for the FPU +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module fhazard( + input logic [4:0] Adr1E, Adr2E, Adr3E, + input logic FWriteEnM, FWriteEnW, + input logic [4:0] RdM, RdW, + input logic [2:0] FResultSelM, + output logic FStallD, + output logic [1:0] ForwardXE, ForwardYE, ForwardZE +); + + + always_comb begin + // set ReadData as default + ForwardXE = 2'b00; // choose FRD1E + ForwardYE = 2'b00; // choose FRD2E + ForwardZE = 2'b00; // choose FRD3E + FStallD = 0; + + if ((Adr1E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W + + + if ((Adr2E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W + + + if ((Adr3E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W + + end + +endmodule diff --git a/wally-pipelined/src/fpu/fregfile.sv b/wally-pipelined/src/fpu/fregfile.sv new file mode 100644 index 000000000..78c24b3e6 --- /dev/null +++ b/wally-pipelined/src/fpu/fregfile.sv @@ -0,0 +1,54 @@ +/////////////////////////////////////////// +// regfile.sv +// +// Written: David_Harris@hmc.edu 9 January 2021 +// Modified: +// +// Purpose: 4-port register file +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module fregfile ( + input logic clk, reset, + input logic we4, + input logic [ 4:0] a1, a2, a3, a4, + input logic [63:0] wd4, //KEP `XLEN-1 changed to 63 (lint warning) *** figure out if double can be suported when XLEN = 32 + output logic [63:0] rd1, rd2, rd3); + + logic [63:0] rf[31:0]; + integer i; + + // three ported register file + // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) + // write fourth port on rising edge of clock (A4/WD4/WE4) + // write occurs on falling edge of clock + + // reset is intended for simulation only, not synthesis + + always_ff @(negedge clk or posedge reset) + if (reset) for(i=0; i<32; i++) rf[i] <= 0; + else if (we4) rf[a4] <= wd4; + + assign #2 rd1 = rf[a1]; + assign #2 rd2 = rf[a2]; + assign #2 rd3 = rf[a3]; + +endmodule // regfile +