FPU update - missing files

This commit is contained in:
Katherine Parry 2021-07-02 12:53:05 -04:00
parent 3f61e313d2
commit 72406b8a88
5 changed files with 904 additions and 0 deletions

View File

@ -0,0 +1,62 @@
`include "wally-config.vh"
module fclassify (
input logic [63:0] SrcXE,
input logic FmtE, // 0-Single 1-Double
output logic [63:0] ClassResE
);
logic [31:0] Single;
logic [63:0] Double;
logic Sgn;
logic Inf, NaN, Zero, Norm, Denorm;
logic PInf, QNaN, PZero, PNorm, PDenorm;
logic NInf, SNaN, NZero, NNorm, NDenorm;
logic MaxExp, ExpZero, ManZero, FirstBitFrac;
// Single and Double precision layouts
assign Single = SrcXE[63:32];
assign Double = SrcXE;
assign Sgn = SrcXE[63];
// basic calculations for readabillity
assign ExpZero = FmtE ? ~|Double[62:52] : ~|Single[30:23];
assign MaxExp = FmtE ? &Double[62:52] : &Single[30:23];
assign ManZero = FmtE ? ~|Double[51:0] : ~|Single[22:0];
assign FirstBitFrac = FmtE ? Double[51] : Single[22];
// determine the type of number
assign NaN = MaxExp & ~ManZero;
assign Inf = MaxExp & ManZero;
assign Zero = ExpZero & ManZero;
assign Denorm= ExpZero & ~ManZero;
assign Norm = ~ExpZero;
// determine the sub categories
assign QNaN = FirstBitFrac&NaN;
assign SNaN = ~FirstBitFrac&NaN;
assign PInf = ~Sgn&Inf;
assign NInf = Sgn&Inf;
assign PNorm = ~Sgn&Norm;
assign NNorm = Sgn&Norm;
assign PDenorm = ~Sgn&Denorm;
assign NDenorm = Sgn&Denorm;
assign PZero = ~Sgn&Zero;
assign NZero = Sgn&Zero;
// determine sub category and combine into the result
// bit 0 - -Inf
// bit 1 - -Norm
// bit 2 - -Denorm
// bit 3 - -Zero
// bit 4 - +Zero
// bit 5 - +Denorm
// bit 6 - +Norm
// bit 7 - +Inf
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
endmodule

465
wally-pipelined/src/fpu/fcmp.sv Executable file
View File

@ -0,0 +1,465 @@
//
// File name : fpcomp.v
// Title : Floating-Point Comparator
// project : FPU
// Library : fpcomp
// Author(s) : James E. Stine
// Purpose : definition of main unit to floating-point comparator
// notes :
//
// Copyright Oklahoma State University
//
// Floating Point Comparator (Algorithm)
//
// 1.) Performs sign-extension if the inputs are 32-bit integers.
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
// and correct for sign bits
//
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 (unused)
//
// The comparator produces a 2-bit signal FCC, which
// indicates the result of the comparison:
//
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
//
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
`include "wally-config.vh"
module fcmp (
input logic [63:0] op1,
input logic [63:0] op2,
input logic [2:0] FOpCtrlE,
input logic FmtE,
output logic Invalid, // Invalid Operation
// output logic [1:0] FCC, // Condition Codes
output logic [63:0] CmpResE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
logic [1:0] FCC; // Condition Codes
logic [7:0] w, x;
logic ANaN, BNaN;
logic Azero, Bzero;
logic LT; // magnitude op1 < magnitude op2
logic EQ; // magnitude op1 = magnitude op2
magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
magcompare64b_2 magcomp2 (LT, EQ, w, x);
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*);
endmodule // fpcomp
// module magcompare2b (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// // Determine if A < B using a minimized sum-of-products expression
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// // Determine if A > B using a minimized sum-of-products expression
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
// endmodule // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// this version actually incorporates don't cares into the equation to
// simplify the optimization
module magcompare2c (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
assign LT = B[1] | (!A[1]&B[0]);
assign GT = A[1] | (!B[1]&A[0]);
endmodule // magcompare2b
// This module compares two 64-bit values A and B. LT is '1' if A < B
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// This structure was modified so
// that it only does a strict magnitdude comparison, and only
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// of 63 2-bit magnitude comparators, followed by one OR gates.
//
// J. E. Stine and M. J. Schulte, "A combined two's complement and
// floating-point comparator," 2005 IEEE International Symposium on
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// doi: 10.1109/ISCAS.2005.1464531
module magcompare64b_1 (w, x, A, B);
input logic [63:0] A;
input logic [63:0] B;
logic [31:0] s;
logic [31:0] t;
logic [15:0] u;
logic [15:0] v;
output logic [7:0] w;
output logic [7:0] x;
magcompare2b mag1(s[0], t[0], A[1:0], B[1:0]);
magcompare2b mag2(s[1], t[1], A[3:2], B[3:2]);
magcompare2b mag3(s[2], t[2], A[5:4], B[5:4]);
magcompare2b mag4(s[3], t[3], A[7:6], B[7:6]);
magcompare2b mag5(s[4], t[4], A[9:8], B[9:8]);
magcompare2b mag6(s[5], t[5], A[11:10], B[11:10]);
magcompare2b mag7(s[6], t[6], A[13:12], B[13:12]);
magcompare2b mag8(s[7], t[7], A[15:14], B[15:14]);
magcompare2b mag9(s[8], t[8], A[17:16], B[17:16]);
magcompare2b magA(s[9], t[9], A[19:18], B[19:18]);
magcompare2b magB(s[10], t[10], A[21:20], B[21:20]);
magcompare2b magC(s[11], t[11], A[23:22], B[23:22]);
magcompare2b magD(s[12], t[12], A[25:24], B[25:24]);
magcompare2b magE(s[13], t[13], A[27:26], B[27:26]);
magcompare2b magF(s[14], t[14], A[29:28], B[29:28]);
magcompare2b mag10(s[15], t[15], A[31:30], B[31:30]);
magcompare2b mag11(s[16], t[16], A[33:32], B[33:32]);
magcompare2b mag12(s[17], t[17], A[35:34], B[35:34]);
magcompare2b mag13(s[18], t[18], A[37:36], B[37:36]);
magcompare2b mag14(s[19], t[19], A[39:38], B[39:38]);
magcompare2b mag15(s[20], t[20], A[41:40], B[41:40]);
magcompare2b mag16(s[21], t[21], A[43:42], B[43:42]);
magcompare2b mag17(s[22], t[22], A[45:44], B[45:44]);
magcompare2b mag18(s[23], t[23], A[47:46], B[47:46]);
magcompare2b mag19(s[24], t[24], A[49:48], B[49:48]);
magcompare2b mag1A(s[25], t[25], A[51:50], B[51:50]);
magcompare2b mag1B(s[26], t[26], A[53:52], B[53:52]);
magcompare2b mag1C(s[27], t[27], A[55:54], B[55:54]);
magcompare2b mag1D(s[28], t[28], A[57:56], B[57:56]);
magcompare2b mag1E(s[29], t[29], A[59:58], B[59:58]);
magcompare2b mag1F(s[30], t[30], A[61:60], B[61:60]);
magcompare2b mag20(s[31], t[31], A[63:62], B[63:62]);
magcompare2c mag21(u[0], v[0], t[1:0], s[1:0]);
magcompare2c mag22(u[1], v[1], t[3:2], s[3:2]);
magcompare2c mag23(u[2], v[2], t[5:4], s[5:4]);
magcompare2c mag24(u[3], v[3], t[7:6], s[7:6]);
magcompare2c mag25(u[4], v[4], t[9:8], s[9:8]);
magcompare2c mag26(u[5], v[5], t[11:10], s[11:10]);
magcompare2c mag27(u[6], v[6], t[13:12], s[13:12]);
magcompare2c mag28(u[7], v[7], t[15:14], s[15:14]);
magcompare2c mag29(u[8], v[8], t[17:16], s[17:16]);
magcompare2c mag2A(u[9], v[9], t[19:18], s[19:18]);
magcompare2c mag2B(u[10], v[10], t[21:20], s[21:20]);
magcompare2c mag2C(u[11], v[11], t[23:22], s[23:22]);
magcompare2c mag2D(u[12], v[12], t[25:24], s[25:24]);
magcompare2c mag2E(u[13], v[13], t[27:26], s[27:26]);
magcompare2c mag2F(u[14], v[14], t[29:28], s[29:28]);
magcompare2c mag30(u[15], v[15], t[31:30], s[31:30]);
magcompare2c mag31(w[0], x[0], v[1:0], u[1:0]);
magcompare2c mag32(w[1], x[1], v[3:2], u[3:2]);
magcompare2c mag33(w[2], x[2], v[5:4], u[5:4]);
magcompare2c mag34(w[3], x[3], v[7:6], u[7:6]);
magcompare2c mag35(w[4], x[4], v[9:8], u[9:8]);
magcompare2c mag36(w[5], x[5], v[11:10], u[11:10]);
magcompare2c mag37(w[6], x[6], v[13:12], u[13:12]);
magcompare2c mag38(w[7], x[7], v[15:14], u[15:14]);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 bfloat precision numbers
//
// The comparator produces a 2-bit signal fcc, which
// indicates the result of the comparison as follows:
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE);
input logic [63:0] A;
input logic [63:0] B;
input logic [2:0] FOpCtrlE;
logic dp, sp, hp;
output logic ANaN;
output logic BNaN;
output logic Azero;
output logic Bzero;
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
// Test if A or B is NaN.
assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) &
((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) |
(dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) |
(hp&(A[57]|A[56])));
assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) &
((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) |
(dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) |
(hp&(B[57]|B[56])));
// Test if A is +0 or -0 when viewed as a floating point number (i.e,
// the 63 least siginficant bits of A are zero).
// Depending on how this synthesizes, it may work better to replace
// this with assign Azero = ~(A[62] | A[61] | ... | A[0])
assign Azero = (A[62:0] == 63'h0);
assign Bzero = (B[62:0] == 63'h0);
endmodule // exception_cmp
//
// File name : fpcomp.v
// Title : Floating-Point Comparator
// project : FPU
// Library : fpcomp
// Author(s) : James E. Stine
// Purpose : definition of main unit to floating-point comparator
// notes :
//
// Copyright Oklahoma State University
//
// Floating Point Comparator (Algorithm)
//
// 1.) Performs sign-extension if the inputs are 32-bit integers.
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
// and correct for sign bits
//
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 (unused)
//
// The comparator produces a 2-bit signal FCC, which
// indicates the result of the comparison:
//
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
//
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
/*module magcompare2b (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
// Determine if A < B using a minimized sum-of-products expression
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// Determine if A > B using a minimized sum-of-products expression
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
endmodule*/ // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// this version actually incorporates don't cares into the equation to
// simplify the optimization
// module magcompare2c (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// assign LT = B[1] | (!A[1]&B[0]);
// assign GT = A[1] | (!B[1]&A[0]);
// endmodule // magcompare2b
// This module compares two 64-bit values A and B. LT is '1' if A < B
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// This structure was modified so
// that it only does a strict magnitdude comparison, and only
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// of 63 2-bit magnitude comparators, followed by one OR gates.
//
// J. E. Stine and M. J. Schulte, "A combined two's complement and
// floating-point comparator," 2005 IEEE International Symposium on
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// doi: 10.1109/ISCAS.2005.1464531
module magcompare64b_2 (LT, EQ, w, x);
input logic [7:0] w;
input logic [7:0] x;
logic [3:0] y;
logic [3:0] z;
logic [1:0] a;
logic [1:0] b;
logic GT;
output logic LT;
output logic EQ;
magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
assign EQ = ~(LT | GT);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 bfloat precision numbers
//
// The comparator produces a 2-bit signal fcc, which
// indicates the result of the comparison as follows:
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_2 (
input logic [63:0] A,
input logic [63:0] B,
input logic FmtE,
input logic LT_mag,
input logic EQ_mag,
input logic [2:0] FOpCtrlE,
output logic invalid,
output logic [1:0] fcc,
output logic [63:0] CmpResE,
input logic Azero,
input logic Bzero,
input logic ANaN,
input logic BNaN);
logic dp;
logic sp;
logic hp;
logic ASNaN;
logic BSNaN;
logic UO;
logic GT;
logic LT;
logic EQ;
logic [62:0] sixtythreezeros = 63'h0;
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
// Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
// point comparison is being performed.
assign UO = (ANaN | BNaN);
// Test if A or B is a signaling NaN.
assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
// If either A or B is a signaling NaN the "Invalid Operation"
// exception flag is set to one; otherwise it is zero.
assign invalid = (ASNaN | BSNaN);
// A and B are equal if (their magnitudes are equal) AND ((their signs are
// equal) or (their magnitudes are zero AND they are floating point
// numbers)). Also, A and B are not equal if they are unordered.
assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
// A is less than B if (A is negative and B is posiive) OR
// (A and B are positive and the magnitude of A is less than
// the magnitude of B) or (A and B are negative integers and
// the magnitude of A is less than the magnitude of B) or
// (A and B are negative floating point numbers and
// the magnitude of A is greater than the magnitude of B).
// Also, A is not less than B if A and B are equal or unordered.
assign LT = ((~LT_mag & A[63] & B[63]) |
(LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
// A is greater than B when LT, EQ, and UO are are false.
assign GT = ~(LT | EQ | UO);
// Note: it may be possible to optimize the setting of fcc
// a little more, but it is probably not worth the effort.
// Set the bits of fcc based on LT, GT, EQ, and UO
assign fcc[0] = LT | UO;
assign fcc[1] = GT | UO;
always_comb begin
case (FOpCtrlE[2:0])
3'b111: CmpResE = LT ? A : B;//min
3'b101: CmpResE = GT ? A : B;//max
3'b010: CmpResE = {63'b0, EQ};//equal
3'b001: CmpResE = {63'b0, LT};//less than
3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal
default: CmpResE = 64'b0;
endcase
end
endmodule // exception_cmp

View File

@ -0,0 +1,256 @@
//
// File name : fpdiv
// Title : Floating-Point Divider/Square-Root
// project : FPU
// Library : fpdiv
// Author(s) : James E. Stine, Jr.
// Purpose : definition of main unit to floating-point div/sqrt
// notes :
//
// Copyright Oklahoma State University
//
// Basic Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Exponent Logic
// Step 4: Divide/Sqrt using Goldschmidt
// Step 5: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 6: Round the result.//
// Step 7: Put quotient/remainder onto output.
//
// `timescale 1ps/1ps
module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
FDivStartE, reset, clk, FDivBusyE, HoldInputs);
input [63:0] DivInput1E; // 1st input operand (A)
input [63:0] DivInput2E; // 2nd input operand (B)
input [2:0] FrmE; // Rounding mode - specify values
input DivOpType; // Function opcode
input FmtE; // Result Precision (0 for double, 1 for single) //***will need to swap this
input DivOvEn; // Overflow trap enabled
input DivUnEn; // Underflow trap enabled
input FDivStartE;
input reset;
input clk;
output [63:0] FDivResultM; // Result of operation
output [4:0] FDivSqrtFlgM; // IEEE exception flags
output FDivSqrtDoneE;
output FDivBusyE, HoldInputs;
supply1 vdd;
supply0 vss;
wire [63:0] Float1;
wire [63:0] Float2;
wire [63:0] IntValue;
wire DivDenormM; // DivDenormM on input or output
wire [12:0] exp1, exp2, expF;
wire [12:0] exp_diff, bias;
wire [13:0] exp_sqrt;
wire [12:0] exp_s;
wire [12:0] exp_c;
wire [10:0] exponent, exp_pre;
wire [63:0] Result;
wire [52:0] mantissaA;
wire [52:0] mantissaB;
wire [63:0] sum, sum_tc, sum_corr, sum_norm;
wire [5:0] align_shift;
wire [5:0] norm_shift;
wire [2:0] sel_inv;
wire op1_Norm, op2_Norm;
wire opA_Norm, opB_Norm;
wire Invalid;
wire DenormIn, DenormIO;
wire [4:0] FlagsIn;
wire exp_gt63;
wire Sticky_out;
wire signResult, sign_corr;
wire corr_sign;
wire zeroB;
wire convert;
wire swap;
wire sub;
wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
wire [63:0] rega_out, regb_out, regc_out, regd_out;
wire [127:0] regr_out;
wire [2:0] sel_muxa, sel_muxb;
wire sel_muxr;
wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;
wire donev, sel_muxrv, sel_muxsv;
wire [1:0] sel_muxav, sel_muxbv;
wire load_regav, load_regbv, load_regcv;
wire load_regrv, load_regsv;
logic exp_cout1, exp_cout2, exp_odd, open;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the DivOpType , and their precision FmtE.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
Float1, Float2, DivOpType);
// Determine Sign/Mantissa
assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType;
assign mantissaA = {vdd, Float1[51:0]};
assign mantissaB = {vdd, Float2[51:0]};
// Perform Exponent Subtraction - expA - expB + Bias
assign exp1 = {2'b0, Float1[62:52]};
assign exp2 = {2'b0, Float2[62:52]};
// bias : DP = 2^{11-1}-1 = 1023
assign bias = {3'h0, 10'h3FF};
// Divide exponent
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder
exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder?
{vss, exp_s}, {vss, exp_c}, 1'b1);
// Sqrt exponent (check if exponent is odd)
assign exp_odd = Float1[52] ? vss : vdd;
exp_add explogic2 (exp_cout2, exp_sqrt, //***adder?
{vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
// Choose correct exponent
assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;
// Main Goldschmidt/Division Routine
divconv goldy (q1, qm1, qp1, q0, qm0, qp0,
rega_out, regb_out, regc_out, regd_out,
regr_out, mantissaB, mantissaA,
sel_muxa, sel_muxb, sel_muxr,
reset, clk,
load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, FmtE, DivOpType, exp_odd);
// FSM : control divider
fsm control (FDivSqrtDoneE, load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
clk, reset, FDivStartE, DivOpType, FDivBusyE, HoldInputs);
// Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and
// set the exception flags.
//***add max magnitude and swap negitive and positive infinity
rounder_div divround1 (Result, DenormIO, FlagsIn,
FrmE, FmtE, DivOvEn, DivUnEn, expF,
sel_inv, Invalid, DenormIn, signResult,
q1, qm1, qp1, q0, qm0, qp0, regr_out);
// Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM);
flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM);
flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM);
endmodule // fpadd
//
// Brent-Kung Prefix Adder
// (yes, it is 14 bits as my generator is broken for 13 bits :(
// assume, synthesizer will delete stuff not needed )
//
module exp_add (cout, sum, a, b, cin);
input [13:0] a, b;
input cin;
output [13:0] sum;
output cout;
wire [14:0] p,g;
wire [13:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
brent_kung prefix_tree(c, p[13:0], g[13:0]);
// post-computation
assign sum=p[14:1]^c;
assign cout=g[14]|(p[14]&c[13]);
endmodule // exp_add
module brent_kung (c, p, g);
input [13:0] p;
input [13:0] g;
output [14:1] c;
logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/FmtE pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/FmtE pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// Stage 3: Generates G/FmtE pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/FmtE pairs that span 8 bits
// Stage 5: Generates G/FmtE pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// Stage 6: Generates G/FmtE pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
endmodule // brent_kung

View File

@ -0,0 +1,67 @@
///////////////////////////////////////////
// fpuhazard.sv
//
// Written: me@KatherineParry.com 19 May 2021
// Modified:
//
// Purpose: Determine forwarding, stalls and flushes for the FPU
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E,
input logic FWriteEnM, FWriteEnW,
input logic [4:0] RdM, RdW,
input logic [2:0] FResultSelM,
output logic FStallD,
output logic [1:0] ForwardXE, ForwardYE, ForwardZE
);
always_comb begin
// set ReadData as default
ForwardXE = 2'b00; // choose FRD1E
ForwardYE = 2'b00; // choose FRD2E
ForwardZE = 2'b00; // choose FRD3E
FStallD = 0;
if ((Adr1E == RdM) & FWriteEnM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W
if ((Adr2E == RdM) & FWriteEnM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W
if ((Adr3E == RdM) & FWriteEnM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W
end
endmodule

View File

@ -0,0 +1,54 @@
///////////////////////////////////////////
// regfile.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: 4-port register file
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module fregfile (
input logic clk, reset,
input logic we4,
input logic [ 4:0] a1, a2, a3, a4,
input logic [63:0] wd4, //KEP `XLEN-1 changed to 63 (lint warning) *** figure out if double can be suported when XLEN = 32
output logic [63:0] rd1, rd2, rd3);
logic [63:0] rf[31:0];
integer i;
// three ported register file
// read three ports combinationally (A1/RD1, A2/RD2, A3/RD3)
// write fourth port on rising edge of clock (A4/WD4/WE4)
// write occurs on falling edge of clock
// reset is intended for simulation only, not synthesis
always_ff @(negedge clk or posedge reset)
if (reset) for(i=0; i<32; i++) rf[i] <= 0;
else if (we4) rf[a4] <= wd4;
assign #2 rd1 = rf[a1];
assign #2 rd2 = rf[a2];
assign #2 rd3 = rf[a3];
endmodule // regfile