From f5bfdf46db3b67fefe66c355a9da9e9871e4347e Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 14 Jul 2021 17:56:49 -0400 Subject: [PATCH] fpu unpacking unit created --- wally-pipelined/src/fpu/divconv.sv | 211 +++++++++--------- wally-pipelined/src/fpu/faddcvt.sv | 20 +- wally-pipelined/src/fpu/fclassify.sv | 63 +++--- wally-pipelined/src/fpu/fcmp.sv | 170 ++------------ wally-pipelined/src/fpu/fcvt.sv | 66 ++---- wally-pipelined/src/fpu/fhazard.sv | 20 +- wally-pipelined/src/fpu/fma.sv | 211 ++++++------------ wally-pipelined/src/fpu/fpdiv.sv | 7 +- wally-pipelined/src/fpu/fpu.sv | 103 ++++++--- wally-pipelined/src/fpu/fsgn.sv | 22 +- wally-pipelined/src/fpu/fsm_div.v | 2 + wally-pipelined/src/fpu/sbtm.sv | 58 ++--- wally-pipelined/src/fpu/sbtm2.sv | 58 ++--- wally-pipelined/src/fpu/sbtm3.sv | 56 ++--- wally-pipelined/src/fpu/sbtm_a4.sv | 2 +- wally-pipelined/src/fpu/sbtm_a5.sv | 2 +- wally-pipelined/src/fpu/unpacking.sv | 77 +++++++ wally-pipelined/src/ieu/ieu.sv | 5 +- .../src/wally/wallypipelinedhart.sv | 1 + 19 files changed, 520 insertions(+), 634 deletions(-) create mode 100644 wally-pipelined/src/fpu/unpacking.sv diff --git a/wally-pipelined/src/fpu/divconv.sv b/wally-pipelined/src/fpu/divconv.sv index 8fdddaa4..2f5a6df6 100755 --- a/wally-pipelined/src/fpu/divconv.sv +++ b/wally-pipelined/src/fpu/divconv.sv @@ -1,4 +1,3 @@ -`timescale 1ps/1ps module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out, regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, load_rega, load_regb, load_regc, load_regd, load_regr, load_regs, P, op_type, exp_odd); @@ -106,123 +105,123 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o endmodule // divconv -module adder #(parameter WIDTH=8) - (input logic [WIDTH-1:0] a, b, - input logic cin, - output logic [WIDTH-1:0] y, - output logic cout); +// module adder #(parameter WIDTH=8) +// (input logic [WIDTH-1:0] a, b, +// input logic cin, +// output logic [WIDTH-1:0] y, +// output logic cout); - assign {cout, y} = a + b + cin; +// assign {cout, y} = a + b + cin; -endmodule // adder +// endmodule // adder -module flopenr #(parameter WIDTH = 8) - (input logic clk, reset, en, - input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q); +// module flopenr #(parameter WIDTH = 8) +// (input logic clk, reset, en, +// input logic [WIDTH-1:0] d, +// output logic [WIDTH-1:0] q); - always_ff @(posedge clk, posedge reset) - if (reset) q <= #10 0; - else if (en) q <= #10 d; +// always_ff @(posedge clk, posedge reset) +// if (reset) q <= #10 0; +// else if (en) q <= #10 d; -endmodule // flopenr +// endmodule // flopenr -module flopr #(parameter WIDTH = 8) - (input logic clk, reset, - input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q); +// module flopr #(parameter WIDTH = 8) +// (input logic clk, reset, +// input logic [WIDTH-1:0] d, +// output logic [WIDTH-1:0] q); - always_ff @(posedge clk, posedge reset) - if (reset) q <= #10 0; - else q <= #10 d; +// always_ff @(posedge clk, posedge reset) +// if (reset) q <= #10 0; +// else q <= #10 d; -endmodule // flopr +// endmodule // flopr -module flopenrc #(parameter WIDTH = 8) - (input logic clk, reset, en, clear, - input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q); +// module flopenrc #(parameter WIDTH = 8) +// (input logic clk, reset, en, clear, +// input logic [WIDTH-1:0] d, +// output logic [WIDTH-1:0] q); - always_ff @(posedge clk, posedge reset) - if (reset) q <= #10 0; - else if (en) - if (clear) q <= #10 0; - else q <= #10 d; +// always_ff @(posedge clk, posedge reset) +// if (reset) q <= #10 0; +// else if (en) +// if (clear) q <= #10 0; +// else q <= #10 d; -endmodule // flopenrc +// endmodule // flopenrc -module floprc #(parameter WIDTH = 8) - (input logic clk, reset, clear, - input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q); +// module floprc #(parameter WIDTH = 8) +// (input logic clk, reset, clear, +// input logic [WIDTH-1:0] d, +// output logic [WIDTH-1:0] q); - always_ff @(posedge clk, posedge reset) - if (reset) q <= #10 0; - else - if (clear) q <= #10 0; - else q <= #10 d; +// always_ff @(posedge clk, posedge reset) +// if (reset) q <= #10 0; +// else +// if (clear) q <= #10 0; +// else q <= #10 d; -endmodule // floprc +// endmodule // floprc -module mux2 #(parameter WIDTH = 8) - (input logic [WIDTH-1:0] d0, d1, - input logic s, - output logic [WIDTH-1:0] y); +// module mux2 #(parameter WIDTH = 8) +// (input logic [WIDTH-1:0] d0, d1, +// input logic s, +// output logic [WIDTH-1:0] y); - assign y = s ? d1 : d0; +// assign y = s ? d1 : d0; -endmodule // mux2 +// endmodule // mux2 -module mux3 #(parameter WIDTH = 8) - (input logic [WIDTH-1:0] d0, d1, d2, - input logic [1:0] s, - output logic [WIDTH-1:0] y); +// module mux3 #(parameter WIDTH = 8) +// (input logic [WIDTH-1:0] d0, d1, d2, +// input logic [1:0] s, +// output logic [WIDTH-1:0] y); - assign y = s[1] ? d2 : (s[0] ? d1 : d0); +// assign y = s[1] ? d2 : (s[0] ? d1 : d0); -endmodule // mux3 +// endmodule // mux3 -module mux4 #(parameter WIDTH = 8) - (input logic [WIDTH-1:0] d0, d1, d2, d3, - input logic [1:0] s, - output logic [WIDTH-1:0] y); +// module mux4 #(parameter WIDTH = 8) +// (input logic [WIDTH-1:0] d0, d1, d2, d3, +// input logic [1:0] s, +// output logic [WIDTH-1:0] y); - assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0); +// assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0); -endmodule // mux4 +// endmodule // mux4 -module mux5 #(parameter WIDTH = 8) - (input logic [WIDTH-1:0] d0, d1, d2, d3, d4, - input logic [2:0] s, - output logic [WIDTH-1:0] y); +// module mux5 #(parameter WIDTH = 8) +// (input logic [WIDTH-1:0] d0, d1, d2, d3, d4, +// input logic [2:0] s, +// output logic [WIDTH-1:0] y); - always_comb - casez (s) - 3'b000 : y = d0; - 3'b001 : y = d1; - 3'b010 : y = d2; - 3'b011 : y = d3; - 3'b1?? : y = d4; - endcase // casez (s) +// always_comb +// casez (s) +// 3'b000 : y = d0; +// 3'b001 : y = d1; +// 3'b010 : y = d2; +// 3'b011 : y = d3; +// 3'b1?? : y = d4; +// endcase // casez (s) -endmodule // mux5 +// endmodule // mux5 -module mux6 #(parameter WIDTH = 8) - (input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, - input logic [2:0] s, - output logic [WIDTH-1:0] y); +// module mux6 #(parameter WIDTH = 8) +// (input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, +// input logic [2:0] s, +// output logic [WIDTH-1:0] y); - always_comb - casez (s) - 3'b000 : y = d0; - 3'b001 : y = d1; - 3'b010 : y = d2; - 3'b011 : y = d3; - 3'b10? : y = d4; - 3'b11? : y = d5; - endcase // casez (s) +// always_comb +// casez (s) +// 3'b000 : y = d0; +// 3'b001 : y = d1; +// 3'b010 : y = d2; +// 3'b011 : y = d3; +// 3'b10? : y = d4; +// 3'b11? : y = d5; +// endcase // casez (s) -endmodule // mux6 +// endmodule // mux6 module eqcmp #(parameter WIDTH = 8) (input logic [WIDTH-1:0] a, b, @@ -232,25 +231,25 @@ module eqcmp #(parameter WIDTH = 8) endmodule // eqcmp -module fa (input logic a, b, c, output logic sum, carry); +// module fa (input logic a, b, c, output logic sum, carry); - assign sum = a^b^c; - assign carry = a&b|a&c|b&c; +// assign sum = a^b^c; +// assign carry = a&b|a&c|b&c; -endmodule // fa +// endmodule // fa -module csa #(parameter WIDTH=8) - (input logic [WIDTH-1:0] a, b, c, - output logic [WIDTH-1:0] sum, carry); +// module csa #(parameter WIDTH=8) +// (input logic [WIDTH-1:0] a, b, c, +// output logic [WIDTH-1:0] sum, carry); - logic [WIDTH:0] carry_temp; - genvar i; - generate - for (i=0;i B using a minimized sum-of-products expression -// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -// endmodule // magcompare2b // 2-bit magnitude comparator // This module compares two 2-bit values A and B. LT is '1' if A < B @@ -198,135 +185,6 @@ module magcompare64b_1 (w, x, A, B); endmodule // magcompare64b -// This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of -// operands being compared as indicated below. -// FOpCtrlE Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 bfloat precision numbers -// -// The comparator produces a 2-bit signal fcc, which -// indicates the result of the comparison as follows: -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// It also produces a invalid operation flag, which is one -// if either of the input operands is a signaling NaN. - -module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE); - - input logic [63:0] A; - input logic [63:0] B; - input logic [2:0] FOpCtrlE; - - logic dp, sp, hp; - - output logic ANaN; - output logic BNaN; - output logic Azero; - output logic Bzero; - - assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; - assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; - assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; - - // Test if A or B is NaN. - assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) & - ((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) | - (dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) | - (hp&(A[57]|A[56]))); - - assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) & - ((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) | - (dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) | - (hp&(B[57]|B[56]))); - - // Test if A is +0 or -0 when viewed as a floating point number (i.e, - // the 63 least siginficant bits of A are zero). - // Depending on how this synthesizes, it may work better to replace - // this with assign Azero = ~(A[62] | A[61] | ... | A[0]) - assign Azero = (A[62:0] == 63'h0); - assign Bzero = (B[62:0] == 63'h0); - -endmodule // exception_cmp -// -// File name : fpcomp.v -// Title : Floating-Point Comparator -// project : FPU -// Library : fpcomp -// Author(s) : James E. Stine -// Purpose : definition of main unit to floating-point comparator -// notes : -// -// Copyright Oklahoma State University -// -// Floating Point Comparator (Algorithm) -// -// 1.) Performs sign-extension if the inputs are 32-bit integers. -// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// 3.) Check for special cases (+0=-0, unordered, and infinite values) -// and correct for sign bits -// -// This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal FOpCtrlE that indicates the type of -// operands being compared as indicated below. -// FOpCtrlE Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 (unused) -// -// The comparator produces a 2-bit signal FCC, which -// indicates the result of the comparison: -// -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// -// It also produces an invalid operation flag, which is one -// if either of the input operands is a signaling NaN per 754 - - -/*module magcompare2b (LT, GT, A, B); - - input logic [1:0] A; - input logic [1:0] B; - - output logic LT; - output logic GT; - - // Determine if A < B using a minimized sum-of-products expression - assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; - // Determine if A > B using a minimized sum-of-products expression - assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -endmodule*/ // magcompare2b - -// 2-bit magnitude comparator -// This module compares two 2-bit values A and B. LT is '1' if A < B -// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// this version actually incorporates don't cares into the equation to -// simplify the optimization - -// module magcompare2c (LT, GT, A, B); - -// input logic [1:0] A; -// input logic [1:0] B; - -// output logic LT; -// output logic GT; - -// assign LT = B[1] | (!A[1]&B[0]); -// assign GT = A[1] | (!B[1]&A[0]); - -// endmodule // magcompare2b - // This module compares two 64-bit values A and B. LT is '1' if A < B // and EQ is '1'if A = B. LT and GT are both '0' if A > B. // This structure was modified so @@ -388,6 +246,8 @@ endmodule // magcompare64b module exception_cmp_2 ( input logic [63:0] A, input logic [63:0] B, + input logic [63:0] FSrcXE, + input logic [63:0] FSrcYE, input logic FmtE, input logic LT_mag, input logic EQ_mag, @@ -456,8 +316,8 @@ module exception_cmp_2 ( always_comb begin case (FOpCtrlE[2:0]) - 3'b111: CmpResE = LT ? A : B;//min - 3'b101: CmpResE = GT ? A : B;//max + 3'b111: CmpResE = LT ? FSrcXE : FSrcYE;//min + 3'b101: CmpResE = GT ? FSrcXE : FSrcYE;//max 3'b010: CmpResE = {63'b0, EQ};//equal 3'b001: CmpResE = {63'b0, LT};//less than 3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal diff --git a/wally-pipelined/src/fpu/fcvt.sv b/wally-pipelined/src/fpu/fcvt.sv index 3c72b5d0..66c574c4 100644 --- a/wally-pipelined/src/fpu/fcvt.sv +++ b/wally-pipelined/src/fpu/fcvt.sv @@ -1,7 +1,15 @@ `include "wally-config.vh" module fcvt ( - input logic [63:0] X, // floating point input + input logic XSgnE, + input logic [10:0] XExpE, + input logic [51:0] XFracE, + input logic XAssumed1E, + input logic XZeroE, + input logic XNaNE, + input logic XInfE, + input logic XDenormE, + input logic [10:0] BiasE, input logic [`XLEN-1:0] SrcAE, // integer input input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below) input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude @@ -9,15 +17,10 @@ module fcvt ( output logic [63:0] CvtResE, // convert final result output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact} - logic XSgn; // FP input's sign - logic [10:0] XExp; // FP input's exponent - logic [51:0] XFrac; // FP input's fraction logic ResSgn; // FP result's sign logic [10:0] ResExp,TmpExp; // FP result's exponent logic [51:0] ResFrac; // FP result's fraction logic [5:0] LZResP; // lz output - // logic LZResV; - logic [11:0] Bias; // 1023 for double, 127 for single logic [7:0] Bits; // how many bits are in the integer result logic [7:0] SubBits; // subtract these bits from the exponent (FP result) logic [64+51:0] ShiftedManTmp; // Shifted mantissa @@ -31,11 +34,7 @@ module fcvt ( logic [64-1:0] PosInt; // absolute value of the integer input logic [63:0] CvtIntRes; // interger result from the fp -> int instructions logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions - logic XFracZero; // is the fraction of X zero? logic Of, Uf; // did the integer result underflow or overflow - logic XExpZero; // is X's exponent zero - logic XExpMax; // is the exponent all ones - logic XNaN, XDenorm, XInf, XZero; // is X a special value logic Guard, Round, LSB, Sticky; // bits used to determine rounding logic Plus1,CalcPlus1; // do you add one for rounding logic SgnRes; // sign of the floating point result @@ -62,31 +61,15 @@ module fcvt ( // fcvt.d.lu = 1101 // {long, unsigned, to int, from int} - // split the input into it's various parts - assign XSgn = FmtE ? X[63] : X[31]; - assign XExp = FmtE ? X[62:52] : {3'b0, X[30:23]}; - assign XFrac = FmtE ? X[51:0] : {X[23:0], 29'b0}; - - // determine if the exponent and fraction are all zero or ones - assign XExpZero = ~|XExp; - assign XFracZero = ~|XFrac; - assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0]; - - // determine if X is a special value - assign XNaN = XExpMax & ~XFracZero; - assign XDenorm = XExpZero & ~XFracZero; - assign XInf = XExpMax & XFracZero; - assign XZero = XExpZero & XFracZero; - // calculate signals based off the input and output's size - assign Bias = FmtE ? 12'h3ff : 12'h7f; + // assign Bias = FmtE ? 12'h3ff : 12'h7f; assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101))); assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE))); assign SubBits = In64 ? 8'd64 : 8'd32; assign Bits = Res64 ? 8'd64 : 8'd32; // calulate the unbiased exponent - assign ExpVal = XExp - Bias + XDenorm; + assign ExpVal = XExpE - BiasE + XDenormE; //////////////////////////////////////////////////////// @@ -97,11 +80,10 @@ module fcvt ( // determine the integer's sign assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0; - // This did not work \/ // generate - // if(64 == 64) + // if(`XLEN == 64) // lz64 lz(LZResP, LZResV, PosInt); - // else if(64 == 32) begin + // else if(`XLEN == 32) begin // assign LZResP[5] = 1'b0; // lz32 lz(LZResP[4:0], LZResV, PosInt); // end @@ -116,7 +98,7 @@ module fcvt ( end // if no one was found set to zero otherwise calculate the exponent - assign TmpExp = i==`XLEN ? 0 : Bias + SubBits - LZResP; + assign TmpExp = i==`XLEN ? 0 : BiasE + SubBits - LZResP; @@ -126,12 +108,12 @@ module fcvt ( // select the shift value and amount based on operation (to fp or int) assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP; - assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0}; + assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, XAssumed1E, XFracE} : {PosInt, 52'b0}; // if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds) // if the shift is negitive add a bit for sticky bit calculation // otherwise shift left - assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZero} : ShiftVal << ShiftCnt; + assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, XAssumed1E, XFracE[51:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZeroE} : ShiftVal << ShiftCnt; // truncate the shifted mantissa assign ShiftedMan = ShiftedManTmp[64+51:50]; @@ -139,7 +121,7 @@ module fcvt ( // calculate sticky bit // - take into account the possible right shift from before // - the sticky bit calculation covers three diffrent sizes depending on the opperation - assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFrac[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]); + assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFracE[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]); // determine guard, round, and least significant bit of the result @@ -152,23 +134,23 @@ module fcvt ( case (FrmE) 3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = (XSgn&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down - 3'b011: CalcPlus1 = (~XSgn&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up + 3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down + 3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up 3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase end // dont tound if the result is exact - assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZero&FOpCtrlE[1]); + assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[1]); // round the shifted mantissa assign RoundedTmp = ShiftedMan[64+1:2] + Plus1; assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ; // fit the rounded result into the appropriate size and take the 2's complement if needed - assign Rounded = Res64 ? XSgn&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : - XSgn ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]}; + assign Rounded = Res64 ? XSgnE&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : + XSgnE ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]}; // extract the MSB and Sign for later use (will be used to determine underflow and overflow) assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32]; @@ -176,10 +158,10 @@ module fcvt ( // check if the result overflows - assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgn&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN; + assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE; // check if the result underflows (this calculation changes if the result is signed or unsigned) - assign Uf = FOpCtrlE[2] ? XSgn&~XZero | (XSgn&XInf) | (XSgn&~XZero&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgn&XInf) | (XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (XSgn&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgn | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded; + assign Uf = FOpCtrlE[2] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgnE | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded; // calculate the result's sign assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1]; diff --git a/wally-pipelined/src/fpu/fhazard.sv b/wally-pipelined/src/fpu/fhazard.sv index e77d6424..2d3e2330 100644 --- a/wally-pipelined/src/fpu/fhazard.sv +++ b/wally-pipelined/src/fpu/fhazard.sv @@ -31,36 +31,36 @@ module fhazard( input logic [4:0] RdM, RdW, input logic [2:0] FResultSelM, output logic FStallD, - output logic [1:0] ForwardXE, ForwardYE, ForwardZE + output logic [1:0] FForwardXE, FForwardYE, FForwardZE ); always_comb begin // set ReadData as default - ForwardXE = 2'b00; // choose FRD1E - ForwardYE = 2'b00; // choose FRD2E - ForwardZE = 2'b00; // choose FRD3E + FForwardXE = 2'b00; // choose FRD1E + FForwardYE = 2'b00; // choose FRD2E + FForwardZE = 2'b00; // choose FRD3E FStallD = 0; if ((Adr1E == RdM) & FRegWriteM) // if the result will be FResM - if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM + if(FResultSelM == 3'b100) FForwardXE = 2'b10; // choose FResM else FStallD = 1; // if the result won't be ready stall - else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W + else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W if ((Adr2E == RdM) & FRegWriteM) // if the result will be FResM - if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM + if(FResultSelM == 3'b100) FForwardYE = 2'b10; // choose FResM else FStallD = 1; // if the result won't be ready stall - else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W + else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W if ((Adr3E == RdM) & FRegWriteM) // if the result will be FResM - if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM + if(FResultSelM == 3'b100) FForwardZE = 2'b10; // choose FResM else FStallD = 1; // if the result won't be ready stall - else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W + else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W end diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index f9c6b1bd..5bf7785e 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -3,12 +3,23 @@ module fma( input logic reset, input logic FlushM, input logic StallM, - input logic [63:0] SrcXE, SrcXM, // X - input logic [63:0] SrcYE, SrcYM, // Y - input logic [63:0] SrcZE, SrcZM, // Z input logic FmtE, FmtM, // precision 1 = double 0 = single input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic XSgnE, YSgnE, ZSgnE, + input logic [10:0] XExpE, YExpE, ZExpE, + input logic [51:0] XFracE, YFracE, ZFracE, + input logic XSgnM, YSgnM, ZSgnM, + input logic [10:0] XExpM, YExpM, ZExpM, + input logic [51:0] XFracM, YFracM, ZFracM, + input logic XAssumed1E, YAssumed1E, ZAssumed1E, + input logic XDenormE, YDenormE, ZDenormE, + input logic XZeroE, YZeroE, ZZeroE, + input logic XNaNM, YNaNM, ZNaNM, + input logic XSNaNM, YSNaNM, ZSNaNM, + input logic XZeroM, YZeroM, ZZeroM, + input logic XInfM, YInfM, ZInfM, + input logic [10:0] BiasE, output logic [63:0] FMAResM, output logic [4:0] FMAFlgM); @@ -18,24 +29,23 @@ module fma( logic [12:0] ProdExpE, ProdExpM; logic AddendStickyE, AddendStickyM; logic KillProdE, KillProdM; - logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; - logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; - logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; - fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE, - .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, - .XNaNE, .YNaNE, .ZNaNE ); + fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, + .BiasE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, + .FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE, + .ProdExpE, .AddendStickyE, .KillProdE); flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, - {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); + flopenrc #(2) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE}, + {AddendStickyM, KillProdM}); - fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM, .FrmM, .FmtM, + fma2 fma2(.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM, + .FOpCtrlM, .FrmM, .FmtM, .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, - .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, + .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .FMAResM, .FMAFlgM); endmodule @@ -43,98 +53,27 @@ endmodule module fma1( - - input logic [63:0] X, // X - input logic [63:0] Y, // Y - input logic [63:0] Z, // Z + // input logic XSgnE, YSgnE, ZSgnE, + input logic [10:0] XExpE, YExpE, ZExpE, + input logic [51:0] XFracE, YFracE, ZFracE, + input logic XAssumed1E, YAssumed1E, ZAssumed1E, + input logic XDenormE, YDenormE, ZDenormE, + input logic XZeroE, YZeroE, ZZeroE, + input logic [10:0] BiasE, input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic FmtE, // precision 1 = double 0 = single output logic [105:0] ProdManE, // 1.X frac * 1.Y frac output logic [161:0] AlignedAddendE, // Z aligned for addition output logic [12:0] ProdExpE, // X exponent + Y exponent - bias output logic AddendStickyE, // sticky bit that is calculated during alignment - output logic KillProdE, // set the product to zero before addition if the product is too small to matter - output logic XZeroE, YZeroE, ZZeroE, // inputs are zero - output logic XInfE, YInfE, ZInfE, // inputs are infinity - output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN + output logic KillProdE // set the product to zero before addition if the product is too small to matter + ); - logic [51:0] XFrac,YFrac,ZFrac; // input fraction - logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one) - logic [12:0] XExp,YExp,ZExp; // input exponents - logic XSgn,YSgn,ZSgn; // input signs logic [12:0] AlignCnt; // how far to shift the addend to align with the product logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit logic [213:0] ZManPreShifted; // input to the alignment shifter - logic XDenorm, YDenorm, ZDenorm; // inputs are denormal - logic [63:0] Addend; // value to add (Z or zero) - logic [12:0] Bias; // 1023 for double, 127 for single - logic XExpZero, YExpZero, ZExpZero; // input exponent zero - logic XFracZero, YFracZero, ZFracZero; // input fraction zero - logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s - - /////////////////////////////////////////////////////////////////////////////// - // split inputs into the sign bit, fraction, and exponent to handle single or double precision - // - single precision is in the top half of the inputs - /////////////////////////////////////////////////////////////////////////////// - - // Set addend to zero if FMUL instruction - assign Addend = FOpCtrlE[2] ? 64'b0 : Z; - - assign XSgn = X[63]; - assign YSgn = Y[63]; - assign ZSgn = Addend[63]; - - assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]}; - assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]}; - assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]}; - - assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0}; - assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0}; - assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0}; - - assign XMan = {~XExpZero, XFrac}; - assign YMan = {~YExpZero, YFrac}; - assign ZMan = {~ZExpZero, ZFrac}; - - assign Bias = FmtE ? 13'h3ff : 13'h7f; - - - - /////////////////////////////////////////////////////////////////////////////// - // determine if an input is a special value - /////////////////////////////////////////////////////////////////////////////// - - assign XExpZero = ~|XExp; - assign YExpZero = ~|YExp; - assign ZExpZero = ~|ZExp; - - assign XFracZero = ~|XFrac; - assign YFracZero = ~|YFrac; - assign ZFracZero = ~|ZFrac; - - assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0]; - assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0]; - assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0]; - - assign XNaNE = XExpMax & ~XFracZero; - assign YNaNE = YExpMax & ~YFracZero; - assign ZNaNE = ZExpMax & ~ZFracZero; - - assign XDenorm = XExpZero & ~XFracZero; - assign YDenorm = YExpZero & ~YFracZero; - assign ZDenorm = ZExpZero & ~ZFracZero; - - assign XInfE = XExpMax & XFracZero; - assign YInfE = YExpMax & YFracZero; - assign ZInfE = ZExpMax & ZFracZero; - - assign XZeroE = XExpZero & XFracZero; - assign YZeroE = YExpZero & YFracZero; - assign ZZeroE = ZExpZero & ZFracZero; - - - - + + /////////////////////////////////////////////////////////////////////////////// // Calculate the product // - When multipliying two fp numbers, add the exponents @@ -145,11 +84,11 @@ module fma1( // verilator lint_off WIDTH assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : - XExp + YExp - Bias + XDenorm + YDenorm; + XExpE + YExpE - BiasE + XDenormE + YDenormE; // Calculate the product's mantissa // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. - assign ProdManE = XMan * YMan; + assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE}; @@ -168,7 +107,7 @@ module fma1( // - positive means the product is larger, so shift Z right // - Denormal numbers have an an exponent value of 1, however they are // represented with an exponent of 0. add one to the exponent if it is a denormal number - assign AlignCnt = ProdExpE - ZExp - ZDenorm; + assign AlignCnt = ProdExpE - ZExpE - ZDenormE; // verilator lint_on WIDTH @@ -177,7 +116,7 @@ module fma1( // |1'b0| addnend | // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) - assign ZManPreShifted = {55'b0, ZMan, 106'b0}; + assign ZManPreShifted = {55'b0, {ZAssumed1E, ZFracE}, 106'b0}; always_comb begin @@ -187,7 +126,7 @@ module fma1( // | addnend | if ($signed(AlignCnt) <= $signed(-13'd56)) begin KillProdE = 1; - ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0}; + ZManShifted = ZManPreShifted;//{107'b0, {~ZAssumed1E, ZFrac}, 54'b0}; AddendStickyE = ~(XZeroE|YZeroE); // If the Addend is shifted left (negitive AlignCnt) @@ -229,10 +168,10 @@ endmodule module fma2( - - input logic [63:0] X, // X - input logic [63:0] Y, // Y - input logic [63:0] Z, // Z + + input logic XSgnM, YSgnM, ZSgnM, + input logic [10:0] XExpM, YExpM, ZExpM, + input logic [51:0] XFracM, YFracM, ZFracM, input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic FmtM, // precision 1 = double 0 = single @@ -244,6 +183,7 @@ module fma2( input logic XZeroM, YZeroM, ZZeroM, // inputs are zero input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN + input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs output logic [63:0] FMAResM, // FMA final result output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} @@ -252,8 +192,6 @@ module fma2( logic [51:0] ResultFrac; // Result fraction logic [10:0] ResultExp; // Result exponent logic ResultSgn; // Result sign - logic [10:0] ZExp; // input exponent - logic XSgn, YSgn, ZSgn; // input sign logic PSgn; // product sign logic [105:0] ProdMan2; // product being added logic [162:0] AlignedAddend2; // possibly inverted aligned Z @@ -289,28 +227,10 @@ module fma2( logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results - /////////////////////////////////////////////////////////////////////////////// - // Select input fields - // The following logic duplicates fma1 because it's cheaper to recompute than provide registers - /////////////////////////////////////////////////////////////////////////////// - - // Set addend to zero if FMUL instruction - assign Addend = FOpCtrlM[2] ? 64'b0 : Z; - - // split inputs into the sign bit, and exponent to handle single or double precision - // - single precision is in the top half of the inputs - assign XSgn = X[63]; - assign YSgn = Y[63]; - assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction - - assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]}; - - - - + // Calculate the product's sign // Negate product's sign if FNMADD or FNMSUB - assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1]; + assign PSgn = XSgnM ^ YSgnM ^ FOpCtrlM[1]; @@ -321,7 +241,7 @@ module fma2( // Negate Z when doing one of the following opperations: // -prod + Z // prod - Z - assign InvZ = ZSgn ^ PSgn; + assign InvZ = ZSgnM ^ PSgn; // Choose an inverted or non-inverted addend - the one is added later assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM}; @@ -376,7 +296,7 @@ module fma2( assign FracLen = FmtM ? 13'd52 : 13'd23; // Determine if the result is denormal - assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56); + assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - 13'd56); assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; // Determine the shift needed for denormal results @@ -501,13 +421,13 @@ module fma2( // Determine the sign if the sum is zero // if cancelation then 0 unless round to -infinity // otherwise psign - assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn; + assign ZeroSgn = (PSgn^ZSgnM)&~Underflow ? FrmM == 3'b010 : PSgn; // is the result negitive // if p - z is the Sum negitive // if -p + z is the Sum positive // if -p - z then the Sum is negitive - assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn); + assign ResultSgnTmp = InvZ&(ZSgnM)&NegSum | InvZ&PSgn&~NegSum | ((ZSgnM)&PSgn); assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp; @@ -525,9 +445,8 @@ module fma2( // 2) Inf - Inf (unless x or y is NaN) // 3) 0 * Inf assign MaxExp = FmtM ? 13'd2047 : 13'd255; - assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) : - (XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]); - assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); + assign SigNaN = XSNaNM | YSNaNM | ZSNaNM; + assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); // Set Overflow flag if the number is too big to be represented // - Don't set the overflow flag if an overflowed result isn't outputed @@ -555,28 +474,28 @@ module fma2( /////////////////////////////////////////////////////////////////////////////// // Select the result /////////////////////////////////////////////////////////////////////////////// - assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]}; - assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]}; - assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]}; + assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XFracM[50:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XFracM[50:29]}; + assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YFracM[50:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YFracM[50:29]}; + assign ZNaNResult = FmtM ? {ZSgnM, ZExpM, 1'b1, ZFracM[50:0]} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], 1'b1, ZFracM[50:29]}; assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} : {ResultSgn, 11'h7ff, 52'b0} : - ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} : - {ResultSgn, 8'hff, 55'b0}; - assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; - assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; - assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; + ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} : + {{32{1'b1}}, ResultSgn, 8'hff, 23'b0}; + assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZFracM} - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZFracM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; + assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}}; assign FMAResM = XNaNM ? XNaNResult : YNaNM ? YNaNResult : ZNaNM ? ZNaNResult : Invalid ? InvalidResult : // has to be before inf - XInfM ? {PSgn, X[62:0]} : - YInfM ? {PSgn, Y[62:0]} : - ZInfM ? {ZSgn, Addend[62:0]} : + XInfM ? FmtM ? {PSgn, XExpM, XFracM} : {{32{1'b1}}, PSgn, XExpM[7:0], XFracM[51:29]} : + XInfM ? FmtM ? {PSgn, YExpM, YFracM} : {{32{1'b1}}, PSgn, YExpM[7:0], YFracM[51:29]} : + XInfM ? FmtM ? {ZSgnM, ZExpM, ZFracM} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], ZFracM[51:29]} : Overflow ? OverflowResult : KillProdM ? KillProdResult : // has to be after Underflow Underflow & ~ResultDenorm ? UnderflowResult : FmtM ? {ResultSgn, ResultExp, ResultFrac} : - {ResultSgn, ResultExp[7:0], ResultFrac, 3'b0}; + {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]}; diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv index 4051f6de..454896a1 100755 --- a/wally-pipelined/src/fpu/fpdiv.sv +++ b/wally-pipelined/src/fpu/fpdiv.sv @@ -22,8 +22,8 @@ // Step 7: Put quotient/remainder onto output. // -`timescale 1ps/1ps -module fpdiv (done, AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn, +// `timescale 1ps/1ps +module fpdiv (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn, start, reset, clk); input [63:0] op1; // 1st input operand (A) @@ -40,7 +40,8 @@ module fpdiv (done, AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, Un output [63:0] AS_Result; // Result of operation output [4:0] Flags; // IEEE exception flags output Denorm; // Denorm on input or output - output done; + logic done; + // output done; supply1 vdd; supply0 vss; diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 77d68591..b7240efd 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -34,6 +34,7 @@ module fpu ( input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, + input logic [4:0] RdE, RdM, RdW, output logic FRegWriteM, output logic FStallD, // Stall the decode stage output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable @@ -52,7 +53,7 @@ module fpu ( logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic FDivStartD, FDivStartE; // Start division logic FWriteIntD; // Write to integer register - logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal + logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component logic [1:0] FResSelD, FResSelE, FResSelM; @@ -60,13 +61,34 @@ module fpu ( logic [4:0] Adr1E, Adr2E, Adr3E; // regfile signals - logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`XLEN-1:0] SrcXMAligned; - logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) - logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) + logic [`XLEN-1:0] FSrcXMAligned; + logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding) + logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding) + + // unpacking signals + logic XSgnE, YSgnE, ZSgnE; + logic [10:0] XExpE, YExpE, ZExpE; + logic [51:0] XFracE, YFracE, ZFracE; + logic XAssumed1E, YAssumed1E, ZAssumed1E; + logic XNaNE, YNaNE, ZNaNE; + logic XSNaNE, YSNaNE, ZSNaNE; + logic XDenormE, YDenormE, ZDenormE; + logic XZeroE, YZeroE, ZZeroE; + logic [10:0] BiasE; + logic XInfE, YInfE, ZInfE; + logic XExpMaxE; + logic XNormE; + + logic XSgnM, YSgnM, ZSgnM; + logic [10:0] XExpM, YExpM, ZExpM; + logic [51:0] XFracM, YFracM, ZFracM; + logic XNaNM, YNaNM, ZNaNM; + logic XSNaNM, YSNaNM, ZSNaNM; + logic XZeroM, YZeroM, ZZeroM; + logic XInfM, YInfM, ZInfM; // div/sqrt signals logic [63:0] FDivResultM, FDivResultW; @@ -132,25 +154,27 @@ module fpu ( flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); + {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); //EXECUTION STAGE // Hazard unit for FPU fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD, - .ForwardXE, .ForwardYE, .ForwardZE); + .FForwardXE, .FForwardYE, .FForwardZE); // forwarding muxs - mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, ForwardXE, SrcXE); - mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, ForwardYE, SrcYE); - mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, ForwardZE, SrcZE); + mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); + mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE); + mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE); - + unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); // first of two-stage instance of floating-point fused multiply-add unit fma fma (.clk, .reset, .FlushM, .StallM, - .SrcXE, .SrcYE, .SrcZE, .SrcXM, .SrcYM, .SrcZM, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, + .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, + // .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); @@ -163,43 +187,50 @@ module fpu ( .ECLK(fpdivClk)); // capture the inputs for div/sqrt - flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), + flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E), .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), + flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E), .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); - fpdiv fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, - .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, - .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); - + // fpdiv fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, + // .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, + // .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); + assign FDivBusyE = 0; // first of two-stage instance of floating-point add/cvt unit faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, - .SrcXE, .SrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM); + .FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM); // first and only instance of floating-point comparator - fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); + fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE); // first and only instance of floating-point sign converter - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE); // first and only instance of floating-point classify unit - fclassify fclassify (.SrcXE, .FmtE, .ClassResE); + fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); - fcvt fcvt (.X(SrcXE), .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); + fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); // output for store instructions - // mux2 #(`XLEN) FWriteDataMux({{`XLEN-32{1'b0}}, SrcYE[63:32]}, SrcYE[63:64-`XLEN], FmtE, FWriteDataE); - assign FWriteDataE = SrcYE[`XLEN-1:0]; + // mux2 #(`XLEN) FWriteDataMux({{`XLEN-32{1'b0}}, FSrcYE[63:32]}, FSrcYE[63:64-`XLEN], FmtE, FWriteDataE); + assign FWriteDataE = FSrcYE[`XLEN-1:0]; //***************** // E/M pipe registers //***************** - flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); - flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); - flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); + // flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM); + // flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM); + flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM}); + flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM}); + flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM}); + flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM, + {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, + {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); @@ -212,8 +243,8 @@ module fpu ( flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, - {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, + {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); @@ -221,8 +252,8 @@ module fpu ( mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM); mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM); - // mux2 #(`XLEN) SrcXAlignedMux({{`XLEN-32{1'b0}}, SrcXM[63:32]}, SrcXM[63:64-`XLEN], FmtM, SrcXMAligned); - mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); + // mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned); + mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM); @@ -242,8 +273,8 @@ module fpu ( flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResultSelM, RdM, FmtM, FWriteIntM}, - {FRegWriteW, FResultSelW, RdW, FmtW, FWriteIntW}); + {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, + {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); //######################################### // BEGIN WRITEBACK STAGE diff --git a/wally-pipelined/src/fpu/fsgn.sv b/wally-pipelined/src/fpu/fsgn.sv index 7df9386c..67865b64 100755 --- a/wally-pipelined/src/fpu/fsgn.sv +++ b/wally-pipelined/src/fpu/fsgn.sv @@ -1,30 +1,34 @@ //performs the fsgnj/fsgnjn/fsgnjx RISCV instructions -module fsgn ( - input logic [63:0] SrcXE, SrcYE, +module fsgn ( + input logic XSgnE, YSgnE, + input logic [10:0] XExpE, + input logic [51:0] XFracE, + input logic XExpMaxE, + input logic FmtE, input logic [1:0] SgnOpCodeE, output logic [63:0] SgnResE, output logic SgnNVE); logic AonesExp; + logic ResSgn; //op code designation: // - //00 - fsgnj - directly copy over sign value of SrcYE - //01 - fsgnjn - negate sign value of SrcYE - //10 - fsgnjx - XOR sign values of SrcXE & SrcYE + //00 - fsgnj - directly copy over sign value of FSrcYE + //01 - fsgnjn - negate sign value of FSrcYE + //10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE // - assign SgnResE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]); - assign SgnResE[62:0] = SrcXE[62:0]; + assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]); + assign SgnResE = FmtE ? {ResSgn, XExpE, XFracE} : {{32{1'b1}}, ResSgn, XExpE[7:0], XFracE[51:29]}; //If the exponent is all ones, then the value is either Inf or NaN, //both of which will produce a QNaN/SNaN value of some sort. This will //set the invalid flag high. - assign AonesExp = SrcXE[62]&SrcXE[61]&SrcXE[60]&SrcXE[59]&SrcXE[58]&SrcXE[57]&SrcXE[56]&SrcXE[55]&SrcXE[54]&SrcXE[53]&SrcXE[52]; //the only flag that can occur during this operation is invalid //due to changing sign on already existing NaN - assign SgnNVE = AonesExp & SgnResE[63]; + assign SgnNVE = XExpMaxE & SgnResE[63]; endmodule diff --git a/wally-pipelined/src/fpu/fsm_div.v b/wally-pipelined/src/fpu/fsm_div.v index 77f0dc9a..f9f33877 100755 --- a/wally-pipelined/src/fpu/fsm_div.v +++ b/wally-pipelined/src/fpu/fsm_div.v @@ -1,3 +1,5 @@ + +`timescale 1ps/1ps module fsm_div (done, load_rega, load_regb, load_regc, load_regd, load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, diff --git a/wally-pipelined/src/fpu/sbtm.sv b/wally-pipelined/src/fpu/sbtm.sv index 7a4fefc0..abd1bba7 100644 --- a/wally-pipelined/src/fpu/sbtm.sv +++ b/wally-pipelined/src/fpu/sbtm.sv @@ -1,33 +1,33 @@ -module sbtm (input logic [11:0] a, output logic [10:0] ia_out); +// module sbtm (input logic [11:0] a, output logic [10:0] ia_out); - // bit partitions - logic [3:0] x0; - logic [2:0] x1; - logic [3:0] x2; - logic [2:0] x2_1cmp; - // mem outputs - logic [12:0] y0; - logic [4:0] y1; - // input to CPA - logic [14:0] op1; - logic [14:0] op2; - logic [14:0] p; +// // bit partitions +// logic [3:0] x0; +// logic [2:0] x1; +// logic [3:0] x2; +// logic [2:0] x2_1cmp; +// // mem outputs +// logic [12:0] y0; +// logic [4:0] y1; +// // input to CPA +// logic [14:0] op1; +// logic [14:0] op2; +// logic [14:0] p; - assign x0 = a[10:7]; - assign x1 = a[6:4]; - assign x2 = a[3:0]; +// assign x0 = a[10:7]; +// assign x1 = a[6:4]; +// assign x2 = a[3:0]; - sbtm_a0 mem1 ({x0, x1}, y0); - // 1s cmp per sbtm/stam - assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0]; - sbtm_a1 mem2 ({x0, x2_1cmp}, y1); - assign op1 = {1'b0, y0, 1'b0}; - // 1s cmp per sbtm/stam - assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} : - {1'b0, 8'b0, y1, 1'b1}; - // CPA - adder #(15) cp1 (op1, op2, 1'b0, p, cout); - //assign ia_out = {p[14:4], {53{1'b0}}}; - assign ia_out = p[14:4]; +// sbtm_a0 mem1 ({x0, x1}, y0); +// // 1s cmp per sbtm/stam +// assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0]; +// sbtm_a1 mem2 ({x0, x2_1cmp}, y1); +// assign op1 = {1'b0, y0, 1'b0}; +// // 1s cmp per sbtm/stam +// assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} : +// {1'b0, 8'b0, y1, 1'b1}; +// // CPA +// adder #(15) cp1 (op1, op2, 1'b0, p, cout); +// //assign ia_out = {p[14:4], {53{1'b0}}}; +// assign ia_out = p[14:4]; -endmodule // sbtm +// endmodule // sbtm diff --git a/wally-pipelined/src/fpu/sbtm2.sv b/wally-pipelined/src/fpu/sbtm2.sv index 3052f60f..e7b9b6c2 100644 --- a/wally-pipelined/src/fpu/sbtm2.sv +++ b/wally-pipelined/src/fpu/sbtm2.sv @@ -1,39 +1,39 @@ -module sbtm2 (input logic [11:0] a, output logic [10:0] y); +// module sbtm2 (input logic [11:0] a, output logic [10:0] y); - // bit partitions - logic [4:0] x0; - logic [2:0] x1; - logic [3:0] x2; - logic [2:0] x2_1cmp; - // mem outputs - logic [12:0] y0; - logic [5:0] y1; - // input to CPA - logic [14:0] op1; - logic [14:0] op2; - logic [14:0] p; - logic cout; +// // bit partitions +// logic [4:0] x0; +// logic [2:0] x1; +// logic [3:0] x2; +// logic [2:0] x2_1cmp; +// // mem outputs +// logic [12:0] y0; +// logic [5:0] y1; +// // input to CPA +// logic [14:0] op1; +// logic [14:0] op2; +// logic [14:0] p; +// logic cout; - assign x0 = a[11:7]; - assign x1 = a[6:4]; - assign x2 = a[3:0]; +// assign x0 = a[11:7]; +// assign x1 = a[6:4]; +// assign x2 = a[3:0]; - sbtm_a2 mem1 ({x0[3:0], x1}, y0); - assign op1 = {1'b0, y0, 1'b0}; +// sbtm_a2 mem1 ({x0[3:0], x1}, y0); +// assign op1 = {1'b0, y0, 1'b0}; - // 1s cmp per sbtm/stam - assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0]; - sbtm_a3 mem2 ({x0, x2_1cmp}, y1); - // 1s cmp per sbtm/stam - assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} : - {8'b0, y1, 1'b1}; +// // 1s cmp per sbtm/stam +// assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0]; +// sbtm_a3 mem2 ({x0, x2_1cmp}, y1); +// // 1s cmp per sbtm/stam +// assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} : +// {8'b0, y1, 1'b1}; - // CPA - bk15 cp1 (cout, p, op1, op2, 1'b0); - assign y = p[14:4]; +// // CPA +// bk15 cp1 (cout, p, op1, op2, 1'b0); +// assign y = p[14:4]; -endmodule // sbtm2 +// endmodule // sbtm2 diff --git a/wally-pipelined/src/fpu/sbtm3.sv b/wally-pipelined/src/fpu/sbtm3.sv index f333d285..231bf52e 100755 --- a/wally-pipelined/src/fpu/sbtm3.sv +++ b/wally-pipelined/src/fpu/sbtm3.sv @@ -1,37 +1,37 @@ -module sbtm2 (input logic [11:0] a, output logic [10:0] y); +// module sbtm2 (input logic [11:0] a, output logic [10:0] y); - // bit partitions - logic [4:0] x0; - logic [2:0] x1; - logic [3:0] x2; - logic [2:0] x2_1cmp; - // mem outputs - logic [13:0] y0; - logic [5:0] y1; - // input to CPA - logic [14:0] op1; - logic [14:0] op2; - logic [14:0] p; +// // bit partitions +// logic [4:0] x0; +// logic [2:0] x1; +// logic [3:0] x2; +// logic [2:0] x2_1cmp; +// // mem outputs +// logic [13:0] y0; +// logic [5:0] y1; +// // input to CPA +// logic [14:0] op1; +// logic [14:0] op2; +// logic [14:0] p; - assign x0 = a[11:7]; - assign x1 = a[6:4]; - assign x2 = a[3:0]; +// assign x0 = a[11:7]; +// assign x1 = a[6:4]; +// assign x2 = a[3:0]; - sbtm_a2 mem1 ({x0, x1}, y0); - assign op1 = {y0, 1'b0}; +// sbtm_a2 mem1 ({x0, x1}, y0); +// assign op1 = {y0, 1'b0}; - // 1s cmp per sbtm/stam - assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0]; - sbtm_a3 mem2 ({x0, x2_1cmp}, y1); - // 1s cmp per sbtm/stam - assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} : - {8'b0, y1, 1'b1}; +// // 1s cmp per sbtm/stam +// assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0]; +// sbtm_a3 mem2 ({x0, x2_1cmp}, y1); +// // 1s cmp per sbtm/stam +// assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} : +// {8'b0, y1, 1'b1}; - // CPA - adder #(15) cp1 (op1, op2, 1'b0, p, cout); - assign y = p[14:4]; +// // CPA +// adder #(15) cp1 (op1, op2, 1'b0, p, cout); +// assign y = p[14:4]; -endmodule // sbtm2 +// endmodule // sbtm2 diff --git a/wally-pipelined/src/fpu/sbtm_a4.sv b/wally-pipelined/src/fpu/sbtm_a4.sv index 1553c80d..7ffe4c61 100755 --- a/wally-pipelined/src/fpu/sbtm_a4.sv +++ b/wally-pipelined/src/fpu/sbtm_a4.sv @@ -1,4 +1,4 @@ -module sbtm_a2 (input logic [7:0] a, +module sbtm_a4 (input logic [7:0] a, output logic [13:0] y); always_comb case(a) diff --git a/wally-pipelined/src/fpu/sbtm_a5.sv b/wally-pipelined/src/fpu/sbtm_a5.sv index ff0aaa4b..b2d6d2f8 100755 --- a/wally-pipelined/src/fpu/sbtm_a5.sv +++ b/wally-pipelined/src/fpu/sbtm_a5.sv @@ -1,4 +1,4 @@ -module sbtm_a3 (input logic [7:0] a, +module sbtm_a5 (input logic [7:0] a, output logic [5:0] y); always_comb case(a) diff --git a/wally-pipelined/src/fpu/unpacking.sv b/wally-pipelined/src/fpu/unpacking.sv new file mode 100644 index 00000000..b22d1896 --- /dev/null +++ b/wally-pipelined/src/fpu/unpacking.sv @@ -0,0 +1,77 @@ +module unpacking ( + input logic [63:0] X, Y, Z, + input logic FmtE, + input logic [2:0] FOpCtrlE, + output logic XSgnE, YSgnE, ZSgnE, + output logic [10:0] XExpE, YExpE, ZExpE, + output logic [51:0] XFracE, YFracE, ZFracE, + output logic XAssumed1E, YAssumed1E, ZAssumed1E, + output logic XNormE, + output logic XNaNE, YNaNE, ZNaNE, + output logic XSNaNE, YSNaNE, ZSNaNE, + output logic XDenormE, YDenormE, ZDenormE, + output logic XZeroE, YZeroE, ZZeroE, + output logic [10:0] BiasE, + output logic XInfE, YInfE, ZInfE, + output logic XExpMaxE +); + + logic XFracZero, YFracZero, ZFracZero; // input fraction zero + logic XExpZero, YExpZero, ZExpZero; // input exponent zero + logic [63:0] Addend; // value to add (Z or zero) + logic YExpMaxE, ZExpMaxE; // input exponent all 1s + + assign Addend = FOpCtrlE[2] ? 64'b0 : Z; // Z is only used in the FMA, and is set to Zero if a multiply opperation + assign XSgnE = FmtE ? X[63] : X[31]; + assign YSgnE = FmtE ? Y[63] : Y[31]; + assign ZSgnE = FmtE ? Addend[63]^FOpCtrlE[0] : Addend[31]^FOpCtrlE[0]; + + assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]}; + assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]}; + assign ZExpE = FmtE ? Addend[62:52] : {3'b0, Addend[30:23]}; + + assign XFracE = FmtE ? X[51:0] : {X[22:0], 29'b0}; + assign YFracE = FmtE ? Y[51:0] : {Y[22:0], 29'b0}; + assign ZFracE = FmtE ? Addend[51:0] : {Addend[22:0], 29'b0}; + + assign XAssumed1E = |XExpE; + assign YAssumed1E = |YExpE; + assign ZAssumed1E = |ZExpE; + + assign XExpZero = ~XAssumed1E; + assign YExpZero = ~YAssumed1E; + assign ZExpZero = ~ZAssumed1E; + + assign XFracZero = ~|XFracE; + assign YFracZero = ~|YFracE; + assign ZFracZero = ~|ZFracE; + + assign XExpMaxE = FmtE ? &XExpE[10:0] : &XExpE[7:0]; + assign YExpMaxE = FmtE ? &YExpE[10:0] : &YExpE[7:0]; + assign ZExpMaxE = FmtE ? &ZExpE[10:0] : &ZExpE[7:0]; + + assign XNormE = ~(XExpMaxE|XExpZero); + + assign XNaNE = XExpMaxE & ~XFracZero; + assign YNaNE = YExpMaxE & ~YFracZero; + assign ZNaNE = ZExpMaxE & ~ZFracZero; + + assign XSNaNE = XNaNE&~XExpE[51]; + assign YSNaNE = YNaNE&~YExpE[51]; + assign ZSNaNE = ZNaNE&~ZExpE[51]; + + assign XDenormE = XExpZero & ~XFracZero; + assign YDenormE = YExpZero & ~YFracZero; + assign ZDenormE = ZExpZero & ~ZFracZero; + + assign XInfE = XExpMaxE & XFracZero; + assign YInfE = YExpMaxE & YFracZero; + assign ZInfE = ZExpMaxE & ZFracZero; + + assign XZeroE = XExpZero & XFracZero; + assign YZeroE = YExpZero & YFracZero; + assign ZZeroE = ZExpZero & ZFracZero; + + assign BiasE = FmtE ? 13'h3ff : 13'h7f; + +endmodule \ No newline at end of file diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 56e11d0f..e7b13869 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -42,6 +42,7 @@ module ieu ( output logic MulDivE, W64E, output logic [2:0] Funct3E, output logic [`XLEN-1:0] SrcAE, SrcBE, + output logic [4:0] RdE, input logic FWriteIntM, // Memory stage interface @@ -53,12 +54,14 @@ module ieu ( output logic [2:0] Funct3M, // size and signedness to LSU output logic [`XLEN-1:0] SrcAM, // to privilege and fpu + output logic [4:0] RdM, input logic DataAccessFaultM, input logic [`XLEN-1:0] FIntResM, // Writeback stage input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, input logic FWriteIntW, + output logic [4:0] RdW, // input logic [`XLEN-1:0] PCLinkW, output logic InstrValidM, // hazards @@ -82,7 +85,7 @@ module ieu ( logic InstrValidW; // forwarding signals - logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW; + logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; logic [1:0] ForwardAE, ForwardBE; logic RegWriteM, RegWriteW; logic MemReadE, CSRReadE; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index f1387a54..f8db959d 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -94,6 +94,7 @@ module wallypipelinedhart // floating point unit signals logic [2:0] FRM_REGW; logic [1:0] FMemRWM, FMemRWE; + logic [4:0] RdE, RdM, RdW; logic FStallD; logic FWriteIntE, FWriteIntM, FWriteIntW; logic [`XLEN-1:0] FWriteDataE;