diff --git a/wally-pipelined/src/fpu/divconv.sv b/wally-pipelined/src/fpu/divconv.sv index 885964510..7fa89c82e 100755 --- a/wally-pipelined/src/fpu/divconv.sv +++ b/wally-pipelined/src/fpu/divconv.sv @@ -34,88 +34,88 @@ module divconv ( input logic reset, input logic clk, - output logic [63:0] q1, qp1, qm1, - output logic [63:0] q0, qp0, qm0, - output logic [63:0] rega_out, regb_out, regc_out, regd_out, - output logic [127:0] regr_out + output logic [59:0] q1, qp1, qm1, + output logic [59:0] q0, qp0, qm0, + output logic [59:0] rega_out, regb_out, regc_out, regd_out, + output logic [119:0] regr_out ); - logic [63:0] muxa_out, muxb_out; + logic [59:0] muxa_out, muxb_out; logic [10:0] ia_div, ia_sqrt; - logic [63:0] ia_out; - logic [127:0] mul_out; - logic [63:0] q_out1, qm_out1, qp_out1; - logic [63:0] q_out0, qm_out0, qp_out0; - logic [63:0] mcand, mplier, mcand_q; - logic [63:0] twocmp_out; - logic [64:0] three; - logic [127:0] constant, constant2; - logic [63:0] q_const, qp_const, qm_const; - logic [63:0] d2, n2; + logic [59:0] ia_out; + logic [119:0] mul_out; + logic [59:0] q_out1, qm_out1, qp_out1; + logic [59:0] q_out0, qm_out0, qp_out0; + logic [59:0] mcand, mplier, mcand_q; + logic [59:0] twocmp_out; + logic [60:0] three; + logic [119:0] constant, constant2; + logic [59:0] q_const, qp_const, qm_const; + logic [59:0] d2, n2; logic muxr_out; logic cout1, cout2, cout3, cout4, cout5, cout6, cout7; // Check if exponent is odd for sqrt // If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA - assign d2 = (exp_odd&op_type) ? {1'b0,d,10'h0} : {d,11'h0}; - assign n2 = op_type ? d2 : {n,11'h0}; + assign d2 = (exp_odd&op_type) ? {1'b0, d, 6'h0} : {d, 7'h0}; + assign n2 = op_type ? d2 : {n, 7'h0}; // IA div/sqrt sbtm_div ia1 (d[52:41], ia_div); - sbtm_sqrt ia2 (d2[63:52], ia_sqrt); - assign ia_out = op_type ? {ia_sqrt, {53{1'b0}}} : {ia_div, {53{1'b0}}}; + sbtm_sqrt ia2 (d2[59:48], ia_sqrt); + assign ia_out = op_type ? {ia_sqrt, {49{1'b0}}} : {ia_div, {49{1'b0}}}; // Choose IA or iteration - mux6 #(64) mx1 (d2, ia_out, rega_out, regc_out, regd_out, regb_out, sel_muxb, muxb_out); - mux5 #(64) mx2 (regc_out, n2, ia_out, regb_out, regd_out, sel_muxa, muxa_out); + mux6 #(60) mx1 (d2, ia_out, rega_out, regc_out, regd_out, regb_out, sel_muxb, muxb_out); + mux5 #(60) mx2 (regc_out, n2, ia_out, regb_out, regd_out, sel_muxa, muxa_out); // Deal with remainder if [0.5, 1) instead of [1, 2) - mux2 #(128) mx3a ({~n, {75{1'b1}}}, {{1'b1}, ~n, {74{1'b1}}}, q1[63], constant2); + mux2 #(120) mx3a ({~n, {67{1'b1}}}, {{1'b1}, ~n, {66{1'b1}}}, q1[59], constant2); // Select Mcand, Remainder/Q'' - mux2 #(128) mx3 (128'h0, constant2, sel_muxr, constant); + mux2 #(120) mx3 (120'h0, constant2, sel_muxr, constant); // Select mcand - remainder should always choose q1 [1,2) because // adjustment of N in the from XX.FFFFFFF - mux2 #(64) mx4 (q0, q1, q1[63], mcand_q); - mux2 #(64) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier); - mux2 #(64) mx6 (muxa_out, mcand_q, sel_muxr, mcand); + mux2 #(60) mx4 (q0, q1, q1[59], mcand_q); + mux2 #(60) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier); + mux2 #(60) mx6 (muxa_out, mcand_q, sel_muxr, mcand); // Q*D - N (reversed but changed in rounder.v to account for sign reversal) // Add ulp for subtraction in remainder mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out); // Constant for Q'' - mux2 #(64) mx8 ({64'h0000_0000_0000_0200}, {64'h0000_0040_0000_0000}, P, q_const); - mux2 #(64) mx9 ({64'h0000_0000_0000_0A00}, {64'h0000_0140_0000_0000}, P, qp_const); - mux2 #(64) mxA ({64'hFFFF_FFFF_FFFF_F9FF}, {64'hFFFF_FF3F_FFFF_FFFF}, P, qm_const); + mux2 #(60) mx8 ({60'h0000_0000_0000_020}, {60'h0000_0040_0000_000}, P, q_const); + mux2 #(60) mx9 ({60'h0000_0000_0000_0A0}, {60'h0000_0140_0000_000}, P, qp_const); + mux2 #(60) mxA ({60'hFFFF_FFFF_FFFF_F9F}, {60'hFFFF_FF3F_FFFF_FFF}, P, qm_const); // CPA (from CSA)/Remainder addition/subtraction - assign {cout1, mul_out} = (mcand*mplier) + constant + {127'b0, muxr_out}; + assign {cout1, mul_out} = (mcand*mplier) + constant + {118'b0, muxr_out}; // Assuming [1,2) - q1 assign {cout2, q_out1} = regb_out + q_const; assign {cout3, qp_out1} = regb_out + qp_const; assign {cout4, qm_out1} = regb_out + qm_const + 1'b1; // Assuming [0.5,1) - q0 - assign {cout5, q_out0} = {regb_out[62:0], 1'b0} + q_const; - assign {cout6, qp_out0} = {regb_out[62:0], 1'b0} + qp_const; - assign {cout7, qm_out0} = {regb_out[62:0], 1'b0} + qm_const + 1'b1; + assign {cout5, q_out0} = {regb_out[58:0], 1'b0} + q_const; + assign {cout6, qp_out0} = {regb_out[58:0], 1'b0} + qp_const; + assign {cout7, qm_out0} = {regb_out[58:0], 1'b0} + qm_const + 1'b1; // One's complement instead of two's complement (for hw efficiency) - assign three = {~mul_out[126], mul_out[126], ~mul_out[125:63]}; - mux2 #(64) mxTC (~mul_out[126:63], three[64:1], op_type, twocmp_out); + assign three = {~mul_out[118], mul_out[118], ~mul_out[117:59]}; + mux2 #(60) mxTC (~mul_out[118:59], three[60:1], op_type, twocmp_out); // regs - flopenr #(64) regc (clk, reset, load_regc, twocmp_out, regc_out); - flopenr #(64) regb (clk, reset, load_regb, mul_out[126:63], regb_out); - flopenr #(64) rega (clk, reset, load_rega, mul_out[126:63], rega_out); - flopenr #(64) regd (clk, reset, load_regd, mul_out[126:63], regd_out); - flopenr #(128) regr (clk, reset, load_regr, mul_out, regr_out); + flopenr #(60) regc (clk, reset, load_regc, twocmp_out, regc_out); + flopenr #(60) regb (clk, reset, load_regb, mul_out[118:59], regb_out); + flopenr #(60) rega (clk, reset, load_rega, mul_out[118:59], rega_out); + flopenr #(60) regd (clk, reset, load_regd, mul_out[118:59], regd_out); + flopenr #(120) regr (clk, reset, load_regr, mul_out, regr_out); // Assuming [1,2) - flopenr #(64) rege (clk, reset, load_regs, {q_out1[63:39], (q_out1[38:10] & {29{~P}}), 10'h0}, q1); - flopenr #(64) regf (clk, reset, load_regs, {qm_out1[63:39], (qm_out1[38:10] & {29{~P}}), 10'h0}, qm1); - flopenr #(64) regg (clk, reset, load_regs, {qp_out1[63:39], (qp_out1[38:10] & {29{~P}}), 10'h0}, qp1); + flopenr #(60) rege (clk, reset, load_regs, {q_out1[59:35], (q_out1[34:6] & {29{~P}}), 6'h0}, q1); + flopenr #(60) regf (clk, reset, load_regs, {qm_out1[59:35], (qm_out1[34:6] & {29{~P}}), 6'h0}, qm1); + flopenr #(60) regg (clk, reset, load_regs, {qp_out1[59:35], (qp_out1[34:6] & {29{~P}}), 6'h0}, qp1); // Assuming [0,1) - flopenr #(64) regh (clk, reset, load_regs, {q_out0[63:39], (q_out0[38:10] & {29{~P}}), 10'h0}, q0); - flopenr #(64) regj (clk, reset, load_regs, {qm_out0[63:39], (qm_out0[38:10] & {29{~P}}), 10'h0}, qm0); - flopenr #(64) regk (clk, reset, load_regs, {qp_out0[63:39], (qp_out0[38:10] & {29{~P}}), 10'h0}, qp0); + flopenr #(60) regh (clk, reset, load_regs, {q_out0[59:35], (q_out0[34:6] & {29{~P}}), 6'h0}, q0); + flopenr #(60) regj (clk, reset, load_regs, {qm_out0[59:35], (qm_out0[34:6] & {29{~P}}), 6'h0}, qm0); + flopenr #(60) regk (clk, reset, load_regs, {qp_out0[59:35], (qp_out0[34:6] & {29{~P}}), 6'h0}, qp0); endmodule // divconv diff --git a/wally-pipelined/src/fpu/divconv_pipe.sv b/wally-pipelined/src/fpu/divconv_pipe.sv new file mode 100755 index 000000000..4e3b843d6 --- /dev/null +++ b/wally-pipelined/src/fpu/divconv_pipe.sv @@ -0,0 +1,175 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Convergence unit for pipelined floating point divider/square root top unit (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out, + regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, + load_rega, load_regb, load_regc, load_regd, load_regr, load_regs, load_regp, + P, op_type, exp_odd); + + input logic [52:0] d, n; + input logic [2:0] sel_muxa, sel_muxb; + input logic sel_muxr; + input logic load_rega, load_regb, load_regc, load_regd; + input logic load_regr, load_regs; + input logic load_regp; + input logic P; + input logic op_type; + input logic exp_odd; + input logic reset; + input logic clk; + + output logic [59:0] q1, qp1, qm1; + output logic [59:0] q0, qp0, qm0; + output logic [59:0] rega_out, regb_out, regc_out, regd_out; + output logic [119:0] regr_out; + + supply1 vdd; + supply0 vss; + + logic [59:0] muxa_out, muxb_out; + logic [10:0] ia_div, ia_sqrt; + logic [59:0] ia_out; + logic [119:0] mul_out; + logic [59:0] q_out1, qm_out1, qp_out1; + logic [59:0] q_out0, qm_out0, qp_out0; + logic [59:0] mcand, mplier, mcand_q; + logic [59:0] twocmp_out; + logic [60:0] three; + logic [119:0] Carry, Carry2; + logic [119:0] Sum, Sum2; + logic [119:0] constant, constant2; + logic [59:0] q_const, qp_const, qm_const; + logic [59:0] d2, n2; + logic [11:0] d3; + + // Check if exponent is odd for sqrt + // If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA + assign d2 = (exp_odd&op_type) ? {vss,d,6'h0} : {d,7'h0}; + assign n2 = op_type ? d2 : {n,7'h0}; + + // IA div/sqrt + sbtm_div ia1 (d[52:41], ia_div); + sbtm_sqrt ia2 (d2[59:48], ia_sqrt); + assign ia_out = op_type ? {ia_sqrt, {49{1'b0}}} : {ia_div, {49{1'b0}}}; + + // Choose IA or iteration + mux6 #(60) mx1 (d2, ia_out, rega_out, regc_out, regd_out, regb_out, sel_muxb, muxb_out); + mux5 #(60) mx2 (regc_out, n2, ia_out, regb_out, regd_out, sel_muxa, muxa_out); + + // Deal with remainder if [0.5, 1) instead of [1, 2) + mux2 #(120) mx3a ({~n, {67{1'b1}}}, {{1'b1}, ~n, {66{1'b1}}}, q1[59], constant2); + // Select Mcand, Remainder/Q'' + mux2 #(120) mx3 (120'h0, constant2, sel_muxr, constant); + // Select mcand - remainder should always choose q1 [1,2) because + // adjustment of N in the from XX.FFFFFFF + mux2 #(60) mx4 (q0, q1, q1[59], mcand_q); + mux2 #(60) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier); + mux2 #(60) mx6 (muxa_out, mcand_q, sel_muxr, mcand); + // R4 Booth TDM multiplier (carry/save) + redundantmul #(60) bigmul(.a(mcand), .b(mplier), .out0(Sum), .out1(Carry)); + // Q*D - N (reversed but changed in rounder.v to account for sign reversal) + csa #(120) csa1 (Sum, Carry, constant, Sum2, Carry2); + // Add ulp for subtraction in remainder + mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out); + + // Constant for Q'' + mux2 #(60) mx8 ({60'h0000_0000_0000_020}, {60'h0000_0040_0000_000}, P, q_const); + mux2 #(60) mx9 ({60'h0000_0000_0000_0A0}, {60'h0000_0140_0000_000}, P, qp_const); + mux2 #(60) mxA ({60'hFFFF_FFFF_FFFF_F9F}, {60'hFFFF_FF3F_FFFF_FFF}, P, qm_const); + + logic [119:0] Sum_pipe; + logic [119:0] Carry_pipe; + logic muxr_pipe; + logic rega_pipe; + logic regb_pipe; + logic regc_pipe; + logic regd_pipe; + logic regs_pipe; + logic regs_pipe2; + logic regr_pipe; + logic P_pipe; + logic op_type_pipe; + logic [59:0] q_const_pipe; + logic [59:0] qm_const_pipe; + logic [59:0] qp_const_pipe; + logic [59:0] q_const_pipe2; + logic [59:0] qm_const_pipe2; + logic [59:0] qp_const_pipe2; + + // Stage 1 + flopenr #(120) regp1 (clk, reset, load_regp, Sum2, Sum_pipe); + flopenr #(120) regp2 (clk, reset, load_regp, Carry2, Carry_pipe); + flopenr #(1) regp3 (clk, reset, load_regp, muxr_out, muxr_pipe); + + flopenr #(1) regp4 (clk, reset, load_regp, load_rega, rega_pipe); + flopenr #(1) regp5 (clk, reset, load_regp, load_regb, regb_pipe); + flopenr #(1) regp6 (clk, reset, load_regp, load_regc, regc_pipe); + flopenr #(1) regp7 (clk, reset, load_regp, load_regd, regd_pipe); + flopenr #(1) regp8 (clk, reset, load_regp, load_regs, regs_pipe); + flopenr #(1) regp9 (clk, reset, load_regp, load_regr, regr_pipe); + flopenr #(1) regpA (clk, reset, load_regp, P, P_pipe); + flopenr #(1) regpB (clk, reset, load_regp, op_type, op_type_pipe); + flopenr #(60) regpC (clk, reset, load_regp, q_const, q_const_pipe); + flopenr #(60) regpD (clk, reset, load_regp, qp_const, qp_const_pipe); + flopenr #(60) regpE (clk, reset, load_regp, qm_const, qm_const_pipe); + + // CPA (from CSA)/Remainder addition/subtraction + assign {cout1, mul_out} = Sum_pipe + Carry_pipe + muxr_pipe; + // One's complement instead of two's complement (for hw efficiency) + assign three = {~mul_out[118] , mul_out[118], ~mul_out[117:59]}; + mux2 #(60) mxTC (~mul_out[118:59], three[60:1], op_type_pipe, twocmp_out); + + // Stage 2 + flopenr #(60) regc (clk, reset, regc_pipe, twocmp_out, regc_out); + flopenr #(60) regb (clk, reset, regb_pipe, mul_out[118:59], regb_out); + flopenr #(60) rega (clk, reset, rega_pipe, mul_out[118:59], rega_out); + flopenr #(60) regd (clk, reset, regd_pipe, mul_out[118:59], regd_out); + flopenr #(120) regr (clk, reset, regr_pipe, mul_out, regr_out); + flopenr #(1) regl (clk, reset, regs_pipe, regs_pipe, regs_pipe2); + flopenr #(60) regm (clk, reset, regs_pipe, q_const_pipe, q_const_pipe2); + flopenr #(60) regn (clk, reset, regs_pipe, qp_const_pipe, qp_const_pipe2); + flopenr #(60) rego (clk, reset, regs_pipe, qm_const_pipe, qm_const_pipe2); + + // Assuming [1,2) - q1 + assign {cout2, q_out1} = regb_out + q_const; + assign {cout3, qp_out1} = regb_out + qp_const; + assign {cout4, qm_out1} = regb_out + qm_const + 1'b1; + // Assuming [0.5,1) - q0 + assign {cout5, q_out0} = {regb_out[58:0], 1'b0} + q_const; + assign {cout6, qp_out0} = {regb_out[58:0], 1'b0} + qp_const; + assign {cout7, qm_out0} = {regb_out[58:0], 1'b0} + qm_const + 1'b1; + + // Stage 3 + // Assuming [1,2) + flopenr #(60) rege (clk, reset, regs_pipe2, {q_out1[59:35], (q_out1[34:6] & {29{~P_pipe}}), 6'h0}, q1); + flopenr #(60) regf (clk, reset, regs_pipe2, {qm_out1[59:35], (qm_out1[34:6] & {29{~P_pipe}}), 6'h0}, qm1); + flopenr #(60) regg (clk, reset, regs_pipe2, {qp_out1[59:35], (qp_out1[34:6] & {29{~P_pipe}}), 6'h0}, qp1); + // Assuming [0,1) + flopenr #(60) regh (clk, reset, regs_pipe2, {q_out0[59:35], (q_out0[34:6] & {29{~P_pipe}}), 6'h0}, q0); + flopenr #(60) regj (clk, reset, regs_pipe2, {qm_out0[59:35], (qm_out0[34:6] & {29{~P_pipe}}), 6'h0}, qm0); + flopenr #(60) regk (clk, reset, regs_pipe2, {qp_out0[59:35], (qp_out0[34:6] & {29{~P_pipe}}), 6'h0}, qp0); + +endmodule // divconv diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv index 1f1788f9d..571d79c47 100755 --- a/wally-pipelined/src/fpu/fpdiv.sv +++ b/wally-pipelined/src/fpu/fpdiv.sv @@ -61,9 +61,9 @@ module fpdiv ( logic [4:0] FlagsIn; logic signResult; - logic [63:0] q1, qm1, qp1, q0, qm0, qp0; - logic [63:0] rega_out, regb_out, regc_out, regd_out; - logic [127:0] regr_out; + logic [59:0] q1, qm1, qp1, q0, qm0, qp0; + logic [59:0] rega_out, regb_out, regc_out, regd_out; + logic [119:0] regr_out; logic [2:0] sel_muxa, sel_muxb; logic sel_muxr; logic load_rega, load_regb, load_regc, load_regd, load_regr; diff --git a/wally-pipelined/src/fpu/fpdiv_pipe.sv b/wally-pipelined/src/fpu/fpdiv_pipe.sv new file mode 100755 index 000000000..52380d3c6 --- /dev/null +++ b/wally-pipelined/src/fpu/fpdiv_pipe.sv @@ -0,0 +1,172 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Floating point divider/square root top unit pipelined version (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +module fpdiv_pipe ( + input logic clk, + input logic reset, + input logic start, + input logic [63:0] op1, + input logic [63:0] op2, + input logic [1:0] rm, + input logic op_type, + input logic P, + input logic OvEn, + input logic UnEn, + input logic XNaNQ, + input logic YNaNQ, + input logic XZeroQ, + input logic YZeroQ, + input logic XInfQ, + input logic YInfQ, + + output logic done, + output logic FDivBusyE, + output logic [63:0] AS_Result, + output logic [4:0] Flags); + + supply1 vdd; + supply0 vss; + + logic [63:0] Float1; + logic [63:0] Float2; + logic [63:0] IntValue; + + logic [12:0] exp1, exp2, expF; + logic [12:0] exp_diff, bias; + logic [13:0] exp_sqrt; + + logic [63:0] Result; + logic [52:0] mantissaA; + logic [52:0] mantissaB; + + logic [2:0] sel_inv; + logic Invalid; + logic [4:0] FlagsIn; + logic exp_gt63; + logic Sticky_out; + logic signResult, sign_corr; + logic corr_sign; + logic zeroB; + logic convert; + logic swap; + logic sub; + + logic [59:0] q1, qm1, qp1, q0, qm0, qp0; + logic [59:0] rega_out, regb_out, regc_out, regd_out; + logic [119:0] regr_out; + logic [2:0] sel_muxa, sel_muxb; + logic sel_muxr; + logic load_rega, load_regb, load_regc, load_regd, load_regr; + logic load_regp; + + logic donev, sel_muxrv, sel_muxsv; + logic [1:0] sel_muxav, sel_muxbv; + logic load_regav, load_regbv, load_regcv; + logic load_regrv, load_regsv; + + + // op_type : fdiv=0, fsqrt=1 + assign Float1 = op1; + assign Float2 = op_type ? op1 : op2; + + // Exception detection + exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid); + + // Determine Sign/Mantissa + assign signResult = ((Float1[63]^Float2[63])&~op_type) | Float1[63]&op_type; + assign mantissaA = {vdd, Float1[51:0]}; + assign mantissaB = {vdd, Float2[51:0]}; + // Early-ending detection + assign early_detection = |mantissaB[31:0]; + + // Perform Exponent Subtraction - expA - expB + Bias + assign exp1 = {2'b0, Float1[62:52]}; + assign exp2 = {2'b0, Float2[62:52]}; + // bias : DP = 2^{11-1}-1 = 1023 + assign bias = {3'h0, 10'h3FF}; + // Divide exponent + assign {exp_cout1, open, exp_diff} = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias}; + + // Sqrt exponent (check if exponent is odd) + assign exp_odd = Float1[52] ? vss : vdd; + assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd}; + + // Choose correct exponent + assign expF = op_type ? exp_sqrt[13:1] : exp_diff; + + logic exp_odd1; + logic P1; + logic op_type1; + logic [12:0] expF1; + logic [52:0] mantissaA1; + logic [52:0] mantissaB1; + logic [2:0] sel_inv1; + logic DenormIn1; + logic signResult1; + logic Invalid1; + + flopenr #(1) rega (clk, reset, 1'b1, exp_odd, exp_odd1); + flopenr #(1) regb (clk, reset, 1'b1, P, P1); + flopenr #(1) regc (clk, reset, 1'b1, op_type, op_type1); + flopenr #(13) regd (clk, reset, 1'b1, expF, expF1); + flopenr #(53) rege (clk, reset, 1'b1, mantissaA, mantissaA1); + flopenr #(53) regf (clk, reset, 1'b1, mantissaB, mantissaB1); + flopenr #(1) regg (clk, reset, 1'b1, start, start1); + flopenr #(3) regh (clk, reset, 1'b1, sel_inv, sel_inv1); + flopenr #(1) regi (clk, reset, 1'b1, DenormIn, DenormIn1); + flopenr #(1) regj (clk, reset, 1'b1, signResult, signResult1); + flopenr #(1) regk (clk, reset, 1'b1, Invalid, Invalid1); + + // Main Goldschmidt/Division Routine + divconv_pipe goldy (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out, + regr_out, mantissaB1, mantissaA1, + sel_muxa, sel_muxb, sel_muxr, reset, clk, + load_rega, load_regb, load_regc, load_regd, + load_regr, load_regs, load_regp, + P1, op_type1, exp_odd1); + + // FSM : control divider + fsm_fpdiv_pipe control (.clk, .reset, .start, .op_type, .P, + .done, .load_rega, .load_regb, .load_regc, .load_regd, + .load_regr, .load_regs, .load_regp, + .sel_muxa, .sel_muxb, .sel_muxr, .divBusy(FDivBusyE)); + + + // Round the mantissa to a 52-bit value, with the leading one + // removed. The rounding units also handles special cases and + // set the exception flags. + rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), + .sel_inv, .Invalid, .SignR(signResult), + .Float1(op1), .Float2(op2), + .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, + .XInfQ, .YInfQ, .op_type, + .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, + .Result, .Flags(FlagsIn)); + + // Store the final result and the exception flags in registers. + flopenr #(64) regl (clk, reset, done, Result, AS_Result); + flopenr #(5) regn (clk, reset, done, FlagsIn, Flags); + +endmodule // fpdiv_pipe + diff --git a/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv b/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv new file mode 100755 index 000000000..66ce0ab7e --- /dev/null +++ b/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv @@ -0,0 +1,1216 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 9/28/2021 +// +// Purpose: FSM for floating point divider/square root unit (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +module fsm_fpdiv_pipe ( + input logic clk, + input logic reset, + input logic start, + input logic op_type, + input logic P, + output logic done, + output logic load_rega, + output logic load_regb, + output logic load_regc, + output logic load_regd, + output logic load_regr, + output logic load_regs, + output logic load_regp, + output logic [2:0] sel_muxa, + output logic [2:0] sel_muxb, + output logic sel_muxr, + output logic divBusy + ); + + // div64 : S0-S14 (15 cycles) + // sqrt64 : S15-S35 (21 cycles) + // div32: S36-S47 (12 cycles) + // sqrt32 : S48-S64 (17 cycles) + typedef enum logic [6:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, + S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, + S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, + S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, + S50, S51, S52, S53, S54, S55, S56, S57, S58, S59, + S60, S61, S62, S63, S64} statetype; + + statetype current_state, next_state; + + always @(posedge clk) + begin + if (reset == 1'b1) + current_state <= S0; + else + current_state <= next_state; + end + + always @(*) + begin + case(current_state) + S0: // iteration 0 + begin + if (start==1'b0) + begin + done = 1'b0; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S0; + end + else if (start==1'b1 && op_type==1'b0 && P==1'b0) + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b010; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S1; + end + else if (start==1'b1 && op_type==1'b0 && P==1'b1) + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b010; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S36; + end + else if (start==1'b1 && op_type==1'b1 && P==1'b0) + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b1; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b010; + sel_muxb = 3'b001; + sel_muxr = 1'b0; + next_state <= S15; + end + else if (start==1'b1 && op_type==1'b1 && P==1'b1) + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b1; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b010; + sel_muxb = 3'b001; + sel_muxr = 1'b0; + next_state <= S48; + end + else + begin + done = 1'b0; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S0; + end + end // case: S0 + // div64 + S1: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b001; + sel_muxb = 3'b001; + sel_muxr = 1'b0; + next_state <= S2; + end // case: S1 + S2: // iteration 1 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b010; + sel_muxr = 1'b0; + next_state <= S3; + end + S3: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S4; + end + S4: // iteration 2 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b010; + sel_muxr = 1'b0; + next_state <= S5; + end + S5: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b011; + sel_muxr = 1'b0; // add + next_state <= S6; + end + S6: // iteration 3 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b010; + sel_muxr = 1'b0; + next_state <= S7; + end + S7: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S8; + end // case: S7 + S8: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S9; + end // case: S7 + S9: // q,qm,qp + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b1; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S10; + end // case: S9 + S10: // rem + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b1; + next_state <= S11; + end + S11: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b1; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b1; + next_state <= S12; + end // case: S11 + S12: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S13; + end + S13: + begin + done = 1'b1; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S14; + end + S14: + begin + done = 1'b0; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S0; + end + // sqrt64 + S15: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S16; + end + S16: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b001; + sel_muxb = 3'b100; + sel_muxr = 1'b0; + next_state <= S17; + end + S17: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b010; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S18; + end + S18: // iteration 1 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b1; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S19; + end + S19: // iteration 1 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S20; + end + S20: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b100; + sel_muxb = 3'b010; + sel_muxr = 1'b0; + next_state <= S21; + end + S21: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S22; + end + S22: // iteration 2 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b1; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S23; + end // case: S18 + S23: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S24; + end + S24: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b100; + sel_muxb = 3'b010; + sel_muxr = 1'b0; + next_state <= S25; + end + S25: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S26; + end + S26: // iteration 3 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b1; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S27; + end // case: S21 + S27: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S28; + end + S28: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b100; + sel_muxb = 3'b010; + sel_muxr = 1'b0; + next_state <= S29; + end + S29: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S30; + end // case: S23 + S30: // q,qm,qp + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b1; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S31; + end + S31: // rem + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b110; + sel_muxr = 1'b1; + next_state <= S32; + end // case: S25 + S32: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b1; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b110; + sel_muxr = 1'b1; + next_state <= S33; + end // case: S34 + S33: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S34; + end + S34: // done + begin + done = 1'b1; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S35; + end // case: S34 + S34: + begin + done = 1'b0; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S0; + end + // div32 + S36: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b001; + sel_muxb = 3'b001; + sel_muxr = 1'b0; + next_state <= S37; + end // case: S1 + S37: // iteration 1 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b010; + sel_muxr = 1'b0; + next_state <= S38; + end + S38: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S39; + end + S39: // iteration 2 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b010; + sel_muxr = 1'b0; + next_state <= S40; + end + S40: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S41; + end + S41: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S42; + end + S42: // q,qm,qp + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b1; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S43; + end // case: S9 + S43: // rem + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b1; + next_state <= S44; + end + S44: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b1; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b1; + next_state <= S45; + end // case: S11 + S45: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S46; + end + S46: // done + begin + done = 1'b1; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S47; + end + S47: + begin + done = 1'b0; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S0; + end + // sqrt32 + S48: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S49; + end + S49: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b001; + sel_muxb = 3'b100; + sel_muxr = 1'b0; + next_state <= S50; + end + S50: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b010; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S51; + end + S51: // iteration 1 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b1; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S52; + end + S52: // iteration 1 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S53; + end + S53: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b100; + sel_muxb = 3'b010; + sel_muxr = 1'b0; + next_state <= S54; + end + S54: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S55; + end + S55: // iteration 2 + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b1; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S56; + end // case: S18 + S56: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S57; + end + S57: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b1; + load_regb = 1'b0; + load_regc = 1'b1; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b100; + sel_muxb = 3'b010; + sel_muxr = 1'b0; + next_state <= S58; + end + S58: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b1; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b011; + sel_muxr = 1'b0; + next_state <= S59; + end + S59: // q,qm,qp + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b1; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S60; + end + S60: // rem + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b110; + sel_muxr = 1'b1; + next_state <= S61; + end // case: S25 + S61: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b1; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b011; + sel_muxb = 3'b110; + sel_muxr = 1'b1; + next_state <= S62; + end // case: S34 + S62: + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S63; + end + S63: // done + begin + done = 1'b1; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b1; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S64; + end // case: S34 + S64: + begin + done = 1'b0; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S0; + end + default: + begin + done = 1'b0; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state <= S0; + end + endcase // case(current_state) + end // always @ (current_state or X) + +endmodule // fsm diff --git a/wally-pipelined/src/fpu/rounder_div.sv b/wally-pipelined/src/fpu/rounder_div.sv index 03dcff7a4..66bfe1d30 100755 --- a/wally-pipelined/src/fpu/rounder_div.sv +++ b/wally-pipelined/src/fpu/rounder_div.sv @@ -40,13 +40,13 @@ module rounder_div ( input logic XInfQ, input logic YInfQ, input logic op_type, - input logic [63:0] q1, - input logic [63:0] qm1, - input logic [63:0] qp1, - input logic [63:0] q0, - input logic [63:0] qm0, - input logic [63:0] qp0, - input logic [127:0] regr_out, + input logic [59:0] q1, + input logic [59:0] qm1, + input logic [59:0] qp1, + input logic [59:0] q0, + input logic [59:0] qm0, + input logic [59:0] qp0, + input logic [119:0] regr_out, output logic [63:0] Result, output logic [4:0] Flags @@ -56,7 +56,7 @@ module rounder_div ( logic [10:0] Rexp; logic [12:0] Texp; logic [51:0] Rmant; - logic [63:0] Tmant; + logic [59:0] Tmant; logic [51:0] Smant; logic Rzero; logic Gdp, Gsp, G; @@ -77,7 +77,7 @@ module rounder_div ( logic zero_rem; logic [1:0] mux_mant; logic sign_rem; - logic [63:0] q, qm, qp; + logic [59:0] q, qm, qp; logic exp_ovf; logic [50:0] NaN_out; @@ -87,10 +87,10 @@ module rounder_div ( // Remainder = 0? assign zero_rem = ~(|regr_out); // Remainder Sign - assign sign_rem = ~regr_out[127]; + assign sign_rem = ~regr_out[119]; // choose correct Guard bit [1,2) or [0,1) - assign Gdp = q1[63] ? q1[10] : q0[10]; - assign Gsp = q1[63] ? q1[39] : q0[39]; + assign Gdp = q1[59] ? q1[6] : q0[6]; + assign Gsp = q1[59] ? q1[35] : q0[35]; assign G = P ? Gsp : Gdp; // Selection of Rounding (from logic/switching) assign mux_mant[1] = (SignR&rm[1]&rm[0]&G) | (!SignR&rm[1]&!rm[0]&G) | @@ -102,18 +102,18 @@ module rounder_div ( (SignR&rm[1]&!rm[0]&!G&!zero_rem&sign_rem); // Which Q? - mux2 #(64) mx1 (q0, q1, q1[63], q); - mux2 #(64) mx2 (qm0, qm1, q1[63], qm); - mux2 #(64) mx3 (qp0, qp1, q1[63], qp); + mux2 #(60) mx1 (q0, q1, q1[59], q); + mux2 #(60) mx2 (qm0, qm1, q1[59], qm); + mux2 #(60) mx3 (qp0, qp1, q1[59], qp); // Choose Q, Q+1, Q-1 - mux3 #(64) mx4 (q, qm, qp, mux_mant, Tmant); - assign Smant = Tmant[62:11]; + mux3 #(60) mx4 (q, qm, qp, mux_mant, Tmant); + assign Smant = Tmant[58:7]; // Compute the value of the exponent // exponent is modified if we choose: // 1.) we choose any qm0, qp0, q0 (since we shift mant) // 2.) we choose qp and we overflow (for RU) - assign exp_ovf = |{qp[62:40], (qp[39:11] & {29{~P}})}; - assign Texp = exp_diff - {{12{1'b0}}, ~q1[63]} + {{12{1'b0}}, mux_mant[1]&qp1[63]&~exp_ovf}; + assign exp_ovf = |{qp[58:36], (qp[35:7] & {29{~P}})}; + assign Texp = exp_diff - {{12{1'b0}}, ~q1[59]} + {{12{1'b0}}, mux_mant[1]&qp1[59]&~exp_ovf}; // Overflow only occurs for double precision, if Texp[10] to Texp[0] are // all ones. To encourage sharing with single precision overflow detection,