From 49721a169b337d36f55f676acb9eccd1401414f4 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Thu, 21 Oct 2021 13:52:12 -0500 Subject: [PATCH] Clean up some FPU and add pipelined fpdivsqrt to fpu.sv --- wally-pipelined/src/fpu/divconv_pipe.sv | 19 +-- wally-pipelined/src/fpu/fpdiv_pipe.sv | 62 ++++---- wally-pipelined/src/fpu/fpu.sv | 122 ++++++++------- wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv | 181 ++++++++++++---------- 4 files changed, 198 insertions(+), 186 deletions(-) diff --git a/wally-pipelined/src/fpu/divconv_pipe.sv b/wally-pipelined/src/fpu/divconv_pipe.sv index 4e3b843d6..240000c28 100755 --- a/wally-pipelined/src/fpu/divconv_pipe.sv +++ b/wally-pipelined/src/fpu/divconv_pipe.sv @@ -50,6 +50,7 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r supply0 vss; logic [59:0] muxa_out, muxb_out; + logic muxr_out; logic [10:0] ia_div, ia_sqrt; logic [59:0] ia_out; logic [119:0] mul_out; @@ -67,8 +68,8 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r // Check if exponent is odd for sqrt // If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA - assign d2 = (exp_odd&op_type) ? {vss,d,6'h0} : {d,7'h0}; - assign n2 = op_type ? d2 : {n,7'h0}; + assign d2 = (exp_odd&op_type) ? {vss, d, 6'h0} : {d, 7'h0}; + assign n2 = op_type ? d2 : {n, 7'h0}; // IA div/sqrt sbtm_div ia1 (d[52:41], ia_div); @@ -137,7 +138,7 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r flopenr #(60) regpE (clk, reset, load_regp, qm_const, qm_const_pipe); // CPA (from CSA)/Remainder addition/subtraction - assign {cout1, mul_out} = Sum_pipe + Carry_pipe + muxr_pipe; + assign mul_out = Sum_pipe + Carry_pipe + {119'h0, muxr_pipe}; // One's complement instead of two's complement (for hw efficiency) assign three = {~mul_out[118] , mul_out[118], ~mul_out[117:59]}; mux2 #(60) mxTC (~mul_out[118:59], three[60:1], op_type_pipe, twocmp_out); @@ -154,13 +155,13 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r flopenr #(60) rego (clk, reset, regs_pipe, qm_const_pipe, qm_const_pipe2); // Assuming [1,2) - q1 - assign {cout2, q_out1} = regb_out + q_const; - assign {cout3, qp_out1} = regb_out + qp_const; - assign {cout4, qm_out1} = regb_out + qm_const + 1'b1; + assign q_out1 = regb_out + q_const; + assign qp_out1 = regb_out + qp_const; + assign qm_out1 = regb_out + qm_const + 1'b1; // Assuming [0.5,1) - q0 - assign {cout5, q_out0} = {regb_out[58:0], 1'b0} + q_const; - assign {cout6, qp_out0} = {regb_out[58:0], 1'b0} + qp_const; - assign {cout7, qm_out0} = {regb_out[58:0], 1'b0} + qm_const + 1'b1; + assign q_out0 = {regb_out[58:0], 1'b0} + q_const; + assign qp_out0 = {regb_out[58:0], 1'b0} + qp_const; + assign qm_out0 = {regb_out[58:0], 1'b0} + qm_const + 1'b1; // Stage 3 // Assuming [1,2) diff --git a/wally-pipelined/src/fpu/fpdiv_pipe.sv b/wally-pipelined/src/fpu/fpdiv_pipe.sv index 52380d3c6..1012bb325 100755 --- a/wally-pipelined/src/fpu/fpdiv_pipe.sv +++ b/wally-pipelined/src/fpu/fpdiv_pipe.sv @@ -78,13 +78,18 @@ module fpdiv_pipe ( logic [2:0] sel_muxa, sel_muxb; logic sel_muxr; logic load_rega, load_regb, load_regc, load_regd, load_regr; - logic load_regp; - - logic donev, sel_muxrv, sel_muxsv; - logic [1:0] sel_muxav, sel_muxbv; - logic load_regav, load_regbv, load_regcv; - logic load_regrv, load_regsv; + logic load_regp, load_regs; + logic exp_odd, exp_odd1; + logic start1; + logic P1; + logic op_type1; + logic [12:0] expF1; + logic [52:0] mantissaA1; + logic [52:0] mantissaB1; + logic [2:0] sel_inv1; + logic signResult1; + logic Invalid1; // op_type : fdiv=0, fsqrt=1 assign Float1 = op1; @@ -94,11 +99,9 @@ module fpdiv_pipe ( exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid); // Determine Sign/Mantissa - assign signResult = ((Float1[63]^Float2[63])&~op_type) | Float1[63]&op_type; + assign signResult = ((Float1[63]^Float2[63])&~op_type); assign mantissaA = {vdd, Float1[51:0]}; assign mantissaB = {vdd, Float2[51:0]}; - // Early-ending detection - assign early_detection = |mantissaB[31:0]; // Perform Exponent Subtraction - expA - expB + Bias assign exp1 = {2'b0, Float1[62:52]}; @@ -106,26 +109,14 @@ module fpdiv_pipe ( // bias : DP = 2^{11-1}-1 = 1023 assign bias = {3'h0, 10'h3FF}; // Divide exponent - assign {exp_cout1, open, exp_diff} = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias}; + assign exp_diff = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias}; // Sqrt exponent (check if exponent is odd) - assign exp_odd = Float1[52] ? vss : vdd; - assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd}; - + assign exp_odd = Float1[52] ? 1'b0 : 1'b1; + assign exp_sqrt = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd}; // Choose correct exponent assign expF = op_type ? exp_sqrt[13:1] : exp_diff; - logic exp_odd1; - logic P1; - logic op_type1; - logic [12:0] expF1; - logic [52:0] mantissaA1; - logic [52:0] mantissaB1; - logic [2:0] sel_inv1; - logic DenormIn1; - logic signResult1; - logic Invalid1; - flopenr #(1) rega (clk, reset, 1'b1, exp_odd, exp_odd1); flopenr #(1) regb (clk, reset, 1'b1, P, P1); flopenr #(1) regc (clk, reset, 1'b1, op_type, op_type1); @@ -134,33 +125,32 @@ module fpdiv_pipe ( flopenr #(53) regf (clk, reset, 1'b1, mantissaB, mantissaB1); flopenr #(1) regg (clk, reset, 1'b1, start, start1); flopenr #(3) regh (clk, reset, 1'b1, sel_inv, sel_inv1); - flopenr #(1) regi (clk, reset, 1'b1, DenormIn, DenormIn1); flopenr #(1) regj (clk, reset, 1'b1, signResult, signResult1); flopenr #(1) regk (clk, reset, 1'b1, Invalid, Invalid1); // Main Goldschmidt/Division Routine - divconv_pipe goldy (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out, - regr_out, mantissaB1, mantissaA1, - sel_muxa, sel_muxb, sel_muxr, reset, clk, - load_rega, load_regb, load_regc, load_regd, - load_regr, load_regs, load_regp, - P1, op_type1, exp_odd1); + divconv_pipe goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, + .rega_out, .regb_out, .regc_out, .regd_out, + .regr_out, .d(mantissaB1), .n(mantissaA1), + .sel_muxa, .sel_muxb, .sel_muxr, .reset, .clk, + .load_rega, .load_regb, .load_regc, .load_regd, + .load_regr, .load_regs, .load_regp, + .P(P1), .op_type(op_type1), .exp_odd(exp_odd1)); // FSM : control divider - fsm_fpdiv_pipe control (.clk, .reset, .start, .op_type, .P, + fsm_fpdiv_pipe control (.clk, .reset, .start(start), .op_type(op_type1), .P(P1), .done, .load_rega, .load_regb, .load_regc, .load_regd, .load_regr, .load_regs, .load_regp, .sel_muxa, .sel_muxb, .sel_muxr, .divBusy(FDivBusyE)); - // Round the mantissa to a 52-bit value, with the leading one // removed. The rounding units also handles special cases and // set the exception flags. - rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), - .sel_inv, .Invalid, .SignR(signResult), + rounder_div round1 (.rm, .P(P1), .OvEn(1'b0), .UnEn(1'b0), .exp_diff(expF1), + .sel_inv(sel_inv1), .Invalid(Invalid1), .SignR(signResult1), .Float1(op1), .Float2(op2), .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, - .XInfQ, .YInfQ, .op_type, + .XInfQ, .YInfQ, .op_type(op_type1), .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, .Result, .Flags(FlagsIn)); diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index fd91b1b2f..da7163fea 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -66,67 +66,69 @@ module fpu ( logic FDivStartD, FDivStartE; // Start division or squareroot logic FWriteIntD; // Write to integer register logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals - logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register - logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister - logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register + logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register + logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage + logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister + logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input // regfile signals - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) - logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [63:0] FSrcXE; // Input 1 to the various units (after forwarding) + logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) + logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) // unpacking signals - logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM, YSgnM; // input's sign - memory stage - logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage - logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage - logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage - logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage - logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2) - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XNaNQ, YNaNQ; // is the input a NaN - divide - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, YDenormE, ZDenormE; // is the input denormalized - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage - logic XZeroQ, YZeroQ; // is the input zero - divide - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XInfQ, YInfQ; // is the input infinity - divide - logic XExpMaxE; // is the exponent all ones (max value) - logic XNormE; // is normal + logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage + logic XSgnM, YSgnM; // input's sign - memory stage + logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage + logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage + logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage + logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage + logic [10:0] BiasE; // bias based on precision (single=7f double=3ff) + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XNaNQ, YNaNQ; // is the input a NaN - divide + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XDenormE, YDenormE, ZDenormE; // is the input denormalized + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage + logic XZeroQ, YZeroQ; // is the input zero - divide + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XInfQ, YInfQ; // is the input infinity - divide + logic XExpMaxE; // is the exponent all ones (max value) + logic XNormE; // is normal + logic FmtQ; + logic FDivStartQ; // result and flag signals - logic [63:0] FDivResM, FDivResW; // divide/squareroot result - logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags - logic [63:0] FMAResM, FMAResW; // FMA/multiply result - logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result - logic [63:0] ReadResW; // read result (load instruction) - logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result - logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags - logic [63:0] CvtResE, CvtResM; // FP <-> int convert result - logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this - logic [63:0] ClassResE, ClassResM; // classify result - logic [63:0] CmpResE, CmpResM; // compare result - logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid) - logic [63:0] SgnResE, SgnResM; // sign injection result - logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid) - logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage - logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage + logic [63:0] FDivResM, FDivResW; // divide/squareroot result + logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags + logic [63:0] FMAResM, FMAResW; // FMA/multiply result + logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result + logic [63:0] ReadResW; // read result (load instruction) + logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result + logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags + logic [63:0] CvtResE, CvtResM; // FP <-> int convert result + logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this + logic [63:0] ClassResE, ClassResM; // classify result + logic [63:0] CmpResE, CmpResM; // compare result + logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid) + logic [63:0] SgnResE, SgnResM; // sign injection result + logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid) + logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage + logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage logic [`XLEN-1:0] FIntResE; - logic [63:0] FPUResultW; // final FP result being written to the FP register - + logic [63:0] FPUResultW; // final FP result being written to the FP register // other signals - logic FDivSqrtDoneE; // is divide done - logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit - logic FDivClk; // clock for divide/squareroot unit - logic [63:0] AlignedSrcAE; // align SrcA to the floating point format + logic FDivSqrtDoneE; // is divide done + logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit + logic FDivClk; // clock for divide/squareroot unit + logic [63:0] AlignedSrcAE; // align SrcA to the floating point format // DECODE STAGE @@ -198,14 +200,14 @@ module fpu ( floprc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), .clear(FDivSqrtDoneE), .reset(reset), .clk(FDivBusyE)); - floprc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}), - .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}), + floprc #(7) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE}), + .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ}), .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); - fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), - .reset, .clk(clk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1), - .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, - .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + .reset(reset), .clk(FDivBusyE)); + fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), + .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), + .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, + .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); // convert from signle to double and vice versa cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); diff --git a/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv b/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv index 66ce0ab7e..95438e012 100755 --- a/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv +++ b/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv @@ -42,7 +42,7 @@ module fsm_fpdiv_pipe ( output logic divBusy ); - // div64 : S0-S14 (15 cycles) + // div64 : S1-S14 (14 cycles) // sqrt64 : S15-S35 (21 cycles) // div32: S36-S47 (12 cycles) // sqrt32 : S48-S64 (17 cycles) @@ -52,7 +52,7 @@ module fsm_fpdiv_pipe ( S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, S50, S51, S52, S53, S54, S55, S56, S57, S58, S59, - S60, S61, S62, S63, S64} statetype; + S60, S61, S62, S63, S64, S65} statetype; statetype current_state, next_state; @@ -72,7 +72,7 @@ module fsm_fpdiv_pipe ( if (start==1'b0) begin done = 1'b0; - divBusy = 1'b0; + divBusy = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -83,9 +83,28 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; + end // if (start==1'b0) + else + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state = S65; end - else if (start==1'b1 && op_type==1'b0 && P==1'b0) + end + S65: + begin + if (op_type==1'b0 && P==1'b0) begin done = 1'b0; divBusy = 1'b1; @@ -99,9 +118,9 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S1; + next_state = S1; end - else if (start==1'b1 && op_type==1'b0 && P==1'b1) + else if (op_type==1'b0 && P==1'b1) begin done = 1'b0; divBusy = 1'b1; @@ -115,9 +134,9 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S36; + next_state = S36; end - else if (start==1'b1 && op_type==1'b1 && P==1'b0) + else if (op_type==1'b1 && P==1'b0) begin done = 1'b0; divBusy = 1'b1; @@ -131,9 +150,9 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b001; sel_muxr = 1'b0; - next_state <= S15; + next_state = S15; end - else if (start==1'b1 && op_type==1'b1 && P==1'b1) + else if (op_type==1'b1 && P==1'b1) begin done = 1'b0; divBusy = 1'b1; @@ -147,7 +166,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b001; sel_muxr = 1'b0; - next_state <= S48; + next_state = S48; end else begin @@ -163,7 +182,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end end // case: S0 // div64 @@ -181,7 +200,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b001; sel_muxb = 3'b001; sel_muxr = 1'b0; - next_state <= S2; + next_state = S2; end // case: S1 S2: // iteration 1 begin @@ -197,7 +216,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S3; + next_state = S3; end S3: begin @@ -213,7 +232,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S4; + next_state = S4; end S4: // iteration 2 begin @@ -229,7 +248,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S5; + next_state = S5; end S5: begin @@ -245,7 +264,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; // add - next_state <= S6; + next_state = S6; end S6: // iteration 3 begin @@ -261,7 +280,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S7; + next_state = S7; end S7: begin @@ -277,7 +296,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S8; + next_state = S8; end // case: S7 S8: begin @@ -293,7 +312,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S9; + next_state = S9; end // case: S7 S9: // q,qm,qp begin @@ -309,7 +328,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S10; + next_state = S10; end // case: S9 S10: // rem begin @@ -325,7 +344,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b1; - next_state <= S11; + next_state = S11; end S11: begin @@ -341,7 +360,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b1; - next_state <= S12; + next_state = S12; end // case: S11 S12: begin @@ -357,7 +376,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S13; + next_state = S13; end S13: begin @@ -373,7 +392,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S14; + next_state = S14; end S14: begin @@ -389,7 +408,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end // sqrt64 S15: @@ -406,7 +425,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S16; + next_state = S16; end S16: begin @@ -422,7 +441,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b001; sel_muxb = 3'b100; sel_muxr = 1'b0; - next_state <= S17; + next_state = S17; end S17: begin @@ -438,7 +457,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S18; + next_state = S18; end S18: // iteration 1 begin @@ -454,7 +473,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S19; + next_state = S19; end S19: // iteration 1 begin @@ -470,7 +489,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S20; + next_state = S20; end S20: begin @@ -486,7 +505,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S21; + next_state = S21; end S21: begin @@ -502,7 +521,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S22; + next_state = S22; end S22: // iteration 2 begin @@ -518,7 +537,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S23; + next_state = S23; end // case: S18 S23: begin @@ -534,7 +553,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S24; + next_state = S24; end S24: begin @@ -550,7 +569,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S25; + next_state = S25; end S25: begin @@ -566,7 +585,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S26; + next_state = S26; end S26: // iteration 3 begin @@ -582,7 +601,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S27; + next_state = S27; end // case: S21 S27: begin @@ -598,7 +617,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S28; + next_state = S28; end S28: begin @@ -614,7 +633,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S29; + next_state = S29; end S29: begin @@ -630,7 +649,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S30; + next_state = S30; end // case: S23 S30: // q,qm,qp begin @@ -646,7 +665,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S31; + next_state = S31; end S31: // rem begin @@ -662,7 +681,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - next_state <= S32; + next_state = S32; end // case: S25 S32: begin @@ -678,8 +697,8 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - next_state <= S33; - end // case: S34 + next_state = S33; + end S33: begin done = 1'b0; @@ -694,7 +713,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S34; + next_state = S34; end S34: // done begin @@ -710,9 +729,9 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S35; - end // case: S34 - S34: + next_state = S35; + end + S35: begin done = 1'b0; divBusy = 1'b0; @@ -726,7 +745,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end // div32 S36: @@ -743,7 +762,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b001; sel_muxb = 3'b001; sel_muxr = 1'b0; - next_state <= S37; + next_state = S37; end // case: S1 S37: // iteration 1 begin @@ -759,7 +778,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S38; + next_state = S38; end S38: begin @@ -775,7 +794,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S39; + next_state = S39; end S39: // iteration 2 begin @@ -791,7 +810,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S40; + next_state = S40; end S40: begin @@ -807,7 +826,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S41; + next_state = S41; end S41: begin @@ -823,7 +842,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S42; + next_state = S42; end S42: // q,qm,qp begin @@ -839,7 +858,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S43; + next_state = S43; end // case: S9 S43: // rem begin @@ -855,7 +874,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b1; - next_state <= S44; + next_state = S44; end S44: begin @@ -871,7 +890,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b1; - next_state <= S45; + next_state = S45; end // case: S11 S45: begin @@ -887,7 +906,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S46; + next_state = S46; end S46: // done begin @@ -903,7 +922,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S47; + next_state = S47; end S47: begin @@ -919,7 +938,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end // sqrt32 S48: @@ -936,7 +955,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S49; + next_state = S49; end S49: begin @@ -952,7 +971,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b001; sel_muxb = 3'b100; sel_muxr = 1'b0; - next_state <= S50; + next_state = S50; end S50: begin @@ -968,7 +987,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S51; + next_state = S51; end S51: // iteration 1 begin @@ -984,7 +1003,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S52; + next_state = S52; end S52: // iteration 1 begin @@ -1000,7 +1019,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S53; + next_state = S53; end S53: begin @@ -1016,7 +1035,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S54; + next_state = S54; end S54: begin @@ -1032,7 +1051,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S55; + next_state = S55; end S55: // iteration 2 begin @@ -1048,7 +1067,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S56; + next_state = S56; end // case: S18 S56: begin @@ -1064,7 +1083,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S57; + next_state = S57; end S57: begin @@ -1080,7 +1099,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S58; + next_state = S58; end S58: begin @@ -1096,7 +1115,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S59; + next_state = S59; end S59: // q,qm,qp begin @@ -1112,7 +1131,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S60; + next_state = S60; end S60: // rem begin @@ -1128,7 +1147,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - next_state <= S61; + next_state = S61; end // case: S25 S61: begin @@ -1144,7 +1163,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - next_state <= S62; + next_state = S62; end // case: S34 S62: begin @@ -1160,7 +1179,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S63; + next_state = S63; end S63: // done begin @@ -1176,7 +1195,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S64; + next_state = S64; end // case: S34 S64: begin @@ -1192,7 +1211,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end default: begin @@ -1208,7 +1227,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end endcase // case(current_state) end // always @ (current_state or X)