From 49721a169b337d36f55f676acb9eccd1401414f4 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Thu, 21 Oct 2021 13:52:12 -0500 Subject: [PATCH 1/6] Clean up some FPU and add pipelined fpdivsqrt to fpu.sv --- wally-pipelined/src/fpu/divconv_pipe.sv | 19 +-- wally-pipelined/src/fpu/fpdiv_pipe.sv | 62 ++++---- wally-pipelined/src/fpu/fpu.sv | 122 ++++++++------- wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv | 181 ++++++++++++---------- 4 files changed, 198 insertions(+), 186 deletions(-) diff --git a/wally-pipelined/src/fpu/divconv_pipe.sv b/wally-pipelined/src/fpu/divconv_pipe.sv index 4e3b843d6..240000c28 100755 --- a/wally-pipelined/src/fpu/divconv_pipe.sv +++ b/wally-pipelined/src/fpu/divconv_pipe.sv @@ -50,6 +50,7 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r supply0 vss; logic [59:0] muxa_out, muxb_out; + logic muxr_out; logic [10:0] ia_div, ia_sqrt; logic [59:0] ia_out; logic [119:0] mul_out; @@ -67,8 +68,8 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r // Check if exponent is odd for sqrt // If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA - assign d2 = (exp_odd&op_type) ? {vss,d,6'h0} : {d,7'h0}; - assign n2 = op_type ? d2 : {n,7'h0}; + assign d2 = (exp_odd&op_type) ? {vss, d, 6'h0} : {d, 7'h0}; + assign n2 = op_type ? d2 : {n, 7'h0}; // IA div/sqrt sbtm_div ia1 (d[52:41], ia_div); @@ -137,7 +138,7 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r flopenr #(60) regpE (clk, reset, load_regp, qm_const, qm_const_pipe); // CPA (from CSA)/Remainder addition/subtraction - assign {cout1, mul_out} = Sum_pipe + Carry_pipe + muxr_pipe; + assign mul_out = Sum_pipe + Carry_pipe + {119'h0, muxr_pipe}; // One's complement instead of two's complement (for hw efficiency) assign three = {~mul_out[118] , mul_out[118], ~mul_out[117:59]}; mux2 #(60) mxTC (~mul_out[118:59], three[60:1], op_type_pipe, twocmp_out); @@ -154,13 +155,13 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r flopenr #(60) rego (clk, reset, regs_pipe, qm_const_pipe, qm_const_pipe2); // Assuming [1,2) - q1 - assign {cout2, q_out1} = regb_out + q_const; - assign {cout3, qp_out1} = regb_out + qp_const; - assign {cout4, qm_out1} = regb_out + qm_const + 1'b1; + assign q_out1 = regb_out + q_const; + assign qp_out1 = regb_out + qp_const; + assign qm_out1 = regb_out + qm_const + 1'b1; // Assuming [0.5,1) - q0 - assign {cout5, q_out0} = {regb_out[58:0], 1'b0} + q_const; - assign {cout6, qp_out0} = {regb_out[58:0], 1'b0} + qp_const; - assign {cout7, qm_out0} = {regb_out[58:0], 1'b0} + qm_const + 1'b1; + assign q_out0 = {regb_out[58:0], 1'b0} + q_const; + assign qp_out0 = {regb_out[58:0], 1'b0} + qp_const; + assign qm_out0 = {regb_out[58:0], 1'b0} + qm_const + 1'b1; // Stage 3 // Assuming [1,2) diff --git a/wally-pipelined/src/fpu/fpdiv_pipe.sv b/wally-pipelined/src/fpu/fpdiv_pipe.sv index 52380d3c6..1012bb325 100755 --- a/wally-pipelined/src/fpu/fpdiv_pipe.sv +++ b/wally-pipelined/src/fpu/fpdiv_pipe.sv @@ -78,13 +78,18 @@ module fpdiv_pipe ( logic [2:0] sel_muxa, sel_muxb; logic sel_muxr; logic load_rega, load_regb, load_regc, load_regd, load_regr; - logic load_regp; - - logic donev, sel_muxrv, sel_muxsv; - logic [1:0] sel_muxav, sel_muxbv; - logic load_regav, load_regbv, load_regcv; - logic load_regrv, load_regsv; + logic load_regp, load_regs; + logic exp_odd, exp_odd1; + logic start1; + logic P1; + logic op_type1; + logic [12:0] expF1; + logic [52:0] mantissaA1; + logic [52:0] mantissaB1; + logic [2:0] sel_inv1; + logic signResult1; + logic Invalid1; // op_type : fdiv=0, fsqrt=1 assign Float1 = op1; @@ -94,11 +99,9 @@ module fpdiv_pipe ( exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid); // Determine Sign/Mantissa - assign signResult = ((Float1[63]^Float2[63])&~op_type) | Float1[63]&op_type; + assign signResult = ((Float1[63]^Float2[63])&~op_type); assign mantissaA = {vdd, Float1[51:0]}; assign mantissaB = {vdd, Float2[51:0]}; - // Early-ending detection - assign early_detection = |mantissaB[31:0]; // Perform Exponent Subtraction - expA - expB + Bias assign exp1 = {2'b0, Float1[62:52]}; @@ -106,26 +109,14 @@ module fpdiv_pipe ( // bias : DP = 2^{11-1}-1 = 1023 assign bias = {3'h0, 10'h3FF}; // Divide exponent - assign {exp_cout1, open, exp_diff} = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias}; + assign exp_diff = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias}; // Sqrt exponent (check if exponent is odd) - assign exp_odd = Float1[52] ? vss : vdd; - assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd}; - + assign exp_odd = Float1[52] ? 1'b0 : 1'b1; + assign exp_sqrt = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd}; // Choose correct exponent assign expF = op_type ? exp_sqrt[13:1] : exp_diff; - logic exp_odd1; - logic P1; - logic op_type1; - logic [12:0] expF1; - logic [52:0] mantissaA1; - logic [52:0] mantissaB1; - logic [2:0] sel_inv1; - logic DenormIn1; - logic signResult1; - logic Invalid1; - flopenr #(1) rega (clk, reset, 1'b1, exp_odd, exp_odd1); flopenr #(1) regb (clk, reset, 1'b1, P, P1); flopenr #(1) regc (clk, reset, 1'b1, op_type, op_type1); @@ -134,33 +125,32 @@ module fpdiv_pipe ( flopenr #(53) regf (clk, reset, 1'b1, mantissaB, mantissaB1); flopenr #(1) regg (clk, reset, 1'b1, start, start1); flopenr #(3) regh (clk, reset, 1'b1, sel_inv, sel_inv1); - flopenr #(1) regi (clk, reset, 1'b1, DenormIn, DenormIn1); flopenr #(1) regj (clk, reset, 1'b1, signResult, signResult1); flopenr #(1) regk (clk, reset, 1'b1, Invalid, Invalid1); // Main Goldschmidt/Division Routine - divconv_pipe goldy (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out, - regr_out, mantissaB1, mantissaA1, - sel_muxa, sel_muxb, sel_muxr, reset, clk, - load_rega, load_regb, load_regc, load_regd, - load_regr, load_regs, load_regp, - P1, op_type1, exp_odd1); + divconv_pipe goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, + .rega_out, .regb_out, .regc_out, .regd_out, + .regr_out, .d(mantissaB1), .n(mantissaA1), + .sel_muxa, .sel_muxb, .sel_muxr, .reset, .clk, + .load_rega, .load_regb, .load_regc, .load_regd, + .load_regr, .load_regs, .load_regp, + .P(P1), .op_type(op_type1), .exp_odd(exp_odd1)); // FSM : control divider - fsm_fpdiv_pipe control (.clk, .reset, .start, .op_type, .P, + fsm_fpdiv_pipe control (.clk, .reset, .start(start), .op_type(op_type1), .P(P1), .done, .load_rega, .load_regb, .load_regc, .load_regd, .load_regr, .load_regs, .load_regp, .sel_muxa, .sel_muxb, .sel_muxr, .divBusy(FDivBusyE)); - // Round the mantissa to a 52-bit value, with the leading one // removed. The rounding units also handles special cases and // set the exception flags. - rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), - .sel_inv, .Invalid, .SignR(signResult), + rounder_div round1 (.rm, .P(P1), .OvEn(1'b0), .UnEn(1'b0), .exp_diff(expF1), + .sel_inv(sel_inv1), .Invalid(Invalid1), .SignR(signResult1), .Float1(op1), .Float2(op2), .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, - .XInfQ, .YInfQ, .op_type, + .XInfQ, .YInfQ, .op_type(op_type1), .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, .Result, .Flags(FlagsIn)); diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index fd91b1b2f..da7163fea 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -66,67 +66,69 @@ module fpu ( logic FDivStartD, FDivStartE; // Start division or squareroot logic FWriteIntD; // Write to integer register logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals - logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register - logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister - logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register + logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register + logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage + logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister + logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input // regfile signals - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) - logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [63:0] FSrcXE; // Input 1 to the various units (after forwarding) + logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) + logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) // unpacking signals - logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM, YSgnM; // input's sign - memory stage - logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage - logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage - logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage - logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage - logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2) - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XNaNQ, YNaNQ; // is the input a NaN - divide - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, YDenormE, ZDenormE; // is the input denormalized - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage - logic XZeroQ, YZeroQ; // is the input zero - divide - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XInfQ, YInfQ; // is the input infinity - divide - logic XExpMaxE; // is the exponent all ones (max value) - logic XNormE; // is normal + logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage + logic XSgnM, YSgnM; // input's sign - memory stage + logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage + logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage + logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage + logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage + logic [10:0] BiasE; // bias based on precision (single=7f double=3ff) + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XNaNQ, YNaNQ; // is the input a NaN - divide + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XDenormE, YDenormE, ZDenormE; // is the input denormalized + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage + logic XZeroQ, YZeroQ; // is the input zero - divide + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XInfQ, YInfQ; // is the input infinity - divide + logic XExpMaxE; // is the exponent all ones (max value) + logic XNormE; // is normal + logic FmtQ; + logic FDivStartQ; // result and flag signals - logic [63:0] FDivResM, FDivResW; // divide/squareroot result - logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags - logic [63:0] FMAResM, FMAResW; // FMA/multiply result - logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result - logic [63:0] ReadResW; // read result (load instruction) - logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result - logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags - logic [63:0] CvtResE, CvtResM; // FP <-> int convert result - logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this - logic [63:0] ClassResE, ClassResM; // classify result - logic [63:0] CmpResE, CmpResM; // compare result - logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid) - logic [63:0] SgnResE, SgnResM; // sign injection result - logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid) - logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage - logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage + logic [63:0] FDivResM, FDivResW; // divide/squareroot result + logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags + logic [63:0] FMAResM, FMAResW; // FMA/multiply result + logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result + logic [63:0] ReadResW; // read result (load instruction) + logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result + logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags + logic [63:0] CvtResE, CvtResM; // FP <-> int convert result + logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this + logic [63:0] ClassResE, ClassResM; // classify result + logic [63:0] CmpResE, CmpResM; // compare result + logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid) + logic [63:0] SgnResE, SgnResM; // sign injection result + logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid) + logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage + logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage logic [`XLEN-1:0] FIntResE; - logic [63:0] FPUResultW; // final FP result being written to the FP register - + logic [63:0] FPUResultW; // final FP result being written to the FP register // other signals - logic FDivSqrtDoneE; // is divide done - logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit - logic FDivClk; // clock for divide/squareroot unit - logic [63:0] AlignedSrcAE; // align SrcA to the floating point format + logic FDivSqrtDoneE; // is divide done + logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit + logic FDivClk; // clock for divide/squareroot unit + logic [63:0] AlignedSrcAE; // align SrcA to the floating point format // DECODE STAGE @@ -198,14 +200,14 @@ module fpu ( floprc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), .clear(FDivSqrtDoneE), .reset(reset), .clk(FDivBusyE)); - floprc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}), - .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}), + floprc #(7) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE}), + .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ}), .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); - fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), - .reset, .clk(clk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1), - .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, - .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + .reset(reset), .clk(FDivBusyE)); + fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), + .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), + .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, + .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); // convert from signle to double and vice versa cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); diff --git a/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv b/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv index 66ce0ab7e..95438e012 100755 --- a/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv +++ b/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv @@ -42,7 +42,7 @@ module fsm_fpdiv_pipe ( output logic divBusy ); - // div64 : S0-S14 (15 cycles) + // div64 : S1-S14 (14 cycles) // sqrt64 : S15-S35 (21 cycles) // div32: S36-S47 (12 cycles) // sqrt32 : S48-S64 (17 cycles) @@ -52,7 +52,7 @@ module fsm_fpdiv_pipe ( S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, S50, S51, S52, S53, S54, S55, S56, S57, S58, S59, - S60, S61, S62, S63, S64} statetype; + S60, S61, S62, S63, S64, S65} statetype; statetype current_state, next_state; @@ -72,7 +72,7 @@ module fsm_fpdiv_pipe ( if (start==1'b0) begin done = 1'b0; - divBusy = 1'b0; + divBusy = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -83,9 +83,28 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; + end // if (start==1'b0) + else + begin + done = 1'b0; + divBusy = 1'b1; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state = S65; end - else if (start==1'b1 && op_type==1'b0 && P==1'b0) + end + S65: + begin + if (op_type==1'b0 && P==1'b0) begin done = 1'b0; divBusy = 1'b1; @@ -99,9 +118,9 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S1; + next_state = S1; end - else if (start==1'b1 && op_type==1'b0 && P==1'b1) + else if (op_type==1'b0 && P==1'b1) begin done = 1'b0; divBusy = 1'b1; @@ -115,9 +134,9 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S36; + next_state = S36; end - else if (start==1'b1 && op_type==1'b1 && P==1'b0) + else if (op_type==1'b1 && P==1'b0) begin done = 1'b0; divBusy = 1'b1; @@ -131,9 +150,9 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b001; sel_muxr = 1'b0; - next_state <= S15; + next_state = S15; end - else if (start==1'b1 && op_type==1'b1 && P==1'b1) + else if (op_type==1'b1 && P==1'b1) begin done = 1'b0; divBusy = 1'b1; @@ -147,7 +166,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b001; sel_muxr = 1'b0; - next_state <= S48; + next_state = S48; end else begin @@ -163,7 +182,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end end // case: S0 // div64 @@ -181,7 +200,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b001; sel_muxb = 3'b001; sel_muxr = 1'b0; - next_state <= S2; + next_state = S2; end // case: S1 S2: // iteration 1 begin @@ -197,7 +216,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S3; + next_state = S3; end S3: begin @@ -213,7 +232,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S4; + next_state = S4; end S4: // iteration 2 begin @@ -229,7 +248,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S5; + next_state = S5; end S5: begin @@ -245,7 +264,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; // add - next_state <= S6; + next_state = S6; end S6: // iteration 3 begin @@ -261,7 +280,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S7; + next_state = S7; end S7: begin @@ -277,7 +296,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S8; + next_state = S8; end // case: S7 S8: begin @@ -293,7 +312,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S9; + next_state = S9; end // case: S7 S9: // q,qm,qp begin @@ -309,7 +328,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S10; + next_state = S10; end // case: S9 S10: // rem begin @@ -325,7 +344,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b1; - next_state <= S11; + next_state = S11; end S11: begin @@ -341,7 +360,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b1; - next_state <= S12; + next_state = S12; end // case: S11 S12: begin @@ -357,7 +376,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S13; + next_state = S13; end S13: begin @@ -373,7 +392,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S14; + next_state = S14; end S14: begin @@ -389,7 +408,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end // sqrt64 S15: @@ -406,7 +425,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S16; + next_state = S16; end S16: begin @@ -422,7 +441,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b001; sel_muxb = 3'b100; sel_muxr = 1'b0; - next_state <= S17; + next_state = S17; end S17: begin @@ -438,7 +457,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S18; + next_state = S18; end S18: // iteration 1 begin @@ -454,7 +473,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S19; + next_state = S19; end S19: // iteration 1 begin @@ -470,7 +489,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S20; + next_state = S20; end S20: begin @@ -486,7 +505,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S21; + next_state = S21; end S21: begin @@ -502,7 +521,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S22; + next_state = S22; end S22: // iteration 2 begin @@ -518,7 +537,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S23; + next_state = S23; end // case: S18 S23: begin @@ -534,7 +553,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S24; + next_state = S24; end S24: begin @@ -550,7 +569,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S25; + next_state = S25; end S25: begin @@ -566,7 +585,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S26; + next_state = S26; end S26: // iteration 3 begin @@ -582,7 +601,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S27; + next_state = S27; end // case: S21 S27: begin @@ -598,7 +617,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S28; + next_state = S28; end S28: begin @@ -614,7 +633,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S29; + next_state = S29; end S29: begin @@ -630,7 +649,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S30; + next_state = S30; end // case: S23 S30: // q,qm,qp begin @@ -646,7 +665,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S31; + next_state = S31; end S31: // rem begin @@ -662,7 +681,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - next_state <= S32; + next_state = S32; end // case: S25 S32: begin @@ -678,8 +697,8 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - next_state <= S33; - end // case: S34 + next_state = S33; + end S33: begin done = 1'b0; @@ -694,7 +713,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S34; + next_state = S34; end S34: // done begin @@ -710,9 +729,9 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S35; - end // case: S34 - S34: + next_state = S35; + end + S35: begin done = 1'b0; divBusy = 1'b0; @@ -726,7 +745,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end // div32 S36: @@ -743,7 +762,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b001; sel_muxb = 3'b001; sel_muxr = 1'b0; - next_state <= S37; + next_state = S37; end // case: S1 S37: // iteration 1 begin @@ -759,7 +778,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S38; + next_state = S38; end S38: begin @@ -775,7 +794,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S39; + next_state = S39; end S39: // iteration 2 begin @@ -791,7 +810,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S40; + next_state = S40; end S40: begin @@ -807,7 +826,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S41; + next_state = S41; end S41: begin @@ -823,7 +842,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S42; + next_state = S42; end S42: // q,qm,qp begin @@ -839,7 +858,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S43; + next_state = S43; end // case: S9 S43: // rem begin @@ -855,7 +874,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b1; - next_state <= S44; + next_state = S44; end S44: begin @@ -871,7 +890,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b1; - next_state <= S45; + next_state = S45; end // case: S11 S45: begin @@ -887,7 +906,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S46; + next_state = S46; end S46: // done begin @@ -903,7 +922,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S47; + next_state = S47; end S47: begin @@ -919,7 +938,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end // sqrt32 S48: @@ -936,7 +955,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S49; + next_state = S49; end S49: begin @@ -952,7 +971,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b001; sel_muxb = 3'b100; sel_muxr = 1'b0; - next_state <= S50; + next_state = S50; end S50: begin @@ -968,7 +987,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S51; + next_state = S51; end S51: // iteration 1 begin @@ -984,7 +1003,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S52; + next_state = S52; end S52: // iteration 1 begin @@ -1000,7 +1019,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S53; + next_state = S53; end S53: begin @@ -1016,7 +1035,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S54; + next_state = S54; end S54: begin @@ -1032,7 +1051,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S55; + next_state = S55; end S55: // iteration 2 begin @@ -1048,7 +1067,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S56; + next_state = S56; end // case: S18 S56: begin @@ -1064,7 +1083,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S57; + next_state = S57; end S57: begin @@ -1080,7 +1099,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - next_state <= S58; + next_state = S58; end S58: begin @@ -1096,7 +1115,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - next_state <= S59; + next_state = S59; end S59: // q,qm,qp begin @@ -1112,7 +1131,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S60; + next_state = S60; end S60: // rem begin @@ -1128,7 +1147,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - next_state <= S61; + next_state = S61; end // case: S25 S61: begin @@ -1144,7 +1163,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - next_state <= S62; + next_state = S62; end // case: S34 S62: begin @@ -1160,7 +1179,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S63; + next_state = S63; end S63: // done begin @@ -1176,7 +1195,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S64; + next_state = S64; end // case: S34 S64: begin @@ -1192,7 +1211,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end default: begin @@ -1208,7 +1227,7 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state <= S0; + next_state = S0; end endcase // case(current_state) end // always @ (current_state or X) From 0e0a107a98842f8bcfcd6acb65944ff45b0c607d Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Thu, 21 Oct 2021 15:19:22 -0500 Subject: [PATCH 2/6] Get rid of lint warning - still need more testing though --- wally-pipelined/src/fpu/fpdiv_pipe.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wally-pipelined/src/fpu/fpdiv_pipe.sv b/wally-pipelined/src/fpu/fpdiv_pipe.sv index 1012bb325..e02e91b9e 100755 --- a/wally-pipelined/src/fpu/fpdiv_pipe.sv +++ b/wally-pipelined/src/fpu/fpdiv_pipe.sv @@ -53,6 +53,7 @@ module fpdiv_pipe ( logic [63:0] IntValue; logic [12:0] exp1, exp2, expF; + logic [14:0] exp_pre_diff; logic [12:0] exp_diff, bias; logic [13:0] exp_sqrt; @@ -109,7 +110,8 @@ module fpdiv_pipe ( // bias : DP = 2^{11-1}-1 = 1023 assign bias = {3'h0, 10'h3FF}; // Divide exponent - assign exp_diff = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias}; + assign exp_pre_diff = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias}; + assign exp_diff = exp_pre_diff[12:0]; // Sqrt exponent (check if exponent is odd) assign exp_odd = Float1[52] ? 1'b0 : 1'b1; From 00cc1e0c5c5f45fc2e8ed438423a5978e8e23083 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 22 Oct 2021 10:03:12 -0700 Subject: [PATCH 3/6] put the FMA priority encoders into their own module --- wally-pipelined/src/fpu/fma.sv | 31 +++++++++++++++--------------- wally-pipelined/testbench/tests.vh | 16 +++++++-------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index 5dcfe883d..1f196a315 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -381,15 +381,10 @@ module posloa( // Apply function to determine Leading pattern - logic [3*`NF+6:0] pf; - assign pf = T^{Z[3*`NF+5:0], 1'b0}; + logic [3*`NF+6:0] f; + assign f = T^{Z[3*`NF+5:0], 1'b0}; - logic [8:0] i; - always_comb begin - i = 0; - while (~pf[3*`NF+6-i] && $unsigned(i) <= $unsigned(9'd3*9'd`NF+9'd6)) i = i+1; // search for leading one - PCnt = i; - end + lzc lzc(.f, .Cnt(PCnt)); endmodule @@ -410,17 +405,23 @@ module negloa( logic [3*`NF+6:0] f; assign f = T^{~Z, 1'b0}; - logic [8:0] i; - always_comb begin - i = 0; - while (~f[3*`NF+6-i] && $unsigned(i) <= $unsigned(9'd3*9'd`NF+9'd6)) i = i+1; // search for leading one - NCnt = i; - end + lzc lzc(.f, .Cnt(NCnt)); endmodule - +module lzc( + input logic [3*`NF+6:0] f, + output logic [8:0] Cnt // normalization shift count for the negitive result +); + + logic [8:0] i; + always_comb begin + i = 0; + while (~f[3*`NF+6-i] && $unsigned(i) <= $unsigned(9'd3*9'd`NF+9'd6)) i = i+1; // search for leading one + Cnt = i; + end +endmodule diff --git a/wally-pipelined/testbench/tests.vh b/wally-pipelined/testbench/tests.vh index ac1136265..a82e22830 100644 --- a/wally-pipelined/testbench/tests.vh +++ b/wally-pipelined/testbench/tests.vh @@ -85,10 +85,10 @@ string imperas32f[] = '{ "rv64f/I-FSW-01", "2000", "rv64f/I-FCLASS-S-01", "2000", "rv64f/I-FADD-S-01", "2000", -// "rv64f/I-FCVT-S-L-01", "2000", -// "rv64f/I-FCVT-S-LU-01", "2000", -// "rv64f/I-FCVT-S-W-01", "2000", -// "rv64f/I-FCVT-S-WU-01", "2000", + "rv64f/I-FCVT-S-L-01", "2000", + "rv64f/I-FCVT-S-LU-01", "2000", + "rv64f/I-FCVT-S-W-01", "2000", + "rv64f/I-FCVT-S-WU-01", "2000", "rv64f/I-FCVT-L-S-01", "2000", "rv64f/I-FCVT-LU-S-01", "2000", "rv64f/I-FCVT-W-S-01", "2000", @@ -135,11 +135,11 @@ string imperas32f[] = '{ "rv64d/I-FSGNJX-D-01", "2000", "rv64d/I-FSQRT-D-01", "2000", "rv64d/I-FSUB-D-01", "2000", -// "rv64d/I-FCVT-D-L-01", "2000", -// "rv64d/I-FCVT-D-LU-01", "2000", + "rv64d/I-FCVT-D-L-01", "2000", + "rv64d/I-FCVT-D-LU-01", "2000", "rv64d/I-FCVT-D-S-01", "2000", -// "rv64d/I-FCVT-D-W-01", "2000", -// "rv64d/I-FCVT-D-WU-01", "2000", + "rv64d/I-FCVT-D-W-01", "2000", + "rv64d/I-FCVT-D-WU-01", "2000", "rv64d/I-FCVT-L-D-01", "2000", "rv64d/I-FCVT-LU-D-01", "2000", "rv64d/I-FCVT-S-D-01", "2000", From a60e19dc3f2b8a2e1507bfb64d8c2e98242c6008 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Fri, 22 Oct 2021 13:41:50 -0500 Subject: [PATCH 4/6] Modify register before fpdivsqrt to be synthesizable for FPGAs and better in tune for ASIC clocking --- wally-pipelined/src/fpu/fpdiv_pipe.sv | 9 +- wally-pipelined/src/fpu/fpu.sv | 28 +-- wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv | 238 +++++++++++++++------- 3 files changed, 184 insertions(+), 91 deletions(-) diff --git a/wally-pipelined/src/fpu/fpdiv_pipe.sv b/wally-pipelined/src/fpu/fpdiv_pipe.sv index e02e91b9e..8a7bc685f 100755 --- a/wally-pipelined/src/fpu/fpdiv_pipe.sv +++ b/wally-pipelined/src/fpu/fpdiv_pipe.sv @@ -42,6 +42,7 @@ module fpdiv_pipe ( output logic done, output logic FDivBusyE, + output logic load_preload, output logic [63:0] AS_Result, output logic [4:0] Flags); @@ -137,18 +138,18 @@ module fpdiv_pipe ( .sel_muxa, .sel_muxb, .sel_muxr, .reset, .clk, .load_rega, .load_regb, .load_regc, .load_regd, .load_regr, .load_regs, .load_regp, - .P(P1), .op_type(op_type1), .exp_odd(exp_odd1)); + .P(P), .op_type(op_type1), .exp_odd(exp_odd1)); // FSM : control divider - fsm_fpdiv_pipe control (.clk, .reset, .start(start), .op_type(op_type1), .P(P1), + fsm_fpdiv_pipe control (.clk, .reset, .start(start), .op_type(op_type1), .P(P), .done, .load_rega, .load_regb, .load_regc, .load_regd, - .load_regr, .load_regs, .load_regp, + .load_regr, .load_regs, .load_regp, .load_preload, .sel_muxa, .sel_muxb, .sel_muxr, .divBusy(FDivBusyE)); // Round the mantissa to a 52-bit value, with the leading one // removed. The rounding units also handles special cases and // set the exception flags. - rounder_div round1 (.rm, .P(P1), .OvEn(1'b0), .UnEn(1'b0), .exp_diff(expF1), + rounder_div round1 (.rm, .P(P), .OvEn(1'b0), .UnEn(1'b0), .exp_diff(expF1), .sel_inv(sel_inv1), .Invalid(Invalid1), .SignR(signResult1), .Float1(op1), .Float2(op2), .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index da7163fea..810eac6f7 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -103,7 +103,8 @@ module fpu ( logic XExpMaxE; // is the exponent all ones (max value) logic XNormE; // is normal logic FmtQ; - logic FDivStartQ; + logic FDivStartQ; + logic FOpCtrlQ; // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -128,6 +129,7 @@ module fpu ( logic FDivSqrtDoneE; // is divide done logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit logic FDivClk; // clock for divide/squareroot unit + logic load_preload; // enable for FF on fpdivsqrt logic [63:0] AlignedSrcAE; // align SrcA to the floating point format // DECODE STAGE @@ -194,19 +196,19 @@ module fpu ( .FMAFlgM, .FMAResM); // fpdivsqrt using Goldschmidt's iteration - floprc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), - .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); - floprc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), - .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); - floprc #(7) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE}), - .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ}), - .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); - fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), + flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), + .clear(FDivSqrtDoneE), .en(load_preload), + .reset(reset), .clk(clk)); + flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), + .clear(FDivSqrtDoneE), .en(load_preload), + .reset(reset), .clk(clk)); + flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}), + .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), + .clear(FDivSqrtDoneE), .en(load_preload), + .reset(reset), .clk(clk)); + fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, + .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); // convert from signle to double and vice versa diff --git a/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv b/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv index 95438e012..c1ab43c19 100755 --- a/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv +++ b/wally-pipelined/src/fpu/fsm_fpdiv_pipe.sv @@ -28,7 +28,8 @@ module fsm_fpdiv_pipe ( input logic start, input logic op_type, input logic P, - output logic done, + output logic done, + output logic load_preload, output logic load_rega, output logic load_regb, output logic load_regc, @@ -52,7 +53,7 @@ module fsm_fpdiv_pipe ( S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, S50, S51, S52, S53, S54, S55, S56, S57, S58, S59, - S60, S61, S62, S63, S64, S65} statetype; + S60, S61, S62, S63, S64, S65, S66} statetype; statetype current_state, next_state; @@ -73,6 +74,7 @@ module fsm_fpdiv_pipe ( begin done = 1'b0; divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -89,6 +91,7 @@ module fsm_fpdiv_pipe ( begin done = 1'b0; divBusy = 1'b1; + load_preload = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -99,15 +102,33 @@ module fsm_fpdiv_pipe ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - next_state = S65; + next_state = S66; end - end + end // case: S0 + S66: + begin + done = 1'b0; + divBusy = 1'b1; + load_preload = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + load_regp = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; + next_state = S65; + end // if (start==1'b0) S65: begin if (op_type==1'b0 && P==1'b0) begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -123,7 +144,8 @@ module fsm_fpdiv_pipe ( else if (op_type==1'b0 && P==1'b1) begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -139,7 +161,8 @@ module fsm_fpdiv_pipe ( else if (op_type==1'b1 && P==1'b0) begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -155,7 +178,8 @@ module fsm_fpdiv_pipe ( else if (op_type==1'b1 && P==1'b1) begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -171,7 +195,8 @@ module fsm_fpdiv_pipe ( else begin done = 1'b0; - divBusy = 1'b0; + divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -189,7 +214,8 @@ module fsm_fpdiv_pipe ( S1: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -205,7 +231,8 @@ module fsm_fpdiv_pipe ( S2: // iteration 1 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -221,7 +248,8 @@ module fsm_fpdiv_pipe ( S3: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -237,7 +265,8 @@ module fsm_fpdiv_pipe ( S4: // iteration 2 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -253,7 +282,8 @@ module fsm_fpdiv_pipe ( S5: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -269,7 +299,8 @@ module fsm_fpdiv_pipe ( S6: // iteration 3 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -285,7 +316,8 @@ module fsm_fpdiv_pipe ( S7: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -301,7 +333,8 @@ module fsm_fpdiv_pipe ( S8: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -317,7 +350,8 @@ module fsm_fpdiv_pipe ( S9: // q,qm,qp begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -333,7 +367,8 @@ module fsm_fpdiv_pipe ( S10: // rem begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -349,7 +384,8 @@ module fsm_fpdiv_pipe ( S11: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -365,7 +401,8 @@ module fsm_fpdiv_pipe ( S12: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -381,7 +418,8 @@ module fsm_fpdiv_pipe ( S13: begin done = 1'b1; - divBusy = 1'b0; + divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -397,7 +435,8 @@ module fsm_fpdiv_pipe ( S14: begin done = 1'b0; - divBusy = 1'b0; + divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -414,7 +453,8 @@ module fsm_fpdiv_pipe ( S15: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -430,7 +470,8 @@ module fsm_fpdiv_pipe ( S16: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -446,7 +487,8 @@ module fsm_fpdiv_pipe ( S17: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -462,7 +504,8 @@ module fsm_fpdiv_pipe ( S18: // iteration 1 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -478,7 +521,8 @@ module fsm_fpdiv_pipe ( S19: // iteration 1 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -494,7 +538,8 @@ module fsm_fpdiv_pipe ( S20: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -510,7 +555,8 @@ module fsm_fpdiv_pipe ( S21: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -526,7 +572,8 @@ module fsm_fpdiv_pipe ( S22: // iteration 2 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -542,7 +589,8 @@ module fsm_fpdiv_pipe ( S23: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -558,7 +606,8 @@ module fsm_fpdiv_pipe ( S24: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -574,7 +623,8 @@ module fsm_fpdiv_pipe ( S25: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -590,7 +640,8 @@ module fsm_fpdiv_pipe ( S26: // iteration 3 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -606,7 +657,8 @@ module fsm_fpdiv_pipe ( S27: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -622,7 +674,8 @@ module fsm_fpdiv_pipe ( S28: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -638,7 +691,8 @@ module fsm_fpdiv_pipe ( S29: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -654,7 +708,8 @@ module fsm_fpdiv_pipe ( S30: // q,qm,qp begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -670,7 +725,8 @@ module fsm_fpdiv_pipe ( S31: // rem begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -686,7 +742,8 @@ module fsm_fpdiv_pipe ( S32: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -702,7 +759,8 @@ module fsm_fpdiv_pipe ( S33: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -718,7 +776,8 @@ module fsm_fpdiv_pipe ( S34: // done begin done = 1'b1; - divBusy = 1'b0; + divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -734,7 +793,8 @@ module fsm_fpdiv_pipe ( S35: begin done = 1'b0; - divBusy = 1'b0; + divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -751,7 +811,8 @@ module fsm_fpdiv_pipe ( S36: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -767,7 +828,8 @@ module fsm_fpdiv_pipe ( S37: // iteration 1 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -783,7 +845,8 @@ module fsm_fpdiv_pipe ( S38: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -799,7 +862,8 @@ module fsm_fpdiv_pipe ( S39: // iteration 2 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -815,7 +879,8 @@ module fsm_fpdiv_pipe ( S40: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -831,7 +896,8 @@ module fsm_fpdiv_pipe ( S41: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -847,7 +913,8 @@ module fsm_fpdiv_pipe ( S42: // q,qm,qp begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -863,7 +930,8 @@ module fsm_fpdiv_pipe ( S43: // rem begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -879,7 +947,8 @@ module fsm_fpdiv_pipe ( S44: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -895,7 +964,8 @@ module fsm_fpdiv_pipe ( S45: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -911,7 +981,8 @@ module fsm_fpdiv_pipe ( S46: // done begin done = 1'b1; - divBusy = 1'b0; + divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -927,7 +998,8 @@ module fsm_fpdiv_pipe ( S47: begin done = 1'b0; - divBusy = 1'b0; + divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -944,7 +1016,8 @@ module fsm_fpdiv_pipe ( S48: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -960,7 +1033,8 @@ module fsm_fpdiv_pipe ( S49: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -976,7 +1050,8 @@ module fsm_fpdiv_pipe ( S50: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -992,7 +1067,8 @@ module fsm_fpdiv_pipe ( S51: // iteration 1 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -1008,7 +1084,8 @@ module fsm_fpdiv_pipe ( S52: // iteration 1 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -1024,7 +1101,8 @@ module fsm_fpdiv_pipe ( S53: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -1040,7 +1118,8 @@ module fsm_fpdiv_pipe ( S54: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -1056,7 +1135,8 @@ module fsm_fpdiv_pipe ( S55: // iteration 2 begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -1072,7 +1152,8 @@ module fsm_fpdiv_pipe ( S56: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -1088,7 +1169,8 @@ module fsm_fpdiv_pipe ( S57: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -1104,7 +1186,8 @@ module fsm_fpdiv_pipe ( S58: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -1120,7 +1203,8 @@ module fsm_fpdiv_pipe ( S59: // q,qm,qp begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -1136,7 +1220,8 @@ module fsm_fpdiv_pipe ( S60: // rem begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -1152,7 +1237,8 @@ module fsm_fpdiv_pipe ( S61: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -1168,7 +1254,8 @@ module fsm_fpdiv_pipe ( S62: begin done = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -1184,7 +1271,8 @@ module fsm_fpdiv_pipe ( S63: // done begin done = 1'b1; - divBusy = 1'b0; + divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -1200,7 +1288,8 @@ module fsm_fpdiv_pipe ( S64: begin done = 1'b0; - divBusy = 1'b0; + divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -1216,7 +1305,8 @@ module fsm_fpdiv_pipe ( default: begin done = 1'b0; - divBusy = 1'b0; + divBusy = 1'b0; + load_preload = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; From c2f4b49b1597d37b0a283d0d3c0df2d7a5b0e6bb Mon Sep 17 00:00:00 2001 From: kipmacsaigoren Date: Fri, 22 Oct 2021 15:18:25 -0500 Subject: [PATCH 5/6] removed reduntant definitions for FPU in MISA. --- wally-pipelined/config/rv64g/wally-config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/config/rv64g/wally-config.vh b/wally-pipelined/config/rv64g/wally-config.vh index 605c2535e..d0ead2cdc 100644 --- a/wally-pipelined/config/rv64g/wally-config.vh +++ b/wally-pipelined/config/rv64g/wally-config.vh @@ -36,7 +36,7 @@ `define XLEN 64 // MISA RISC-V configuration per specification -`define MISA (32'h00000104 | 0 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 | 1 <<3 | 1 << 5) +`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ) `define ZICSR_SUPPORTED 1 `define ZIFENCEI_SUPPORTED 1 `define COUNTERS 32