diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 8f10611af..dd291ecea 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -38,7 +38,8 @@ module fctrl ( input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision input logic [6:0] OpD, // bits 6:0 of instruction input logic [4:0] Rs2D, // bits 24:20 of instruction - input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode + input logic [2:0] Funct3D, Funct3E, // bits 14:12 of instruction - may contain rounding mode + input logic MDUE, input logic [2:0] FRM_REGW, // rounding mode from CSR input logic [1:0] STATUS_FS, // is FPU enabled? input logic FDivBusyE, // is the divider busy @@ -61,7 +62,7 @@ module fctrl ( logic [`FCTRLW-1:0] ControlsD; logic IllegalFPUInstrD, IllegalFPUInstrE; logic FRegWriteD; // FP register write enable - logic DivStartD; // integer register write enable + logic FDivStartD, FDivStartE, IDivStartE; // integer register write enable logic FWriteIntD; // integer register write enable logic FRegWriteE; // FP register write enable logic [2:0] OpCtrlD; // Select which opperation to do in each component @@ -169,7 +170,7 @@ module fctrl ( endcase // unswizzle control bits - assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD; + assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD; // rounding modes: // 000 - round to nearest, ties to even @@ -264,7 +265,12 @@ module fctrl ( {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, IllegalFPUInstrE}); flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); - flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE); + flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); + if (`M_SUPPORTED) begin + assign IDivStartE = MDUE & Funct3E[2]; + assign DivStartE = FDivStartE | IDivStartE; // integer or floating-point division + end else assign DivStartE = FDivStartE; + assign FCvtIntE = (FResSelE == 2'b01); // E/M pipleine register diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 3f6426a2a..19679aa55 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -67,17 +67,18 @@ module fdivsqrt( logic SpecialCaseM; fdivsqrtpreproc fdivsqrtpreproc( - .clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), - .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc); + .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), + .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, + .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, - .DivBusy, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, + .DivBusy, .DivStartE,.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XInfE, .YInfE, .WZero, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, - .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .DivBusy); fdivsqrtpostproc fdivsqrtpostproc(.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .QmM, .WZero, .DivSM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index db11dcefd..8dc188c6b 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -37,7 +37,7 @@ module fdivsqrtfsm( input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, - input logic DivStart, + input logic DivStartE, input logic XsE, input logic SqrtE, input logic StallE, @@ -101,8 +101,8 @@ module fdivsqrtfsm( always_ff @(posedge clk) begin if (reset) begin state <= #1 IDLE; - end else if (DivStart&~StallE) begin - step <= cycles; // *** this should be adjusted to depend on the precision; sqrt should use one fewer step becasue firststep=1 + end else if (DivStartE&~StallE) begin + step <= cycles; // $display("Setting Nf = %d fbits %d cycles = %d FmtE %d FPSIZES = %d Q_NF = %d num = %d denom = %d\n", Nf, fbits, cycles, FmtE, `FPSIZES, `Q_NF, // (fbits +(`LOGR*`DIVCOPIES)-1), (`LOGR*`DIVCOPIES)); if (SpecialCaseE) state <= #1 DONE; diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index b0beae6d9..d13d706f4 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -32,7 +32,7 @@ module fdivsqrtiter( input logic clk, - input logic DivStart, + input logic DivStartE, input logic DivBusy, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, @@ -90,19 +90,19 @@ module fdivsqrtiter( // Initialize C to -1 for sqrt and -R for division logic [1:0] initCSqrt, initCDiv2, initCDiv4, initCUpper; - assign initCSqrt = 2'b11; - assign initCDiv2 = 2'b10; - assign initCDiv4 = 2'b00; // *** not sure why this works; seems like it should be 00 for initializing to -4 + assign initCSqrt = 2'b11; // -1 + assign initCDiv2 = 2'b10; // -2 + assign initCDiv4 = 2'b00; // -4 assign initCUpper = SqrtE ? initCSqrt : (`RADIX == 4) ? initCDiv4 : initCDiv2; assign initC = {initCUpper, {`DIVb{1'b0}}}; - mux2 #(`DIVb+4) wsmux(NextWSN, X, DivStart, WSN); - flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); - mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN); - flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); - flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D); - mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, DivStart, CMux); - flopen #(`DIVb+2) cflop(clk, DivStart|DivBusy, CMux, C[0]); + mux2 #(`DIVb+4) wsmux(NextWSN, X, DivStartE, WSN); + flopen #(`DIVb+4) wsflop(clk, DivStartE|DivBusy, WSN, WS[0]); + mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStartE, WCN); + flopen #(`DIVb+4) wcflop(clk, DivStartE|DivBusy, WCN, WC[0]); + flopen #(`DIVN-1) dflop(clk, DivStartE, Dpreproc, D); + mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, DivStartE, CMux); + flopen #(`DIVb+2) cflop(clk, DivStartE|DivBusy, CMux, C[0]); // Divisor Selections // - choose the negitive version of what's being selected @@ -139,10 +139,10 @@ module fdivsqrtiter( // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0; assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; - mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStart, UMux); - mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStart, UMMux); - flopen #(`DIVb+1) UReg(clk, DivStart|DivBusy, UMux, U[0]); - flopen #(`DIVb+1) UMReg(clk, DivStart|DivBusy, UMMux, UM[0]); + mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStartE, UMux); + mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStartE, UMMux); + flopen #(`DIVb+1) UReg(clk, DivStartE|DivBusy, UMux, U[0]); + flopen #(`DIVb+1) UMReg(clk, DivStartE|DivBusy, UMMux, UM[0]); assign FirstWS = WS[0]; assign FirstWC = WC[0]; diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 2a6f6a9e2..d1f9b93ba 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -32,12 +32,15 @@ module fdivsqrtpreproc ( input logic clk, - input logic DivStart, + input logic DivStartE, input logic [`NF:0] Xm, Ym, input logic [`NE-1:0] Xe, Ye, input logic [`FMTBITS-1:0] Fmt, input logic Sqrt, input logic XZero, + input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [2:0] Funct3E, Funct3M, + input logic MDUE, W64E, output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, output logic [`DIVN-2:0] Dpreproc @@ -76,7 +79,7 @@ module fdivsqrtpreproc ( // DIVRESLEN = DIVLEN or DIVLEN+2 // r = 1 or 2 // DIVRESLEN/(r*`DIVCOPIES) - flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM); + flopen #(`NE+2) expflop(clk, DivStartE, Qe, QeM); expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .XZeroCnt, .YZeroCnt, .Qe); endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 459b891d0..dcc0db6d5 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -163,7 +163,8 @@ module fpu ( ////////////////////////////////////////////////////////////////////////////////////////// // calculate FP control signals - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD, + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .Funct3E, .MDUE, .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, .reset, .clk, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE, .DivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE,