diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index 8e3ec34df..e56fb7a21 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -139,7 +139,7 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define BPRED_ENABLED 1 +`define BPRED_ENABLED 0 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 `define BPRED_SIZE 10 diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 044ebdcc3..c21ab754b 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -63,7 +63,7 @@ module fdivsqrt( logic [`DIVb:0] FirstU, FirstUM; logic [`DIVb+1:0] FirstC; logic Firstun; - logic WZeroM, AZeroM, BZeroM, AZeroE, BZeroE; + logic WZeroE, AZeroM, BZeroM, AZeroE, BZeroE; logic SpecialCaseM, MDUM; logic [`DIVBLEN:0] nE, nM, mM; logic CalcOTFCSwapE, OTFCSwapE, ALTBM, AsM; @@ -80,15 +80,16 @@ module fdivsqrt( .FDivBusyE, .FDivStartE, .IDivStartE, .IFDivStartE, .FDivDoneE, .StallM, .FlushE, /*.DivDone, */ .XZeroE, .YZeroE, .AZeroE, .BZeroE, .XNaNE, .YNaNE, .MDUE, - .XInfE, .YInfE, .WZeroM, .SpecialCaseM); + .XInfE, .YInfE, .WZeroE, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .MDUE, .SqrtE, // .SqrtM, .X,.DPreproc, .FirstWS(WS), .FirstWC(WC), .IFDivStartE, .CalcOTFCSwapE, .OTFCSwapE, .FDivBusyE); fdivsqrtpostproc fdivsqrtpostproc( - .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .MDUM, + .clk, .reset, .StallM, + .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .MDUE, .Firstun, .SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAM, .nM, .ALTBM, .mM, .BZeroM, .AsM, .OTFCSwapEM(OTFCSwapE), - .QmM, .WZeroM, .DivSM, .FPIntDivResultM); + .QmM, .WZeroE, .DivSM, .FPIntDivResultM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv new file mode 100644 index 000000000..fe95c50da --- /dev/null +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv @@ -0,0 +1,76 @@ +/////////////////////////////////////////// +// fdivsqrtpreproc.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fdivsqrtexpcalc( + input logic [`FMTBITS-1:0] Fmt, + input logic [`NE-1:0] Xe, Ye, + input logic Sqrt, + input logic XZeroE, + input logic [`DIVBLEN:0] ell, m, + output logic [`NE+1:0] Qe + ); + logic [`NE-2:0] Bias; + logic [`NE+1:0] SXExp; + logic [`NE+1:0] SExp; + logic [`NE+1:0] DExp; + + if (`FPSIZES == 1) begin + assign Bias = (`NE-1)'(`BIAS); + + end else if (`FPSIZES == 2) begin + assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + + end else if (`FPSIZES == 3) begin + always_comb + case (Fmt) + `FMT: Bias = (`NE-1)'(`BIAS); + `FMT1: Bias = (`NE-1)'(`BIAS1); + `FMT2: Bias = (`NE-1)'(`BIAS2); + default: Bias = 'x; + endcase + + end else if (`FPSIZES == 4) begin + always_comb + case (Fmt) + 2'h3: Bias = (`NE-1)'(`Q_BIAS); + 2'h1: Bias = (`NE-1)'(`D_BIAS); + 2'h0: Bias = (`NE-1)'(`S_BIAS); + 2'h2: Bias = (`NE-1)'(`H_BIAS); + endcase + end + assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS); + assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias}; + // correct exponent for denormalized input's normalization shifts + assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZeroE}}; + + assign Qe = Sqrt ? SExp : DExp; +endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index d287416ee..f63168290 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -43,7 +43,7 @@ module fdivsqrtfsm( input logic SqrtE, input logic StallM, input logic FlushE, - input logic WZeroM, + input logic WZeroE, input logic MDUE, input logic [`DIVBLEN:0] nE, output logic IFDivStartE, @@ -116,7 +116,8 @@ module fdivsqrtfsm( if (SpecialCaseE) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin - if (step == 1 /*| WZeroM */) state <= #1 DONE; // finished steps or terminate early on zero residual +// if (step == 1 | WZeroE) state <= #1 DONE; // finished steps or terminate early on zero residual + if (step == 1) state <= #1 DONE; // finished steps or terminate early on zero residual step <= step - 1; end else if (state == DONE) begin if (StallM) state <= #1 DONE; diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index e963df4a7..8eaf98afa 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -31,16 +31,19 @@ `include "wally-config.vh" module fdivsqrtpostproc( + input logic clk, reset, + input logic StallM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb-1:0] D, input logic [`DIVb:0] FirstU, FirstUM, input logic [`DIVb+1:0] FirstC, + input logic SqrtE, MDUE, input logic Firstun, SqrtM, SpecialCaseM, OTFCSwapEM, input logic [`XLEN-1:0] ForwardedSrcAM, - input logic RemOpM, ALTBM, BZeroM, AsM, MDUM, + input logic RemOpM, ALTBM, BZeroM, AsM, input logic [`DIVBLEN:0] nM, mM, output logic [`DIVb:0] QmM, - output logic WZeroM, + output logic WZeroE, output logic DivSM, output logic [`XLEN-1:0] FPIntDivResultM ); @@ -48,37 +51,56 @@ module fdivsqrtpostproc( logic [`DIVb+3:0] W, Sum, DM; logic [`DIVb:0] PreQmM; logic NegStickyM, PostIncM; - logic weq0; + logic weq0E; logic [`DIVBLEN:0] NormShiftM; logic [`DIVb:0] IntQuotM, NormQuotM; logic [`DIVb+3:0] IntRemM, NormRemM; logic signed [`DIVb+3:0] PreResultM, PreFPIntDivResultM; + logic WZeroM; - // check for early termination on an exact result. If the result is not exact, the sticky should be set - aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0); + ////////////////////////// + // Execute Stage: Detect early termination for an exact result + ////////////////////////// - if (`RADIX == 2) begin - logic [`DIVb+3:0] FZero; + // check for early termination on an exact result. + aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0E); + + if (`RADIX == 2) begin: R2EarlyTerm + logic [`DIVb+3:0] FZeroE; logic [`DIVb+2:0] FirstK; - logic wfeq0; + logic wfeq0E; logic [`DIVb+3:0] WCF, WSF; assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1)); - assign FZero = (SqrtM & ~MDUM) ? {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0} : {3'b001,D,1'b0}; - csa #(`DIVb+4) fadd(WS, WC, FZero, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero}; - aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0); - assign WZeroM = weq0|(wfeq0 & Firstun); + assign FZeroE = (SqrtE & ~MDUE) ? {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0} : {3'b001,D,1'b0}; + csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero}; + aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E); + assign WZeroE = weq0E|(wfeq0E & Firstun); end else begin - assign WZeroM = weq0; + assign WZeroE = weq0E; end + + ////////////////////////// + // E/M Pipeline register + ////////////////////////// + + flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM); + + ////////////////////////// + // Memory Stage: Postprocessing + ////////////////////////// + + // If the result is not exact, the sticky should be set assign DivSM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide - // Determine if sticky bit is negative + // Determine if sticky bit is negative // *** look for ways to optimize this assign Sum = WC + WS; assign W = $signed(Sum) >>> `LOGR; assign NegStickyM = W[`DIVb+3]; assign DM = {4'b0001, D}; + // *** put conditionals on integer division hardware, move to its own module + // Integer division: sign handling for div and rem always_comb if (~AsM) @@ -92,7 +114,8 @@ module fdivsqrtpostproc( PostIncM = 0; end else - if (NegStickyM | weq0) begin +// if (NegStickyM | weq0) begin // *** old code, replaced by the one below in the right stage and more comprehensive + if (NegStickyM | WZeroM) begin NormQuotM = FirstU; NormRemM = W; PostIncM = 0; @@ -111,13 +134,14 @@ module fdivsqrtpostproc( IntQuotM = '0; IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, ForwardedSrcAM}; end else if (WZeroM) begin - if (weq0) begin + // *** dh: 12/26: don't understand this logic and why weq0 inside WZero check. Need a divide by 0 check here +/* if (weq0) begin */ IntQuotM = FirstU; IntRemM = '0; - end else begin +/* end else begin IntQuotM = FirstUM; IntRemM = '0; - end + end */ end else begin IntQuotM = NormQuotM; IntRemM = NormRemM; diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index c68cd25d4..6711441f5 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -114,6 +114,8 @@ module fdivsqrtpreproc ( else assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX; assign X = MDUE ? DivX >> RightShiftX : PreShiftX; + fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZeroE, .ell, .m(mE), .Qe(QeE)); + // radix 2 radix 4 // 1 copies DIVLEN+2 DIVLEN+2/2 // 2 copies DIVLEN+2/2 DIVLEN+2/2*2 @@ -134,51 +136,7 @@ module fdivsqrtpreproc ( flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); flopen #(`XLEN) srcareg(clk, IFDivStartE, ForwardedSrcAE, ForwardedSrcAM); - expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZeroE, .ell, .m(mE), .Qe(QeE)); + endmodule -module expcalc( - input logic [`FMTBITS-1:0] Fmt, - input logic [`NE-1:0] Xe, Ye, - input logic Sqrt, - input logic XZeroE, - input logic [`DIVBLEN:0] ell, m, - output logic [`NE+1:0] Qe - ); - logic [`NE-2:0] Bias; - logic [`NE+1:0] SXExp; - logic [`NE+1:0] SExp; - logic [`NE+1:0] DExp; - - if (`FPSIZES == 1) begin - assign Bias = (`NE-1)'(`BIAS); - - end else if (`FPSIZES == 2) begin - assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); - - end else if (`FPSIZES == 3) begin - always_comb - case (Fmt) - `FMT: Bias = (`NE-1)'(`BIAS); - `FMT1: Bias = (`NE-1)'(`BIAS1); - `FMT2: Bias = (`NE-1)'(`BIAS2); - default: Bias = 'x; - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (Fmt) - 2'h3: Bias = (`NE-1)'(`Q_BIAS); - 2'h1: Bias = (`NE-1)'(`D_BIAS); - 2'h0: Bias = (`NE-1)'(`S_BIAS); - 2'h2: Bias = (`NE-1)'(`H_BIAS); - endcase - end - assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS); - assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias}; - // correct exponent for denormalized input's normalization shifts - assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZeroE}}; - - assign Qe = Sqrt ? SExp : DExp; -endmodule \ No newline at end of file diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 3da4523ac..df9857c0a 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1191,8 +1191,8 @@ string imperas32f[] = '{ "rv64i_m/F/src/fsub_b4-01.S", "rv64i_m/F/src/fsub_b5-01.S", "rv64i_m/F/src/fsub_b7-01.S", - "rv64i_m/F/src/fsub_b8-01.S" - // "rv64i_m/F/src/fsw-align-01.S" + "rv64i_m/F/src/fsub_b8-01.S", + "rv64i_m/F/src/fsw-align-01.S" }; @@ -1279,8 +1279,8 @@ string imperas32f[] = '{ "rv64i_m/D/src/fle.d_b19-01.S", "rv64i_m/D/src/flt.d_b1-01.S", "rv64i_m/D/src/flt.d_b19-01.S", - "rv64i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back - "rv64i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266 + "rv64i_m/D/src/fld-align-01.S", + "rv64i_m/D/src/fsd-align-01.S", "rv64i_m/D/src/fmadd.d_b14-01.S", "rv64i_m/D/src/fmadd.d_b16-01.S", "rv64i_m/D/src/fmadd.d_b17-01.S", @@ -1551,8 +1551,8 @@ string imperas32f[] = '{ "rv32i_m/F/src/fsub_b4-01.S", "rv32i_m/F/src/fsub_b5-01.S", "rv32i_m/F/src/fsub_b7-01.S", - "rv32i_m/F/src/fsub_b8-01.S" - // "rv32i_m/F/src/fsw-align-01.S" + "rv32i_m/F/src/fsub_b8-01.S", + "rv32i_m/F/src/fsw-align-01.S" }; string arch32d[] = '{ @@ -1618,8 +1618,8 @@ string imperas32f[] = '{ "rv32i_m/D/src/fle.d_b19-01.S", "rv32i_m/D/src/flt.d_b1-01.S", "rv32i_m/D/src/flt.d_b19-01.S", - "rv32i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back - "rv32i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266 + "rv32i_m/D/src/fld-align-01.S", + "rv32i_m/D/src/fsd-align-01.S", "rv32i_m/D/src/fmadd.d_b14-01.S", "rv32i_m/D/src/fmadd.d_b16-01.S", "rv32i_m/D/src/fmadd.d_b17-01.S",