diff --git a/.gitignore b/.gitignore index 94fb14c3..d4e9acd5 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ __pycache__/ #External repos addins/riscv-arch-test/Makefile.include addins/riscv-tests/target +addins/TestFloat-3e/build/Linux-x86_64-GCC/* benchmarks/embench/wally*.json #vsim work files to ignore diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index e6726eef..cc24c42f 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -110,7 +110,7 @@ // division constants `define RADIX 32'h2 -`define DIVCOPIES 32'h1 +`define DIVCOPIES 32'h4 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input `define DIVN (`NF<`XLEN ? `XLEN : (`NF + 3)) // length of input diff --git a/pipelined/radixcopiesmultiregression.sh b/pipelined/radixcopiesmultiregression.sh new file mode 100755 index 00000000..1953b1da --- /dev/null +++ b/pipelined/radixcopiesmultiregression.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +configFile=config/shared/wally-shared.vh + +searchRadix="define RADIX 32'".. +searchCopies="define DIVCOPIES 32'".. + +currRadix="define RADIX 32'h2" +currCopies="define DIVCOPIES 32'h1" +sed -i "s/$searchRadix/$currRadix/" $configFile +sed -i "s/$searchCopies/$currCopies/" $configFile +echo regression on Radix :$currRadix: and Copies :$currCopies: +./regression/regression-wally + +currRadix="define RADIX 32'h2" +currCopies="define DIVCOPIES 32'h2" +sed -i "s/$searchRadix/$currRadix/" $configFile +sed -i "s/$searchCopies/$currCopies/" $configFile +echo regression on Radix :$currRadix: and Copies :$currCopies: +./regression/regression-wally + +currRadix="define RADIX 32'h2" +currCopies="define DIVCOPIES 32'h4" +sed -i "s/$searchRadix/$currRadix/" $configFile +sed -i "s/$searchCopies/$currCopies/" $configFile +echo regression on Radix :$currRadix: and Copies :$currCopies: +./regression/regression-wally + +currRadix="define RADIX 32'h4" +currCopies="define DIVCOPIES 32'h1" +sed -i "s/$searchRadix/$currRadix/" $configFile +sed -i "s/$searchCopies/$currCopies/" $configFile +echo regression on Radix :$currRadix: and Copies :$currCopies: +./regression/regression-wally + +currRadix="define RADIX 32'h4" +currCopies="define DIVCOPIES 32'h2" +sed -i "s/$searchRadix/$currRadix/" $configFile +sed -i "s/$searchCopies/$currCopies/" $configFile +echo regression on Radix :$currRadix: and Copies :$currCopies: +./regression/regression-wally + +currRadix="define RADIX 32'h4" +currCopies="define DIVCOPIES 32'h4" +sed -i "s/$searchRadix/$currRadix/" $configFile +sed -i "s/$searchCopies/$currCopies/" $configFile +echo regression on Radix :$currRadix: and Copies :$currCopies: +./regression/regression-wally \ No newline at end of file diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 8bca167d..6e29ce94 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -81,9 +81,9 @@ module fctrl ( (Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED)); always_comb if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled - ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; + ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt) - ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // for anything other than loads and stores, check for supported format + ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // for anything other than loads and stores, check for supported format else case(OpD) // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt 7'b0000111: case(Funct3D) @@ -94,7 +94,7 @@ module fctrl ( else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // flq not supported 3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flh else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // flh not supported - default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // non-implemented instruction + default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction endcase 7'b0100111: case(Funct3D) 3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsw @@ -104,7 +104,7 @@ module fctrl ( else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // fsq not supported 3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsh else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // fsh not supported - default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // non-implemented instruction + default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction endcase 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub @@ -120,23 +120,23 @@ module fctrl ( 3'b000: ControlsD = `FCTRLW'b1_0_00_xx_000_0_0_0; // fsgnj 3'b001: ControlsD = `FCTRLW'b1_0_00_xx_001_0_0_0; // fsgnjn 3'b010: ControlsD = `FCTRLW'b1_0_00_xx_010_0_0_0; // fsgnjx - default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // non-implemented instruction + default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction endcase 7'b00101??: case(Funct3D) 3'b000: ControlsD = `FCTRLW'b1_0_00_xx_110_0_0_0; // fmin 3'b001: ControlsD = `FCTRLW'b1_0_00_xx_101_0_0_0; // fmax - default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // non-implemented instruction + default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction endcase 7'b10100??: case(Funct3D) 3'b010: ControlsD = `FCTRLW'b0_1_00_xx_010_0_0_0; // feq 3'b001: ControlsD = `FCTRLW'b0_1_00_xx_001_0_0_0; // flt 3'b000: ControlsD = `FCTRLW'b0_1_00_xx_011_0_0_0; // fle - default: ControlsD = `FCTRLW'b0_0_00_xx_0xx__0_1_0; // non-implemented instruction + default: ControlsD = `FCTRLW'b0_0_00_xx_000__0_1_0; // non-implemented instruction endcase 7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_10_xx_000_0_0_0; // fclass else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0_0; // fmv.x.w to int reg else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0_0; // fmv.x.d to int reg - else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // non-implemented instruction + else ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction 7'b1101000: case(Rs2D[1:0]) 2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.s.w w->s 2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.s.wu wu->s @@ -165,7 +165,7 @@ module fctrl ( endcase 7'b1111001: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0_0; // fmv.d.x to fp reg 7'b0100001: ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.s - default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // non-implemented instruction + default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction endcase default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction endcase diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 0d742246..ee84c532 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -68,27 +68,27 @@ module fdivsqrt( logic [`DIVBLEN:0] nE, nM, mM; logic NegQuotM, ALTBM, AsM, W64M; logic DivStartE; - logic [`XLEN-1:0] ForwardedSrcAM; + logic [`XLEN-1:0] AM; fdivsqrtpreproc fdivsqrtpreproc( .clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), - .Sqrt(SqrtE), .Ym(YmE), .XZeroE, .X, .DPreproc, .ForwardedSrcAM, .MDUM, .W64M, + .Sqrt(SqrtE), .Ym(YmE), .XZeroE, .X, .DPreproc, .AM, .MDUM, .W64M, .nE, .nM, .mM, .NegQuotM, .ALTBM, .AZeroM, .BZeroM, .AZeroE, .BZeroE, .AsM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, .nE, - .FDivBusyE, .FDivStartE, .IDivStartE, .IFDivStartE, .FDivDoneE, .StallM, .FlushE, /*.DivDone, */ + .FDivBusyE, .FDivStartE, .IDivStartE, .IFDivStartE, .FDivDoneE, .StallM, .FlushE, .XZeroE, .YZeroE, .AZeroE, .BZeroE, .XNaNE, .YNaNE, .MDUE, .XInfE, .YInfE, .WZeroE, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( - .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM, + .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .X,.DPreproc, .FirstWS(WS), .FirstWC(WC), .IFDivStartE, .FDivBusyE); fdivsqrtpostproc fdivsqrtpostproc( .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .Firstun, - .SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAM, + .SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .AM, .nM, .ALTBM, .mM, .BZeroM, .AsM, .NegQuotM, .W64M, .QmM, .WZeroE, .DivSM, .FPIntDivResultM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv index fe95c50d..bba177c5 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv @@ -34,7 +34,7 @@ module fdivsqrtexpcalc( input logic [`FMTBITS-1:0] Fmt, input logic [`NE-1:0] Xe, Ye, input logic Sqrt, - input logic XZeroE, + input logic XZero, input logic [`DIVBLEN:0] ell, m, output logic [`NE+1:0] Qe ); @@ -70,7 +70,7 @@ module fdivsqrtexpcalc( assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS); assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias}; // correct exponent for denormalized input's normalization shifts - assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZeroE}}; + assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}}; assign Qe = Sqrt ? SExp : DExp; endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 743715b3..a5735ba3 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -65,8 +65,10 @@ module fdivsqrtfsm( // terminate immediately on special cases assign FSpecialCaseE = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE); - assign ISpecialCaseE = AZeroE | BZeroE; // *** why is AZeroE part of this. Should other special cases be considered? - assign SpecialCaseE = MDUE ? ISpecialCaseE : FSpecialCaseE; + if (`IDIV_ON_FPU) begin + assign ISpecialCaseE = AZeroE | BZeroE; // *** why is AZeroE part of this. Should other special cases be considered? + assign SpecialCaseE = MDUE ? ISpecialCaseE : FSpecialCaseE; + end else assign SpecialCaseE = FSpecialCaseE; flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc // DIVN = `NF+3 @@ -103,7 +105,8 @@ module fdivsqrtfsm( always_comb begin if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - cycles = MDUE ? (nE + 1) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + if (`IDIV_ON_FPU) cycles = MDUE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); end /* verilator lint_on WIDTH */ diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 6a93c1f8..8bedd384 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -39,7 +39,7 @@ module fdivsqrtpostproc( input logic [`DIVb+1:0] FirstC, input logic SqrtE, input logic Firstun, SqrtM, SpecialCaseM, NegQuotM, - input logic [`XLEN-1:0] ForwardedSrcAM, + input logic [`XLEN-1:0] AM, input logic RemOpM, ALTBM, BZeroM, AsM, W64M, input logic [`DIVBLEN:0] nM, mM, output logic [`DIVb:0] QmM, @@ -98,79 +98,80 @@ module fdivsqrtpostproc( // Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed. assign Sum = WC + WS; - assign W = $signed(Sum) >>> `LOGR; - assign NegStickyM = W[`DIVb+3]; - assign DM = {4'b0001, D}; - - // *** put conditionals on integer division hardware, move to its own module - - // Integer division: sign handling for div and rem - always_comb - if (~AsM) - if (NegStickyM) begin - NormQuotM = FirstUM; - NormRemM = W + DM; - end else begin - NormQuotM = FirstU; - NormRemM = W; - end - else - if (NegStickyM) begin - NormQuotM = FirstUM; - NormRemM = -(W + DM); - end else begin - NormQuotM = FirstU; - NormRemM = -W; - end - - // Integer division: Special cases - always_comb - if (ALTBM) begin - IntQuotM = '0; - IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, ForwardedSrcAM}; - end else begin - logic [`DIVb+3:0] PreIntQuotM; - if (WZeroM) begin - if (weq0M) begin - PreIntQuotM = {3'b000, FirstU}; - IntRemM = '0; - end else begin - PreIntQuotM = {3'b000, FirstUM}; - IntRemM = '0; - end - end else begin - PreIntQuotM = {3'b000, NormQuotM}; - IntRemM = NormRemM; - end - // flip sign if necessary - if (NegQuotM) IntQuotM = -PreIntQuotM; - else IntQuotM = PreIntQuotM; - end + assign NegStickyM = Sum[`DIVb+3]; - always_comb - if (RemOpM) begin - NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder - PreResultM = IntRemM; - end else begin - NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))); - PreResultM = IntQuotM; - /* - if (~ALTBM & NegQuotM) begin - PreResultM = {3'b111, -IntQuotM}; - end else begin - PreResultM = {3'b000, IntQuotM}; - end*/ - //PreResultM = {IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM}; // Suspicious Sign Extender - end - - - // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted - - assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); - assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? ForwardedSrcAM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases - // *** conditional on RV64 - assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64 - assign PreQmM = NegStickyM ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit assign QmM = SqrtM ? (PreQmM << 1) : PreQmM; + + if (`IDIV_ON_FPU) begin + assign W = $signed(Sum) >>> `LOGR; + assign DM = {4'b0001, D}; + + // Integer division: sign handling for div and rem + always_comb + if (~AsM) + if (NegStickyM) begin + NormQuotM = FirstUM; + NormRemM = W + DM; + end else begin + NormQuotM = FirstU; + NormRemM = W; + end + else + if (NegStickyM) begin + NormQuotM = FirstUM; + NormRemM = -(W + DM); + end else begin + NormQuotM = FirstU; + NormRemM = -W; + end + + // Integer division: Special cases + always_comb + if (ALTBM) begin + IntQuotM = '0; + IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; + end else begin + logic [`DIVb+3:0] PreIntQuotM; + if (WZeroM) begin + if (weq0M) begin + PreIntQuotM = {3'b000, FirstU}; + IntRemM = '0; + end else begin + PreIntQuotM = {3'b000, FirstUM}; + IntRemM = '0; + end + end else begin + PreIntQuotM = {3'b000, NormQuotM}; + IntRemM = NormRemM; + end + // flip sign if necessary + if (NegQuotM) IntQuotM = -PreIntQuotM; + else IntQuotM = PreIntQuotM; + end + + always_comb + if (RemOpM) begin + NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder + PreResultM = IntRemM; + end else begin + NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))); + PreResultM = IntQuotM; + /* + if (~ALTBM & NegQuotM) begin + PreResultM = {3'b111, -IntQuotM}; + end else begin + PreResultM = {3'b000, IntQuotM}; + end*/ + //PreResultM = {IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM}; // Suspicious Sign Extender + end + + + // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted + + assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); + assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases + // *** conditional on RV64 + assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64 + end endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 1f4ac4ea..b3f42a7c 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -47,7 +47,7 @@ module fdivsqrtpreproc ( output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, output logic [`DIVb-1:0] DPreproc, - output logic [`XLEN-1:0] ForwardedSrcAM + output logic [`XLEN-1:0] AM ); logic [`DIVb-1:0] XPreproc; @@ -56,9 +56,6 @@ module fdivsqrtpreproc ( logic [`NE+1:0] QeE; // Intdiv signals logic [`DIVb-1:0] IFNormLenX, IFNormLenD; - logic [`XLEN-1:0] PosA, PosB; - logic AsE, BsE, ALTBE, NegQuotE; - logic [`XLEN-1:0] A64, B64, A64Src; logic [`DIVBLEN:0] mE; logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell; @@ -69,77 +66,98 @@ module fdivsqrtpreproc ( // ***can probably merge X LZC with conversion // cout the number of leading zeros - // *** W64 muxes conditional on RV64 - assign AsE = ~Funct3E[0] & (W64E ? ForwardedSrcAE[31] : ForwardedSrcAE[`XLEN-1]); - assign BsE = ~Funct3E[0] & (W64E ? ForwardedSrcBE[31] : ForwardedSrcBE[`XLEN-1]); - assign A64 = W64E ? {{(`XLEN-32){AsE}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE; - assign B64 = W64E ? {{(`XLEN-32){BsE}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE; - assign A64Src = W64E ? {{(`XLEN-32){ForwardedSrcAE[31]}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE; + if (`IDIV_ON_FPU) begin + logic signedDiv; + logic AsE, BsE, ALTBE, NegQuotE; + logic [`XLEN-1:0] AE, BE; + logic [`XLEN-1:0] PosA, PosB; - assign NegQuotE = (AsE ^ BsE) & MDUE; - - assign PosA = AsE ? -A64 : A64; - assign PosB = BsE ? -B64 : B64; - assign AZeroE = W64E ? ~(|ForwardedSrcAE[31:0]) : ~(|ForwardedSrcAE); - assign BZeroE = W64E ? ~(|ForwardedSrcBE[31:0]) : ~(|ForwardedSrcBE); + // Extract inputs, signs, zero, depending on W64 mode if applicable + assign signedDiv = ~Funct3E[0]; + if (`XLEN==64) begin // 64-bit, supports W64 + assign AsE = signedDiv & (W64E ? ForwardedSrcAE[31] : ForwardedSrcAE[`XLEN-1]); + assign BsE = signedDiv & (W64E ? ForwardedSrcBE[31] : ForwardedSrcBE[`XLEN-1]); + assign AE = W64E ? {{(`XLEN-32){AsE}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE; + assign BE = W64E ? {{(`XLEN-32){BsE}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE; + assign AZeroE = W64E ? ~(|ForwardedSrcAE[31:0]) : ~(|ForwardedSrcAE); + assign BZeroE = W64E ? ~(|ForwardedSrcBE[31:0]) : ~(|ForwardedSrcBE); + end else begin // 32 bits only + assign AsE = signedDiv & ForwardedSrcAE[`XLEN-1]; + assign BsE = signedDiv & ForwardedSrcBE[`XLEN-1]; + assign AE = ForwardedSrcAE; + assign BE = ForwardedSrcBE; + assign AZeroE = ~(|ForwardedSrcAE); + assign BZeroE = ~(|ForwardedSrcBE); + end - assign IFNormLenX = MDUE ? {PosA, {(`DIVb-`XLEN){1'b0}}} : {Xm, {(`DIVb-`NF-1){1'b0}}}; - assign IFNormLenD = MDUE ? {PosB, {(`DIVb-`XLEN){1'b0}}} : {Ym, {(`DIVb-`NF-1){1'b0}}}; + // Quotient is negative + assign NegQuotE = (AsE ^ BsE) & MDUE; + + // Force inputs to be postiive + assign PosA = AsE ? -AE : AE; + assign PosB = BsE ? -BE : BE; + + // Select integer or floating point inputs + assign IFNormLenX = MDUE ? {PosA, {(`DIVb-`XLEN){1'b0}}} : {Xm, {(`DIVb-`NF-1){1'b0}}}; + assign IFNormLenD = MDUE ? {PosB, {(`DIVb-`XLEN){1'b0}}} : {Ym, {(`DIVb-`NF-1){1'b0}}}; + + // Difference in number of leading zeros + assign ZeroDiff = mE - ell; + assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B + assign p = ALTBE ? '0 : ZeroDiff; + + /* verilator lint_off WIDTH */ + // right shift amount to complete in discrete number of steps + assign pPlusr = (`DIVBLEN)'(`LOGR) + p; + assign pPrTrunc = pPlusr % `RK; + assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)}; + assign nE = (pPrCeil * (`DIVBLEN+1)'(`DIVCOPIES)) - {{(`DIVBLEN){1'b0}}, 1'b1}; + assign IntBits = (`DIVBLEN)'(`LOGR) + p - {{(`DIVBLEN){1'b0}}, 1'b1}; + assign RightShiftX = ((`DIVBLEN)'(`RK) - 1) - (IntBits % `RK); + /* verilator lint_on WIDTH */ + + // Selet integer or floating-point operands + assign NumZeroE = MDUE ? AZeroE : XZeroE; + assign X = MDUE ? DivX >> RightShiftX : PreShiftX; + + // pipeline registers + flopen #(1) mdureg(clk, IFDivStartE, MDUE, MDUM); + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); + flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); + flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); + flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); + flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); + flopen #(1) azeroreg(clk, IFDivStartE, AZeroE, AZeroM); + flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); + flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); + flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); + + end else begin + assign IFNormLenX = {Xm, {(`DIVb-`NF-1){1'b0}}}; + assign IFNormLenD = {Ym, {(`DIVb-`NF-1){1'b0}}}; + assign NumZeroE = XZeroE; + assign X = PreShiftX; + end + + // count leading zeros for denorm FP and to normalize integer inputs lzc #(`DIVb) lzcX (IFNormLenX, ell); lzc #(`DIVb) lzcY (IFNormLenD, mE); - assign XPreproc = IFNormLenX << (ell + {{`DIVBLEN{1'b0}}, 1'b1}); // had issue with (`DIVBLEN+1)'(~MDUE) so using this instead - assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); // replaced ~MDUE with 1 bc we always want that extra left shift - - assign ZeroDiff = mE - ell; - assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B - assign p = ALTBE ? '0 : ZeroDiff; - -/* verilator lint_off WIDTH */ - assign pPlusr = (`DIVBLEN)'(`LOGR) + p; - assign pPrTrunc = pPlusr % `RK; -//assign pPrTrunc = (`LOGRK == 0) ? 0 : pPlusr[`LOGRK-1:0]; - assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)}; - assign nE = (pPrCeil * (`DIVBLEN+1)'(`DIVCOPIES)) - {{(`DIVBLEN){1'b0}}, 1'b1}; - assign IntBits = (`DIVBLEN)'(`LOGR) + p - {{(`DIVBLEN){1'b0}}, 1'b1}; - assign RightShiftX = ((`DIVBLEN)'(`RK) - 1) - (IntBits % `RK); -//assign RightShiftX = (`LOGRK == 0) ? 0 : ((`DIVBLEN)'(`RK) - 1) - {{(`DIVBLEN - `RK){1'b0}}, IntBits[`LOGRK-1:0]}; -/* verilator lint_on WIDTH */ - - assign NumZeroE = MDUE ? AZeroE : XZeroE; + // Normalization shift + assign XPreproc = IFNormLenX << (ell + {{`DIVBLEN{1'b0}}, 1'b1}); + assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); + // append leading 1 (for nonzero inputs) and zero-extend assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF assign DivX = {3'b000, ~NumZeroE, XPreproc}; // *** explain why X is shifted between radices (initial assignment of WS=RX) if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX; else assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX; - assign X = MDUE ? DivX >> RightShiftX : PreShiftX; - fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZeroE, .ell, .m(mE), .Qe(QeE)); + // Floating-point exponent + fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); - // radix 2 radix 4 - // 1 copies DIVLEN+2 DIVLEN+2/2 - // 2 copies DIVLEN+2/2 DIVLEN+2/2*2 - // 4 copies DIVLEN+2/4 DIVLEN+2/2*4 - // 8 copies DIVLEN+2/8 DIVLEN+2/2*8 - - // DIVRESLEN = DIVLEN or DIVLEN+2 - // r = 1 or 2 - // DIVRESLEN/(r*`DIVCOPIES) - - flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); - flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); - flopen #(1) azeroreg(clk, IFDivStartE, AZeroE, AZeroM); - flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); - flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); - flopen #(1) mdureg(clk, IFDivStartE, MDUE, MDUM); - flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); - flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); - flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); - flopen #(`XLEN) srcareg(clk, IFDivStartE, A64Src, ForwardedSrcAM); - - endmodule diff --git a/pipelined/src/muldiv/intdivrestoring.sv b/pipelined/src/mdu/intdivrestoring.sv similarity index 100% rename from pipelined/src/muldiv/intdivrestoring.sv rename to pipelined/src/mdu/intdivrestoring.sv diff --git a/pipelined/src/muldiv/intdivrestoringstep.sv b/pipelined/src/mdu/intdivrestoringstep.sv similarity index 100% rename from pipelined/src/muldiv/intdivrestoringstep.sv rename to pipelined/src/mdu/intdivrestoringstep.sv diff --git a/pipelined/src/muldiv/muldiv.sv b/pipelined/src/mdu/mdu.sv similarity index 98% rename from pipelined/src/muldiv/muldiv.sv rename to pipelined/src/mdu/mdu.sv index 96d47173..04fbcd64 100644 --- a/pipelined/src/muldiv/muldiv.sv +++ b/pipelined/src/mdu/mdu.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// muldiv.sv +// mdu.sv // // Written: David_Harris@hmc.edu 9 January 2021 // Modified: @@ -30,7 +30,7 @@ `include "wally-config.vh" -module muldiv ( +module mdu ( input logic clk, reset, // Execute Stage interface // input logic [`XLEN-1:0] SrcAE, SrcBE, @@ -94,6 +94,6 @@ module muldiv ( // Writeback stage pipeline register flopenrc #(`XLEN) MDUResultWReg(clk, reset, FlushW, ~StallW, MDUResultM, MDUResultW); -endmodule // muldiv +endmodule // mdu diff --git a/pipelined/src/muldiv/mul.sv b/pipelined/src/mdu/mul.sv similarity index 100% rename from pipelined/src/muldiv/mul.sv rename to pipelined/src/mdu/mul.sv diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 6519f823..24ef3dbe 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -370,7 +370,7 @@ module wallypipelinedcore ( assign BigEndianM = 0; end if (`M_SUPPORTED) begin:mdu - muldiv mdu( + mdu mdu( .clk, .reset, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E, diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index b5c93dc9..c20dd3ad 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -82,7 +82,7 @@ module testbenchfp; logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by logic [`DIVb:0] Quot; logic CvtResDenormUfE; - logic DivStart, FDivBusyE; + logic DivStart, FDivBusyE, OldFDivBusyE; logic reset = 1'b0; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DURLEN-1:0] Dur; @@ -689,12 +689,12 @@ module testbenchfp; .Xe(Xe), .Ye(Ye), .Ze(Ze), .Xm(Xm), .Ym(Ym), .Zm(Zm), .XZero, .YZero, .ZZero, .Ss, .Se, - .OpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .InvA, .SCnt, .As, .Ps, + .OpCtrl(OpCtrlVal), .Sm, .InvA, .SCnt, .As, .Ps, .ZmSticky); end postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]), - .Ze(Ze), .ZDenorm(ZDenorm), .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), + .ZDenorm(ZDenorm), .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss), .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), @@ -719,8 +719,8 @@ module testbenchfp; fdivsqrt fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]), .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .FDivStartE(DivStart), .IDivStartE(1'b0), .MDUE(1'b0), .W64E(1'b0), - .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .FDivBusyE, .QeM(DivCalcExp), - .QmM(Quot), .DivDone); + .StallM(1'b0), .DivSM(DivSticky), .FDivBusyE, .QeM(DivCalcExp), + .QmM(Quot)); end assign CmpFlg[3:0] = 0; @@ -811,6 +811,9 @@ end logic ResMatch, FlagMatch, CheckNow; +always @(posedge clk) + OldFDivBusyE = FDivBusyE; + // check results on falling edge of clk always @(negedge clk) begin @@ -883,6 +886,7 @@ always @(negedge clk) begin ResMatch = (Res === Ans | NaNGood | NaNGood === 1'bx); FlagMatch = (ResFlg === AnsFlg | AnsFlg === 5'bx); divsqrtop = OpCtrlVal == `SQRT_OPCTRL | OpCtrlVal == `DIV_OPCTRL; + assign DivDone = OldFDivBusyE & ~FDivBusyE; //assign divsqrtop = OpCtrl[TestNum] == `SQRT_OPCTRL | OpCtrl[TestNum] == `DIV_OPCTRL; CheckNow = (DivDone | ~divsqrtop) & (UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT); diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index df9857c0..61e45d9e 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -138,6 +138,7 @@ string tvpaths[] = '{ string imperas32f[] = '{ `IMPERASTEST, + "rv32i_m/F/FSQRT-S-DYN-RDN-01", "rv32i_m/F/FADD-S-DYN-RDN-01", "rv32i_m/F/FADD-S-DYN-RMM-01", "rv32i_m/F/FADD-S-DYN-RNE-01", @@ -1198,8 +1199,6 @@ string imperas32f[] = '{ string arch64d[] = '{ `RISCVARCHTEST, - "rv64i_m/D/src/fsqrt.d_b1-01.S", - "rv64i_m/D/src/fdiv.d_b20-01.S", "rv64i_m/D/src/fadd.d_b10-01.S", "rv64i_m/D/src/fadd.d_b1-01.S", "rv64i_m/D/src/fadd.d_b11-01.S",