From 961cf91482b1da5bc617c1036cadb988981ae8da Mon Sep 17 00:00:00 2001 From: Jacob Pease Date: Tue, 26 Sep 2023 15:16:45 -0500 Subject: [PATCH 01/62] Linux Makefile: Fixed find utility crashing for real this time. --- linux/Makefile | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/linux/Makefile b/linux/Makefile index 433bf0e73..093913eee 100644 --- a/linux/Makefile +++ b/linux/Makefile @@ -27,23 +27,6 @@ BINARIES := fw_jump.elf vmlinux busybox OBJDUMPS := $(foreach name, $(BINARIES), $(basename $(name) .elf)) OBJDUMPS := $(foreach name, $(OBJDUMPS), $(DIS)/$(name).objdump) -# LINUXDIR := $(shell ls $(BUILDROOT)/output/build | grep -e '^linux-[0-9]\+\.[0-9]\+\.[0-9]\+$$' ) -# LINUXDIR := $(BUILDROOT)/output/build/$(LINUXDIR) -# BUSYBOXDIR := $(shell ls $(BUILDROOT)/output/build | grep -e '^linux-[0-9]\+\.[0-9]\+\.[0-9]\+$$' ) -# BUSYBOXDIR := $(BUILDROOT)/output/build/$(BUSYBOXDIR) - -# Gets Linux and Busybox output folders for objedect dumps -# LINUXDIR ?= $(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") -# BUSYBOXDIR ?= $(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$") - -define linuxDir = -$(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") -endef - -define busyboxDir = -$(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$") -endef - .PHONY: all generate disassemble install clean cleanDTB cleanDriver test # Generate all device trees ------------------------------------------- @@ -59,8 +42,7 @@ all: # Temp rule for debugging test: - @echo $(linuxDir) - @echo $(busyboxDir) + echo $(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") generate: $(DTB) $(IMAGES) @@ -87,11 +69,13 @@ $(DIS)/%.objdump: $(IMAGES)/%.elf $(DIS)/%.objdump: $(IMAGES)/% riscv64-unknown-elf-objdump -S $< >> $@ -$(IMAGES)/vmlinux: $(call linuxDir)/vmlinux - cp $< $@ +$(IMAGES)/vmlinux: + linuxDir=$$(find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") ;\ + cp $$linuxDir/vmlinux $@ ;\ -$(IMAGES)/busybox: $(call busyboxDir)/busybox - cp $< $@ +$(IMAGES)/busybox: + busyboxDir=$$(find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$") ;\ + cp $$busyboxDir/busybox $@ ;\ # Generating new Buildroot directories -------------------------------- From 2b1c604016b5ddd878b64b18debadf6ea4e676e5 Mon Sep 17 00:00:00 2001 From: Jacob Pease Date: Tue, 17 Oct 2023 14:13:18 -0500 Subject: [PATCH 02/62] Slight modification to testbench.sv --- testbench/testbench.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 74077e547..dd83f7610 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -387,6 +387,7 @@ module testbench; assign SDCCmd = SDCCmdOE ? SDCCmdOut : 1'bz; assign SDCCmdIn = SDCCmd; + assign SDCDat = sd_dat_reg_t ? sd_dat_reg_o : sd_dat_i; assign SDCDatIn = SDCDat; -----/\----- EXCLUDED -----/\----- */ assign SDCIntr = '0; From 4c106215f44cecc055a2e88fe47d0b683d0282e0 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 08:46:55 -0800 Subject: [PATCH 03/62] Started cleaning up shifting leading 1 in fdivsqrt --- config/shared/config-shared.vh | 18 +++++++++--------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 48f02b848..acc7996cb 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -94,15 +94,15 @@ localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2); localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); // division constants -localparam DIVN = (((NF+2 Date: Fri, 10 Nov 2023 09:11:15 -0800 Subject: [PATCH 04/62] fdivsqrt parameter cleanup --- config/shared/config-shared.vh | 13 ++++++------- config/shared/parameter-defs.vh | 3 +-- src/cvw.sv | 1 - src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 3 ++- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 18 +++++++++--------- 5 files changed, 18 insertions(+), 20 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index acc7996cb..17b1ede83 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -94,15 +94,14 @@ localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2); localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); // division constants -localparam DIVN = ((NF+2>> NormShiftM); diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 8f3c477c4..0e716ac20 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -48,7 +48,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] AM ); - logic [P.DIVb-1:0] Xfract, Dfract; + logic [P.DIVb:0] Xfract, Dfract; logic [P.DIVb:0] PreSqrtX; logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [P.NE+1:0] QeE; // Quotient Exponent (FP only) @@ -103,12 +103,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////// // count leading zeros for Subnorm FP and to normalize integer inputs - lzc #(P.DIVb) lzcX (IFX[P.DIVb:1], ell); - lzc #(P.DIVb) lzcY (IFD[P.DIVb:1], mE); + lzc #(P.DIVb+1) lzcX (IFX, ell); + lzc #(P.DIVb+1) lzcY (IFD, mE); // Normalization shift: shift off leading one - assign Xfract = (IFX[P.DIVb:1] << ell) << 1; - assign Dfract = (IFD[P.DIVb:1] << mE) << 1; + assign Xfract = (IFX << ell); + assign Dfract = (IFD << mE); ////////////////////////////////////////////////////// // Integer Right Shift to digit boundary @@ -158,10 +158,10 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( // it comes out in the wash and gives the right answer. Investigate later if possible. ////////////////////////////////////////////////////// - assign DivX = {3'b000, ~NumerZeroE, Xfract}; + assign DivX = {3'b000, Xfract}; // Sqrt is initialized on step one as R(X-1), so depends on Radix - mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + mux2 #(P.DIVb+1) sqrtxmux(Xfract, {1'b0, Xfract[P.DIVb:1]}, (Xe[0] ^ ell[0]), PreSqrtX); if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); @@ -176,8 +176,8 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( assign X = PreShiftX; end - // Divisior register - flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D); + // Divisior register + flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dfract}, D); // Floating-point exponent fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); From 255873a50cbc1b5af537130097fb318f0d17d8e8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 11:21:02 -0800 Subject: [PATCH 05/62] Divsqrt cleanup: change Q to U, commenting code --- src/fpu/fdivsqrt/fdivsqrt.sv | 8 +++---- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtexpcalc.sv | 11 ++++++--- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 12 +++++----- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 36 +++++++++++++++++----------- src/fpu/fpu.sv | 12 +++++----- src/fpu/postproc/divshiftcalc.sv | 28 +++++++++++----------- src/fpu/postproc/postprocess.sv | 12 +++++----- src/fpu/postproc/round.sv | 6 ++--- src/fpu/postproc/shiftcorrection.sv | 8 +++---- 10 files changed, 74 insertions(+), 61 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 5c5fa0f57..60e42f457 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -45,8 +45,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( input logic IntDivE, W64E, output logic DivStickyM, output logic FDivBusyE, IFDivStartE, FDivDoneE, - output logic [P.NE+1:0] QeM, - output logic [P.DIVb:0] QmM, + output logic [P.NE+1:0] UeM, // Exponent result + output logic [P.DIVb:0] UmM, // Significand result output logic [P.XLEN-1:0] FIntDivResultM ); @@ -74,7 +74,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, + .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, .BZeroM, .nM, .mM, .AM, @@ -94,7 +94,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, - .QmM, .WZeroE, .DivStickyM, + .UmM, .WZeroE, .DivStickyM, // Int-specific .nM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, .FIntDivResultM); diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index ed28c9355..2122317fe 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -68,7 +68,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below? // if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES); + if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits -1)/(P.RK) + 1; else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES); end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv index 5531276df..113f2b2dd 100644 --- a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv @@ -32,8 +32,9 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( input logic Sqrt, input logic XZero, input logic [P.DIVBLEN:0] ell, m, - output logic [P.NE+1:0] Qe + output logic [P.NE+1:0] Ue ); + logic [P.NE-2:0] Bias; logic [P.NE+1:0] SXExp; logic [P.NE+1:0] SExp; @@ -63,10 +64,14 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( 2'h2: Bias = (P.NE-1)'(P.H_BIAS); endcase end + + // Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS); assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias}; - // correct exponent for subnormal input's normalization shifts + // division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias}); - assign Qe = Sqrt ? SExp : DExp; + + // Select square root or division exponent + assign Ue = Sqrt ? SExp : DExp; endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 9f887d4ab..2b9be54a7 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -38,14 +38,14 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] AM, input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, input logic [P.DIVBLEN:0] nM, mM, - output logic [P.DIVb:0] QmM, + output logic [P.DIVb:0] UmM, // result significand output logic WZeroE, output logic DivStickyM, output logic [P.XLEN-1:0] FIntDivResultM ); logic [P.DIVb+3:0] W, Sum; - logic [P.DIVb:0] PreQmM; + logic [P.DIVb:0] PreUmM; logic NegStickyM; logic weq0E, WZeroM; logic [P.XLEN-1:0] IntDivResultM; @@ -91,17 +91,17 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( // Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed. assign Sum = WC + WS; assign NegStickyM = Sum[P.DIVb+3]; - mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit - mux2 #(P.DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM); + mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit + mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM); - // Integer quotient or remainder correctoin, normalization, and special cases + // Integer quotient or remainder correction, normalization, and special cases if (P.IDIV_ON_FPU) begin:intpostproc // Int supported logic [P.DIVBLEN:0] NormShiftM; logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM; logic signed [P.DIVb+3:0] PreResultM, PreIntResultM; assign W = $signed(Sum) >>> P.LOGR; - assign UnsignedQuotM = {3'b000, PreQmM}; + assign UnsignedQuotM = {3'b000, PreUmM}; // Integer remainder: sticky and sign correction muxes assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 0e716ac20..2255aafb1 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -35,7 +35,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic SqrtE, input logic XZeroE, input logic [2:0] Funct3E, - output logic [P.NE+1:0] QeM, + output logic [P.NE+1:0] UeM, output logic [P.DIVb+3:0] X, D, // Int-specific input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B @@ -48,10 +48,10 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] AM ); - logic [P.DIVb:0] Xfract, Dfract; + logic [P.DIVb:0] Xnorm, Dnorm; logic [P.DIVb:0] PreSqrtX; logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed - logic [P.NE+1:0] QeE; // Quotient Exponent (FP only) + logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs logic NumerZeroE; // Numerator is zero (X or A) @@ -106,9 +106,9 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( lzc #(P.DIVb+1) lzcX (IFX, ell); lzc #(P.DIVb+1) lzcY (IFD, mE); - // Normalization shift: shift off leading one - assign Xfract = (IFX << ell); - assign Dfract = (IFD << mE); + // Normalization shift: shift leading one into most significant bit + assign Xnorm = (IFX << ell); + assign Dnorm = (IFD << mE); ////////////////////////////////////////////////////// // Integer Right Shift to digit boundary @@ -133,10 +133,11 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.LOGRK-1:0] IntTrunc, RightShiftX; logic [P.DIVBLEN:0] TotalIntBits, IntSteps; /* verilator lint_off WIDTH */ + // n = k*ceil((r+p)/rk) - 1 assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div - assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits + assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps /* verilator lint_on WIDTH */ @@ -150,18 +151,25 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////// // Floating-Point Preprocessing - // append leading 1 (for nonzero inputs) + // Extend to Q4.b format // shift square root to be in range [1/4, 1) // Normalized numbers are shifted right by 1 if the exponent is odd // Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. // NOTE: there might be a discrepancy that X is never right shifted by 2. However - // it comes out in the wash and gives the right answer. Investigate later if possible. + // it comes out in the wash and gives the right answer. Investigate later if possible. *** ////////////////////////////////////////////////////// - assign DivX = {3'b000, Xfract}; + assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division // Sqrt is initialized on step one as R(X-1), so depends on Radix - mux2 #(P.DIVb+1) sqrtxmux(Xfract, {1'b0, Xfract[P.DIVb:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + // If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter + // Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2) + // Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even + // Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits + // Then multiply by R is left shift by r (1 or 2 for radix 2 or 4) + // For Radix 2, this gives 3 leading 1s, followed by the fraction bits + // For Radix 4, this gives 2 leading 1s, followed by the fraction bits (and a zero in the lsb) + mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, (Xe[0] ^ ell[0]), PreSqrtX); if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); @@ -177,11 +185,11 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( end // Divisior register - flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dfract}, D); + flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D); // Floating-point exponent - fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); - flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM); + fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Ue(UeE)); + flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); // Number of FSM cycles (to FSM) fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index f71999471..ffd9cf49a 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -133,8 +133,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU) // divide signals - logic [P.DIVb:0] QmM; // fdivsqrt signifcand - logic [P.NE+1:0] QeM; // fdivsqrt exponent + logic [P.DIVb:0] UmM; // fdivsqrt signifcand + logic [P.NE+1:0] UeM; // fdivsqrt exponent logic DivStickyM; // fdivsqrt sticky bit logic FDivDoneE, IFDivStartE; // fdivsqrt control signals logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU) @@ -242,8 +242,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, - .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, - .QmM, .FIntDivResultM); + .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .UeM, + .UmM, .FIntDivResultM); // compare: fmin/fmax, flt/fle/feq fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), @@ -326,9 +326,9 @@ module fpu import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////////////////////////////////////////// postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), - .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), + .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivUm(UmM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), - .FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), + .FmaSm(SmM), .DivUe(UeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), .CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); diff --git a/src/fpu/postproc/divshiftcalc.sv b/src/fpu/postproc/divshiftcalc.sv index d560714db..380f8f5e6 100644 --- a/src/fpu/postproc/divshiftcalc.sv +++ b/src/fpu/postproc/divshiftcalc.sv @@ -27,8 +27,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module divshiftcalc import cvw::*; #(parameter cvw_t P) ( - input logic [P.DIVb:0] DivQm, // divsqrt significand - input logic [P.NE+1:0] DivQe, // divsqrt exponent + input logic [P.DIVb:0] DivUm, // divsqrt significand + input logic [P.NE+1:0] DivUe, // divsqrt exponent output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input output logic DivResSubnorm, // is the divsqrt result subnormal @@ -41,23 +41,23 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) ( // is the result subnormal // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes - assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]); + assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]); // if the result is subnormal - // 00000000x.xxxxxx... Exp = DivQe - // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 - // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 + // 00000000x.xxxxxx... Exp = DivUe + // .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1 + // .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 - // Left shift amount = DivQe+NF+1-1 - assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe; + // Left shift amount = DivUe+NF+1-1 + assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe; assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1]; // if the result is normalized - // 00000000x.xxxxxx... Exp = DivQe - // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 - // 00000000.xxxxxxx... << NF Exp = DivQe+1 - // 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards) - // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after) + // 00000000x.xxxxxx... Exp = DivUe + // .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivUe+1 + // 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after) // inital Left shift amount = NF // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF); @@ -68,5 +68,5 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) ( assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; // pre-shift the divider result for normalization - assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}}; + assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}}; endmodule diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index ee96b34d2..05db352cd 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -48,8 +48,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic DivSticky, // divider sticky bit - input logic [P.NE+1:0] DivQe, // divsqrt exponent - input logic [P.DIVb:0] DivQm, // divsqrt significand + input logic [P.NE+1:0] DivUe, // divsqrt exponent + input logic [P.DIVb:0] DivUm, // divsqrt significand // conversion signals input logic CvtCs, // the result's sign input logic [P.NE:0] CvtCe, // the calculated expoent @@ -91,7 +91,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // division singals logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input - logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift + logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift logic DivByZero; // divide by zero flag logic DivResSubnorm; // is the divsqrt result subnormal logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed) @@ -146,7 +146,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); + divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); // select which unit's output to shift always_comb @@ -174,7 +174,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // correct for LZA/divsqrt error shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp, - .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf); + .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivUe, .Ue, .FmaSZero, .Shifted, .FmaMe, .Mf); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -189,7 +189,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // calulate result sign used in rounding unit roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); - round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe, + round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Ue, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf, .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); diff --git a/src/fpu/postproc/round.sv b/src/fpu/postproc/round.sv index 0a5d9ecc5..e01ff376b 100644 --- a/src/fpu/postproc/round.sv +++ b/src/fpu/postproc/round.sv @@ -39,7 +39,7 @@ module round import cvw::*; #(parameter cvw_t P) ( // divsqrt input logic DivOp, // is a division opperation being done input logic DivSticky, // divsqrt sticky bit - input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent + input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent // cvt input logic CvtOp, // is a convert opperation being done input logic ToInt, // is the cvt op a cvt to integer @@ -300,8 +300,8 @@ module round import cvw::*; #(parameter cvw_t P) ( case(PostProcSel) 2'b10: Me = FmaMe; // fma 2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt - // 2'b01: Me = DivDone ? Qe : '0; // divide - 2'b01: Me = Qe; // divide + // 2'b01: Me = DivDone ? Ue : '0; // divide + 2'b01: Me = Ue; // divide default: Me = '0; endcase diff --git a/src/fpu/postproc/shiftcorrection.sv b/src/fpu/postproc/shiftcorrection.sv index 9e0473667..f5860b42d 100644 --- a/src/fpu/postproc/shiftcorrection.sv +++ b/src/fpu/postproc/shiftcorrection.sv @@ -31,7 +31,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // divsqrt input logic DivOp, // is it a divsqrt opperation input logic DivResSubnorm, // is the divsqrt result subnormal - input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent + input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed) //fma input logic FmaOp, // is it an fma opperation @@ -41,7 +41,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // output output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction - output logic [P.NE+1:0] Qe // corrected exponent for divider + output logic [P.NE+1:0] Ue // corrected exponent for divider ); logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction @@ -61,7 +61,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // correct the shifting of the divsqrt caused by producing a result in (2, .5] range // condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) - assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1)); + assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1)); assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2]; assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1]; mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); @@ -87,5 +87,5 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // the quotent is in the range [.5,2) if there is no early termination // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift - assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1}; + assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1}; endmodule From 8f87860146fa2f58cc6d3cc42020d4199d0334b2 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 11:25:54 -0800 Subject: [PATCH 06/62] Reduced duplicated logic in fdivsqrtcycles --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 2122317fe..e9fbc6042 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -33,7 +33,10 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.DIVBLEN:0] nE, output logic [P.DURLEN-1:0] CyclesE ); + logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits + logic [P.DURLEN-1:0] fpcycles; // number of cycles for floating-point operation + // DIVN = P.NF+3 // NS = NF + 1 // N = NS or NS+2 for div/sqrt. @@ -68,8 +71,10 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below? // if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits -1)/(P.RK) + 1; - else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES); + assign fpcycles = (fbits-1)/(P.RK) + 1; + + if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : fpcycles; + else CyclesE = fpcycles; end /* verilator lint_on WIDTH */ From 2903791820e56cc02516ad24fab358b6f9d35ba7 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 14:00:27 -0800 Subject: [PATCH 07/62] Simplified cycle count logic --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 18 +++++++++--------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 22 +++++++++++++--------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index e9fbc6042..df581701b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -30,12 +30,12 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic IntDivE, - input logic [P.DIVBLEN:0] nE, + input logic [P.DIVBLEN:0] IntResultBits, output logic [P.DURLEN-1:0] CyclesE ); - logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits - logic [P.DURLEN-1:0] fpcycles; // number of cycles for floating-point operation + logic [P.DURLEN+1:0] Nf, FPResultBits; // number of fractional bits + logic [P.DIVBLEN:0] ResultBits; // number of result bits; // DIVN = P.NF+3 // NS = NF + 1 @@ -68,13 +68,13 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( endcase always_comb begin - if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below? - // if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 - else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - assign fpcycles = (fbits-1)/(P.RK) + 1; + if (SqrtE) FPResultBits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 rather than +2; is it related to DIVCOPIES logic below? + else FPResultBits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : fpcycles; - else CyclesE = fpcycles; + if (P.IDIV_ON_FPU) ResultBits = IntDivE ? IntResultBits : FPResultBits; + else ResultBits = FPResultBits; + + assign CyclesE = (ResultBits-1)/(P.RK) + 1; end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 2255aafb1..ab0941aca 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -54,6 +54,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [P.DIVBLEN:0] IntResultBits; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic SignedDivE; // signed division @@ -122,7 +123,11 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros) - mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); + mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); + + /* verilator lint_off WIDTH */ + assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + /* verilator lint_on WIDTH */ // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; @@ -131,15 +136,14 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( if (P.LOGRK > 0) begin // more than 1 bit per cycle logic [P.LOGRK-1:0] IntTrunc, RightShiftX; - logic [P.DIVBLEN:0] TotalIntBits, IntSteps; + logic [P.DIVBLEN:0] IntSteps; /* verilator lint_off WIDTH */ // n = k*ceil((r+p)/rk) - 1 - assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) - assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator - assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div - assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit - assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount - assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + assign IntTrunc = IntResultBits % P.RK; // Truncation check for ceiling operator + assign IntSteps = (IntResultBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div + assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit + assign RightShiftX = P.RK - 1 - ((IntResultBits - 1) % P.RK); // Right shift amount + assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting assign nE = p; @@ -192,7 +196,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); + fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs // pipeline registers From b315ead57507cc884d070c0f77e0a05f875f705b Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 14:28:57 -0800 Subject: [PATCH 08/62] Simplified IntDivNormShift --- src/fpu/fdivsqrt/fdivsqrt.sv | 6 ++-- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 9 +++++- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 4 +-- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 41 ++++++++++++++-------------- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 60e42f457..751486f86 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -67,7 +67,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( // Integer div/rem signals logic BZeroM; // Denominator is zero logic IntDivM; // Integer operation - logic [P.DIVBLEN:0] nM, mM; // Shift amounts + logic [P.DIVBLEN:0] mM, IntDivNormShiftM; // Shift amounts logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor logic ISpecialCaseE; // Integer div/remainder special cases @@ -77,7 +77,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, - .BZeroM, .nM, .mM, .AM, + .BZeroM, .IntDivNormShiftM, .mM, .AM, .IntDivM, .W64M, .ALTBM, .AsM, .BsM); fdivsqrtfsm #(P) fdivsqrtfsm( // FSM @@ -96,6 +96,6 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, .UmM, .WZeroE, .DivStickyM, // Int-specific - .nM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, + .IntDivNormShiftM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, .FIntDivResultM); endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index df581701b..bba6e8005 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -67,6 +67,13 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( P.Q_FMT: Nf = P.Q_NF; endcase + // Cycle logic + // P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk. + // Integer division needs p fractional + r integer result bits + // FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits + // FP Sqrt needs at least Nf fractional bits, 2 guard/round bits, and *** shift bits + // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBits / rk) + always_comb begin if (SqrtE) FPResultBits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 rather than +2; is it related to DIVCOPIES logic below? else FPResultBits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs @@ -74,7 +81,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( if (P.IDIV_ON_FPU) ResultBits = IntDivE ? IntResultBits : FPResultBits; else ResultBits = FPResultBits; - assign CyclesE = (ResultBits-1)/(P.RK) + 1; + assign CyclesE = (ResultBits-1)/(P.RK) + 1; // ceil (ResultBits/rk) end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 2b9be54a7..58649e3a8 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -37,7 +37,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( input logic Firstun, SqrtM, SpecialCaseM, input logic [P.XLEN-1:0] AM, input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, - input logic [P.DIVBLEN:0] nM, mM, + input logic [P.DIVBLEN:0] mM, IntDivNormShiftM, output logic [P.DIVb:0] UmM, // result significand output logic WZeroE, output logic DivStickyM, @@ -111,7 +111,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( // Select quotient or remainder and do normalization shift localparam DIVa = (P.DIVb+1-P.XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result - mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(DIVa)), RemOpM, NormShiftM); + mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftM, (mM + (P.DIVBLEN+1)'(DIVa)), RemOpM, NormShiftM); mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM); assign PreIntResultM = $signed(PreResultM >>> NormShiftM); diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index ab0941aca..35757e480 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -42,7 +42,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic IntDivE, W64E, output logic ISpecialCaseE, output logic [P.DURLEN-1:0] CyclesE, - output logic [P.DIVBLEN:0] nM, mM, + output logic [P.DIVBLEN:0] mM, IntDivNormShiftM, output logic ALTBM, IntDivM, W64M, output logic AsM, BsM, BZeroM, output logic [P.XLEN-1:0] AM @@ -53,7 +53,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [P.DIVBLEN:0] mE, ell; // Leading zeros of inputs logic [P.DIVBLEN:0] IntResultBits; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division @@ -126,27 +126,21 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); /* verilator lint_off WIDTH */ - assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) /* verilator lint_on WIDTH */ // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; - // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps - + // calculate right shift amount RightShiftX to complete in discrete number of steps if (P.LOGRK > 0) begin // more than 1 bit per cycle logic [P.LOGRK-1:0] IntTrunc, RightShiftX; logic [P.DIVBLEN:0] IntSteps; - /* verilator lint_off WIDTH */ - // n = k*ceil((r+p)/rk) - 1 - assign IntTrunc = IntResultBits % P.RK; // Truncation check for ceiling operator - assign IntSteps = (IntResultBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div - assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit + /* verilator lint_offf WIDTH */ assign RightShiftX = P.RK - 1 - ((IntResultBits - 1) % P.RK); // Right shift amount - assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting - assign nE = p; assign DivXShifted = DivX; end end else begin @@ -199,17 +193,22 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs + logic [P.DIVBLEN:0] IntDivNormShiftE; + /* verilator lint_off WIDTH */ + assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain + /* verilator lint_on WIDTH */ + // pipeline registers - flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); - flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); - flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); - flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); - flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); - flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); - flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); - flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); + flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); + flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); + flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); + flopen #(P.DIVBLEN+1) nsreg(clk, IFDivStartE, IntDivNormShiftE, IntDivNormShiftM); + flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); + flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); if (P.XLEN==64) - flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end endmodule From 3108b58290d7dfe0f05d1ee47d5c7b078873b453 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 14:55:36 -0800 Subject: [PATCH 09/62] Simplified integer postnormalization shift --- src/fpu/fdivsqrt/fdivsqrt.sv | 6 +++--- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 7 ++----- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 12 ++++++++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 751486f86..ac5c2c338 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -67,7 +67,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( // Integer div/rem signals logic BZeroM; // Denominator is zero logic IntDivM; // Integer operation - logic [P.DIVBLEN:0] mM, IntDivNormShiftM; // Shift amounts + logic [P.DIVBLEN:0] IntNormShiftM; // Integer normalizatoin shift amount logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor logic ISpecialCaseE; // Integer div/remainder special cases @@ -77,7 +77,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, - .BZeroM, .IntDivNormShiftM, .mM, .AM, + .BZeroM, .IntNormShiftM, .AM, .IntDivM, .W64M, .ALTBM, .AsM, .BsM); fdivsqrtfsm #(P) fdivsqrtfsm( // FSM @@ -96,6 +96,6 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, .UmM, .WZeroE, .DivStickyM, // Int-specific - .IntDivNormShiftM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, + .IntNormShiftM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, .FIntDivResultM); endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 58649e3a8..3b6115201 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -37,7 +37,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( input logic Firstun, SqrtM, SpecialCaseM, input logic [P.XLEN-1:0] AM, input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, - input logic [P.DIVBLEN:0] mM, IntDivNormShiftM, + input logic [P.DIVBLEN:0] IntNormShiftM, output logic [P.DIVb:0] UmM, // result significand output logic WZeroE, output logic DivStickyM, @@ -96,7 +96,6 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( // Integer quotient or remainder correction, normalization, and special cases if (P.IDIV_ON_FPU) begin:intpostproc // Int supported - logic [P.DIVBLEN:0] NormShiftM; logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM; logic signed [P.DIVb+3:0] PreResultM, PreIntResultM; @@ -110,10 +109,8 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM); // Select quotient or remainder and do normalization shift - localparam DIVa = (P.DIVb+1-P.XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result - mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftM, (mM + (P.DIVBLEN+1)'(DIVa)), RemOpM, NormShiftM); mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM); - assign PreIntResultM = $signed(PreResultM >>> NormShiftM); + assign PreIntResultM = $signed(PreResultM >>> IntNormShiftM); // special case logic // terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B| diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 35757e480..137f54d99 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -42,7 +42,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic IntDivE, W64E, output logic ISpecialCaseE, output logic [P.DURLEN-1:0] CyclesE, - output logic [P.DIVBLEN:0] mM, IntDivNormShiftM, + output logic [P.DIVBLEN:0] IntNormShiftM, output logic ALTBM, IntDivM, W64M, output logic AsM, BsM, BZeroM, output logic [P.XLEN-1:0] AM @@ -193,10 +193,15 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs - logic [P.DIVBLEN:0] IntDivNormShiftE; + logic [P.DIVBLEN:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; + logic RemOpE; + /* verilator lint_off WIDTH */ assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain + assign IntRemNormShiftE = mE + (P.DIVb+1-P.XLEN); // m + b - (N-1) for remainder normalization shift /* verilator lint_on WIDTH */ + assign RemOpE = Funct3E[1]; + mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE); // pipeline registers flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); @@ -204,8 +209,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); - flopen #(P.DIVBLEN+1) nsreg(clk, IFDivStartE, IntDivNormShiftE, IntDivNormShiftM); - flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); + flopen #(P.DIVBLEN+1) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM); flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); if (P.XLEN==64) flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); From 03864642a7c434f7a638522e5c70845a62142a15 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 16:42:32 -0800 Subject: [PATCH 10/62] fdivsqrt cleanup --- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 3b6115201..e9fd2fd2c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -118,7 +118,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( if (BZeroM) begin // Divide by zero if (RemOpM) IntDivResultM = AM; else IntDivResultM = {(P.XLEN){1'b1}}; - end else if (ALTBM) begin // Numerator is zero + end else if (ALTBM) begin // Numerator is small if (RemOpM) IntDivResultM = AM; else IntDivResultM = '0; end else IntDivResultM = PreIntResultM[P.XLEN-1:0]; diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 137f54d99..66ba957e8 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -198,7 +198,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( /* verilator lint_off WIDTH */ assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain - assign IntRemNormShiftE = mE + (P.DIVb+1-P.XLEN); // m + b - (N-1) for remainder normalization shift + assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift /* verilator lint_on WIDTH */ assign RemOpE = Funct3E[1]; mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE); From 7d0d9dcebe704a464cb156e635bf2d215762daa6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 18:01:13 -0800 Subject: [PATCH 11/62] divider cleanup --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 18 +++++++++--------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 8 ++++---- src/fpu/unpackinput.sv | 6 ------ 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index bba6e8005..d5c571940 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -30,12 +30,12 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic IntDivE, - input logic [P.DIVBLEN:0] IntResultBits, + input logic [P.DIVBLEN:0] IntResultBitsE, output logic [P.DURLEN-1:0] CyclesE ); - logic [P.DURLEN+1:0] Nf, FPResultBits; // number of fractional bits - logic [P.DIVBLEN:0] ResultBits; // number of result bits; + logic [P.DURLEN+1:0] Nf, FPResultBitsE; // number of fractional bits + logic [P.DIVBLEN:0] ResultBitsE; // number of result bits; // DIVN = P.NF+3 // NS = NF + 1 @@ -72,16 +72,16 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( // Integer division needs p fractional + r integer result bits // FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits // FP Sqrt needs at least Nf fractional bits, 2 guard/round bits, and *** shift bits - // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBits / rk) + // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk) always_comb begin - if (SqrtE) FPResultBits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 rather than +2; is it related to DIVCOPIES logic below? - else FPResultBits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs + if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 and +0 rather than +2; is it related to DIVCOPIES logic below? + else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) ResultBits = IntDivE ? IntResultBits : FPResultBits; - else ResultBits = FPResultBits; + if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; + else ResultBitsE = FPResultBitsE; - assign CyclesE = (ResultBits-1)/(P.RK) + 1; // ceil (ResultBits/rk) + assign CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk) end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 66ba957e8..e950a40bd 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -54,7 +54,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [P.DIVBLEN:0] mE, ell; // Leading zeros of inputs - logic [P.DIVBLEN:0] IntResultBits; // bits in integer result + logic [P.DIVBLEN:0] IntResultBitsE; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic SignedDivE; // signed division @@ -126,7 +126,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); /* verilator lint_off WIDTH */ - assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) /* verilator lint_on WIDTH */ // Integer special cases (terminate immediately) @@ -137,7 +137,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.LOGRK-1:0] IntTrunc, RightShiftX; logic [P.DIVBLEN:0] IntSteps; /* verilator lint_offf WIDTH */ - assign RightShiftX = P.RK - 1 - ((IntResultBits - 1) % P.RK); // Right shift amount + assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting @@ -190,7 +190,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); + fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs logic [P.DIVBLEN:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; diff --git a/src/fpu/unpackinput.sv b/src/fpu/unpackinput.sv index c551e8173..b3d7f901e 100644 --- a/src/fpu/unpackinput.sv +++ b/src/fpu/unpackinput.sv @@ -83,7 +83,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing always_comb if (BadNaNBox) begin -// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]}; PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}}; end else PostBox = In; @@ -143,8 +142,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( if (BadNaNBox) begin case (Fmt) P.FMT: PostBox = In; -// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]}; -// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]}; P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}}; P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}}; default: PostBox = 'x; @@ -230,9 +227,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( if (BadNaNBox) begin case (Fmt) 2'b11: PostBox = In; -// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]}; -// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]}; -// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]}; 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}}; 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}}; 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}}; From 3cae2385ab00e31887656ccf8c81bdbd75124396 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 18:19:41 -0800 Subject: [PATCH 12/62] Simplified out LOGRK parameter --- config/shared/config-shared.vh | 5 ++--- config/shared/parameter-defs.vh | 1 - src/cvw.sv | 1 - src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 5 ++--- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 17b1ede83..10b56f24e 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -97,11 +97,10 @@ localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); localparam DIVN = ((NF+2 Date: Fri, 10 Nov 2023 21:06:24 -0600 Subject: [PATCH 15/62] Add bestSynths.csv that are the initial values. If this is re-run after ppaAnalysis.py is run, more refinement can be made --- synthDC/ppa/bestSynths.csv | 180 ++++++++++++++++++++++++++++--------- 1 file changed, 140 insertions(+), 40 deletions(-) diff --git a/synthDC/ppa/bestSynths.csv b/synthDC/ppa/bestSynths.csv index 7e3d35569..d57ac6924 100644 --- a/synthDC/ppa/bestSynths.csv +++ b/synthDC/ppa/bestSynths.csv @@ -1,24 +1,74 @@ Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (nJ) -priorityencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078 -priorityencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348 -priorityencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111 -priorityencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981 -priorityencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861 -add,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422 -add,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417 -add,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014 -add,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874 -add,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755 +binencoder,sky130,8,1000,1.0000,50.960001,24.761,0.010685929975270078 +binencoder,sky130,16,1000,1.0000,136.220003,77.243,0.021773774467348 +binencoder,sky130,32,1000,1.0000,372.400007,189.626,0.04371111111111111 +binencoder,sky130,64,1000,1.0000,797.720015,382.205,0.07393850658857981 +binencoder,sky130,128,1000,1.0000,1602.300031,610.009,0.1261366969785861 +adder,sky130,8,1000,1.0000,253.820005,154.438,0.10825587752870422 +adder,sky130,16,1000,1.0000,722.260013,485.109,0.32460910944935417 +adder,sky130,32,1000,1.0000,1440.600027,714.057,0.6580226904376014 +adder,sky130,64,1000,1.0000,2781.240054,1050.0,0.9392239364188874 +adder,sky130,128,1000,1.0000,6186.740118,2230.0,2.1480106100795755 +csa,sky130,8,1000,1.0000,266.560005,154.202,0.13650573115665163 +csa,sky130,16,1000,1.0000,533.12001,308.404,0.27263530601922104 +csa,sky130,32,1000,1.0000,1066.240021,616.808,0.5448072247308093 +csa,sky130,64,1000,1.0000,2132.480042,1230.0,1.0905412240768841 +csa,sky130,128,1000,1.0000,4264.960083,2470.0,2.178553363682347 +shifter,sky130,8,1000,1.0000,259.700005,196.451,0.07534088282874972 +shifter,sky130,16,1000,1.0000,666.400006,558.433,0.19552906110283155 +shifter,sky130,32,1000,1.0000,1475.880027,768.262,0.3807431082700759 +shifter,sky130,64,1000,1.0000,3914.120062,2680.0,1.144802541988198 +shifter,sky130,128,1000,1.0000,9192.400136,6080.0,2.9008914525432616 +comparator,sky130,8,1000,1.0000,200.900004,136.6,0.05001033271337053 +comparator,sky130,16,1000,1.0000,358.680007,189.253,0.06321553011448482 +comparator,sky130,32,1000,1.0000,690.900013,315.709,0.10771793448084398 +comparator,sky130,64,1000,1.0000,1372.980026,508.393,0.2048577820389901 +comparator,sky130,128,1000,1.0000,2744.980052,796.047,0.34396273737011823 +flop,sky130,8,1000,1.0000,133.279999,64.8145,0.193835 +flop,sky130,16,1000,1.0000,266.5599975,129.629,0.38715000000000005 +flop,sky130,32,1000,1.0000,533.119995,259.258,0.7723000000000001 +flop,sky130,64,1000,1.0000,1066.23999,520.0,1.54955 +flop,sky130,128,1000,1.0000,2132.4799805,1035.0,3.094 +mux2,sky130,8,1000,1.0000,63.700001,21.541,0.01932440083034535 +mux2,sky130,16,1000,1.0000,119.560002,32.354,0.03884536082474227 +mux2,sky130,32,1000,1.0000,375.340008,259.372,0.13671796921846893 +mux2,sky130,64,1000,1.0000,479.220009,115.22,0.15148539160324087 +mux2,sky130,128,1000,1.0000,1302.420025,767.078,0.4665334665334665 +mux4,sky130,8,1000,1.0000,148.960002,66.984,0.04026661024121879 +mux4,sky130,16,1000,1.0000,392.0,398.313,0.1037037037037037 +mux4,sky130,32,1000,1.0000,594.860011,331.197,0.131617289946576 +mux4,sky130,64,1000,1.0000,899.640016,344.331,0.2862533692722372 +mux4,sky130,128,1000,1.0000,2013.900038,818.249,0.6094182825484764 +mux8,sky130,8,1000,1.0000,287.140006,116.648,0.06089260808926081 +mux8,sky130,16,1000,1.0000,582.120003,282.366,0.14455681142177274 +mux8,sky130,32,1000,1.0000,1319.079995,670.683,0.35777218376337316 +mux8,sky130,64,1000,1.0000,2132.48004,808.482,0.44287680660701995 +mux8,sky130,128,1000,1.0000,4575.620089,1830.0,0.9786276715410572 +mul,sky130,8,1000,1.0000,2194.220041,1440.0,1.421374045801527 +mul,sky130,16,1000,1.0000,7519.540137,4940.0,6.376128385155466 +mul,sky130,32,1000,1.0000,25200.700446,14900.0,24.931847968545217 +mul,sky130,64,1000,1.0000,86011.661365,42600.0,88.84651898734177 +mul,sky130,128,1000,1.0000,296198.144128,114000.0,273.3148854961832 +binencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078 +binencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348 +binencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111 +binencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981 +binencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861 +adder,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422 +adder,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417 +adder,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014 +adder,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874 +adder,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755 csa,sky90,8,5758,0.16536141368530738,266.560005,154.202,0.13650573115665163 csa,sky90,16,5931,0.1654056314280897,533.12001,308.404,0.27263530601922104 csa,sky90,32,5758,0.16536141368530738,1066.240021,616.808,0.5448072247308093 csa,sky90,64,5931,0.1654056314280897,2132.480042,1230.0,1.0905412240768841 csa,sky90,128,5931,0.1654056314280897,4264.960083,2470.0,2.178553363682347 -shiftleft,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972 -shiftleft,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155 -shiftleft,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759 -shiftleft,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198 -shiftleft,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616 +shifter,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972 +shifter,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155 +shifter,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759 +shifter,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198 +shifter,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616 comparator,sky90,8,4839,0.20629126741062204,200.900004,136.6,0.05001033271337053 comparator,sky90,16,4018,0.24806303982080635,358.680007,189.253,0.06321553011448482 comparator,sky90,32,3602,0.276293542476402,690.900013,315.709,0.10771793448084398 @@ -44,31 +94,31 @@ mux8,sky90,16,3362,0.295237998810232,582.120003,282.366,0.14455681142177274 mux8,sky90,32,3178,0.3140553102580239,1319.079995,670.683,0.35777218376337316 mux8,sky90,64,2906,0.3440756228492774,2132.48004,808.482,0.44287680660701995 mux8,sky90,128,2667,0.3749401308586427,4575.620089,1830.0,0.9786276715410572 -mult,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527 -mult,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466 -mult,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217 -mult,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177 -mult,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832 -priorityencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546 -priorityencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294 -priorityencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266 -priorityencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089 -priorityencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666 -add,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546 -add,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698 -add,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594 -add,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008 -add,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731 +mul,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527 +mul,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466 +mul,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217 +mul,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177 +mul,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832 +binencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546 +binencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294 +binencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266 +binencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089 +binencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666 +adder,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546 +adder,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698 +adder,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594 +adder,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008 +adder,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731 csa,tsmc28,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921 csa,tsmc28,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842 csa,tsmc28,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941 csa,tsmc28,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076 csa,tsmc28,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363 -shiftleft,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006 -shiftleft,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719 -shiftleft,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654 -shiftleft,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286 -shiftleft,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039 +shifter,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006 +shifter,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719 +shifter,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654 +shifter,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286 +shifter,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039 comparator,tsmc28,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243 comparator,tsmc28,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673 comparator,tsmc28,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319 @@ -94,8 +144,58 @@ mux8,tsmc28,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262 mux8,tsmc28,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814 mux8,tsmc28,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495 mux8,tsmc28,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426 -mult,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766 -mult,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547 -mult,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723 -mult,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251 -mult,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719 +mul,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766 +mul,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547 +mul,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723 +mul,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251 +mul,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719 +binencoder,tsmc28psyn,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546 +binencoder,tsmc28psyn,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294 +binencoder,tsmc28psyn,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266 +binencoder,tsmc28psyn,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089 +binencoder,tsmc28psyn,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666 +adder,tsmc28psyn,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546 +adder,tsmc28psyn,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698 +adder,tsmc28psyn,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594 +adder,tsmc28psyn,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008 +adder,tsmc28psyn,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731 +csa,tsmc28psyn,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921 +csa,tsmc28psyn,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842 +csa,tsmc28psyn,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941 +csa,tsmc28psyn,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076 +csa,tsmc28psyn,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363 +shifter,tsmc28psyn,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006 +shifter,tsmc28psyn,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719 +shifter,tsmc28psyn,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654 +shifter,tsmc28psyn,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286 +shifter,tsmc28psyn,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039 +comparator,tsmc28psyn,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243 +comparator,tsmc28psyn,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673 +comparator,tsmc28psyn,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319 +comparator,tsmc28psyn,64,11080,0.09024670758122744,294.21,1250.0,0.0684115523465704 +comparator,tsmc28psyn,128,9371,0.10671119720414043,558.432,2400.0,0.12794792444776437 +flop,tsmc28psyn,8,10,0.048889000000002625,15.12,78.6345,0.027246000000000003 +flop,tsmc28psyn,16,10,0.048889000000002625,30.24,157.29,0.054290000000000005 +flop,tsmc28psyn,32,10,0.048889000000002625,60.4799995,314.5805,0.10908000000000001 +flop,tsmc28psyn,64,10,0.048889000000002625,120.959999,630.0,0.21765500000000004 +flop,tsmc28psyn,128,10,0.048889000000002625,241.919998,1260.0,0.43579999999999997 +mux2,tsmc28psyn,8,29614,0.03374481252110488,16.758,114.564,0.005436617815897886 +mux2,tsmc28psyn,16,18767,0.053046021580433735,15.75,88.025,0.005142004582511856 +mux2,tsmc28psyn,32,17903,0.05585556035301346,32.130001,171.146,0.009897782494553985 +mux2,tsmc28psyn,64,18568,0.05371109651012495,91.35,523.884,0.027574321413183972 +mux2,tsmc28psyn,128,16637,0.05991099044298852,176.525999,941.106,0.05012923002945243 +mux4,tsmc28psyn,8,18151,0.055092383284667513,27.971999,133.963,0.008032615282904523 +mux4,tsmc28psyn,16,16486,0.06057952759917506,39.438,186.231,0.012556108213029236 +mux4,tsmc28psyn,32,15196,0.06580579126085812,69.174,324.969,0.023229797315082915 +mux4,tsmc28psyn,64,13926,0.07180612868016659,137.465999,648.086,0.04574177796926612 +mux4,tsmc28psyn,128,13090,0.07636619404125286,294.335997,1420.0,0.09358288770053477 +mux8,tsmc28psyn,8,12902,0.07750336319950395,44.604,214.286,0.0117501162610448 +mux8,tsmc28psyn,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262 +mux8,tsmc28psyn,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814 +mux8,tsmc28psyn,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495 +mux8,tsmc28psyn,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426 +mul,tsmc28psyn,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766 +mul,tsmc28psyn,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547 +mul,tsmc28psyn,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723 +mul,tsmc28psyn,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251 +mul,tsmc28psyn,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719 From 65e536e4014c77c479f56dbda54a7fc27b545d89 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Fri, 10 Nov 2023 21:07:36 -0600 Subject: [PATCH 16/62] Update ppa/ppaSynth.py for sky130 and better sweep parameterization --- synthDC/ppa/ppaSynth.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/synthDC/ppa/ppaSynth.py b/synthDC/ppa/ppaSynth.py index d9d07c10d..528c851a0 100755 --- a/synthDC/ppa/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -12,8 +12,6 @@ from ppaAnalyze import synthsfromcsv def runCommand(module, width, tech, freq): command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq) - print('here we go') - subprocess.Popen(command, shell=True) def deleteRedundant(synthsToRun): @@ -61,15 +59,15 @@ if __name__ == '__main__': ##### Run specific syntheses widths = [8, 16, 32, 64, 128] - modules = ['mult', 'add', 'shiftleft', 'flop', 'comparator', 'priorityencoder', 'add', 'csa', 'mux2', 'mux4', 'mux8'] - techs = ['sky90', 'tsmc28'] + modules = ['mul', 'adder', 'shifter', 'flop', 'comparator', 'binencoder', 'csa', 'mux2', 'mux4', 'mux8'] + techs = ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn'] freqs = [5000] synthsToRun = allCombos(widths, modules, techs, freqs) ##### Run a sweep based on best delay found in existing syntheses - module = 'add' + module = 'adder' width = 32 - tech = 'sky90' + tech = 'tsmc28psyn' synthsToRun = freqSweep(module, width, tech) ##### Only do syntheses for which a run doesn't already exist From 7b79d8edeb59d676fcef4b54d841d00ff2930f02 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Fri, 10 Nov 2023 21:10:35 -0600 Subject: [PATCH 17/62] Update scripts/synth.tcl to add with parameter for width and also checks wrapper to see if running CONFIG=rv32e to run without WIDTH --- synthDC/Makefile | 6 +++--- synthDC/scripts/synth.tcl | 14 +++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index e7918e3dc..8e1b09d01 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -20,11 +20,11 @@ export MAXCORES ?= 1 export MAXOPT ?= 0 export DRIVE ?= FLOP export USESRAM ?= 0 - +export WIDTH ?= 32 time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) -export OUTPUTDIR := runs/$(DESIGN)_$(CONFIG)_$(MOD)_$(TECH)nm_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) +export OUTPUTDIR := runs/$(DESIGN)_$(WIDTH)_$(CONFIG)_$(MOD)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) export SAIFPOWER ?= 0 OLDCONFIGDIR ?= ${WALLY}/config @@ -147,4 +147,4 @@ clean: rm -f power.saif rm -f Synopsys_stack_trace_*.txt rm -f crte_*.txt - \ No newline at end of file + diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 9be076edf..cd4d6ff27 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -18,7 +18,6 @@ suppress_message {VER-274} # Enable Multicore set_host_options -max_cores $::env(MAXCORES) - # get outputDir and configDir from environment (Makefile) set outputDir $::env(OUTPUTDIR) set cfg $::env(CONFIGDIR) @@ -26,6 +25,7 @@ set hdl_src "../src" set saifpower $::env(SAIFPOWER) set maxopt $::env(MAXOPT) set drive $::env(DRIVE) +set width $::env(WIDTH) eval file copy -force [glob ${cfg}/*.vh] {$outputDir/hdl/} eval file copy -force [glob ${hdl_src}/cvw.sv] {$outputDir/hdl/} @@ -88,7 +88,13 @@ if { [shell_is_in_topographical_mode] } { #set alib_library_analysis_path ./$outputDir define_design_lib WORK -path ./$outputDir/WORK analyze -f sverilog -lib WORK $my_verilog_files -elaborate $my_toplevel -lib WORK +# If wrapper=0, we want to run against a specific module and pass +# width to DC +if { $wrapper == 1 } { + elaborate $my_toplevel -lib WORK +} else { + elaborate $my_toplevel -lib WORK -parameters WIDTH=$width +} # Set the current_design current_design $my_toplevel @@ -308,6 +314,8 @@ set filename [format "%s%s" $outputDir "/reports/mindelay.rep"] redirect $filename { report_timing -capacitance -transition_time -nets -delay_type min -nworst 1 } set filename [format "%s%s" $outputDir "/reports/per_module_timing.rep"] +redirect -append $filename { echo "\n\n\n//// Critical paths through Stall ////\n\n\n" } +redirect -append $filename { report_timing -capacitance -transition_time -nets -through {Stall*} -nworst 1 } redirect -append $filename { echo "\n\n\n//// Critical paths through ifu ////\n\n\n" } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {ifu/*} -nworst 1 } redirect -append $filename { echo "\n\n\n//// Critical paths through ieu ////\n\n\n" } @@ -445,4 +453,4 @@ set t2 [clock seconds] set t [expr $t2 - $t1] echo [expr $t/60] -quit \ No newline at end of file +quit From 448ced00c51cbe2b3d2433bec633c4d51b988206 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 11 Nov 2023 04:05:34 -0800 Subject: [PATCH 18/62] Fixed testbench-fp to reflect signal name changes --- config/shared/config-shared.vh | 9 +++------ testbench/testbench-fp.sv | 10 +++++----- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 12967764f..61bf461eb 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -95,16 +95,13 @@ localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); // intermediate division parameters not directly used in Divider localparam FPDIVN = NF+3; // length of floating-point inputs: Ns + 2 = Nf + 3 for 1 integer bit, Nf fracitonal bits, 2 extra bits to shift sqrt into [1/4, 1)] -localparam DIVN = ((FPDIVN Date: Sat, 11 Nov 2023 05:58:53 -0800 Subject: [PATCH 19/62] Bug fixes related to size of fpdivsqrt bit count and number of cycles --- config/shared/config-shared.vh | 6 ++++-- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 2 +- testbench/testbench-fp.sv | 3 +++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 61bf461eb..cc230ef3e 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -93,6 +93,7 @@ localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF); localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2); localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); + // intermediate division parameters not directly used in Divider localparam FPDIVN = NF+3; // length of floating-point inputs: Ns + 2 = Nf + 3 for 1 integer bit, Nf fracitonal bits, 2 extra bits to shift sqrt into [1/4, 1)] localparam DIVN = ((FPDIVN Date: Sat, 11 Nov 2023 15:50:06 -0800 Subject: [PATCH 20/62] Cleaned up number of bits in fdivsqrt --- config/shared/config-shared.vh | 23 +++++++++-------- src/fpu/fdivsqrt/fdivsqrt.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 11 +++----- src/fpu/fdivsqrt/fdivsqrtexpcalc.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtiter.sv | 38 ++++++++++++++-------------- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 30 +++++++++++----------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 18 ++++++------- src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv | 6 ++--- src/fpu/fdivsqrt/fdivsqrtstage2.sv | 22 ++++++++-------- src/fpu/fdivsqrt/fdivsqrtstage4.sv | 30 +++++++++++----------- src/fpu/fdivsqrt/fdivsqrtuotfc2.sv | 10 ++++---- src/fpu/fdivsqrt/fdivsqrtuotfc4.sv | 8 +++--- testbench/testbench-fp.sv | 10 ++++---- 13 files changed, 103 insertions(+), 107 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index cc230ef3e..14de5187e 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -93,19 +93,20 @@ localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF); localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2); localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); +// divider r and rk (bits per digit, bits per cycle) +localparam LOGR = $clog2(RADIX); // r = log(R) bits per digit +localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated -// intermediate division parameters not directly used in Divider -localparam FPDIVN = NF+3; // length of floating-point inputs: Ns + 2 = Nf + 3 for 1 integer bit, Nf fracitonal bits, 2 extra bits to shift sqrt into [1/4, 1)] -localparam DIVN = ((FPDIVN(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6))); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); -localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVN+1+NF) > (3*NF+4) ? (DIVN+1+NF) : (3*NF+4))); +localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4))); // Disable spurious Verilator warnings diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index ac5c2c338..a4e20f229 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -67,7 +67,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( // Integer div/rem signals logic BZeroM; // Denominator is zero logic IntDivM; // Integer operation - logic [P.DIVBLEN:0] IntNormShiftM; // Integer normalizatoin shift amount + logic [P.DIVBLEN-1:0] IntNormShiftM; // Integer normalizatoin shift amount logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor logic ISpecialCaseE; // Integer div/remainder special cases diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 9d7f05fc8..20fb16f62 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -30,16 +30,11 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic IntDivE, - input logic [P.DIVBLEN:0] IntResultBitsE, + input logic [P.DIVBLEN-1:0] IntResultBitsE, output logic [P.DURLEN-1:0] CyclesE ); - logic [P.DIVBLEN:0] Nf, FPResultBitsE; // number of fractional bits - logic [P.DIVBLEN:0] ResultBitsE; // number of result bits; - - // DIVN = P.NF+3 - // NS = NF + 1 - // N = NS or NS+2 for div/sqrt. + logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits /* verilator lint_off WIDTH */ if (P.FPSIZES == 1) @@ -75,7 +70,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk) always_comb begin - if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 and +0 rather than +2; is it related to DIVCOPIES logic below? + if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard; integer bit implicit else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; diff --git a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv index 113f2b2dd..a1dd82e35 100644 --- a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv @@ -31,7 +31,7 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( input logic [P.NE-1:0] Xe, Ye, input logic Sqrt, input logic XZero, - input logic [P.DIVBLEN:0] ell, m, + input logic [P.DIVBLEN-1:0] ell, m, output logic [P.NE+1:0] Ue ); diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index 1d40e8d9a..0f66982ab 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -31,31 +31,31 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( input logic IFDivStartE, input logic FDivBusyE, input logic SqrtE, - input logic [P.DIVb+3:0] X, D, - output logic [P.DIVb:0] FirstU, FirstUM, - output logic [P.DIVb+1:0] FirstC, + input logic [P.DIVb+3:0] X, D, // Q4.DIVb + output logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb + output logic [P.DIVb+1:0] FirstC, // Q2.DIVb output logic Firstun, - output logic [P.DIVb+3:0] FirstWS, FirstWC + output logic [P.DIVb+3:0] FirstWS, FirstWC // Q4.DIVb ); /* verilator lint_off UNOPTFLAT */ - logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.b - logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.b - logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.b - logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.b - logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.b - logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.b - logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.b - logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.b - logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.b - logic [P.DIVb+1:0] initC; // Q2.b + logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.DIVb + logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.DIVb + logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.DIVb + logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.DIVb + logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.DIVb + logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.DIVb + logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.DIVb + logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.DIVb + logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.DIVb + logic [P.DIVb+1:0] initC; // Q2.DIVb logic [P.DIVCOPIES-1:0] un; - logic [P.DIVb+3:0] WSN, WCN; // Q4.b - logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.b - logic [P.DIVb+1:0] NextC; - logic [P.DIVb:0] UMux, UMMux; - logic [P.DIVb:0] initU, initUM; + logic [P.DIVb+3:0] WSN, WCN; // Q4.DIVb + logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.DIVb + logic [P.DIVb+1:0] NextC; // Q2.DIVb + logic [P.DIVb:0] UMux, UMMux; // U1.DIVb + logic [P.DIVb:0] initU, initUM; // U1.DIVb /* verilator lint_on UNOPTFLAT */ // Top Muxes and Registers diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index e9fd2fd2c..cb1f56db7 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -27,21 +27,21 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( - input logic clk, reset, - input logic StallM, - input logic [P.DIVb+3:0] WS, WC, - input logic [P.DIVb+3:0] D, - input logic [P.DIVb:0] FirstU, FirstUM, - input logic [P.DIVb+1:0] FirstC, - input logic SqrtE, - input logic Firstun, SqrtM, SpecialCaseM, - input logic [P.XLEN-1:0] AM, - input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, - input logic [P.DIVBLEN:0] IntNormShiftM, - output logic [P.DIVb:0] UmM, // result significand - output logic WZeroE, - output logic DivStickyM, - output logic [P.XLEN-1:0] FIntDivResultM + input logic clk, reset, + input logic StallM, + input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb + input logic [P.DIVb+3:0] D, // Q4.DIVb + input logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb + input logic [P.DIVb+1:0] FirstC, // Q2.DIVb + input logic SqrtE, + input logic Firstun, SqrtM, SpecialCaseM, + input logic [P.XLEN-1:0] AM, // U/Q(XLEN.0) + input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, + input logic [P.DIVBLEN-1:0] IntNormShiftM, + output logic [P.DIVb:0] UmM, // U1.DIVb result significand + output logic WZeroE, + output logic DivStickyM, + output logic [P.XLEN-1:0] FIntDivResultM // U/Q(XLEN.0) ); logic [P.DIVb+3:0] W, Sum; diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 97ceeb085..8d6e565b1 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -42,7 +42,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic IntDivE, W64E, output logic ISpecialCaseE, output logic [P.DURLEN-1:0] CyclesE, - output logic [P.DIVBLEN:0] IntNormShiftM, + output logic [P.DIVBLEN-1:0] IntNormShiftM, output logic ALTBM, IntDivM, W64M, output logic AsM, BsM, BZeroM, output logic [P.XLEN-1:0] AM @@ -53,8 +53,8 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [P.DIVBLEN:0] mE, ell; // Leading zeros of inputs - logic [P.DIVBLEN:0] IntResultBitsE; // bits in integer result + logic [P.DIVBLEN-1:0] mE, ell; // Leading zeros of inputs + logic [P.DIVBLEN-1:0] IntResultBitsE; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic SignedDivE; // signed division @@ -118,12 +118,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////// if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported - logic [P.DIVBLEN:0] ZeroDiff, p; + logic [P.DIVBLEN-1:0] ZeroDiff, p; // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros - assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros) - mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); + assign ALTBE = ZeroDiff[P.DIVBLEN-1]; // A less than B (A has more leading zeros) + mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p); /* verilator lint_off WIDTH */ assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) @@ -192,7 +192,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs - logic [P.DIVBLEN:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; + logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; logic RemOpE; /* verilator lint_off WIDTH */ @@ -200,7 +200,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift /* verilator lint_on WIDTH */ assign RemOpE = Funct3E[1]; - mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE); + mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE); // pipeline registers flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); @@ -208,7 +208,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); - flopen #(P.DIVBLEN+1) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM); + flopen #(P.DIVBLEN) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM); flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); if (P.XLEN==64) flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv index 0eb3b71c0..fe436413e 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv @@ -27,9 +27,9 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module fdivsqrtqsel4cmp ( - input logic [2:0] Dmsbs, - input logic [4:0] Smsbs, - input logic [7:0] WSmsbs, WCmsbs, + input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1 + input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation + input logic [7:0] WSmsbs, WCmsbs, // Q4.4 input logic SqrtE, j1, output logic [3:0] udigit ); diff --git a/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/src/fpu/fdivsqrt/fdivsqrtstage2.sv index bb8d87234..5e319a7c1 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -29,23 +29,23 @@ /* verilator lint_off UNOPTFLAT */ module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) ( - input logic [P.DIVb+3:0] D, DBar, - input logic [P.DIVb:0] U, UM, - input logic [P.DIVb+3:0] WS, WC, - input logic [P.DIVb+1:0] C, + input logic [P.DIVb+3:0] D, DBar, // Q4.DIVb + input logic [P.DIVb:0] U, UM, // U1.DIVb + input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb + input logic [P.DIVb+1:0] C, // Q2.DIVb input logic SqrtE, output logic un, - output logic [P.DIVb+1:0] CNext, - output logic [P.DIVb:0] UNext, UMNext, - output logic [P.DIVb+3:0] WSNext, WCNext + output logic [P.DIVb+1:0] CNext, // Q2.DIVb + output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb + output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb ); /* verilator lint_on UNOPTFLAT */ - logic [P.DIVb+3:0] Dsel; + logic [P.DIVb+3:0] Dsel; // Q4.DIVb logic up, uz; - logic [P.DIVb+3:0] F; - logic [P.DIVb+3:0] AddIn; - logic [P.DIVb+3:0] WSA, WCA; + logic [P.DIVb+3:0] F; // Q4.DIVb + logic [P.DIVb+3:0] AddIn; // Q4.DIVb + logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb // Qmient Selection logic // Given partial remainder, select digit of +1, 0, or -1 (up, uz, un) diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index c6477ec68..fea2851b6 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -27,26 +27,26 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( - input logic [P.DIVb+3:0] D, DBar, D2, DBar2, - input logic [P.DIVb:0] U,UM, - input logic [P.DIVb+3:0] WS, WC, - input logic [P.DIVb+1:0] C, + input logic [P.DIVb+3:0] D, DBar, D2, DBar2, // Q4.DIVb + input logic [P.DIVb:0] U,UM, // U1.DIVb + input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb + input logic [P.DIVb+1:0] C, // Q2.DIVb input logic SqrtE, j1, - output logic [P.DIVb+1:0] CNext, + output logic [P.DIVb+1:0] CNext, // Q2.DIVb output logic un, - output logic [P.DIVb:0] UNext, UMNext, - output logic [P.DIVb+3:0] WSNext, WCNext + output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb + output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb ); - logic [P.DIVb+3:0] Dsel; + logic [P.DIVb+3:0] Dsel; // Q4.DIVb logic [3:0] udigit; - logic [P.DIVb+3:0] F; - logic [P.DIVb+3:0] AddIn; + logic [P.DIVb+3:0] F; // Q4.DIVb + logic [P.DIVb+3:0] AddIn; // Q4.DIVb logic [4:0] Smsbs; logic [2:0] Dmsbs; logic [7:0] WCmsbs, WSmsbs; logic CarryIn; - logic [P.DIVb+3:0] WSA, WCA; + logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb // Digit Selection logic // u encoding: @@ -55,10 +55,10 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( // 0000 = 0 // 0010 = -1 // 0001 = -2 - assign Smsbs = U[P.DIVb:P.DIVb-4]; - assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; - assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; - assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; + assign Smsbs = U[P.DIVb:P.DIVb-4]; // U1.4 most significant bits of square root + assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1 + assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual + assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit); assign un = 1'b0; // unused for radix 4 diff --git a/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv b/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv index bde28cfba..c895fa2ce 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv @@ -31,15 +31,15 @@ /////////////////////////////// module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) ( input logic up, un, - input logic [P.DIVb+1:0] C, - input logic [P.DIVb:0] U, UM, - output logic [P.DIVb:0] UNext, UMNext + input logic [P.DIVb+1:0] C, // Q2.DIVb + input logic [P.DIVb:0] U, UM, // U1.DIVb + output logic [P.DIVb:0] UNext, UMNext // U1.DIVb ); // The on-the-fly converter transfers the divsqrt // bits to the quotient as they come. - logic [P.DIVb:0] K; + logic [P.DIVb:0] K; // U1.DIVb one-hot - assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding + assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding always_comb begin if (up) begin diff --git a/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv b/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv index 403ccf051..b12b9174b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv @@ -28,15 +28,15 @@ module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) ( input logic [3:0] udigit, - input logic [P.DIVb:0] U, UM, - input logic [P.DIVb:0] C, - output logic [P.DIVb:0] UNext, UMNext + input logic [P.DIVb:0] U, UM, // U1.DIVb + input logic [P.DIVb:0] C, // Q1.DIVb + output logic [P.DIVb:0] UNext, UMNext // U1.DIVb ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. // Use this otfc for division and square root. - logic [P.DIVb:0] K1, K2, K3; + logic [P.DIVb:0] K1, K2, K3; // U1.DIVb assign K1 = (C&~(C << 1)); // K assign K2 = ((C << 1)&~(C << 2)); // 2K assign K3 = (C & ~(C << 2)); // 3K diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv index 9e602cab0..662036439 100644 --- a/testbench/testbench-fp.sv +++ b/testbench/testbench-fp.sv @@ -145,11 +145,11 @@ module testbenchfp; initial begin // Information displayed for user on what is simulating - $display("\nThe start of simulation..."); - $display("This simulation for TEST is %s", TEST); - $display("This simulation for TEST is of the operand size of %s", TEST_SIZE); + //$display("\nThe start of simulation..."); + //$display("This simulation for TEST is %s", TEST); + //$display("This simulation for TEST is of the operand size of %s", TEST_SIZE); - $display("FPDUR %d %d DIVN %d LOGR %d RK %d RADIX %d DURLEN %d", FPDUR, DIVN, LOGR, RK, RADIX, DURLEN); + // $display("FPDUR %d %d DIVN %d LOGR %d RK %d RADIX %d DURLEN %d", FPDUR, DIVN, LOGR, RK, RADIX, DURLEN); if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion @@ -652,7 +652,7 @@ module testbenchfp; string tt0; tt0 = $psprintf("%s", Tests[TestNum]); testname = {pp, tt0}; - $display("Here you are %s", testname); + //$display("Here you are %s", testname); $display("\n\nRunning %s vectors ", Tests[TestNum]); $readmemh(testname, TestVectors); // set the test index to 0 From 002034845a685c1dcb3668538658c5ed52978597 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 12 Nov 2023 06:15:47 -0800 Subject: [PATCH 21/62] fdivsqrt comment improvements --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtexpcalc.sv | 8 +++++--- src/fpu/fdivsqrt/fdivsqrtfgen2.sv | 8 ++++---- src/fpu/fdivsqrt/fdivsqrtfgen4.sv | 12 ++++++------ src/fpu/fdivsqrt/fdivsqrtfsm.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtiter.sv | 8 ++++---- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 11 ++++++----- src/fpu/fdivsqrt/fdivsqrtqsel2.sv | 25 ++++++++----------------- src/fpu/fdivsqrt/fdivsqrtstage2.sv | 16 +++++----------- 9 files changed, 40 insertions(+), 52 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 20fb16f62..6043ebb4a 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -30,7 +30,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic IntDivE, - input logic [P.DIVBLEN-1:0] IntResultBitsE, + input logic [P.DIVBLEN-1:0] IntResultBitsE, output logic [P.DURLEN-1:0] CyclesE ); diff --git a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv index a1dd82e35..cf243a84b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv @@ -28,17 +28,19 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] Fmt, - input logic [P.NE-1:0] Xe, Ye, + input logic [P.NE-1:0] Xe, Ye, // input exponents input logic Sqrt, input logic XZero, - input logic [P.DIVBLEN-1:0] ell, m, - output logic [P.NE+1:0] Ue + input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye + output logic [P.NE+1:0] Ue // result exponent ); logic [P.NE-2:0] Bias; logic [P.NE+1:0] SXExp; logic [P.NE+1:0] SExp; logic [P.NE+1:0] DExp; + + // Determine exponent bias according to the format if (P.FPSIZES == 1) begin assign Bias = (P.NE-1)'(P.BIAS); diff --git a/src/fpu/fdivsqrt/fdivsqrtfgen2.sv b/src/fpu/fdivsqrt/fdivsqrtfgen2.sv index 990e3f19f..cf398f570 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfgen2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfgen2.sv @@ -28,12 +28,12 @@ module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) ( input logic up, uz, - input logic [P.DIVb+3:0] C, U, UM, - output logic [P.DIVb+3:0] F + input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms) + output logic [P.DIVb+3:0] F // Q4.DIVb ); - logic [P.DIVb+3:0] FP, FN, FZ; + logic [P.DIVb+3:0] FP, FN, FZ; // Q4.DIVb - // Generate for both positive and negative bits + // Generate for both positive and negative quotient digits assign FP = ~(U << 1) & C; assign FN = (UM << 1) | (C & ~(C << 2)); assign FZ = '0; diff --git a/src/fpu/fdivsqrt/fdivsqrtfgen4.sv b/src/fpu/fdivsqrt/fdivsqrtfgen4.sv index fc648f5bd..e2cec1ab4 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfgen4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfgen4.sv @@ -27,14 +27,14 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) ( - input logic [3:0] udigit, - input logic [P.DIVb+3:0] C, U, UM, - output logic [P.DIVb+3:0] F + input logic [3:0] udigit, // {2, 1, -1, -2}; all cold for zero + input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms) + output logic [P.DIVb+3:0] F // Q4.DIVb ); - logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2; + logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2; // Q4.DIVb - // Generate for both positive and negative bits - assign F2 = (~U << 2) & (C << 2); + // Generate for both positive and negative digits + assign F2 = (~U << 2) & (C << 2); // assign F1 = ~(U << 1) & C; assign F0 = '0; assign FN1 = (UM << 1) | (C & ~(C << 3)); diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 0e2cba90e..862d53b25 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -57,7 +57,7 @@ module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) ( // terminate immediately on special cases assign FSpecialCaseE = XZeroE | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE; if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE; - else assign SpecialCaseE = FSpecialCaseE; + else assign SpecialCaseE = FSpecialCaseE; flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc always_ff @(posedge clk) begin diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index 0f66982ab..863d94837 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -104,14 +104,14 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( for(i=0; $unsigned(i) -1 to choose 0, -1, 1 respectively - assign magnitude = ~((WS[2]^WC[2]) & (WS[1]^WC[1]) & + //if p2 * p1 * p0, W = -1 and choose digit of 0 + assign uz = ((WS[2]^WC[2]) & (WS[1]^WC[1]) & (WS[0]^WC[0])); + + // Otherwise determine sign using carry chain: sign = p3 ^ g_2:0 assign sign = (WS[3]^WC[3])^ (WS[2] & WC[2] | ((WS[2]^WC[2]) & (WS[1]&WC[1] | ((WS[1]^WC[1]) & (WS[0]&WC[0]))))); // Produce digit = +1, 0, or -1 - assign up = magnitude & ~sign; - assign uz = ~magnitude; - assign un = magnitude & sign; + assign up = ~uz & ~sign; + assign un = ~uz & sign; endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4.sv similarity index 72% rename from src/fpu/fdivsqrt/fdivsqrtqsel4.sv rename to src/fpu/fdivsqrt/fdivsqrtuslc4.sv index de520bef2..268ca9ea2 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// fdivsqrtqsel4.sv +// fdivsqrtuslc4.sv // // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu // Modified:13 January 2022 // -// Purpose: Radix 4 Quotient Digit Selection +// Purpose: Table-based Radix 4 Unified Quotient/Square Root Digit Selection // // Documentation: RISC-V System on Chip Design Chapter 13 // @@ -26,25 +26,25 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module fdivsqrtqsel4 ( - input logic [2:0] Dmsbs, - input logic [4:0] Smsbs, - input logic [7:0] WSmsbs, WCmsbs, +module fdivsqrtuslc4 ( + input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1 + input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation + input logic [7:0] WSmsbs, WCmsbs, // Q4.4 redundant residual most significant bits input logic Sqrt, j1, - output logic [3:0] udigit + output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot ); - logic [6:0] Wmsbs; - logic [7:0] PreWmsbs; - logic [2:0] A; + logic [7:0] PreWmsbs; // Q4.4 nonredundant residual msbs + logic [6:0] Wmsbs; // Q4.3 truncated nonredundant residual + logic [2:0] A; // U0.3 upper bits of D or Smsbs, discarding integer bit - assign PreWmsbs = WCmsbs + WSmsbs; - assign Wmsbs = PreWmsbs[7:1]; + assign PreWmsbs = WCmsbs + WSmsbs; // add redundant residual to find msbs + assign Wmsbs = PreWmsbs[7:1]; // truncate least significant bit to Q4.3 to index table // D = 0001.xxx... // Dmsbs = | | // W = xxxx.xxx... // Wmsbs = | | - logic [3:0] USel4[1023:0]; + logic [3:0] USel4[1023:0]; // 1024-bit table indexed with 3 bits of A and 7 bits of Wmsbs // Prepopulate selection table; this is constant at compile time always_comb begin @@ -101,10 +101,10 @@ module fdivsqrtqsel4 ( // Select A always_comb if (Sqrt) begin - if (j1) A = 3'b101; - else if (Smsbs == 5'b10000) A = 3'b111; - else A = Smsbs[2:0]; - end else A = Dmsbs; + if (j1) A = 3'b101; // on first sqrt iteration A = .101 + else if (Smsbs == 5'b10000) A = 3'b111; // if S = 1.0, use A = .111 + else A = Smsbs[2:0]; // otherwise use A = S (in U0.3 format) + end else A = Dmsbs; // division Unless A = D (IN U0.3 format, dropping leading 1) // Select quotient digit from lookup table based on A and W assign udigit = USel4[{A,Wmsbs}]; diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv similarity index 90% rename from src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv rename to src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv index fe436413e..ccb5e618a 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// fdivsqrtqsel4cmp.sv +// fdivsqrtuslc4cmp.sv // // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu // Modified:13 January 2022 // -// Purpose: Comparator-based Radix 4 Quotient Digit Selection +// Purpose: Comparator-based Radix 4 Unified Quotient/Square Root Digit Selection // // Documentation: RISC-V System on Chip Design Chapter 13 // @@ -26,12 +26,12 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module fdivsqrtqsel4cmp ( +module fdivsqrtuslc4cmp ( input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1 input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation - input logic [7:0] WSmsbs, WCmsbs, // Q4.4 + input logic [7:0] WSmsbs, WCmsbs, // Q4.4 residual most significant bits input logic SqrtE, j1, - output logic [3:0] udigit + output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot ); logic [6:0] Wmsbs; logic [7:0] PreWmsbs; From f437336540ddcd084fd40c395eb6c1f51af130c6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 12 Nov 2023 10:05:54 -0800 Subject: [PATCH 23/62] Explained sqrt preshifting --- config/shared/config-shared.vh | 1 + src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 41 ++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 14de5187e..9635d706b 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -99,6 +99,7 @@ localparam RK = LOGR*DIVCOPIES; // r*k bits // intermediate division parameters not directly used in fdivsqrt hardware localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit because square root could be shifted right *** explain better +//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right localparam DIVMINb = ((FPDIVMINb Date: Sun, 12 Nov 2023 19:41:12 -0800 Subject: [PATCH 24/62] Divider cleanup --- config/shared/config-shared.vh | 4 ++-- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 6 +++--- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 13 ++++++++++--- src/fpu/fdivsqrt/fdivsqrtuslc4.sv | 2 +- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 9635d706b..55bca569f 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -98,8 +98,8 @@ localparam LOGR = $clog2(RADIX); // r = log(R localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated // intermediate division parameters not directly used in fdivsqrt hardware -localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit because square root could be shifted right *** explain better -//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right +localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right +//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right, if radix4 doesn't use a right shift. This version saves one cycle on double-precision with R=4,k=4. However, it doesn't work yet because C is too short, so k is incorrectly calculated as a 1 in the lsb after the last step. localparam DIVMINb = ((FPDIVMINb Date: Sun, 12 Nov 2023 20:23:14 -0800 Subject: [PATCH 25/62] DivStickyM no longer mysteriously needs to be gated with SqrtM after divder improvemenst --- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index cb1f56db7..0b358909a 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -86,9 +86,10 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////// // If the result is not exact, the sticky should be set - assign DivStickyM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide +// assign DivStickyM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide + assign DivStickyM = ~WZeroM & ~(SpecialCaseM); - // Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed. + // Determine if sticky bit is negative assign Sum = WC + WS; assign NegStickyM = Sum[P.DIVb+3]; mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit From c44ae93e22aeb0842ed21e18208d52a43c04bdab Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 12 Nov 2023 20:23:27 -0800 Subject: [PATCH 26/62] DivStickyM no longer mysteriously needs to be gated with SqrtM after divder improvemenst --- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 0b358909a..5a40a3bdc 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -86,8 +86,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////// // If the result is not exact, the sticky should be set -// assign DivStickyM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide - assign DivStickyM = ~WZeroM & ~(SpecialCaseM); + assign DivStickyM = ~WZeroM & ~SpecialCaseM; // Determine if sticky bit is negative assign Sum = WC + WS; From 46bfdf5df9d0553daa01cf0e6457a17f84e42196 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Mon, 13 Nov 2023 02:39:25 -0600 Subject: [PATCH 27/62] update ppaAnalyze to analyze correctionly freqSweep --- synthDC/Makefile | 4 +-- synthDC/ppa/bestSynths.csv | 16 +++++------ synthDC/ppa/ppaAnalyze.py | 57 ++++++++++++++++++++------------------ synthDC/ppa/ppaSynth.py | 24 ++++++++++++++-- 4 files changed, 62 insertions(+), 39 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index 8e1b09d01..e6332e60f 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -11,7 +11,7 @@ export MOD ?= orig # title to add a note in the synth's directory name TITLE = # tsmc28, sky130, and sky90 presently supported -export TECH ?= sky90 +export TECH ?= sky130 # MAXCORES allows parallel compilation, which is faster but less CPU-efficient # Avoid when doing sweeps of many optimization points in parallel export MAXCORES ?= 1 @@ -24,7 +24,7 @@ export WIDTH ?= 32 time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) -export OUTPUTDIR := runs/$(DESIGN)_$(WIDTH)_$(CONFIG)_$(MOD)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) +export OUTPUTDIR := runs/ppa_$(DESIGN)_$(WIDTH)_$(CONFIG)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) export SAIFPOWER ?= 0 OLDCONFIGDIR ?= ${WALLY}/config diff --git a/synthDC/ppa/bestSynths.csv b/synthDC/ppa/bestSynths.csv index d57ac6924..885eeb962 100644 --- a/synthDC/ppa/bestSynths.csv +++ b/synthDC/ppa/bestSynths.csv @@ -3,12 +3,12 @@ binencoder,sky130,8,1000,1.0000,50.960001,24.761,0.010685929975270078 binencoder,sky130,16,1000,1.0000,136.220003,77.243,0.021773774467348 binencoder,sky130,32,1000,1.0000,372.400007,189.626,0.04371111111111111 binencoder,sky130,64,1000,1.0000,797.720015,382.205,0.07393850658857981 -binencoder,sky130,128,1000,1.0000,1602.300031,610.009,0.1261366969785861 +binencoder,sky130,128,900,1.1111,1602.300031,610.009,0.1261366969785861 adder,sky130,8,1000,1.0000,253.820005,154.438,0.10825587752870422 adder,sky130,16,1000,1.0000,722.260013,485.109,0.32460910944935417 adder,sky130,32,1000,1.0000,1440.600027,714.057,0.6580226904376014 adder,sky130,64,1000,1.0000,2781.240054,1050.0,0.9392239364188874 -adder,sky130,128,1000,1.0000,6186.740118,2230.0,2.1480106100795755 +adder,sky130,128,900,1.1111,6186.740118,2230.0,2.1480106100795755 csa,sky130,8,1000,1.0000,266.560005,154.202,0.13650573115665163 csa,sky130,16,1000,1.0000,533.12001,308.404,0.27263530601922104 csa,sky130,32,1000,1.0000,1066.240021,616.808,0.5448072247308093 @@ -18,12 +18,12 @@ shifter,sky130,8,1000,1.0000,259.700005,196.451,0.07534088282874972 shifter,sky130,16,1000,1.0000,666.400006,558.433,0.19552906110283155 shifter,sky130,32,1000,1.0000,1475.880027,768.262,0.3807431082700759 shifter,sky130,64,1000,1.0000,3914.120062,2680.0,1.144802541988198 -shifter,sky130,128,1000,1.0000,9192.400136,6080.0,2.9008914525432616 +shifter,sky130,128,900,1.1111,9192.400136,6080.0,2.9008914525432616 comparator,sky130,8,1000,1.0000,200.900004,136.6,0.05001033271337053 comparator,sky130,16,1000,1.0000,358.680007,189.253,0.06321553011448482 -comparator,sky130,32,1000,1.0000,690.900013,315.709,0.10771793448084398 -comparator,sky130,64,1000,1.0000,1372.980026,508.393,0.2048577820389901 -comparator,sky130,128,1000,1.0000,2744.980052,796.047,0.34396273737011823 +comparator,sky130,32,1500,0.666666,690.900013,315.709,0.10771793448084398 +comparator,sky130,64,1300,0.7692307,1372.980026,508.393,0.2048577820389901 +comparator,sky130,128,1100,0.909090,2744.980052,796.047,0.34396273737011823 flop,sky130,8,1000,1.0000,133.279999,64.8145,0.193835 flop,sky130,16,1000,1.0000,266.5599975,129.629,0.38715000000000005 flop,sky130,32,1000,1.0000,533.119995,259.258,0.7723000000000001 @@ -48,7 +48,7 @@ mul,sky130,8,1000,1.0000,2194.220041,1440.0,1.421374045801527 mul,sky130,16,1000,1.0000,7519.540137,4940.0,6.376128385155466 mul,sky130,32,1000,1.0000,25200.700446,14900.0,24.931847968545217 mul,sky130,64,1000,1.0000,86011.661365,42600.0,88.84651898734177 -mul,sky130,128,1000,1.0000,296198.144128,114000.0,273.3148854961832 +mul,sky130,128,800,1.2500,296198.144128,114000.0,273.3148854961832 binencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078 binencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348 binencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111 @@ -158,7 +158,7 @@ adder,tsmc28psyn,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546 adder,tsmc28psyn,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698 adder,tsmc28psyn,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594 adder,tsmc28psyn,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008 -adder,tsmc28psyn,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731 +adder,tsmc28psyn,128,7000,0.142857142857,907.452008,4360.0,0.3451183029643731 csa,tsmc28psyn,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921 csa,tsmc28psyn,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842 csa,tsmc28psyn,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941 diff --git a/synthDC/ppa/ppaAnalyze.py b/synthDC/ppa/ppaAnalyze.py index 459a8520d..80cd57604 100755 --- a/synthDC/ppa/ppaAnalyze.py +++ b/synthDC/ppa/ppaAnalyze.py @@ -38,7 +38,7 @@ def synthsintocsv(): each line contains the module, tech, width, target freq, and resulting metrics ''' print("This takes a moment...") - bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" + bashCommand = "find . -path '*runs/ppa*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) allSynths = output.decode("utf-8").split('\n')[:-1] @@ -51,7 +51,6 @@ def synthsintocsv(): for oneSynth in allSynths: module, width, risc, tech, freq = specReg.findall(oneSynth)[2:7] - tech = tech[:-2] metrics = [] for phrase in [['Path Slack', 'qor'], ['Design Area', 'qor'], ['100', 'power']]: bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*' @@ -87,7 +86,7 @@ def cleanup(): output = subprocess.check_output(['bash','-c', bc]) except: pass - bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" + bashCommand = "find . -path '*runs/ppa*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) allSynths = output.decode("utf-8").split('\n')[:-1] for oneSynth in allSynths: @@ -186,7 +185,7 @@ def genLegend(fits, coefs, r2=None, spec=None, ale=False): legend_elements += [lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label='$R^2$='+ str(round(r2, 4)))] return legend_elements -def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, color=None): +def oneMetricPlot(module, widths, var, freq=None, ax=None, fits='clsgn', norm=True, color=None): ''' module: string module name freq: int freq (MHz) var: string delay, area, lpower, or denergy @@ -519,7 +518,7 @@ def squarify(fig): l = (1.-axs/h)/2 fig.subplots_adjust(bottom=l, top=1-l) -def plotPPA(mod, freq=None, norm=True, aleOpt=False): +def plotPPA(mod, widths, freq=None, norm=True, aleOpt=False): ''' for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits if no freq specified, uses the synthesis with best achievable delay for each width overlays data from both techs @@ -539,7 +538,7 @@ def plotPPA(mod, freq=None, norm=True, aleOpt=False): if (arr[i][j]=='delay') and (f==10): pass else: - r2 = oneMetricPlot(mod, arr[i][j], ax=axs[i, j], freq=f, norm=norm) + r2 = oneMetricPlot(mod, widths, arr[i][j], ax=axs[i, j], freq=f, norm=norm) ls = '--' if f else '-' leg += [lines.Line2D([0], [0], color='red', label='$R^2$='+str(round(r2, 4)), linestyle=ls)] @@ -568,6 +567,7 @@ def makeLineLegend(): fullLeg = [lines.Line2D([0], [0], color='black', label='fastest', linestyle='-')] fullLeg += [lines.Line2D([0], [0], color='black', label='smallest', linestyle='--')] fullLeg += [lines.Line2D([0], [0], color='blue', label='tsmc28', marker='^')] + fullLeg += [lines.Line2D([0], [0], color='blue', label='tsmc28psyn', marker='x')] fullLeg += [lines.Line2D([0], [0], color='green', label='sky90', marker='o')] fullLeg += [lines.Line2D([0], [0], color='green', label='sky130', marker='+')] fullLeg += [lines.Line2D([0], [0], color='red', label='combined', marker='_')] @@ -694,7 +694,7 @@ def makePlotDirectory(): os.makedirs(new_directory) os.chdir(new_directory) if 'freq' in folder: - for tech in ['sky90', 'sky130', 'tsmc28']: + for tech in ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn']: for mod in modules: tech_directory = os.path.join(new_directory, tech) mod_directory = os.path.join(tech_directory, mod) @@ -707,24 +707,26 @@ def makePlotDirectory(): if __name__ == '__main__': ############################## # set up stuff, global variables - widths = [8, 16, 32, 64, 128] - modules = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mux2', 'mux4', 'mux8', 'mult'] - normAddWidth = 32 # divisor to use with N since normalizing to add_32 + widths = [64, 128] + modules = ['adder', 'comparator'] - fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 's', 's'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']} fitDict.update(dict.fromkeys(['mux2', 'mux4', 'mux8'], ['cg', 'l', 'l'])) + normAddWidth = 32 # divisor to use with N since normalizing to add_32 - TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") - techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['sky130', 'red', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['tsmc28', 'blue', '^', 12.2e-3, 209.286002, 1060.0, .08153281695882594]] - techSpecs = [TechSpec(*t) for t in techSpecs] - combined = TechSpec('combined fit', 'red', '_', 0, 0, 0, 0) + fitDict = {'adder': ['cg', 'l', 'l'], 'mul': ['cg', 's', 's'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shifter': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'binencoder': ['cg', 'l', 'l']} + fitDict.update(dict.fromkeys(['mux2', 'mux4', 'mux8'], ['cg', 'l', 'l'])) + + TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") + techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['sky130', 'red', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['tsmc28', 'blue', '^', 12.2e-3, 209.286002, 1060.0, .08153281695882594], ['tsmc28psyn', 'blue', '^', 12.2e-3, 209.286002, 1060.0, .08153281695882594]] + techSpecs = [TechSpec(*t) for t in techSpecs] + combined = TechSpec('combined fit', 'red', '_', 0, 0, 0, 0) ############################## # cleanup() # run to remove garbage synth runs - synthsintocsv() # slow, run only when new synth runs to add to csv + synthsintocsv() # slow, run only when new synth runs to add to csv - allSynths = synthsfromcsv('ppaData.csv') # your csv here! - bestSynths = csvOfBest('bestSynths.csv') - makePlotDirectory() + allSynths = synthsfromcsv('ppaData.csv') # your csv here! + bestSynths = csvOfBest('bestSynths.csv') + makePlotDirectory() # ### other functions # makeCoefTable() @@ -732,11 +734,12 @@ if __name__ == '__main__': # muxPlot() # stdDevError() - for mod in modules: - for w in widths: - freqPlot('sky90', mod, w) - #freqPlot('sky130', mod, w) - #freqPlot('tsmc28', mod, w) - #plotPPA(mod, norm=False) - #plotPPA(mod, aleOpt=True) - plt.close('all') + for mod in modules: + for w in widths: + #freqPlot('sky90', mod, w) + freqPlot('sky130', mod, w) + #freqPlot('tsmc28', mod, w) + #freqPlot('tsmc28psyn', mod, w) + #plotPPA(mod, widths, norm=False) + #plotPPA(mod, aleOpt=True) + plt.close('all') diff --git a/synthDC/ppa/ppaSynth.py b/synthDC/ppa/ppaSynth.py index 528c851a0..ceb6edbd2 100755 --- a/synthDC/ppa/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -32,6 +32,19 @@ def freqSweep(module, width, tech): synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]] return synthsToRun +def freqModuleSweep(widths, modules, tech): + synthsToRun = [] + arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8] + allSynths = synthsfromcsv('ppa/bestSynths.csv') + for w in widths: + for module in modules: + for synth in allSynths: + if (synth.module == str(module)) & (synth.tech == tech) & (synth.width == w): + f = 1000/synth.delay + for freq in [round(f+f*x/100) for x in arr]: + synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]] + return synthsToRun + def filterRedundant(synthsToRun): bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) @@ -57,7 +70,7 @@ def allCombos(widths, modules, techs, freqs): if __name__ == '__main__': - ##### Run specific syntheses + ##### Run specific syntheses for a specific frequency widths = [8, 16, 32, 64, 128] modules = ['mul', 'adder', 'shifter', 'flop', 'comparator', 'binencoder', 'csa', 'mux2', 'mux4', 'mux8'] techs = ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn'] @@ -69,9 +82,16 @@ if __name__ == '__main__': width = 32 tech = 'tsmc28psyn' synthsToRun = freqSweep(module, width, tech) + + ##### Run a sweep for multiple modules/widths based on best delay found in existing syntheses + modules = ['adder', 'comparator'] + widths = [64, 128] + tech = 'sky130' + synthsToRun = freqModuleSweep(widths, modules, tech) ##### Only do syntheses for which a run doesn't already exist synthsToRun = filterRedundant(synthsToRun) pool = Pool(processes=25) - pool.starmap(runCommand, synthsToRun) + +pool.starmap(runCommand, synthsToRun) \ No newline at end of file From 121f685fa27c451e535d8d25c65b23260470649c Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 13 Nov 2023 07:23:15 -0800 Subject: [PATCH 28/62] Removed assign statement inside always block --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index e8a430a91..1e6eda56c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -76,7 +76,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; else ResultBitsE = FPResultBitsE; - assign CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk) + CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk) end /* verilator lint_on WIDTH */ From 74056246d4a57a5fd1067b8edc8448f2d6befdaf Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Mon, 13 Nov 2023 10:02:10 -0600 Subject: [PATCH 29/62] Remove ppa_ prefix and modify ppaAnalyze.py to handle correct vector --- synthDC/Makefile | 2 +- synthDC/ppa/ppaAnalyze.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index e6332e60f..d43a36b50 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -24,7 +24,7 @@ export WIDTH ?= 32 time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) -export OUTPUTDIR := runs/ppa_$(DESIGN)_$(WIDTH)_$(CONFIG)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) +export OUTPUTDIR := runs/$(DESIGN)_$(WIDTH)_$(CONFIG)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) export SAIFPOWER ?= 0 OLDCONFIGDIR ?= ${WALLY}/config diff --git a/synthDC/ppa/ppaAnalyze.py b/synthDC/ppa/ppaAnalyze.py index 80cd57604..73cd353c4 100755 --- a/synthDC/ppa/ppaAnalyze.py +++ b/synthDC/ppa/ppaAnalyze.py @@ -38,7 +38,7 @@ def synthsintocsv(): each line contains the module, tech, width, target freq, and resulting metrics ''' print("This takes a moment...") - bashCommand = "find . -path '*runs/ppa*' -prune" + bashCommand = "find . -path '*runs/*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) allSynths = output.decode("utf-8").split('\n')[:-1] @@ -50,7 +50,7 @@ def synthsintocsv(): writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (nJ)']) for oneSynth in allSynths: - module, width, risc, tech, freq = specReg.findall(oneSynth)[2:7] + module, width, risc, tech, freq = specReg.findall(oneSynth)[1:6] metrics = [] for phrase in [['Path Slack', 'qor'], ['Design Area', 'qor'], ['100', 'power']]: bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*' @@ -86,7 +86,7 @@ def cleanup(): output = subprocess.check_output(['bash','-c', bc]) except: pass - bashCommand = "find . -path '*runs/ppa*' -prune" + bashCommand = "find . -path '*runs/*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) allSynths = output.decode("utf-8").split('\n')[:-1] for oneSynth in allSynths: From 6374d1a200329fcd4dd758833f75a7a13a155a28 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Tue, 14 Nov 2023 01:04:37 -0600 Subject: [PATCH 30/62] Modify ppaSynth.py to be able to not issue excess number of operations with Pool command. This is due to the original command using the Popen command, whereas, using the subprocess.call command solves this issue. The relieves the python script from issuing a ton of synthesis commands and using up all the licenses --- synthDC/ppa/ppaSynth.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/synthDC/ppa/ppaSynth.py b/synthDC/ppa/ppaSynth.py index ceb6edbd2..07a342e26 100755 --- a/synthDC/ppa/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -12,11 +12,11 @@ from ppaAnalyze import synthsfromcsv def runCommand(module, width, tech, freq): command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq) - subprocess.Popen(command, shell=True) + subprocess.call(command, shell=True) def deleteRedundant(synthsToRun): '''removes any previous runs for the current synthesis specifications''' - synthStr = "rm -rf runs/ppa_{}_{}_rv32e_{}nm_{}_*" + synthStr = "rm -rf runs/{}_{}_rv32e_{}_{}_*" for synth in synthsToRun: bashCommand = synthStr.format(*synth) outputCPL = subprocess.check_output(['bash','-c', bashCommand]) @@ -46,7 +46,7 @@ def freqModuleSweep(widths, modules, tech): return synthsToRun def filterRedundant(synthsToRun): - bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" + bashCommand = "find . -path '*runs/*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) specReg = re.compile('[a-zA-Z0-9]+') allSynths = output.decode("utf-8").split('\n')[:-1] @@ -84,14 +84,15 @@ if __name__ == '__main__': synthsToRun = freqSweep(module, width, tech) ##### Run a sweep for multiple modules/widths based on best delay found in existing syntheses - modules = ['adder', 'comparator'] - widths = [64, 128] + modules = ['adder', "comparator"] + widths = [8, 16, 32, 64, 128] tech = 'sky130' synthsToRun = freqModuleSweep(widths, modules, tech) ##### Only do syntheses for which a run doesn't already exist - synthsToRun = filterRedundant(synthsToRun) - + synthsToRun = filterRedundant(synthsToRun) pool = Pool(processes=25) -pool.starmap(runCommand, synthsToRun) \ No newline at end of file +pool.starmap(runCommand, synthsToRun) +pool.close() +pool.join() \ No newline at end of file From c722e2c59da4e9473194d4abd4eda8b36277416c Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Tue, 14 Nov 2023 01:06:14 -0600 Subject: [PATCH 31/62] fix plotPPA and other excruciatingly painful problems related to using allWidths and causing empty arrays to be used. This generates the normalized/unnormalized plots --- synthDC/ppa/bestSynths.csv | 18 +- synthDC/ppa/ppaAnalyze.py | 798 +++++++++++++++++++++++-------------- 2 files changed, 504 insertions(+), 312 deletions(-) diff --git a/synthDC/ppa/bestSynths.csv b/synthDC/ppa/bestSynths.csv index 885eeb962..655f171a1 100644 --- a/synthDC/ppa/bestSynths.csv +++ b/synthDC/ppa/bestSynths.csv @@ -4,10 +4,10 @@ binencoder,sky130,16,1000,1.0000,136.220003,77.243,0.021773774467348 binencoder,sky130,32,1000,1.0000,372.400007,189.626,0.04371111111111111 binencoder,sky130,64,1000,1.0000,797.720015,382.205,0.07393850658857981 binencoder,sky130,128,900,1.1111,1602.300031,610.009,0.1261366969785861 -adder,sky130,8,1000,1.0000,253.820005,154.438,0.10825587752870422 -adder,sky130,16,1000,1.0000,722.260013,485.109,0.32460910944935417 -adder,sky130,32,1000,1.0000,1440.600027,714.057,0.6580226904376014 -adder,sky130,64,1000,1.0000,2781.240054,1050.0,0.9392239364188874 +adder,sky130,8,1700,0.588235,253.820005,154.438,0.10825587752870422 +adder,sky130,16,1300,0.7692307,722.260013,485.109,0.32460910944935417 +adder,sky130,32,1100,0.90909,1440.600027,714.057,0.6580226904376014 +adder,sky130,64,950,1.0526315,2781.240054,1050.0,0.9392239364188874 adder,sky130,128,900,1.1111,6186.740118,2230.0,2.1480106100795755 csa,sky130,8,1000,1.0000,266.560005,154.202,0.13650573115665163 csa,sky130,16,1000,1.0000,533.12001,308.404,0.27263530601922104 @@ -19,11 +19,11 @@ shifter,sky130,16,1000,1.0000,666.400006,558.433,0.19552906110283155 shifter,sky130,32,1000,1.0000,1475.880027,768.262,0.3807431082700759 shifter,sky130,64,1000,1.0000,3914.120062,2680.0,1.144802541988198 shifter,sky130,128,900,1.1111,9192.400136,6080.0,2.9008914525432616 -comparator,sky130,8,1000,1.0000,200.900004,136.6,0.05001033271337053 -comparator,sky130,16,1000,1.0000,358.680007,189.253,0.06321553011448482 -comparator,sky130,32,1500,0.666666,690.900013,315.709,0.10771793448084398 -comparator,sky130,64,1300,0.7692307,1372.980026,508.393,0.2048577820389901 -comparator,sky130,128,1100,0.909090,2744.980052,796.047,0.34396273737011823 +comparator,sky130,8,1700,0.588235,200.900004,136.6,0.05001033271337053 +comparator,sky130,16,1500,0.6666667,358.680007,189.253,0.06321553011448482 +comparator,sky130,32,1300,0.7692307,690.900013,315.709,0.10771793448084398 +comparator,sky130,64,1200,0.8333333,1372.980026,508.393,0.2048577820389901 +comparator,sky130,128,1150,0.869565,2744.980052,796.047,0.34396273737011823 flop,sky130,8,1000,1.0000,133.279999,64.8145,0.193835 flop,sky130,16,1000,1.0000,266.5599975,129.629,0.38715000000000005 flop,sky130,32,1000,1.0000,533.119995,259.258,0.7723000000000001 diff --git a/synthDC/ppa/ppaAnalyze.py b/synthDC/ppa/ppaAnalyze.py index 73cd353c4..9af15fd80 100755 --- a/synthDC/ppa/ppaAnalyze.py +++ b/synthDC/ppa/ppaAnalyze.py @@ -18,92 +18,117 @@ from collections import namedtuple import sklearn.metrics as skm # depricated, will need to replace with scikit-learn import os + def synthsfromcsv(filename): Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy") - with open(filename, newline='') as csvfile: + with open(filename, newline="") as csvfile: csvreader = csv.reader(csvfile) global allSynths allSynths = list(csvreader)[1:] for i in range(len(allSynths)): for j in range(len(allSynths[0])): - try: allSynths[i][j] = int(allSynths[i][j]) - except: - try: allSynths[i][j] = float(allSynths[i][j]) - except: pass + try: + allSynths[i][j] = int(allSynths[i][j]) + except: + try: + allSynths[i][j] = float(allSynths[i][j]) + except: + pass allSynths[i] = Synth(*allSynths[i]) return allSynths - + + def synthsintocsv(): - ''' writes a CSV with one line for every available synthesis - each line contains the module, tech, width, target freq, and resulting metrics - ''' + """writes a CSV with one line for every available synthesis + each line contains the module, tech, width, target freq, and resulting metrics + """ print("This takes a moment...") bashCommand = "find . -path '*runs/*' -prune" - output = subprocess.check_output(['bash','-c', bashCommand]) - allSynths = output.decode("utf-8").split('\n')[:-1] + output = subprocess.check_output(["bash", "-c", bashCommand]) + allSynths = output.decode("utf-8").split("\n")[:-1] - specReg = re.compile('[a-zA-Z0-9]+') - metricReg = re.compile('-?\d+\.\d+[e]?[-+]?\d*') + specReg = re.compile("[a-zA-Z0-9]+") + metricReg = re.compile("-?\d+\.\d+[e]?[-+]?\d*") file = open("ppaData.csv", "w") writer = csv.writer(file) - writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (nJ)']) + writer.writerow( + [ + "Module", + "Tech", + "Width", + "Target Freq", + "Delay", + "Area", + "L Power (nW)", + "D energy (nJ)", + ] + ) for oneSynth in allSynths: module, width, risc, tech, freq = specReg.findall(oneSynth)[1:6] metrics = [] - for phrase in [['Path Slack', 'qor'], ['Design Area', 'qor'], ['100', 'power']]: - bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*' + for phrase in [["Path Slack", "qor"], ["Design Area", "qor"], ["100", "power"]]: + bashCommand = 'grep "{}" ' + oneSynth[2:] + "/reports/*{}*" bashCommand = bashCommand.format(*phrase) - try: output = subprocess.check_output(['bash','-c', bashCommand]) - except: + try: + output = subprocess.check_output(["bash", "-c", bashCommand]) + except: print(module + width + tech + freq + " doesn't have reports") print("Consider running cleanup() first") nums = metricReg.findall(str(output)) nums = [float(m) for m in nums] metrics += nums - delay = 1000/int(freq) - metrics[0] + delay = 1000 / int(freq) - metrics[0] area = metrics[1] lpower = metrics[4] - denergy = (metrics[2] + metrics[3])/int(freq)*1000 # (switching + internal powers)*delay, more practical units for regression coefs + # switching, internal power in mW and leakage in nW + tpower = metrics[2] + metrics[3] + metrics[4]*0.000001 + # EDP (fJ/GHz) + denergy = ( + (metrics[2] + metrics[3] + metrics[4]*0.000001) / int(freq) + ) # (switching + internal powers)*delay, more practical units for regression coefs - if ('flop' in module): # since two flops in each module - [area, lpower, denergy] = [n/2 for n in [area, lpower, denergy]] + if "flop" in module: # since two flops in each module + [area, lpower, denergy] = [n / 2 for n in [area, lpower, denergy]] writer.writerow([module, tech, width, freq, delay, area, lpower, denergy]) file.close() + def cleanup(): - ''' removes runs that didn't work - ''' + """removes runs that didn't work""" bashCommand = 'grep -r "Error" runs/ppa*/reports/*qor*' - try: - output = subprocess.check_output(['bash','-c', bashCommand]) - allSynths = output.decode("utf-8").split('\n')[:-1] + try: + output = subprocess.check_output(["bash", "-c", bashCommand]) + allSynths = output.decode("utf-8").split("\n")[:-1] for run in allSynths: - run = run.split('MHz')[0] - bc = 'rm -r '+ run + '*' - output = subprocess.check_output(['bash','-c', bc]) - except: pass + run = run.split("MHz")[0] + bc = "rm -r " + run + "*" + output = subprocess.check_output(["bash", "-c", bc]) + except: + pass bashCommand = "find . -path '*runs/*' -prune" - output = subprocess.check_output(['bash','-c', bashCommand]) - allSynths = output.decode("utf-8").split('\n')[:-1] + output = subprocess.check_output(["bash", "-c", bashCommand]) + allSynths = output.decode("utf-8").split("\n")[:-1] for oneSynth in allSynths: - for phrase in [['Path Length', 'qor']]: - bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*' + for phrase in [["Path Length", "qor"]]: + bashCommand = 'grep "{}" ' + oneSynth[2:] + "/reports/*{}*" bashCommand = bashCommand.format(*phrase) - try: output = subprocess.check_output(['bash','-c', bashCommand]) - except: - bc = 'rm -r '+ oneSynth[2:] - output = subprocess.check_output(['bash','-c', bc]) + try: + output = subprocess.check_output(["bash", "-c", bashCommand]) + except: + bc = "rm -r " + oneSynth[2:] + output = subprocess.check_output(["bash", "-c", bc]) print("All cleaned up!") + def getVals(tech, module, var, freq=None, width=None): - ''' for a specified tech, module, and variable/metric - returns a list of values for that metric in ascending width order - works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width - ''' + """for a specified tech, module, and variable/metric + returns a list of values for that metric in ascending width order + works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width + """ if width != None: widthsToGet = width @@ -113,85 +138,132 @@ def getVals(tech, module, var, freq=None, width=None): metric = [] widthL = [] - if (freq != None): + if freq != None: for oneSynth in allSynths: - if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module) & (oneSynth.width != 1): + if ( + (oneSynth.freq == freq) + & (oneSynth.tech == tech) + & (oneSynth.module == module) + & (oneSynth.width != 1) + ): widthL += [oneSynth.width] osdict = oneSynth._asdict() metric += [osdict[var]] - metric = [x for _, x in sorted(zip(widthL, metric))] # ordering + metric = [x for _, x in sorted(zip(widthL, metric))] # ordering else: for w in widthsToGet: for oneSynth in bestSynths: - if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == module): + if ( + (oneSynth.width == w) + & (oneSynth.tech == tech) + & (oneSynth.module == module) + ): osdict = oneSynth._asdict() met = osdict[var] metric += [met] return metric + def csvOfBest(filename): bestSynths = [] for tech in [x.tech for x in techSpecs]: for mod in modules: for w in widths: - m = np.Inf # large number to start + m = np.Inf # large number to start best = None - for oneSynth in allSynths: # best achievable, rightmost green - if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == mod): - if (oneSynth.delay < m) & (1000/oneSynth.delay > oneSynth.freq): + for oneSynth in allSynths: # best achievable, rightmost green + if ( + (oneSynth.width == w) + & (oneSynth.tech == tech) + & (oneSynth.module == mod) + ): + if (oneSynth.delay < m) & ( + 1000 / oneSynth.delay > oneSynth.freq + ): m = oneSynth.delay best = oneSynth if (best != None) & (best not in bestSynths): bestSynths += [best] - + file = open(filename, "w") writer = csv.writer(file) - writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (nJ)']) + writer.writerow( + [ + "Module", + "Tech", + "Width", + "Target Freq", + "Delay", + "Area", + "L Power (nW)", + "D energy (nJ)", + ] + ) for synth in bestSynths: writer.writerow(list(synth)) file.close() return bestSynths - + + def genLegend(fits, coefs, r2=None, spec=None, ale=False): - ''' generates a list of two legend elements (or just an equation if no r2 or spec) - labels line with fit equation and dots with r squared of the fit - ''' + """generates a list of two legend elements (or just an equation if no r2 or spec) + labels line with fit equation and dots with r squared of the fit + """ coefsr = [str(sigfig(c, 2)) for c in coefs] if ale: - if (normAddWidth == 32): - sub = 'S' + if normAddWidth == 32: + sub = "S" elif normAddWidth != 1: - print('Equations are wrong, check normAddWidth') + print("Equations are wrong, check normAddWidth") else: - sub = 'N' + sub = "N" - eqDict = {'c': '', 'l': sub, 's': '$'+sub+'^2$', 'g': '$log_2$('+sub+')', 'n': ''+sub+'$log_2$('+sub+')'} - eq = '' - ind = 0 + eqDict = { + "c": "", + "l": sub, + "s": "$" + sub + "^2$", + "g": "$log_2$(" + sub + ")", + "n": "" + sub + "$log_2$(" + sub + ")", + } + eq = "" + ind = 0 for k in eqDict.keys(): if k in fits: - if str(coefsr[ind]) != '0': eq += " + " + coefsr[ind] + eqDict[k] + if str(coefsr[ind]) != "0": + eq += " + " + coefsr[ind] + eqDict[k] ind += 1 - eq = eq[3:] # chop off leading ' + ' + eq = eq[3:] # chop off leading ' + ' - if (r2==None) or (spec==None): + if (r2 == None) or (spec == None): return eq else: legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq)] - legend_elements += [lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label='$R^2$='+ str(round(r2, 4)))] + legend_elements += [ + lines.Line2D( + [0], + [0], + color=spec.color, + ls="", + marker=spec.shape, + label="$R^2$=" + str(round(r2, 4)), + ) + ] return legend_elements -def oneMetricPlot(module, widths, var, freq=None, ax=None, fits='clsgn', norm=True, color=None): - ''' module: string module name - freq: int freq (MHz) - var: string delay, area, lpower, or denergy - fits: constant, linear, square, log2, Nlog2 - plots given variable vs width for all matching syntheses with regression - ''' + +def oneMetricPlot( + module, widths, var, freq=None, ax=None, fits="clsgn", norm=True, color=None +): + """module: string module name + freq: int freq (MHz) + var: string delay, area, lpower, or denergy + fits: constant, linear, square, log2, Nlog2 + plots given variable vs width for all matching syntheses with regression + """ singlePlot = True if ax or (freq == 10): singlePlot = False @@ -202,24 +274,27 @@ def oneMetricPlot(module, widths, var, freq=None, ax=None, fits='clsgn', norm=Tr allWidths = [] allMetrics = [] - ale = (var != 'delay') # if not delay, must be area, leakage, or energy + ale = var != "delay" # if not delay, must be area, leakage, or energy modFit = fitDict[module] fits = modFit[ale] if freq: - ls = '--' + ls = "--" else: - ls = '-' + ls = "-" for spec in techSpecs: + # print(f"Searching for module of spec {spec} and module {module} and var {var}") metric = getVals(spec.tech, module, var, freq=freq) - + # print(f"Found metric : {metric}") if norm: techdict = spec._asdict() norm = techdict[var] - metric = [m/norm for m in metric] + metric = [m / norm for m in metric] - if len(metric) == 5: # don't include the spec if we don't have points for all widths + if len(widths) == len(metric): + # don't include the spec if we don't have points for all widths + # print(f"Width \neq Metric") xp, pred, coefs, r2 = regress(widths, metric, fits, ale) fullLeg += genLegend(fits, coefs, r2, spec, ale=ale) c = color if color else spec.color @@ -228,44 +303,78 @@ def oneMetricPlot(module, widths, var, freq=None, ax=None, fits='clsgn', norm=Tr allWidths += widths allMetrics += metric - xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits) - ax.plot(xp, pred, color='red', linestyle=ls) + # print(f"Widths passed into regress : {allWidths}") + # Not sure why this works (jes) - if allWidths doesn't have data widths does + if len(allWidths) > 0: + xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits) + ax.plot(xp, pred, color="orange", linestyle=ls) + else: + xp, pred, coefs, r2 = regress(widths, metric, fits) + ax.plot(xp, pred, color="orange", linestyle=ls) if norm: - ylabeldic = {"lpower": "Leakage Power (add32)", "denergy": "Energy/Op (add32)", "area": "Area (add32)", "delay": "Delay (FO4)"} + ylabeldic = { + "lpower": "Leakage Power (add32)", + "denergy": "Energy/Op (add32)", + "area": "Area (add32)", + "delay": "Delay (FO4)", + } else: - ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (nJ)", "area": "Area (sq microns)", "delay": "Delay (ns)"} + ylabeldic = { + "lpower": "Leakage Power (nW)", + "denergy": "EDP (fJ/GHz)", + "area": "Area (sq microns)", + "delay": "Delay (ns)", + } ax.set_ylabel(ylabeldic[var]) ax.set_xticks(widths) - if singlePlot or (var == 'lpower') or (var == 'denergy'): + if singlePlot or (var == "lpower") or (var == "denergy"): ax.set_xlabel("Width (bits)") - if not singlePlot and ((var == 'delay') or (var == 'area')): - ax.tick_params(labelbottom=False) + if not singlePlot and ((var == "delay") or (var == "area")): + ax.tick_params(labelbottom=False) if singlePlot: fullLeg += genLegend(fits, coefs, r2, combined, ale=ale) - legLoc = 'upper left' if ale else 'center right' + legLoc = "upper left" if ale else "center right" ax.add_artist(ax.legend(handles=fullLeg, loc=legLoc)) - titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)" + titleStr = ( + " (target " + str(freq) + "MHz)" + if freq != None + else " (best achievable delay)" + ) ax.set_title(module + titleStr) - plt.savefig('.plots/'+ module + '_' + var + '.png') + plt.savefig(".plots/" + module + "_" + var + ".png") # plt.show() return r2 -def regress(widths, var, fits='clsgn', ale=False): - ''' fits a curve to the given points - returns lists of x and y values to plot that curve and coefs for the eq with r2 - ''' +def regress(widths, var, fits="clsgn", ale=False): + """fits a curve to the given points + returns lists of x and y values to plot that curve and coefs for the eq with r2 + """ + if len(var) != len(widths): + print( + f"There are not enough variables to match widths. Widths : {widths} Variables Found : {var}, padding to match may affect correctness (doing it anyways)\n" + ) + if len(widths) > len(var): + while len(widths) > len(var): + var.append(0.0) + if len(var) > len(widths): + while len(var) > len(widths): + widths.append(0) + + # widths = [8, 16, 32, 64, 128] + # print(f"Regress var : {var}") + # print(f"Regress widths : {widths}") funcArr = genFuncs(fits) - xp = np.linspace(min(widths)/2, max(widths)*1.1, 200) + xp = np.linspace(min(widths) / 2, max(widths) * 1.1, 200) xpToCalc = xp if ale: - widths = [w/normAddWidth for w in widths] - xpToCalc = [x/normAddWidth for x in xp] + widths = [w / normAddWidth for w in widths] + xpToCalc = [x / normAddWidth for x in xp] mat = [] for w in widths: @@ -273,8 +382,9 @@ def regress(widths, var, fits='clsgn', ale=False): for func in funcArr: row += [func(w)] mat += [row] - - y = np.array(var, dtype=np.float) + + # var = [0, 1, 2, 3, 4] + y = np.array(var, dtype=np.float64) coefs = opt.nnls(mat, y)[0] yp = [] @@ -290,19 +400,22 @@ def regress(widths, var, fits='clsgn', ale=False): return xp, pred, coefs, r2 + def makeCoefTable(): - ''' writes CSV with each line containing the coefficients for a regression fit - to a particular combination of module, metric (including both techs, normalized) - ''' + """writes CSV with each line containing the coefficients for a regression fit + to a particular combination of module, metric (including both techs, normalized) + """ file = open("ppaFitting.csv", "w") writer = csv.writer(file) - writer.writerow(['Module', 'Metric', 'Target', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2']) + writer.writerow( + ["Module", "Metric", "Target", "1", "N", "N^2", "log2(N)", "Nlog2(N)", "R^2"] + ) for module in modules: for freq in [10, None]: - target = 'easy' if freq else 'hard' - for var in ['delay', 'area', 'lpower', 'denergy']: - ale = (var != 'delay') + target = "easy" if freq else "hard" + for var in ["delay", "area", "lpower", "denergy"]: + ale = var != "delay" metL = [] modFit = fitDict[module] fits = modFit[ale] @@ -311,12 +424,12 @@ def makeCoefTable(): metric = getVals(spec.tech, module, var, freq=freq) techdict = spec._asdict() norm = techdict[var] - metL += [m/norm for m in metric] + metL += [m / norm for m in metric] - xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale) + xp, pred, coefs, r2 = regress(widths * 2, metL, fits, ale) coefs = np.ndarray.tolist(coefs) - coefsToWrite = [None]*5 - fitTerms = 'clsgn' + coefsToWrite = [None] * 5 + fitTerms = "clsgn" ind = 0 for i in range(len(fitTerms)): if fitTerms[i] in fits: @@ -327,25 +440,38 @@ def makeCoefTable(): file.close() + def sigfig(num, figs): - return '{:g}'.format(float('{:.{p}g}'.format(num, p=figs))) + return "{:g}".format(float("{:.{p}g}".format(num, p=figs))) + def makeEqTable(): - ''' writes CSV with each line containing the equations for fits for each metric - to a particular module (including both techs, normalized) - ''' + """writes CSV with each line containing the equations for fits for each metric + to a particular module (including both techs, normalized) + """ file = open("ppaEquations.csv", "w") writer = csv.writer(file) - writer.writerow(['Element', 'Best delay', 'Fast area', 'Fast leakage', 'Fast energy', 'Small area', 'Small leakage', 'Small energy']) + writer.writerow( + [ + "Element", + "Best delay", + "Fast area", + "Fast leakage", + "Fast energy", + "Small area", + "Small leakage", + "Small energy", + ] + ) for module in modules: eqs = [] for freq in [None, 10]: - for var in ['delay', 'area', 'lpower', 'denergy']: - if (var == 'delay') and (freq == 10): + for var in ["delay", "area", "lpower", "denergy"]: + if (var == "delay") and (freq == 10): pass else: - ale = (var != 'delay') + ale = var != "delay" metL = [] modFit = fitDict[module] fits = modFit[ale] @@ -354,9 +480,9 @@ def makeEqTable(): metric = getVals(spec.tech, module, var, freq=freq) techdict = spec._asdict() norm = techdict[var] - metL += [m/norm for m in metric] + metL += [m / norm for m in metric] - xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale) + xp, pred, coefs, r2 = regress(widths * 2, metL, fits, ale) coefs = np.ndarray.tolist(coefs) eqs += [genLegend(fits, coefs, ale=ale)] row = [module] + eqs @@ -364,93 +490,113 @@ def makeEqTable(): file.close() -def genFuncs(fits='clsgn'): - ''' helper function for regress() - returns array of functions with one for each term desired in the regression fit - ''' + +def genFuncs(fits="clsgn"): + """helper function for regress() + returns array of functions with one for each term desired in the regression fit + """ funcArr = [] - if 'c' in fits: + if "c" in fits: funcArr += [lambda x: 1] - if 'l' in fits: + if "l" in fits: funcArr += [lambda x: x] - if 's' in fits: + if "s" in fits: funcArr += [lambda x: x**2] - if 'g' in fits: + if "g" in fits: funcArr += [lambda x: np.log2(x)] - if 'n' in fits: - funcArr += [lambda x: x*np.log2(x)] + if "n" in fits: + funcArr += [lambda x: x * np.log2(x)] return funcArr + def noOutliers(median, freqs, delays, areas): - ''' returns a pared down list of freqs, delays, and areas - cuts out any syntheses in which target freq isn't within 75% of the min delay target to focus on interesting area - helper function to freqPlot() - ''' - f=[] - d=[] - a=[] + """returns a pared down list of freqs, delays, and areas + cuts out any syntheses in which target freq isn't within 75% of the min delay target to focus on interesting area + helper function to freqPlot() + """ + f = [] + d = [] + a = [] for i in range(len(freqs)): - norm = freqs[i]/median - if (norm > 0.4) & (norm<1.4): + norm = freqs[i] / median + if (norm > 0.4) & (norm < 1.4): f += [freqs[i]] d += [delays[i]] a += [areas[i]] - + return f, d, a + def freqPlot(tech, mod, width): - ''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width - ''' + """plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width""" freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: - if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech): - ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period + if ( + (mod == oneSynth.module) + & (width == oneSynth.width) + & (tech == oneSynth.tech) + ): + ind = ( + 1000 / oneSynth.delay < oneSynth.freq + ) # when delay is within target clock period freqsL[ind] += [oneSynth.freq] delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] median = np.median(list(flatten(freqsL))) - + f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) for ax in (ax1, ax2): - ax.ticklabel_format(useOffset=False, style='plain') + ax.ticklabel_format(useOffset=False, style="plain") - for ind in [0,1]: + for ind in [0, 1]: areas = areasL[ind] delays = delaysL[ind] freqs = freqsL[ind] - freqs, delays, areas = noOutliers(median, freqs, delays, areas) # comment out to see all syntheses + freqs, delays, areas = noOutliers( + median, freqs, delays, areas + ) # comment out to see all syntheses - c = 'blue' if ind else 'green' + c = "blue" if ind else "green" ax1.scatter(freqs, delays, color=c) ax2.scatter(freqs, areas, color=c) - legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='o', label='timing achieved'), - lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')] + legend_elements = [ + lines.Line2D( + [0], [0], color="green", ls="", marker="o", label="timing achieved" + ), + lines.Line2D([0], [0], color="blue", ls="", marker="o", label="slack violated"), + ] ax1.legend(handles=legend_elements) width = str(width) - + ax2.set_xlabel("Target Freq (MHz)") - ax1.set_ylabel('Delay (ns)') - ax2.set_ylabel('Area (sq microns)') - ax1.set_title(mod + '_' + width) - if ('mux' in mod) & ('d' in mod): + ax1.set_ylabel("Delay (ns)") + ax2.set_ylabel("Area (sq microns)") + ax1.set_title(mod + "_" + width) + if ("mux" in mod) & ("d" in mod): width = mod - mod = 'muxd' - plt.savefig('./plots/freqBuckshot/' + tech + '/' + mod + '/' + width + '.png') + mod = "muxd" + plt.savefig("./plots/freqBuckshot/" + tech + "/" + mod + "/" + width + ".png") # plt.show() + def squareAreaDelay(tech, mod, width): - ''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width - ''' + """plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width""" global allSynths freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: - if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech): - ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period + if ( + (mod == oneSynth.module) + & (width == oneSynth.width) + & (tech == oneSynth.tech) + ): + ind = ( + 1000 / oneSynth.delay < oneSynth.freq + ) # when delay is within target clock period freqsL[ind] += [oneSynth.freq] delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] @@ -458,182 +604,212 @@ def squareAreaDelay(tech, mod, width): f, (ax1) = plt.subplots(1, 1) ax2 = ax1.twinx() - for ind in [0,1]: + for ind in [0, 1]: areas = areasL[ind] delays = delaysL[ind] targets = freqsL[ind] - targets = [1000/f for f in targets] - - targets, delays, areas = noOutliers(targets, delays, areas) # comment out to see all - + targets = [1000 / f for f in targets] + + targets, delays, areas = noOutliers( + targets, delays, areas + ) # comment out to see all + if not ind: achievedDelays = delays - c = 'blue' if ind else 'green' - ax1.scatter(targets, delays, marker='^', color=c) - ax2.scatter(targets, areas, marker='s', color=c) - - bestAchieved = min(achievedDelays) - - legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='^', label='delay (timing achieved)'), - lines.Line2D([0], [0], color='green', ls='', marker='s', label='area (timing achieved)'), - lines.Line2D([0], [0], color='blue', ls='', marker='^', label='delay (timing violated)'), - lines.Line2D([0], [0], color='blue', ls='', marker='s', label='area (timing violated)')] + c = "blue" if ind else "green" + ax1.scatter(targets, delays, marker="^", color=c) + ax2.scatter(targets, areas, marker="s", color=c) + + bestAchieved = min(achievedDelays) + + legend_elements = [ + lines.Line2D( + [0], [0], color="green", ls="", marker="^", label="delay (timing achieved)" + ), + lines.Line2D( + [0], [0], color="green", ls="", marker="s", label="area (timing achieved)" + ), + lines.Line2D( + [0], [0], color="blue", ls="", marker="^", label="delay (timing violated)" + ), + lines.Line2D( + [0], [0], color="blue", ls="", marker="s", label="area (timing violated)" + ), + ] + + ax2.legend(handles=legend_elements, loc="upper left") - ax2.legend(handles=legend_elements, loc='upper left') - ax1.set_xlabel("Delay Targeted (ns)") ax1.set_ylabel("Delay Achieved (ns)") - ax2.set_ylabel('Area (sq microns)') - ax1.set_title(mod + '_' + str(width)) + ax2.set_ylabel("Area (sq microns)") + ax1.set_title(mod + "_" + str(width)) squarify(f) xvals = np.array(ax1.get_xlim()) - frac = (min(flatten(delaysL))-xvals[0])/(xvals[1]-xvals[0]) - areaLowerLim = min(flatten(areasL))-100 - areaUpperLim = max(flatten(areasL))/frac + areaLowerLim + frac = (min(flatten(delaysL)) - xvals[0]) / (xvals[1] - xvals[0]) + areaLowerLim = min(flatten(areasL)) - 100 + areaUpperLim = max(flatten(areasL)) / frac + areaLowerLim ax2.set_ylim([areaLowerLim, areaUpperLim]) ax1.plot(xvals, xvals, ls="--", c=".3") - ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls='--') + ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls="--") - plt.savefig('./plots/squareareadelay_' + mod + '_' + str(width) + '.png') + plt.savefig("./plots/squareareadelay_" + mod + "_" + str(width) + ".png") # plt.show() + def squarify(fig): - ''' helper function for squareAreaDelay() - forces matplotlib figure to be a square - ''' + """helper function for squareAreaDelay() + forces matplotlib figure to be a square + """ w, h = fig.get_size_inches() if w > h: t = fig.subplotpars.top b = fig.subplotpars.bottom - axs = h*(t-b) - l = (1.-axs/w)/2 - fig.subplots_adjust(left=l, right=1-l) + axs = h * (t - b) + l = (1.0 - axs / w) / 2 + fig.subplots_adjust(left=l, right=1 - l) else: t = fig.subplotpars.right b = fig.subplotpars.left - axs = w*(t-b) - l = (1.-axs/h)/2 - fig.subplots_adjust(bottom=l, top=1-l) + axs = w * (t - b) + l = (1.0 - axs / h) / 2 + fig.subplots_adjust(bottom=l, top=1 - l) -def plotPPA(mod, widths, freq=None, norm=True, aleOpt=False): - ''' for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits - if no freq specified, uses the synthesis with best achievable delay for each width - overlays data from both techs - ''' - with mpl.rc_context({"figure.figsize": (7,3.46)}): + +def plotPPA(mod, freq=None, norm=True, aleOpt=False): + """for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits + if no freq specified, uses the synthesis with best achievable delay for each width + overlays data from both techs + """ + with mpl.rc_context({"figure.figsize": (7, 3.46)}): fig, axs = plt.subplots(2, 2) - arr = [['delay', 'area'], ['lpower', 'denergy']] + arr = [["delay", "area"], ["lpower", "denergy"]] freqs = [freq] - if aleOpt: freqs += [10] + if aleOpt: + freqs += [10] for i in [0, 1]: for j in [0, 1]: leg = [] for f in freqs: - if (arr[i][j]=='delay') and (f==10): + if (arr[i][j] == "delay") and (f == 10): pass else: - r2 = oneMetricPlot(mod, widths, arr[i][j], ax=axs[i, j], freq=f, norm=norm) - ls = '--' if f else '-' - leg += [lines.Line2D([0], [0], color='red', label='$R^2$='+str(round(r2, 4)), linestyle=ls)] + # print(f"Pasing in widths {widths}") + r2 = oneMetricPlot( + mod, widths, arr[i][j], ax=axs[i, j], freq=f, norm=norm + ) + ls = "--" if f else "-" + leg += [ + lines.Line2D( + [0], + [0], + color="orange", + label="$R^2$=" + str(round(r2, 4)), + linestyle=ls, + ) + ] - if (mod in ['flop', 'csa']) & (arr[i][j] == 'delay'): + if (mod in ["flop", "csa"]) & (arr[i][j] == "delay"): axs[i, j].set_ylim(ymin=0) ytop = axs[i, j].get_ylim()[1] - axs[i, j].set_ylim(ymax=1.1*ytop) + axs[i, j].set_ylim(ymax=1.1 * ytop) else: axs[i, j].legend(handles=leg, handlelength=1.5) - - titleStr = " (target " + str(freq)+ "MHz)" if freq != None else "" - plt.suptitle(mod + titleStr) - plt.tight_layout(pad=0.05, w_pad=1, h_pad=0.5, rect=(0,0,1,0.97)) - if freq != 10: - n = 'normalized' if norm else 'unnormalized' - saveStr = './plots/'+ n + '/' + mod + '.png' + titleStr = " (target " + str(freq) + "MHz)" if freq != None else "" + plt.suptitle(mod + titleStr) + plt.tight_layout(pad=0.05, w_pad=1, h_pad=0.5, rect=(0, 0, 1, 0.97)) + + if freq != 10: + n = "normalized" if norm else "unnormalized" + saveStr = "./plots/" + n + "/" + mod + "_" + ".png" + print(f"Saving to {saveStr}") plt.savefig(saveStr) # plt.show() + def makeLineLegend(): - ''' generates legend to accompany normalized plots - ''' - plt.rcParams["figure.figsize"] = (5.5,0.3) + """generates legend to accompany normalized plots""" + plt.rcParams["figure.figsize"] = (5.5, 0.3) fig = plt.figure() - fullLeg = [lines.Line2D([0], [0], color='black', label='fastest', linestyle='-')] - fullLeg += [lines.Line2D([0], [0], color='black', label='smallest', linestyle='--')] - fullLeg += [lines.Line2D([0], [0], color='blue', label='tsmc28', marker='^')] - fullLeg += [lines.Line2D([0], [0], color='blue', label='tsmc28psyn', marker='x')] - fullLeg += [lines.Line2D([0], [0], color='green', label='sky90', marker='o')] - fullLeg += [lines.Line2D([0], [0], color='green', label='sky130', marker='+')] - fullLeg += [lines.Line2D([0], [0], color='red', label='combined', marker='_')] - fig.legend(handles=fullLeg, ncol=5, handlelength=1.4, loc='center') - saveStr = './plots/legend.png' + fullLeg = [lines.Line2D([0], [0], color="black", label="fastest", linestyle="-")] + fullLeg += [lines.Line2D([0], [0], color="black", label="smallest", linestyle="--")] + fullLeg += [lines.Line2D([0], [0], color="blue", label="tsmc28", marker="^")] + fullLeg += [lines.Line2D([0], [0], color="blue", label="tsmc28psyn", marker="x")] + fullLeg += [lines.Line2D([0], [0], color="green", label="sky90", marker="o")] + fullLeg += [lines.Line2D([0], [0], color="purple", label="sky130", marker="+")] + fullLeg += [lines.Line2D([0], [0], color="orange", label="combined", marker="_")] + fig.legend(handles=fullLeg, ncol=5, handlelength=1.4, loc="center") + saveStr = "./plots/legend.png" plt.savefig(saveStr) -def muxPlot(fits='clsgn', norm=True): - ''' module: string module name - freq: int freq (MHz) - var: string delay, area, lpower, or denergy - fits: constant, linear, square, log2, Nlog2 - plots given variable vs width for all matching syntheses with regression - ''' + +def muxPlot(fits="clsgn", norm=True): + """module: string module name + freq: int freq (MHz) + var: string delay, area, lpower, or denergy + fits: constant, linear, square, log2, Nlog2 + plots given variable vs width for all matching syntheses with regression + """ ax = plt.gca() inputs = [2, 4, 8] - allInputs = inputs*2 + allInputs = inputs * 2 fullLeg = [] - for crit in ['data', 'control']: + for crit in ["data", "control"]: allMetrics = [] - muxes = ['mux2', 'mux4', 'mux8'] + muxes = ["mux2", "mux4", "mux8"] - if crit == 'data': - ls = '--' - muxes = [m + 'd' for m in muxes] - elif crit == 'control': - ls = '-' + if crit == "data": + ls = "--" + muxes = [m + "d" for m in muxes] + elif crit == "control": + ls = "-" for spec in techSpecs: metric = [] for module in muxes: - metric += getVals(spec.tech, module, 'delay', width=[1]) - + metric += getVals(spec.tech, module, "delay", width=[1]) + if norm: techdict = spec._asdict() - norm = techdict['delay'] - metric = [m/norm for m in metric] + norm = techdict["delay"] + metric = [m / norm for m in metric] # print(spec.tech, ' ', metric) - if len(metric) == 3: # don't include the spec if we don't have points for all + if ( + len(metric) == 3 + ): # don't include the spec if we don't have points for all xp, pred, coefs, r2 = regress(inputs, metric, fits, ale=False) ax.scatter(inputs, metric, color=spec.color, marker=spec.shape) ax.plot(xp, pred, color=spec.color, linestyle=ls) allMetrics += metric xp, pred, coefs, r2 = regress(allInputs, allMetrics, fits) - ax.plot(xp, pred, color='red', linestyle=ls) - fullLeg += [lines.Line2D([0], [0], color='red', label=crit, linestyle=ls)] - - ax.set_ylabel('Delay (FO4)') + ax.plot(xp, pred, color="orange", linestyle=ls) + fullLeg += [lines.Line2D([0], [0], color="orange", label=crit, linestyle=ls)] + + ax.set_ylabel("Delay (FO4)") ax.set_xticks(inputs) ax.set_xlabel("Number of inputs") - ax.set_title('mux timing') - - ax.legend(handles = fullLeg) - plt.savefig('./plots/mux.png') + ax.set_title("mux timing") + + ax.legend(handles=fullLeg) + plt.savefig("./plots/mux.png") + def stdDevError(): - ''' calculates std deviation and error for paper-writing purposes - ''' - for var in ['delay', 'area', 'lpower', 'denergy']: + """calculates std deviation and error for paper-writing purposes""" + for var in ["delay", "area", "lpower", "denergy"]: errlist = [] for module in modules: - ale = (var != 'delay') + ale = var != "delay" metL = [] modFit = fitDict[module] fits = modFit[ale] @@ -643,20 +819,20 @@ def stdDevError(): metric = getVals(spec.tech, module, var) techdict = spec._asdict() norm = techdict[var] - metL += [m/norm for m in metric] + metL += [m / norm for m in metric] if ale: - ws = [w/normAddWidth for w in widths] + ws = [w / normAddWidth for w in widths] else: ws = widths - ws = ws*2 + ws = ws * 2 mat = [] for w in ws: row = [] for func in funcArr: row += [func(w)] mat += [row] - + y = np.array(metL, dtype=np.float) coefs = opt.nnls(mat, y)[0] @@ -665,68 +841,84 @@ def stdDevError(): n = [func(w) for func in funcArr] yp += [sum(np.multiply(coefs, n))] - if (var == 'delay') & (module == 'flop'): + if (var == "delay") & (module == "flop"): pass - elif (module == 'mult') & ale: + elif (module == "mult") & ale: pass else: for i in range(len(y)): - errlist += [abs(y[i]/yp[i]-1)] + errlist += [abs(y[i] / yp[i] - 1)] # print(module, ' ', var, ' ', np.mean(errlist[-10:])) - + avgErr = np.mean(errlist) stdv = np.std(errlist) - print(var, ' ', avgErr, ' ', stdv) + print(var, " ", avgErr, " ", stdv) + def makePlotDirectory(): - ''' creates plots directory in same level as this script to store plots in - ''' + """creates plots directory in same level as this script to store plots in""" current_directory = os.getcwd() - final_directory = os.path.join(current_directory, 'plots') + final_directory = os.path.join(current_directory, "plots") if not os.path.exists(final_directory): os.makedirs(final_directory) os.chdir(final_directory) - for folder in ['freqBuckshot', 'normalized', 'unnormalized']: + for folder in ["freqBuckshot", "normalized", "unnormalized"]: new_directory = os.path.join(final_directory, folder) if not os.path.exists(new_directory): os.makedirs(new_directory) os.chdir(new_directory) - if 'freq' in folder: - for tech in ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn']: + if "freq" in folder: + for tech in ["sky90", "sky130", "tsmc28", "tsmc28psyn"]: for mod in modules: tech_directory = os.path.join(new_directory, tech) mod_directory = os.path.join(tech_directory, mod) if not os.path.exists(mod_directory): os.makedirs(mod_directory) - os.chdir('..') - + os.chdir("..") + os.chdir(current_directory) - -if __name__ == '__main__': + + +if __name__ == "__main__": ############################## # set up stuff, global variables - widths = [64, 128] - modules = ['adder', 'comparator'] + widths = [8, 16, 32, 64, 128] + modules = ["adder", "comparator"] - normAddWidth = 32 # divisor to use with N since normalizing to add_32 + normAddWidth = 32 # divisor to use with N since normalizing to add_32 - fitDict = {'adder': ['cg', 'l', 'l'], 'mul': ['cg', 's', 's'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shifter': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'binencoder': ['cg', 'l', 'l']} - fitDict.update(dict.fromkeys(['mux2', 'mux4', 'mux8'], ['cg', 'l', 'l'])) + fitDict = { + "adder": ["cg", "l", "l"], + "mul": ["cg", "s", "s"], + "comparator": ["cg", "l", "l"], + "csa": ["c", "l", "l"], + "shifter": ["cg", "l", "ln"], + "flop": ["c", "l", "l"], + "binencoder": ["cg", "l", "l"], + } + fitDict.update(dict.fromkeys(["mux2", "mux4", "mux8"], ["cg", "l", "l"])) - TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") - techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['sky130', 'red', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['tsmc28', 'blue', '^', 12.2e-3, 209.286002, 1060.0, .08153281695882594], ['tsmc28psyn', 'blue', '^', 12.2e-3, 209.286002, 1060.0, .08153281695882594]] - techSpecs = [TechSpec(*t) for t in techSpecs] - combined = TechSpec('combined fit', 'red', '_', 0, 0, 0, 0) + TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") + # FO4 delay information information + techSpecs = [ + # ["sky90", "green", "o", 43.2e-3, 1440.600027, 714.057, 0.658022690438], + # Area/Lpower/Denergy needs to be corrected here (jes) + ["sky130", "orange", "o", 99.5e-3, 1440.600027, 714.057, 0.658022690438], + # ["tsmc28", "blue", "^", 12.2e-3, 209.286002, 1060.0, 0.08153281695882594], + # ["tsmc28psyn", "blue", "^", 12.2e-3, 209.286002, 1060.0, 0.08153281695882594], + ] + techSpecs = [TechSpec(*t) for t in techSpecs] + combined = TechSpec("combined fit", "orange", "_", 0, 0, 0, 0) ############################## # cleanup() # run to remove garbage synth runs - synthsintocsv() # slow, run only when new synth runs to add to csv - - allSynths = synthsfromcsv('ppaData.csv') # your csv here! - bestSynths = csvOfBest('bestSynths.csv') - makePlotDirectory() + synthsintocsv() # slow, run only when new synth runs to add to csv + + allSynths = synthsfromcsv("ppaData.csv") # your csv here! + bestSynths = csvOfBest("bestSynths.csv") + makePlotDirectory() # ### other functions # makeCoefTable() @@ -734,12 +926,12 @@ if __name__ == '__main__': # muxPlot() # stdDevError() - for mod in modules: - for w in widths: - #freqPlot('sky90', mod, w) - freqPlot('sky130', mod, w) - #freqPlot('tsmc28', mod, w) - #freqPlot('tsmc28psyn', mod, w) - #plotPPA(mod, widths, norm=False) - #plotPPA(mod, aleOpt=True) - plt.close('all') + for mod in modules: + for w in widths: + # freqPlot('sky90', mod, w) + # freqPlot("sky130", mod, w) + # freqPlot('tsmc28', mod, w) + # freqPlot('tsmc28psyn', mod, w) + plotPPA(mod, norm=False) + # plotPPA(mod, aleOpt=True) + plt.close("all") From 9dce08a743060ceae695a544f9e7b038041a33e5 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Tue, 14 Nov 2023 02:41:44 -0600 Subject: [PATCH 32/62] minor typo on ppaSynth and ppaAnalyze --- synthDC/ppa/ppaAnalyze.py | 31 ++++++++++++++----------------- synthDC/ppa/ppaSynth.py | 2 +- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/synthDC/ppa/ppaAnalyze.py b/synthDC/ppa/ppaAnalyze.py index 9af15fd80..bd98e79be 100755 --- a/synthDC/ppa/ppaAnalyze.py +++ b/synthDC/ppa/ppaAnalyze.py @@ -82,11 +82,9 @@ def synthsintocsv(): delay = 1000 / int(freq) - metrics[0] area = metrics[1] lpower = metrics[4] - # switching, internal power in mW and leakage in nW - tpower = metrics[2] + metrics[3] + metrics[4]*0.000001 - # EDP (fJ/GHz) + tpower = (metrics[2] + metrics[3] + metrics[4]*.000001) denergy = ( - (metrics[2] + metrics[3] + metrics[4]*0.000001) / int(freq) + (tpower) / int(freq) * 1000 ) # (switching + internal powers)*delay, more practical units for regression coefs if "flop" in module: # since two flops in each module @@ -304,7 +302,6 @@ def oneMetricPlot( allMetrics += metric # print(f"Widths passed into regress : {allWidths}") - # Not sure why this works (jes) - if allWidths doesn't have data widths does if len(allWidths) > 0: xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits) ax.plot(xp, pred, color="orange", linestyle=ls) @@ -322,7 +319,7 @@ def oneMetricPlot( else: ylabeldic = { "lpower": "Leakage Power (nW)", - "denergy": "EDP (fJ/GHz)", + "denergy": "Dynamic Energy (nJ)", "area": "Area (sq microns)", "delay": "Delay (ns)", } @@ -355,9 +352,9 @@ def regress(widths, var, fits="clsgn", ale=False): returns lists of x and y values to plot that curve and coefs for the eq with r2 """ if len(var) != len(widths): - print( - f"There are not enough variables to match widths. Widths : {widths} Variables Found : {var}, padding to match may affect correctness (doing it anyways)\n" - ) + # print( + # f"There are not enough variables to match widths. Widths : {widths} Variables Found : {var}, padding to match may affect correctness (doing it anyways)\n" + # ) if len(widths) > len(var): while len(widths) > len(var): var.append(0.0) @@ -792,8 +789,8 @@ def muxPlot(fits="clsgn", norm=True): allMetrics += metric xp, pred, coefs, r2 = regress(allInputs, allMetrics, fits) - ax.plot(xp, pred, color="orange", linestyle=ls) - fullLeg += [lines.Line2D([0], [0], color="orange", label=crit, linestyle=ls)] + ax.plot(xp, pred, color="red", linestyle=ls) + fullLeg += [lines.Line2D([0], [0], color="red", label=crit, linestyle=ls)] ax.set_ylabel("Delay (FO4)") ax.set_xticks(inputs) @@ -885,7 +882,7 @@ if __name__ == "__main__": ############################## # set up stuff, global variables widths = [8, 16, 32, 64, 128] - modules = ["adder", "comparator"] + modules = ["adder"] normAddWidth = 32 # divisor to use with N since normalizing to add_32 @@ -903,14 +900,14 @@ if __name__ == "__main__": TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") # FO4 delay information information techSpecs = [ - # ["sky90", "green", "o", 43.2e-3, 1440.600027, 714.057, 0.658022690438], + #["sky90", "green", "o", 43.2e-3, 1440.600027, 714.057, 0.658022690438], # Area/Lpower/Denergy needs to be corrected here (jes) ["sky130", "orange", "o", 99.5e-3, 1440.600027, 714.057, 0.658022690438], # ["tsmc28", "blue", "^", 12.2e-3, 209.286002, 1060.0, 0.08153281695882594], # ["tsmc28psyn", "blue", "^", 12.2e-3, 209.286002, 1060.0, 0.08153281695882594], ] techSpecs = [TechSpec(*t) for t in techSpecs] - combined = TechSpec("combined fit", "orange", "_", 0, 0, 0, 0) + combined = TechSpec("combined fit", "red", "_", 0, 0, 0, 0) ############################## # cleanup() # run to remove garbage synth runs @@ -928,10 +925,10 @@ if __name__ == "__main__": for mod in modules: for w in widths: - # freqPlot('sky90', mod, w) - # freqPlot("sky130", mod, w) + #freqPlot('sky90', mod, w) + freqPlot("sky130", mod, w) # freqPlot('tsmc28', mod, w) # freqPlot('tsmc28psyn', mod, w) plotPPA(mod, norm=False) - # plotPPA(mod, aleOpt=True) + plotPPA(mod, aleOpt=True) plt.close("all") diff --git a/synthDC/ppa/ppaSynth.py b/synthDC/ppa/ppaSynth.py index 07a342e26..30fe1254f 100755 --- a/synthDC/ppa/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -84,7 +84,7 @@ if __name__ == '__main__': synthsToRun = freqSweep(module, width, tech) ##### Run a sweep for multiple modules/widths based on best delay found in existing syntheses - modules = ['adder', "comparator"] + modules = ['adder'] widths = [8, 16, 32, 64, 128] tech = 'sky130' synthsToRun = freqModuleSweep(widths, modules, tech) From 8ba0336c6f231eca478244f51121678786c0803d Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 14 Nov 2023 11:01:58 -0800 Subject: [PATCH 33/62] Removed unused addins, cleaned up configuration to support half precision on RV64gc, gate unused hazard inputs to reduce critical path in rv32e --- .gitmodules | 3 --- addins/embench-iot | 2 +- addins/riscv-arch-test | 2 +- addins/riscv-tests | 1 - config/rv64gc/config.vh | 2 +- src/hazard/hazard.sv | 37 +++++++++++++++++++++++++-------- src/wally/wallypipelinedcore.sv | 2 +- 7 files changed, 32 insertions(+), 17 deletions(-) delete mode 160000 addins/riscv-tests diff --git a/.gitmodules b/.gitmodules index 9a4c7fbb8..1e56898c8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -8,9 +8,6 @@ [submodule "addins/imperas-riscv-tests"] path = addins/imperas-riscv-tests url = https://github.com/riscv-ovpsim/imperas-riscv-tests -[submodule "addins/riscv-tests"] - path = addins/riscv-tests - url = https://github.com/riscv-software-src/riscv-tests [submodule "addins/riscv-dv"] path = addins/riscv-dv url = https://github.com/google/riscv-dv diff --git a/addins/embench-iot b/addins/embench-iot index 1480febc3..4c5eb8798 160000 --- a/addins/embench-iot +++ b/addins/embench-iot @@ -1 +1 @@ -Subproject commit 1480febc3ace5f471baeee4b1ae0d8fea16e4762 +Subproject commit 4c5eb87983f51ca7fcf7855306877b3d1c3aabf1 diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 197179fdc..2c5675d7a 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 197179fdc9dfeeca821e848f373c897a3fdae86c +Subproject commit 2c5675d7a58e98d47bef3a6cf5a8373397b0d0be diff --git a/addins/riscv-tests b/addins/riscv-tests deleted file mode 160000 index cf04274f5..000000000 --- a/addins/riscv-tests +++ /dev/null @@ -1 +0,0 @@ -Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7 diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index 8decf60d5..564b32f5d 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -42,7 +42,7 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; -localparam ZFH_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 1; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index cb70605c0..028dbf61d 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -26,7 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module hazard ( +module hazard import cvw::*; #(parameter cvw_t P) ( // Detect hazards input logic BPWrongE, CSRWriteFenceM, RetM, TrapM, input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD, @@ -46,9 +46,28 @@ module hazard ( logic WFIStallM, WFIInterruptedM; + logic ValidWfiM, ValidTrapM, ValidRetM, ValidCSRWriteFenceM, ValidCSRRdStallD; + logic ValidFPUStallD, ValidFCvtIntStallD, ValidFDivBusyE, ValidMDUStallD, ValidDivBusyE; + + // Gate Stall/Flush sources with supported features + // This is not logically necessary because the original signals are already 0 when the feature is unsupported + // However, synthesis does not propagate the constant 0 across modules + // By gating these signals, synthesis eliminates unnecessary stall/flush logic, saving about 10% cycle time for rv32e + // These lines of code gating with a compile-time constant generate no hardware. + assign ValidWfiM = wfiM & P.ZICSR_SUPPORTED; + assign ValidTrapM = TrapM & P.ZICSR_SUPPORTED; + assign ValidRetM = RetM & P.ZICSR_SUPPORTED; + assign ValidCSRWriteFenceM = CSRWriteFenceM & P.ZICSR_SUPPORTED; + assign ValidCSRRdStallD = CSRRdStallD & P.ZICSR_SUPPORTED; + assign ValidFPUStallD = RetM & P.F_SUPPORTED; + assign ValidFCvtIntStallD = RetM & P.F_SUPPORTED; + assign ValidFDivBusyE = FDivBusyE & P.F_SUPPORTED; + assign ValidMDUStallD = MDUStallD & P.M_SUPPORTED; + assign ValidDivBusyE = DivBusyE & P.M_SUPPORTED; + // WFI logic - assign WFIStallM = wfiM & ~IntPendingM; // WFI waiting for an interrupt or timeout - assign WFIInterruptedM = wfiM & IntPendingM; // WFI detects a pending interrupt. Retire WFI; trap if interrupt is enabled. + assign WFIStallM = ValidWfiM & ~IntPendingM; // WFI waiting for an interrupt or timeout + assign WFIInterruptedM = ValidWfiM & IntPendingM; // WFI detects a pending interrupt. Retire WFI; trap if interrupt is enabled. // stalls and flushes // loads: stall for one cycle if the subsequent instruction depends on the load @@ -70,10 +89,10 @@ module hazard ( // Branch misprediction is found in the Execute stage and must flush the next two instructions. // However, an active division operation resides in the Execute stage, and when the BP incorrectly mispredicts the divide as a taken branch, the divde must still complete // When a WFI is interrupted and causes a trap, it flushes the rest of the pipeline but not the W stage, because the WFI needs to commit - assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPWrongE; - assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPWrongE & ~(DivBusyE | FDivBusyE)); - assign FlushMCause = TrapM | RetM | CSRWriteFenceM; - assign FlushWCause = TrapM & ~WFIInterruptedM; + assign FlushDCause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM | BPWrongE; + assign FlushECause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM |(BPWrongE & ~(ValidDivBusyE | ValidFDivBusyE)); + assign FlushMCause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM; + assign FlushWCause = ValidTrapM & ~WFIInterruptedM; // Stall causes // Most data depenency stalls are identified in the decode stage @@ -84,8 +103,8 @@ module hazard ( // The IFU stalls the entire pipeline rather than just Fetch to avoid complications with instructions later in the pipeline causing Exceptions // A trap could be asserted at the start of a IFU/LSU stall, and should flush the memory operation assign StallFCause = '0; - assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | FPUStallD) & ~FlushDCause; - assign StallECause = (DivBusyE | FDivBusyE) & ~FlushECause; + assign StallDCause = (LoadStallD | StoreStallD | ValidMDUStallD | ValidCSRRdStallD | ValidFCvtIntStallD | ValidFPUStallD) & ~FlushDCause; + assign StallECause = (ValidDivBusyE | ValidFDivBusyE) & ~FlushECause; assign StallMCause = WFIStallM & ~FlushMCause; // Need to gate IFUStallF when the equivalent FlushFCause = FlushDCause = 1. // assign StallWCause = ((IFUStallF & ~FlushDCause) | LSUStallM) & ~FlushWCause; diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 00b348660..46ffcac09 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -264,7 +264,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( end // global stall and flush control - hazard hzu( + hazard #(P) hzu( .BPWrongE, .CSRWriteFenceM, .RetM, .TrapM, .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, .LSUStallM, .IFUStallF, From 1ab7c926ea4ec7ac7a1ca9f96c1f60a3ac722b3a Mon Sep 17 00:00:00 2001 From: naichewa Date: Tue, 14 Nov 2023 13:44:59 -0800 Subject: [PATCH 34/62] Final Code Review --- src/uncore/spi_apb.sv | 321 +++++++++++++++++++----------------------- 1 file changed, 143 insertions(+), 178 deletions(-) diff --git a/src/uncore/spi_apb.sv b/src/uncore/spi_apb.sv index 4db435be6..b0649bf93 100644 --- a/src/uncore/spi_apb.sv +++ b/src/uncore/spi_apb.sv @@ -2,10 +2,14 @@ // spi_apb.sv // // Written: Naiche Whyte-Aguayo nwhyteaguayo@g.hmc.edu 11/16/2022 - // // Purpose: SPI peripheral -// See FU540-C000-v1.0 for specifications +// +// SPI module is written to the specifications described in FU540-C000-v1.0. At the top level, it is consists of synchronous 8 byte transmit and recieve FIFOs connected to shift registers. +// The FIFOs are connected to WALLY by an apb control register interface, which includes various control registers for modifying the SPI transmission along with registers for writing +// to the transmit FIFO and reading from the receive FIFO. The transmissions themselves are then controlled by a finite state machine. The SPI module uses 4 tristate pins for SPI input/output, +// along with a 4 bit Chip Select signal, a clock signal, and an interrupt signal to WALLY. +// Current limitations: Flash read sequencer mode not implemented, dual and quad mode not supported // // A component of the Wally configurable RISC-V project. // @@ -25,19 +29,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -// Current limitations: Flash read sequencer mode not implemented, dual and quad modes untestable with current test plan. - -// Attempt to move from >= comparisons by initializing in FSM differently -// Parameterize SynchFIFO -// look at ReadIncrement/WriteIncrement delay necessity - -/* -SPI module is written to the specifications described in FU540-C000-v1.0. At the top level, it is consists of synchronous 8 byte transmit and recieve FIFOs connected to shift registers. -The FIFOs are connected to WALLY by an apb control register interface, which includes various control registers for modifying the SPI transmission along with registers for writing -to the transmit FIFO and reading from the receive FIFO. The transmissions themselves are then controlled by a finite state machine. The SPI module uses 4 tristate pins for SPI input/output, -along with a 4 bit Chip Select signal, a clock signal, and an interrupt signal to WALLY. -*/ - module spi_apb import cvw::*; #(parameter cvw_t P) ( input logic PCLK, PRESETn, input logic PSEL, @@ -54,27 +45,27 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( output logic SPIIntr ); - //SPI control registers. Refer to SiFive FU540-C000 manual + // SPI control registers. Refer to SiFive FU540-C000 manual logic [11:0] SckDiv; - logic [1:0] SckMode; - logic [1:0] ChipSelectID; - logic [3:0] ChipSelectDef; - logic [1:0] ChipSelectMode; + logic [1:0] SckMode; + logic [1:0] ChipSelectID; + logic [3:0] ChipSelectDef; + logic [1:0] ChipSelectMode; logic [15:0] Delay0, Delay1; - logic [4:0] Format; - logic [7:0] ReceiveData; - logic [2:0] TransmitWatermark, ReceiveWatermark; - logic [8:0] TransmitData; - logic [1:0] InterruptEnable, InterruptPending; + logic [4:0] Format; + logic [7:0] ReceiveData; + logic [2:0] TransmitWatermark, ReceiveWatermark; + logic [8:0] TransmitData; + logic [1:0] InterruptEnable, InterruptPending; - //Bus interface signals + // Bus interface signals logic [7:0] Entry; logic Memwrite; logic [31:0] Din, Dout; - logic TransmitInactive; //High when there is no transmission, used as hardware interlock signal + logic TransmitInactive; // High when there is no transmission, used as hardware interlock signal - //FIFO FSM signals - //Watermark signals - TransmitReadMark = ip[0], ReceiveWriteMark = ip[1] + // FIFO FSM signals + // Watermark signals - TransmitReadMark = ip[0], ReceiveWriteMark = ip[1] logic TransmitWriteMark, TransmitReadMark, RecieveWriteMark, RecieveReadMark; logic TransmitFIFOWriteFull, TransmitFIFOReadEmpty; logic TransmitFIFOReadIncrement; @@ -83,75 +74,68 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( logic ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty; logic [7:0] TransmitFIFOReadData, ReceiveFIFOWriteData; logic [2:0] TransmitWriteWatermarkLevel, ReceiveReadWatermarkLevel; - logic [7:0] ReceiveShiftRegEndian; //reverses ReceiveShiftReg if Format[2] set (little endian transmission) + logic [7:0] ReceiveShiftRegEndian; // Reverses ReceiveShiftReg if Format[2] set (little endian transmission) - //Transmission signals + // Transmission signals logic sck; - logic [11:0] DivCounter; //counter for sck - logic SCLKenable; //flip flop enable high every sclk edge + logic [11:0] DivCounter; // Counter for sck + logic SCLKenable; // Flip flop enable high every sclk edge - //Delay signals - logic [8:0] ImplicitDelay1; //Adds implicit delay to cs-sck delay counter based on phase - logic [8:0] ImplicitDelay2; //Adds implicit delay to sck-cs delay counter based on phase - logic [8:0] CS_SCKCount; //Counter for cs-sck delay - logic [8:0] SCK_CSCount; //Counter for sck-cs delay - logic [8:0] InterCSCount; //Counter for inter cs delay - logic [8:0] InterXFRCount; //Counter for inter xfr delay - logic CS_SCKCompare; //Boolean comparison signal, high when CS_SCKCount >= cs-sck delay - logic SCK_CSCompare; //Boolean comparison signal, high when SCK_CSCount >= sck-cs delay - logic InterCSCompare; //Boolean comparison signal, high when InterCSCount >= inter cs delay - logic InterXFRCompare; //Boolean comparison signal, high when InterXFRCount >= inter xfr delay - logic ZeroDelayHoldMode; //High when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0 + // Delay signals + logic [8:0] ImplicitDelay1; // Adds implicit delay to cs-sck delay counter based on phase + logic [8:0] ImplicitDelay2; // Adds implicit delay to sck-cs delay counter based on phase + logic [8:0] CS_SCKCount; // Counter for cs-sck delay + logic [8:0] SCK_CSCount; // Counter for sck-cs delay + logic [8:0] InterCSCount; // Counter for inter cs delay + logic [8:0] InterXFRCount; // Counter for inter xfr delay + logic ZeroDelayHoldMode; // High when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0 - //Frame counting signals - logic [3:0] FrameCount; //Counter for number of frames in transmission - logic FrameCompare; //Boolean comparison signal, high when FrameCount = Format[7:4] - logic [3:0] ReceivePenultimateFrame; //Frame number - 1 - logic [3:0] ReceivePenultimateFrameCount; //Counter - logic ReceivePenultimateFrameBoolean; //High when penultimate frame in transmission has been reached + // Frame counting signals + logic [3:0] FrameCount; // Counter for number of frames in transmission + logic [3:0] ReceivePenultimateFrameCount; // Counter + logic ReceivePenultimateFrame; // High when penultimate frame in transmission has been reached - //State fsm signals - logic Active; //High when state is either Active1 or Active0 (during transmission) - logic Active0; //High when state is Active0 + // State fsm signals + logic Active; // High when state is either Active1 or Active0 (during transmission) + logic Active0; // High when state is Active0 - //Shift reg signals - logic ShiftEdge; //Determines which edge of sck to shift from TransmitShiftReg - logic [7:0] TransmitShiftReg; //Transmit shift register - logic [7:0] ReceiveShiftReg; //Receive shift register - logic SampleEdge; //Determines which edge of sck to sample from ReceiveShiftReg - logic [7:0] TransmitDataEndian; //Reverses TransmitData from txFIFO if littleendian, since TransmitReg always shifts MSB - logic TransmitShiftRegLoad; //Determines when to load TransmitShiftReg - logic ReceiveShiftFull; //High when receive shift register is full - logic TransmitShiftEmpty; //High when transmit shift register is empty - logic ShiftIn; //Determines whether to shift from SPIIn or SPIOut (if SPI_LOOPBACK_TEST) - logic [3:0] LeftShiftAmount; //Determines left shift amount to left-align data when little endian - logic [7:0] ASR; //AlignedReceiveShiftReg + // Shift reg signals + logic ShiftEdge; // Determines which edge of sck to shift from TransmitShiftReg + logic [7:0] TransmitShiftReg; // Transmit shift register + logic [7:0] ReceiveShiftReg; // Receive shift register + logic SampleEdge; // Determines which edge of sck to sample from ReceiveShiftReg + logic [7:0] TransmitDataEndian; // Reverses TransmitData from txFIFO if littleendian, since TransmitReg always shifts MSB + logic TransmitShiftRegLoad; // Determines when to load TransmitShiftReg + logic ReceiveShiftFull; // High when receive shift register is full + logic TransmitShiftEmpty; // High when transmit shift register is empty + logic ShiftIn; // Determines whether to shift from SPIIn or SPIOut (if SPI_LOOPBACK_TEST) + logic [3:0] LeftShiftAmount; // Determines left shift amount to left-align data when little endian + logic [7:0] ASR; // AlignedReceiveShiftReg - //CS signals - logic [3:0] ChipSelectAuto; //Assigns ChipSelect value to selected CS signal based on CS ID - logic [3:0] ChipSelectInternal; //Defines what each ChipSelect signal should be based on transmission status and ChipSelectDef - logic DelayMode; //Determines where to place implicit half cycle delay based on sck phase for CS assertion + // CS signals + logic [3:0] ChipSelectAuto; // Assigns ChipSelect value to selected CS signal based on CS ID + logic [3:0] ChipSelectInternal; // Defines what each ChipSelect signal should be based on transmission status and ChipSelectDef + logic DelayMode; // Determines where to place implicit half cycle delay based on sck phase for CS assertion - //Miscellaneous signals delayed/early by 1 PCLK cycle - logic ReceiveShiftFullDelay; //Delays ReceiveShiftFull signal by 1 PCLK cycle - logic TransmitFIFOWriteIncrementDelay; //TransmitFIFOWriteIncrement delayed by 1 PCLK cycle - logic ReceiveShiftFullDelayPCLK; //ReceiveShiftFull delayed by 1 PCLK cycle + // Miscellaneous signals delayed/early by 1 PCLK cycle + logic ReceiveShiftFullDelay; // Delays ReceiveShiftFull signal by 1 PCLK cycle + logic ReceiveShiftFullDelayPCLK; // ReceiveShiftFull delayed by 1 PCLK cycle logic TransmitFIFOReadEmptyDelay; - logic SCLKenableEarly; //SCLKenable 1 PCLK cycle early, needed for on time register changes when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0 + logic SCLKenableEarly; // SCLKenable 1 PCLK cycle early, needed for on time register changes when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0 - //APB access - assign Entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses - assign Memwrite = PWRITE & PENABLE & PSEL; // only write in access phase - assign PREADY = TransmitInactive; // tie PREADY to transmission for hardware interlock + // APB access + assign Entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses + assign Memwrite = PWRITE & PENABLE & PSEL; // Only write in access phase + assign PREADY = TransmitInactive; // Tie PREADY to transmission for hardware interlock - //Account for subword read/write circuitry + // Account for subword read/write circuitry // -- Note SPI registers are 32 bits no matter what; access them with LW SW. assign Din = PWDATA[31:0]; if (P.XLEN == 64) assign PRDATA = {Dout, Dout}; else assign PRDATA = Dout; - //Register access + // Register access always_ff@(posedge PCLK, negedge PRESETn) if (~PRESETn) begin SckDiv <= #1 12'd3; @@ -167,13 +151,12 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( ReceiveWatermark <= #1 3'b0; InterruptEnable <= #1 2'b0; InterruptPending <= #1 2'b0; - end else begin //writes - //According to FU540 spec: Once interrupt is pending, it will remain set until number - //of entries in tx/rx fifo is strictly more/less than tx/rxmark + end else begin // writes + /* verilator lint_off CASEINCOMPLETE */ if (Memwrite & TransmitInactive) - case(Entry) //flop to sample inputs + case(Entry) // flop to sample inputs 8'h00: SckDiv <= Din[11:0]; 8'h04: SckMode <= Din[1:0]; 8'h10: ChipSelectID <= Din[1:0]; @@ -188,18 +171,21 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( 8'h70: InterruptEnable <= Din[1:0]; endcase /* verilator lint_off CASEINCOMPLETE */ - //interrupt clearance + + // According to FU540 spec: Once interrupt is pending, it will remain set until number + // of entries in tx/rx fifo is strictly more/less than tx/rxmark InterruptPending[0] <= TransmitReadMark; InterruptPending[1] <= RecieveWriteMark; - case(Entry) // flop to sample inputs + + case(Entry) // Flop to sample inputs 8'h00: Dout <= #1 {20'b0, SckDiv}; 8'h04: Dout <= #1 {30'b0, SckMode}; 8'h10: Dout <= #1 {30'b0, ChipSelectID}; 8'h14: Dout <= #1 {28'b0, ChipSelectDef}; 8'h18: Dout <= #1 {30'b0, ChipSelectMode}; - 8'h28: Dout <= {8'b0, Delay0[15:8], 8'b0, Delay0[7:0]}; - 8'h2C: Dout <= {8'b0, Delay1[15:8], 8'b0, Delay1[7:0]}; - 8'h40: Dout <= {12'b0, Format[4:1], 13'b0, Format[0], 2'b0}; + 8'h28: Dout <= #1 {8'b0, Delay0[15:8], 8'b0, Delay0[7:0]}; + 8'h2C: Dout <= #1 {8'b0, Delay1[15:8], 8'b0, Delay1[7:0]}; + 8'h40: Dout <= #1 {12'b0, Format[4:1], 13'b0, Format[0], 2'b0}; 8'h48: Dout <= #1 {23'b0, TransmitFIFOWriteFull, 8'b0}; 8'h4C: Dout <= #1 {23'b0, ReceiveFIFOReadEmpty, ReceiveData[7:0]}; 8'h50: Dout <= #1 {29'b0, TransmitWatermark}; @@ -210,8 +196,9 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( endcase end - //SPI enable generation, where SCLK = PCLK/(2*(SckDiv + 1)) - //Generates a high signal at the rising and falling edge of SCLK by counting from 0 to SckDiv + // SPI enable generation, where SCLK = PCLK/(2*(SckDiv + 1)) + // Asserts SCLKenable at the rising and falling edge of SCLK by counting from 0 to SckDiv + // Active at 2x SCLK frequency to account for implicit half cycle delays and actions on both clock edges depending on phase assign SCLKenable = (DivCounter == SckDiv); assign SCLKenableEarly = ((DivCounter + 12'b1) == SckDiv); always_ff @(posedge PCLK, negedge PRESETn) @@ -219,44 +206,38 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( else if (SCLKenable) DivCounter <= 0; else DivCounter <= DivCounter + 12'b1; - //Boolean logic that tracks frame progression - assign FrameCompare = (FrameCount < Format[4:1]); - assign ReceivePenultimateFrameBoolean = ((FrameCount + 4'b0001) == Format[4:1]); + // Asserts when transmission is one frame before complete + assign ReceivePenultimateFrame = ((FrameCount + 4'b0001) == Format[4:1]); - //Computing delays + // Computing delays // When sckmode.pha = 0, an extra half-period delay is implicit in the cs-sck delay, and vice-versa for sck-cs assign ImplicitDelay1 = SckMode[0] ? 9'b0 : 9'b1; assign ImplicitDelay2 = SckMode[0] ? 9'b1 : 9'b0; - assign CS_SCKCompare = CS_SCKCount >= (({Delay0[7:0], 1'b0}) + ImplicitDelay1); - assign SCK_CSCompare = SCK_CSCount >= (({Delay0[15:8], 1'b0}) + ImplicitDelay2); - assign InterCSCompare = (InterCSCount >= ({Delay1[7:0],1'b0})); - assign InterXFRCompare = (InterXFRCount >= ({Delay1[15:8], 1'b0})); + // Calculate when tx/rx shift registers are full/empty + TransmitShiftFSM TransmitShiftFSM(PCLK, PRESETn, TransmitFIFOReadEmpty, ReceivePenultimateFrame, Active0, TransmitShiftEmpty); + ReceiveShiftFSM ReceiveShiftFSM(PCLK, PRESETn, SCLKenable, ReceivePenultimateFrame, SampleEdge, SckMode[0], ReceiveShiftFull); - //Calculate when tx/rx shift registers are full/empty - TransmitShiftFSM TransmitShiftFSM_1 (PCLK, PRESETn, TransmitFIFOReadEmpty, ReceivePenultimateFrameBoolean, Active0, TransmitShiftEmpty); - ReceiveShiftFSM ReceiveShiftFSM_1 (PCLK, PRESETn, SCLKenable, ReceivePenultimateFrameBoolean, SampleEdge, SckMode[0], ReceiveShiftFull); - - //Calculate tx/rx fifo write and recieve increment signals - assign TransmitFIFOWriteIncrement = (Memwrite & (Entry == 8'h48) & ~TransmitFIFOWriteFull & TransmitInactive); + // Calculate tx/rx fifo write and recieve increment signals always_ff @(posedge PCLK, negedge PRESETn) - if (~PRESETn) TransmitFIFOWriteIncrementDelay <= 0; - else TransmitFIFOWriteIncrementDelay <= TransmitFIFOWriteIncrement; + if (~PRESETn) TransmitFIFOWriteIncrement <= 0; + else TransmitFIFOWriteIncrement <= (Memwrite & (Entry == 8'h48) & ~TransmitFIFOWriteFull & TransmitInactive); always_ff @(posedge PCLK, negedge PRESETn) if (~PRESETn) ReceiveFIFOReadIncrement <= 0; else ReceiveFIFOReadIncrement <= ((Entry == 8'h4C) & ~ReceiveFIFOReadEmpty & PSEL & ~ReceiveFIFOReadIncrement); - //Tx/Rx FIFOs - SynchFIFO #(3,8) txFIFO(PCLK, 1'b1, SCLKenable, PRESETn, TransmitFIFOWriteIncrementDelay, TransmitShiftEmpty, TransmitData[7:0], TransmitWriteWatermarkLevel, TransmitWatermark[2:0], TransmitFIFOReadData[7:0], TransmitFIFOWriteFull, TransmitFIFOReadEmpty, TransmitWriteMark, TransmitReadMark); - SynchFIFO #(3,8) rxFIFO(PCLK, SCLKenable, 1'b1, PRESETn, ReceiveShiftFullDelay, ReceiveFIFOReadIncrement, ReceiveShiftRegEndian, ReceiveWatermark[2:0], ReceiveReadWatermarkLevel, ReceiveData[7:0], ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty, RecieveWriteMark, RecieveReadMark); + // Tx/Rx FIFOs + SynchFIFO #(3,8) txFIFO(PCLK, 1'b1, SCLKenable, PRESETn, TransmitFIFOWriteIncrement, TransmitShiftEmpty, TransmitData[7:0], TransmitWriteWatermarkLevel, TransmitWatermark[2:0], + TransmitFIFOReadData[7:0], TransmitFIFOWriteFull, TransmitFIFOReadEmpty, TransmitWriteMark, TransmitReadMark); + SynchFIFO #(3,8) rxFIFO(PCLK, SCLKenable, 1'b1, PRESETn, ReceiveShiftFullDelay, ReceiveFIFOReadIncrement, ReceiveShiftRegEndian, ReceiveWatermark[2:0], ReceiveReadWatermarkLevel, + ReceiveData[7:0], ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty, RecieveWriteMark, RecieveReadMark); always_ff @(posedge PCLK, negedge PRESETn) if (~PRESETn) TransmitFIFOReadEmptyDelay <= 1; else if (SCLKenable) TransmitFIFOReadEmptyDelay <= TransmitFIFOReadEmpty; - always_ff @(posedge PCLK, negedge PRESETn) if (~PRESETn) ReceiveShiftFullDelay <= 0; else if (SCLKenable) ReceiveShiftFullDelay <= ReceiveShiftFull; @@ -266,16 +247,16 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( assign TransmitShiftRegLoad = ~TransmitShiftEmpty & ~Active | (((ChipSelectMode == 2'b10) & ~|(Delay1[15:8])) & ((ReceiveShiftFullDelay | ReceiveShiftFull) & ~SampleEdge & ~TransmitFIFOReadEmpty)); - //Main FSM which controls SPI transmission + // Main FSM which controls SPI transmission typedef enum logic [2:0] {CS_INACTIVE, DELAY_0, ACTIVE_0, ACTIVE_1, DELAY_1,INTER_CS, INTER_XFR} statetype; statetype state; always_ff @(posedge PCLK, negedge PRESETn) - if (~PRESETn) begin state <= CS_INACTIVE; + if (~PRESETn) begin + state <= CS_INACTIVE; FrameCount <= 4'b0; - - /* verilator lint_off CASEINCOMPLETE */ end else if (SCLKenable) begin + /* verilator lint_off CASEINCOMPLETE */ case (state) CS_INACTIVE: begin CS_SCKCount <= 9'b1; @@ -288,7 +269,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( end DELAY_0: begin CS_SCKCount <= CS_SCKCount + 9'b1; - if (CS_SCKCompare) state <= ACTIVE_0; + if (CS_SCKCount >= (({Delay0[7:0], 1'b0}) + ImplicitDelay1)) state <= ACTIVE_0; end ACTIVE_0: begin FrameCount <= FrameCount + 4'b1; @@ -296,7 +277,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( end ACTIVE_1: begin InterXFRCount <= 9'b1; - if (FrameCompare) state <= ACTIVE_0; + if (FrameCount < Format[4:1]) state <= ACTIVE_0; else if ((ChipSelectMode[1:0] == 2'b10) & ~|(Delay1[15:8]) & (~TransmitFIFOReadEmpty)) begin state <= ACTIVE_0; CS_SCKCount <= 9'b1; @@ -310,11 +291,11 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( end DELAY_1: begin SCK_CSCount <= SCK_CSCount + 9'b1; - if (SCK_CSCompare) state <= INTER_CS; + if (SCK_CSCount >= (({Delay0[15:8], 1'b0}) + ImplicitDelay2)) state <= INTER_CS; end INTER_CS: begin InterCSCount <= InterCSCount + 9'b1; - if (InterCSCompare ) state <= CS_INACTIVE; + if (InterCSCount >= ({Delay1[7:0],1'b0})) state <= CS_INACTIVE; end INTER_XFR: begin CS_SCKCount <= 9'b1; @@ -322,13 +303,14 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( FrameCount <= 4'b0; InterCSCount <= 9'b10; InterXFRCount <= InterXFRCount + 9'b1; - if (InterXFRCompare & ~TransmitFIFOReadEmptyDelay) state <= ACTIVE_0; + if ((InterXFRCount >= ({Delay1[15:8], 1'b0})) & ~TransmitFIFOReadEmptyDelay) state <= ACTIVE_0; else if (~|ChipSelectMode[1:0]) state <= CS_INACTIVE; end endcase + /* verilator lint_off CASEINCOMPLETE */ end - /* verilator lint_off CASEINCOMPLETE */ + assign DelayMode = SckMode[0] ? (state == DELAY_1) : (state == ACTIVE_1 & ReceiveShiftFull); assign ChipSelectInternal = (state == CS_INACTIVE | state == INTER_CS | DelayMode & ~|(Delay0[15:8])) ? ChipSelectDef : ~ChipSelectDef; @@ -339,7 +321,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( assign TransmitInactive = ((state == INTER_CS) | (state == CS_INACTIVE) | (state == INTER_XFR) | (ReceiveShiftFullDelayPCLK & ZeroDelayHoldMode)); assign Active0 = (state == ACTIVE_0); - //Signal tracks which edge of sck to shift data + // Signal tracks which edge of sck to shift data always_comb case(SckMode[1:0]) 2'b00: ShiftEdge = ~sck & SCLKenable; @@ -349,36 +331,36 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( default: ShiftEdge = sck & SCLKenable; endcase - //Transmit shift register - assign TransmitDataEndian = Format[0] ? {TransmitFIFOReadData[0], TransmitFIFOReadData[1], TransmitFIFOReadData[2], TransmitFIFOReadData[3], TransmitFIFOReadData[4], TransmitFIFOReadData[5], TransmitFIFOReadData[6], TransmitFIFOReadData[7]} : TransmitFIFOReadData[7:0]; + // Transmit shift register + assign TransmitDataEndian = Format[0] ? {TransmitFIFOReadData[0], TransmitFIFOReadData[1], TransmitFIFOReadData[2], TransmitFIFOReadData[3], TransmitFIFOReadData[4], TransmitFIFOReadData[5], TransmitFIFOReadData[6], TransmitFIFOReadData[7]} : TransmitFIFOReadData[7:0]; always_ff @(posedge PCLK, negedge PRESETn) if(~PRESETn) TransmitShiftReg <= 8'b0; else if (TransmitShiftRegLoad) TransmitShiftReg <= TransmitDataEndian; - else if (ShiftEdge & Active) TransmitShiftReg <= {TransmitShiftReg[6:0], 1'b0}; + else if (ShiftEdge & Active) TransmitShiftReg <= {TransmitShiftReg[6:0], 1'b0}; assign SPIOut = TransmitShiftReg[7]; - //If in loopback mode, receive shift register is connected directly to module's output pins. Else, connected to SPIIn - //There are no setup/hold time issues because transmit shift register and receive shift register always shift/sample on opposite edges + // If in loopback mode, receive shift register is connected directly to module's output pins. Else, connected to SPIIn + // There are no setup/hold time issues because transmit shift register and receive shift register always shift/sample on opposite edges assign ShiftIn = P.SPI_LOOPBACK_TEST ? SPIOut : SPIIn; - //Receive shift register + // Receive shift register always_ff @(posedge PCLK, negedge PRESETn) if(~PRESETn) ReceiveShiftReg <= 8'b0; else if (SampleEdge & SCLKenable) begin - if (~Active) ReceiveShiftReg <= 8'b0; - else ReceiveShiftReg <= {ReceiveShiftReg[6:0], ShiftIn}; + if (~Active) ReceiveShiftReg <= 8'b0; + else ReceiveShiftReg <= {ReceiveShiftReg[6:0], ShiftIn}; end - //Aligns received data and reverses if little-endian + // Aligns received data and reverses if little-endian assign LeftShiftAmount = 4'h8 - Format[4:1]; assign ASR = ReceiveShiftReg << LeftShiftAmount[2:0]; assign ReceiveShiftRegEndian = Format[0] ? {ASR[0], ASR[1], ASR[2], ASR[3], ASR[4], ASR[5], ASR[6], ASR[7]} : ASR[7:0]; - //Interrupt logic: raise interrupt if any enabled interrupts are pending + // Interrupt logic: raise interrupt if any enabled interrupts are pending assign SPIIntr = |(InterruptPending & InterruptEnable); - //Chip select logic + // Chip select logic always_comb case(ChipSelectID[1:0]) 2'b00: ChipSelectAuto = {ChipSelectDef[3], ChipSelectDef[2], ChipSelectDef[1], ChipSelectInternal[0]}; @@ -390,14 +372,14 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( assign SPICS = ChipSelectMode[0] ? ChipSelectDef : ChipSelectAuto; endmodule -module SynchFIFO #(parameter M =3 , N= 8)( - input logic PCLK, wen, ren, PRESETn, - input logic winc,rinc, - input logic [N-1:0] wdata, - input logic [M-1:0] wwatermarklevel, rwatermarklevel, +module SynchFIFO #(parameter M=3, N=8)( // 2^M entries of N bits each + input logic PCLK, wen, ren, PRESETn, + input logic winc, rinc, + input logic [N-1:0] wdata, + input logic [M-1:0] wwatermarklevel, rwatermarklevel, output logic [N-1:0] rdata, - output logic wfull, rempty, - output logic wwatermark, rwatermark); + output logic wfull, rempty, + output logic wwatermark, rwatermark); /* Pointer FIFO using design elements from "Simulation and Synthesis Techniques for Asynchronous FIFO Design" by Clifford E. Cummings. Namely, M bit read and write pointers @@ -409,8 +391,6 @@ module SynchFIFO #(parameter M =3 , N= 8)( logic [N-1:0] mem[2**M]; logic [M:0] rptr, wptr; logic [M:0] rptrnext, wptrnext; - logic rempty_val; - logic wfull_val; logic [M-1:0] raddr; logic [M-1:0] waddr; @@ -428,53 +408,43 @@ module SynchFIFO #(parameter M =3 , N= 8)( end else begin if (wen) begin - wfull <= wfull_val; + wfull <= ({~wptrnext[M], wptrnext[M-1:0]} == rptr); wptr <= wptrnext; end if (ren) begin rptr <= rptrnext; - rempty <= rempty_val; + rempty <= (wptr == rptrnext); end end - + assign raddr = rptr[M-1:0]; - assign rptrnext = rptr + {3'b0, (rinc & ~rempty)}; - assign rempty_val = (wptr == rptrnext); + assign rptrnext = rptr + {{(M){1'b0}}, (rinc & ~rempty)}; assign rwatermark = ((waddr - raddr) < rwatermarklevel) & ~wfull; assign waddr = wptr[M-1:0]; assign wwatermark = ((waddr - raddr) > wwatermarklevel) | wfull; - assign wptrnext = wptr + {3'b0, (winc & ~wfull)}; - assign wfull_val = ({~wptrnext[M], wptrnext[M-1:0]} == rptr); + assign wptrnext = wptr + {{(M){1'b0}}, (winc & ~wfull)}; endmodule module TransmitShiftFSM( - input logic PCLK, PRESETn, - input logic TransmitFIFOReadEmpty, ReceivePenultimateFrameBoolean, Active0, + input logic PCLK, PRESETn, + input logic TransmitFIFOReadEmpty, ReceivePenultimateFrame, Active0, output logic TransmitShiftEmpty); - typedef enum logic [1:0] {TransmitShiftEmptyState, TransmitShiftHoldState, TransmitShiftNotEmptyState} statetype; - statetype TransmitState, TransmitNextState; always_ff @(posedge PCLK, negedge PRESETn) - if (~PRESETn) TransmitState <= TransmitShiftEmptyState; - else TransmitState <= TransmitNextState; + if (~PRESETn) TransmitShiftEmpty <= 1; + else if (TransmitShiftEmpty) begin + if (TransmitFIFOReadEmpty | (~TransmitFIFOReadEmpty & (ReceivePenultimateFrame & Active0))) TransmitShiftEmpty <= 1; + else if (~TransmitFIFOReadEmpty) TransmitShiftEmpty <= 0; + end else begin + if (ReceivePenultimateFrame & Active0) TransmitShiftEmpty <= 1; + else TransmitShiftEmpty <= 0; + end - always_comb - case(TransmitState) - TransmitShiftEmptyState: begin - if (TransmitFIFOReadEmpty | (~TransmitFIFOReadEmpty & (ReceivePenultimateFrameBoolean & Active0))) TransmitNextState = TransmitShiftEmptyState; - else if (~TransmitFIFOReadEmpty) TransmitNextState = TransmitShiftNotEmptyState; - end - TransmitShiftNotEmptyState: begin - if (ReceivePenultimateFrameBoolean & Active0) TransmitNextState = TransmitShiftEmptyState; - else TransmitNextState = TransmitShiftNotEmptyState; - end - endcase - assign TransmitShiftEmpty = (TransmitNextState == TransmitShiftEmptyState); endmodule module ReceiveShiftFSM( - input logic PCLK, PRESETn, SCLKenable, - input logic ReceivePenultimateFrameBoolean, SampleEdge, SckMode, + input logic PCLK, PRESETn, SCLKenable, + input logic ReceivePenultimateFrame, SampleEdge, SckMode, output logic ReceiveShiftFull ); typedef enum logic [1:0] {ReceiveShiftFullState, ReceiveShiftNotFullState, ReceiveShiftDelayState} statetype; @@ -484,17 +454,12 @@ module ReceiveShiftFSM( else if (SCLKenable) begin case (ReceiveState) ReceiveShiftFullState: ReceiveState <= ReceiveShiftNotFullState; - ReceiveShiftNotFullState: if (ReceivePenultimateFrameBoolean & (SampleEdge)) ReceiveState <= ReceiveShiftDelayState; + ReceiveShiftNotFullState: if (ReceivePenultimateFrame & (SampleEdge)) ReceiveState <= ReceiveShiftDelayState; else ReceiveState <= ReceiveShiftNotFullState; - ReceiveShiftDelayState: ReceiveState <= ReceiveShiftFullState; + ReceiveShiftDelayState: ReceiveState <= ReceiveShiftFullState; endcase end - assign ReceiveShiftFull = SckMode ? (ReceiveState == ReceiveShiftFullState) : (ReceiveState == ReceiveShiftDelayState); + assign ReceiveShiftFull = SckMode ? (ReceiveState == ReceiveShiftFullState) : (ReceiveState == ReceiveShiftDelayState); endmodule - - - - - From 5e9157244b841b912fd01999b350a32b3acd29a9 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 14 Nov 2023 15:18:16 -0800 Subject: [PATCH 35/62] Restored Zfh to 0 for rv64gc because it breaks floating-point tests --- config/rv64gc/config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index 564b32f5d..8decf60d5 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -42,7 +42,7 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; -localparam ZFH_SUPPORTED = 1; +localparam ZFH_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; From 18c29dd7d0d309b56b72fd083f2077fb0de61f89 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Nov 2023 05:46:38 -0800 Subject: [PATCH 36/62] Removed riscv-arch-test submodule that appears corrupted --- .gitmodules | 4 ---- addins/riscv-arch-test | 1 - 2 files changed, 5 deletions(-) delete mode 160000 addins/riscv-arch-test diff --git a/.gitmodules b/.gitmodules index 1e56898c8..dfb5fcf20 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,10 +1,6 @@ [submodule "sky130/sky130_osu_sc_t12"] path = sky130/sky130_osu_sc_t12 url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/ -[submodule "addins/riscv-arch-test"] - path = addins/riscv-arch-test - url = https://github.com/riscv-non-isa/riscv-arch-test - ignore = dirty [submodule "addins/imperas-riscv-tests"] path = addins/imperas-riscv-tests url = https://github.com/riscv-ovpsim/imperas-riscv-tests diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test deleted file mode 160000 index 2c5675d7a..000000000 --- a/addins/riscv-arch-test +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2c5675d7a58e98d47bef3a6cf5a8373397b0d0be From 90cf128349408643056907f63cfc5f29a8089784 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Nov 2023 05:48:33 -0800 Subject: [PATCH 37/62] Added back riscv-arch-test fresh --- .gitmodules | 3 +++ addins/riscv-arch-test | 1 + 2 files changed, 4 insertions(+) create mode 160000 addins/riscv-arch-test diff --git a/.gitmodules b/.gitmodules index dfb5fcf20..361441a18 100644 --- a/.gitmodules +++ b/.gitmodules @@ -26,3 +26,6 @@ [submodule "addins/vivado-risc-v"] path = addins/vivado-risc-v url = https://github.com/eugene-tarassov/vivado-risc-v.git +[submodule "addins/riscv-arch-test"] + path = addins/riscv-arch-test + url = https://github.com/riscv-non-isa/riscv-arch-test diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test new file mode 160000 index 000000000..4eea0a0f0 --- /dev/null +++ b/addins/riscv-arch-test @@ -0,0 +1 @@ +Subproject commit 4eea0a0f0e21f2613a114e45a5ad738e721c4044 From 1c4b3e37b1e16855568f6873266980953057fbd7 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Nov 2023 06:05:55 -0800 Subject: [PATCH 38/62] Removed riscv-arch-test submodule that was corrupted --- .gitmodules | 3 --- addins/riscv-arch-test | 1 - 2 files changed, 4 deletions(-) delete mode 160000 addins/riscv-arch-test diff --git a/.gitmodules b/.gitmodules index 361441a18..dfb5fcf20 100644 --- a/.gitmodules +++ b/.gitmodules @@ -26,6 +26,3 @@ [submodule "addins/vivado-risc-v"] path = addins/vivado-risc-v url = https://github.com/eugene-tarassov/vivado-risc-v.git -[submodule "addins/riscv-arch-test"] - path = addins/riscv-arch-test - url = https://github.com/riscv-non-isa/riscv-arch-test diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test deleted file mode 160000 index 4eea0a0f0..000000000 --- a/addins/riscv-arch-test +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4eea0a0f0e21f2613a114e45a5ad738e721c4044 From 20afaa558a2630042401250e6a5f8dc72b5c4259 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Nov 2023 06:07:57 -0800 Subject: [PATCH 39/62] Added back in riscv-arch-test --- .gitmodules | 3 +++ addins/riscv-arch-test | 1 + 2 files changed, 4 insertions(+) create mode 160000 addins/riscv-arch-test diff --git a/.gitmodules b/.gitmodules index dfb5fcf20..361441a18 100644 --- a/.gitmodules +++ b/.gitmodules @@ -26,3 +26,6 @@ [submodule "addins/vivado-risc-v"] path = addins/vivado-risc-v url = https://github.com/eugene-tarassov/vivado-risc-v.git +[submodule "addins/riscv-arch-test"] + path = addins/riscv-arch-test + url = https://github.com/riscv-non-isa/riscv-arch-test diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test new file mode 160000 index 000000000..4eea0a0f0 --- /dev/null +++ b/addins/riscv-arch-test @@ -0,0 +1 @@ +Subproject commit 4eea0a0f0e21f2613a114e45a5ad738e721c4044 From 79d6fe8c936466d8c1b684c25c670c35b4d6ef15 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Wed, 15 Nov 2023 08:45:25 -0600 Subject: [PATCH 40/62] Add wrapper passing automatically for individual designs vs. Wally --- synthDC/Makefile | 40 ++++++++++++++++++++++++++++++++++------ synthDC/ppa/ppaSynth.py | 4 ++-- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index d43a36b50..3e344e8d2 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -1,7 +1,28 @@ -# -# Makefile for synthesis -# Shreya Sanghai (ssanghai@hmc.edu) 2/28/2022 -# Madeleine Masser-Frye (mmasserfrye@hmc.edu) 1/27/2023 +##################### +# Makefile +# +# Written: ssanghai@hmc.edu, mmasserfrye@hmc.edu, james.stine@okstate.edu 15 November 2023 +# +# Purpose: Makefile to be used for synthesis using DC +# +# A component of the Wally configurable RISC-V project. +# +# Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +# +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +# +# Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +# except in compliance with the License, or, at your option, the Apache License version 2.0. You +# may obtain a copy of the License at +# +# https:#solderpad.org/licenses/SHL-2.1/ +# +# Unless required by applicable law or agreed to in writing, any work distributed under the +# License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +################################################ + NAME := synth # defaults export DESIGN ?= wallypipelinedcore @@ -21,11 +42,18 @@ export MAXOPT ?= 0 export DRIVE ?= FLOP export USESRAM ?= 0 export WIDTH ?= 32 +export WRAPPER ?= 1 +export SAIFPOWER ?= 0 time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) -export OUTPUTDIR := runs/$(DESIGN)_$(WIDTH)_$(CONFIG)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) -export SAIFPOWER ?= 0 +# This is done to create different naming conventions to help the PPA python +# TODO: cleanup later to utilize better parsing/lexing +ifeq ($(WRAPPER), 0) + export OUTPUTDIR := runs/$(DESIGN)_$(WIDTH)_$(CONFIG)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) +else + export OUTPUTDIR := runs/$(DESIGN)_$(CONFIG)_$(MOD)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) +endif OLDCONFIGDIR ?= ${WALLY}/config export CONFIGDIR ?= $(OUTPUTDIR)/config diff --git a/synthDC/ppa/ppaSynth.py b/synthDC/ppa/ppaSynth.py index 30fe1254f..0c4744c26 100755 --- a/synthDC/ppa/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -11,7 +11,7 @@ from multiprocessing import Pool from ppaAnalyze import synthsfromcsv def runCommand(module, width, tech, freq): - command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq) + command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1 WRAPPER=0".format(module, width, tech, freq) subprocess.call(command, shell=True) def deleteRedundant(synthsToRun): @@ -95,4 +95,4 @@ if __name__ == '__main__': pool.starmap(runCommand, synthsToRun) pool.close() -pool.join() \ No newline at end of file +pool.join() From 8ca1e3ba374aa50e8664473e3a2d1712ab519518 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Wed, 15 Nov 2023 08:48:07 -0600 Subject: [PATCH 41/62] missing synth.tcl added for use with wrapper --- synthDC/scripts/synth.tcl | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index cd4d6ff27..668b1c215 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -1,7 +1,27 @@ +##################### +# synth.tcl # -# Synthesis Synopsys Flow -# james.stine@okstate.edu 27 Sep 2015 +# Written: james.stine@okstate.edu 15 November 2023 # +# Purpose: Baseline DC Tcl file +# +# A component of the Wally configurable RISC-V project. +# +# Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +# +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +# +# Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +# except in compliance with the License, or, at your option, the Apache License version 2.0. You +# may obtain a copy of the License at +# +# https:#solderpad.org/licenses/SHL-2.1/ +# +# Unless required by applicable law or agreed to in writing, any work distributed under the +# License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +################################################ # start run clock set t1 [clock seconds] @@ -26,6 +46,7 @@ set saifpower $::env(SAIFPOWER) set maxopt $::env(MAXOPT) set drive $::env(DRIVE) set width $::env(WIDTH) +set wrapper $::env(WRAPPER) eval file copy -force [glob ${cfg}/*.vh] {$outputDir/hdl/} eval file copy -force [glob ${hdl_src}/cvw.sv] {$outputDir/hdl/} @@ -33,7 +54,6 @@ eval file copy -force [glob ${hdl_src}/*/*.sv] {$outputDir/hdl/} eval file copy -force [glob ${hdl_src}/*/*/*.sv] {$outputDir/hdl/} # Check if a wrapper is needed and create it (to pass parameters when cvw_t parameters are used) -set wrapper 0 if {[catch {eval exec grep "cvw_t" $outputDir/hdl/$::env(DESIGN).sv}] == 0} { echo "Creating wrapper" set wrapper 1 @@ -440,7 +460,7 @@ set filename [format "%s%s" $outputDir "/reports/cell.rep"] #redirect $filename { report_cell [get_cells -hier *] } # not too useful set filename [format "%s%s" $outputDir "/reports/power.rep"] -redirect $filename { report_power -hierarchy -levels 1 } +redirect $filename { report_power -analysis_effort high -hierarchy -levels 1 } set filename [format "%s%s" $outputDir "/reports/constraint.rep"] redirect $filename { report_constraint } From 98176665de32e50407470d1ef5e8944781270e9c Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Nov 2023 08:05:41 -0800 Subject: [PATCH 42/62] Fixed messed-up hazard.sv --- src/hazard/hazard.sv | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index 028dbf61d..12bd83bc5 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -26,8 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module hazard import cvw::*; #(parameter cvw_t P) ( - // Detect hazards +module hazard import cvw::*; #(parameter cvw_t P) ( input logic BPWrongE, CSRWriteFenceM, RetM, TrapM, input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD, input logic LSUStallM, IFUStallF, @@ -46,28 +45,9 @@ module hazard import cvw::*; #(parameter cvw_t P) ( logic WFIStallM, WFIInterruptedM; - logic ValidWfiM, ValidTrapM, ValidRetM, ValidCSRWriteFenceM, ValidCSRRdStallD; - logic ValidFPUStallD, ValidFCvtIntStallD, ValidFDivBusyE, ValidMDUStallD, ValidDivBusyE; - - // Gate Stall/Flush sources with supported features - // This is not logically necessary because the original signals are already 0 when the feature is unsupported - // However, synthesis does not propagate the constant 0 across modules - // By gating these signals, synthesis eliminates unnecessary stall/flush logic, saving about 10% cycle time for rv32e - // These lines of code gating with a compile-time constant generate no hardware. - assign ValidWfiM = wfiM & P.ZICSR_SUPPORTED; - assign ValidTrapM = TrapM & P.ZICSR_SUPPORTED; - assign ValidRetM = RetM & P.ZICSR_SUPPORTED; - assign ValidCSRWriteFenceM = CSRWriteFenceM & P.ZICSR_SUPPORTED; - assign ValidCSRRdStallD = CSRRdStallD & P.ZICSR_SUPPORTED; - assign ValidFPUStallD = RetM & P.F_SUPPORTED; - assign ValidFCvtIntStallD = RetM & P.F_SUPPORTED; - assign ValidFDivBusyE = FDivBusyE & P.F_SUPPORTED; - assign ValidMDUStallD = MDUStallD & P.M_SUPPORTED; - assign ValidDivBusyE = DivBusyE & P.M_SUPPORTED; - // WFI logic - assign WFIStallM = ValidWfiM & ~IntPendingM; // WFI waiting for an interrupt or timeout - assign WFIInterruptedM = ValidWfiM & IntPendingM; // WFI detects a pending interrupt. Retire WFI; trap if interrupt is enabled. + assign WFIStallM = wfiM & ~IntPendingM; // WFI waiting for an interrupt or timeout + assign WFIInterruptedM = wfiM & IntPendingM; // WFI detects a pending interrupt. Retire WFI; trap if interrupt is enabled. // stalls and flushes // loads: stall for one cycle if the subsequent instruction depends on the load @@ -89,10 +69,10 @@ module hazard import cvw::*; #(parameter cvw_t P) ( // Branch misprediction is found in the Execute stage and must flush the next two instructions. // However, an active division operation resides in the Execute stage, and when the BP incorrectly mispredicts the divide as a taken branch, the divde must still complete // When a WFI is interrupted and causes a trap, it flushes the rest of the pipeline but not the W stage, because the WFI needs to commit - assign FlushDCause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM | BPWrongE; - assign FlushECause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM |(BPWrongE & ~(ValidDivBusyE | ValidFDivBusyE)); - assign FlushMCause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM; - assign FlushWCause = ValidTrapM & ~WFIInterruptedM; + assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPWrongE; + assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPWrongE & ~(DivBusyE | FDivBusyE)); + assign FlushMCause = TrapM | RetM | CSRWriteFenceM; + assign FlushWCause = TrapM & ~WFIInterruptedM; // Stall causes // Most data depenency stalls are identified in the decode stage @@ -103,8 +83,8 @@ module hazard import cvw::*; #(parameter cvw_t P) ( // The IFU stalls the entire pipeline rather than just Fetch to avoid complications with instructions later in the pipeline causing Exceptions // A trap could be asserted at the start of a IFU/LSU stall, and should flush the memory operation assign StallFCause = '0; - assign StallDCause = (LoadStallD | StoreStallD | ValidMDUStallD | ValidCSRRdStallD | ValidFCvtIntStallD | ValidFPUStallD) & ~FlushDCause; - assign StallECause = (ValidDivBusyE | ValidFDivBusyE) & ~FlushECause; + assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | FPUStallD) & ~FlushDCause; + assign StallECause = (DivBusyE | FDivBusyE) & ~FlushECause; assign StallMCause = WFIStallM & ~FlushMCause; // Need to gate IFUStallF when the equivalent FlushFCause = FlushDCause = 1. // assign StallWCause = ((IFUStallF & ~FlushDCause) | LSUStallM) & ~FlushWCause; From cfaeeae25a44dfd2c95f4fbdb0b06abb1622c5ba Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Nov 2023 08:15:01 -0800 Subject: [PATCH 43/62] Added cmoz support to imperas.ic and adjusted imperas testbench to no longer need FPGA parameter --- sim/imperas.ic | 5 ++++- testbench/testbench-imperas.sv | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sim/imperas.ic b/sim/imperas.ic index adb10dcad..8d20cdd8f 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -22,6 +22,9 @@ --override cpu/Zicbom=T --override cpu/Zicbop=T --override cpu/Zicboz=T +--override cmomp_bytes=64 # Zic64b +--override cmoz_bytes=64 # Zic64b +--override lr_sc_grain=64 # Za64rs # 64 KiB continuous huge pages supported --override cpu/Svpbmt=T @@ -40,7 +43,7 @@ --override cpu/reset_address=0x80000000 ---override cpu/unaligned=F +--override cpu/unaligned=T # Zicclsm (should be true) --override cpu/ignore_non_leaf_DAU=1 --override cpu/wfi_is_nop=T --override cpu/misa_Extensions_mask=0x0 diff --git a/testbench/testbench-imperas.sv b/testbench/testbench-imperas.sv index b503372d4..c27722f9f 100644 --- a/testbench/testbench-imperas.sv +++ b/testbench/testbench-imperas.sv @@ -237,7 +237,7 @@ module testbench; assign HRDATAEXT = 0; end - if(P.FPGA) begin : sdcard + if(P.SDC_SUPPORTED) begin : sdcard // *** fix later /* -----\/----- EXCLUDED -----\/----- sdModel sdcard From 817ddbc7c5cef82f1987eac1c9e3847d47e205cb Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Nov 2023 08:19:50 -0800 Subject: [PATCH 44/62] Adjusted LSU misaligned buffer to fix synthesis warning --- src/lsu/lsu.sv | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index ba7d8e119..d872e0114 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -92,7 +92,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit ); - localparam MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED; + localparam logic MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED; + localparam MLEN = MISALIGN_SUPPROT ? 2*P.LLEN : P.LLEN; // widen buffer for misaligned accessess logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer @@ -118,9 +119,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data /* verilator lint_off WIDTHEXPAND */ - logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data - logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] LSUWriteDataSpillM; // Final write data - logic [((MISALIGN_SUPPORT+1)*P.LLEN-1)/8:0] ByteMaskSpillM; // Selects which bytes within a word to write + logic [MLEN-1:0] DCacheReadDataWordM; // D$ read data + logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data + logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data From eef39bd49546ed66de44cfec32acc1ea18264463 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Nov 2023 08:30:48 -0800 Subject: [PATCH 45/62] Fixed typo in lsu parameter --- src/lsu/lsu.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index d872e0114..f01dc609b 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -93,7 +93,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit ); localparam logic MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED; - localparam MLEN = MISALIGN_SUPPROT ? 2*P.LLEN : P.LLEN; // widen buffer for misaligned accessess + localparam MLEN = MISALIGN_SUPPORT ? 2*P.LLEN : P.LLEN; // widen buffer for misaligned accessess logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer From 7b2bb86ced1d59f0639f1f7f589d7b09e21d72de Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Nov 2023 09:48:13 -0800 Subject: [PATCH 46/62] changed to head of riscv-arch-test --- addins/riscv-arch-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 4eea0a0f0..9f9bdd62d 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 4eea0a0f0e21f2613a114e45a5ad738e721c4044 +Subproject commit 9f9bdd62d3e37fcd8ad1b1a39d71694ccf1d74f3 From ff73f798edf00a4a8f9a1e2730d6af51b6c4d021 Mon Sep 17 00:00:00 2001 From: Jacob Pease Date: Thu, 16 Nov 2023 13:59:12 -0600 Subject: [PATCH 47/62] Replaced vivado-risc-v addins directory with new SDC repo. --- .gitmodules | 6 +++--- fpga/generator/wally.tcl | 8 ++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.gitmodules b/.gitmodules index 361441a18..54d9dd970 100644 --- a/.gitmodules +++ b/.gitmodules @@ -23,9 +23,9 @@ [submodule "addins/vivado-boards"] path = addins/vivado-boards url = https://github.com/Digilent/vivado-boards/ -[submodule "addins/vivado-risc-v"] - path = addins/vivado-risc-v - url = https://github.com/eugene-tarassov/vivado-risc-v.git +[submodule "addins/ahbsdc"] + path = addins/ahbsdc + url = https://github.com/JacobPease/ahbsdc.git [submodule "addins/riscv-arch-test"] path = addins/riscv-arch-test url = https://github.com/riscv-non-isa/riscv-arch-test diff --git a/fpga/generator/wally.tcl b/fpga/generator/wally.tcl index d699c3d21..bad9981df 100644 --- a/fpga/generator/wally.tcl +++ b/fpga/generator/wally.tcl @@ -42,13 +42,9 @@ if {$board=="ArtyA7"} { # read in all other rtl read_verilog -sv [glob -type f ../src/CopiedFiles_do_not_add_to_repo/*/*.sv ../src/CopiedFiles_do_not_add_to_repo/*/*/*.sv] # *** Once the sdc is updated to use ahb changes these to system verilog. -read_verilog [glob -type f ../src/axi_sdc_controller.v] -read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_master.v] -read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_serial_host.v] -read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_master.v] -read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_serial_host.v] +read_verilog [glob -type f ../../addins/ahbsdc/sdc/*.v] -set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/vivado-risc-v/sdc} [current_fileset] +set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/ahbsdc/sdc} [current_fileset] if {$board=="ArtyA7"} { add_files -fileset constrs_1 -norecurse ../constraints/constraints-$board.xdc From 9df87872ef73d8ec4aaa7db36f39424f3e6b6e6c Mon Sep 17 00:00:00 2001 From: Jacob Pease Date: Thu, 16 Nov 2023 15:13:12 -0600 Subject: [PATCH 48/62] Deleted vivado-risc-v directory and added ahbsdc. --- addins/vivado-risc-v | 1 - 1 file changed, 1 deletion(-) delete mode 160000 addins/vivado-risc-v diff --git a/addins/vivado-risc-v b/addins/vivado-risc-v deleted file mode 160000 index c76a8613a..000000000 --- a/addins/vivado-risc-v +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c76a8613a177b3a04face2cb8e15dd07a8d2fc40 From 38cf7f0fb74553f19898a267753258d49574d4e8 Mon Sep 17 00:00:00 2001 From: Jacob Pease Date: Thu, 16 Nov 2023 17:46:48 -0600 Subject: [PATCH 49/62] ahbsdc submodule actually added this time. --- .gitmodules | 2 +- addins/ahbsdc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 160000 addins/ahbsdc diff --git a/.gitmodules b/.gitmodules index 54d9dd970..054afa6fb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -25,7 +25,7 @@ url = https://github.com/Digilent/vivado-boards/ [submodule "addins/ahbsdc"] path = addins/ahbsdc - url = https://github.com/JacobPease/ahbsdc.git + url = git@github.com:jacobpease/ahbsdc.git [submodule "addins/riscv-arch-test"] path = addins/riscv-arch-test url = https://github.com/riscv-non-isa/riscv-arch-test diff --git a/addins/ahbsdc b/addins/ahbsdc new file mode 160000 index 000000000..5df21aa66 --- /dev/null +++ b/addins/ahbsdc @@ -0,0 +1 @@ +Subproject commit 5df21aa6625eca120e64ea353ca641aff37d90b2 From f4f389f3736e9a51d08e6e206c6e987bbd214fe8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 17 Nov 2023 13:27:57 -0800 Subject: [PATCH 50/62] Initial version of embench_arch_sweep.py --- benchmarks/embench/embench_arch_sweep.py | 86 ++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100755 benchmarks/embench/embench_arch_sweep.py diff --git a/benchmarks/embench/embench_arch_sweep.py b/benchmarks/embench/embench_arch_sweep.py new file mode 100755 index 000000000..ad629320a --- /dev/null +++ b/benchmarks/embench/embench_arch_sweep.py @@ -0,0 +1,86 @@ +#!/usr/bin/python3 +# embench_arch_sweep.py +# David_Harris@hmc.edu 16 November 2023 +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +# Run embench on a variety of architectures and collate results + +import os +from datetime import datetime +import re +import collections + +archs = ["rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr", "rv32imafdc_zba_zbb_zbc_zbs_zicsr"] + +def calcgeomean(d, arch): + progs = ["aha-mont64", "crc32", "cubic", "edn", "huffbench", "matmult-int", "minver", "nbody", "nettle-aes", "nettle-sha256", "nsichneu", "picojpeg", "qrduino", "sglib-combined", "slre", "st", "statemate", "ud", "wikisort"] + result = 1.0 + for p in progs: + #val = d[arch][p] + val = d[arch].get(p, 1.0) + result = result *float(val) + result = pow(result, (1.0/float(len(progs)))) + return result + +def tabulate_arch_sweep(directory): + for case in ["wallySizeOpt_size", "wallySpeedOpt_speed"]: + d = collections.defaultdict(dict) + for arch in archs: + file = case+"_"+arch+".json" + file_path = os.path.join(directory, file) + lines = [] + try: + f = open(file_path, "r") + lines = f.readlines() + except: + f.close() + #print(file_path+" does not exist") + for line in lines: + #print("File: "+file+" Line: "+line) + #p = re.compile('".*" : .*,') + p = r'"([^"]*)" : ([^,\n]+)' + match = re.search(p, line) + if match: + prog = match.group(1) + result = match.group(2); + d[arch][prog] = result; + #print(match.group(1)+" " + match.group(2)) + f.close() + for arch in [""] + archs: + print (arch, end="\t") + print("") + for prog in d[archs[0]]: + print(prog, end="\t") + for arch in archs: + entry = d[arch].get(prog, "n/a"); + print (entry, end="\t") + print("") + print("New geo mean", end="\t") + for arch in archs: + geomean = calcgeomean(d, arch) + print(geomean, end="\t") + print("") + +def run_arch_sweep(): + # make a folder whose name depends on the date + # Get current date + current_date = datetime.now() + # Format date as a string in the format YYYYMMDD + date_string = current_date.strftime('%Y%m%d_%H%M%S') + dir = "run_"+date_string + # Create a directory with the date string as its name + os.mkdir(dir) + + # make a directory with the current date as its name + + # sweep the runs and save the results in the run directory + for arch in archs: + os.system("make clean") + os.system("make run ARCH="+arch) + for res in ["SizeOpt_size", "SizeOpt_speed", "SpeedOpt_size", "SpeedOpt_speed"]: + os.system("mv -f wally"+res+".json "+dir+"/wally"+res+"_"+arch+".json") + return dir + +#directory = run_arch_sweep() +directory = "run_20231116_071322" +tabulate_arch_sweep(directory) \ No newline at end of file From 3dc7b93f57178c7d03a65347ccc5a7ece7df7d9d Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Fri, 17 Nov 2023 16:25:35 -0600 Subject: [PATCH 51/62] Revert removal of WRAPPER option that is not prudent --- synthDC/Makefile | 40 ++++++--------------------------------- synthDC/ppa/ppaSynth.py | 4 ++-- synthDC/scripts/synth.tcl | 28 ++++----------------------- 3 files changed, 12 insertions(+), 60 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index 3e344e8d2..470ec8f47 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -1,28 +1,7 @@ -##################### -# Makefile -# -# Written: ssanghai@hmc.edu, mmasserfrye@hmc.edu, james.stine@okstate.edu 15 November 2023 -# -# Purpose: Makefile to be used for synthesis using DC -# -# A component of the Wally configurable RISC-V project. -# -# Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -# -# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -# -# Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -# except in compliance with the License, or, at your option, the Apache License version 2.0. You -# may obtain a copy of the License at -# -# https:#solderpad.org/licenses/SHL-2.1/ -# -# Unless required by applicable law or agreed to in writing, any work distributed under the -# License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied. See the License for the specific language governing permissions -# and limitations under the License. -################################################ - +# +# Makefile for synthesis +# Shreya Sanghai (ssanghai@hmc.edu) 2/28/2022 +# Madeleine Masser-Frye (mmasserfrye@hmc.edu) 1/27/2023 NAME := synth # defaults export DESIGN ?= wallypipelinedcore @@ -42,18 +21,11 @@ export MAXOPT ?= 0 export DRIVE ?= FLOP export USESRAM ?= 0 export WIDTH ?= 32 -export WRAPPER ?= 1 -export SAIFPOWER ?= 0 time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) -# This is done to create different naming conventions to help the PPA python -# TODO: cleanup later to utilize better parsing/lexing -ifeq ($(WRAPPER), 0) - export OUTPUTDIR := runs/$(DESIGN)_$(WIDTH)_$(CONFIG)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) -else - export OUTPUTDIR := runs/$(DESIGN)_$(CONFIG)_$(MOD)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) -endif +export OUTPUTDIR := runs/$(DESIGN)_$(CONFIG)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) +export SAIFPOWER ?= 0 OLDCONFIGDIR ?= ${WALLY}/config export CONFIGDIR ?= $(OUTPUTDIR)/config diff --git a/synthDC/ppa/ppaSynth.py b/synthDC/ppa/ppaSynth.py index 0c4744c26..30fe1254f 100755 --- a/synthDC/ppa/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -11,7 +11,7 @@ from multiprocessing import Pool from ppaAnalyze import synthsfromcsv def runCommand(module, width, tech, freq): - command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1 WRAPPER=0".format(module, width, tech, freq) + command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq) subprocess.call(command, shell=True) def deleteRedundant(synthsToRun): @@ -95,4 +95,4 @@ if __name__ == '__main__': pool.starmap(runCommand, synthsToRun) pool.close() -pool.join() +pool.join() \ No newline at end of file diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 668b1c215..cd4d6ff27 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -1,27 +1,7 @@ -##################### -# synth.tcl # -# Written: james.stine@okstate.edu 15 November 2023 +# Synthesis Synopsys Flow +# james.stine@okstate.edu 27 Sep 2015 # -# Purpose: Baseline DC Tcl file -# -# A component of the Wally configurable RISC-V project. -# -# Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -# -# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -# -# Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -# except in compliance with the License, or, at your option, the Apache License version 2.0. You -# may obtain a copy of the License at -# -# https:#solderpad.org/licenses/SHL-2.1/ -# -# Unless required by applicable law or agreed to in writing, any work distributed under the -# License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied. See the License for the specific language governing permissions -# and limitations under the License. -################################################ # start run clock set t1 [clock seconds] @@ -46,7 +26,6 @@ set saifpower $::env(SAIFPOWER) set maxopt $::env(MAXOPT) set drive $::env(DRIVE) set width $::env(WIDTH) -set wrapper $::env(WRAPPER) eval file copy -force [glob ${cfg}/*.vh] {$outputDir/hdl/} eval file copy -force [glob ${hdl_src}/cvw.sv] {$outputDir/hdl/} @@ -54,6 +33,7 @@ eval file copy -force [glob ${hdl_src}/*/*.sv] {$outputDir/hdl/} eval file copy -force [glob ${hdl_src}/*/*/*.sv] {$outputDir/hdl/} # Check if a wrapper is needed and create it (to pass parameters when cvw_t parameters are used) +set wrapper 0 if {[catch {eval exec grep "cvw_t" $outputDir/hdl/$::env(DESIGN).sv}] == 0} { echo "Creating wrapper" set wrapper 1 @@ -460,7 +440,7 @@ set filename [format "%s%s" $outputDir "/reports/cell.rep"] #redirect $filename { report_cell [get_cells -hier *] } # not too useful set filename [format "%s%s" $outputDir "/reports/power.rep"] -redirect $filename { report_power -analysis_effort high -hierarchy -levels 1 } +redirect $filename { report_power -hierarchy -levels 1 } set filename [format "%s%s" $outputDir "/reports/constraint.rep"] redirect $filename { report_constraint } From 7b33331cf73ed0daa7dcf21647dc8323592f92f3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 17 Nov 2023 15:10:57 -0800 Subject: [PATCH 52/62] Got Wally sweep running again --- synthDC/Makefile | 2 +- synthDC/ppa/ppaSynth.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index 470ec8f47..2183cab83 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -24,7 +24,7 @@ export WIDTH ?= 32 time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) -export OUTPUTDIR := runs/$(DESIGN)_$(CONFIG)_$(TECH)_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) +export OUTPUTDIR := runs/$(DESIGN)_$(CONFIG)_$(MOD)_$(TECH)nm_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) export SAIFPOWER ?= 0 OLDCONFIGDIR ?= ${WALLY}/config diff --git a/synthDC/ppa/ppaSynth.py b/synthDC/ppa/ppaSynth.py index 30fe1254f..315fa554a 100755 --- a/synthDC/ppa/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -85,7 +85,8 @@ if __name__ == '__main__': ##### Run a sweep for multiple modules/widths based on best delay found in existing syntheses modules = ['adder'] - widths = [8, 16, 32, 64, 128] +# widths = [8, 16, 32, 64, 128] + widths = [32] tech = 'sky130' synthsToRun = freqModuleSweep(widths, modules, tech) From 96f9409da4d0153459067f331dd75591feb94b89 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 17 Nov 2023 15:11:52 -0800 Subject: [PATCH 53/62] Embench Makefile to sweep experiments across configs --- benchmarks/embench/Makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index 97c7660c5..d7a18b7e2 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -3,6 +3,7 @@ # Compile Embench for Wally embench_dir = ../../addins/embench-iot +ARCH=rv32imac_zicsr all: build run: build size sim @@ -15,7 +16,7 @@ buildsize: build_speedopt_size build_sizeopt_size # uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size build_speedopt_speed: - $(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles" + $(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S -march=$(ARCH)" --cflags="-O2 -nostartfiles -march=$(ARCH)" # remove files not used in embench1.0 When changing to 2.0, restore these files #rm -rf $(embench_dir)/bd_speedopt_speed/src/md5sum #rm -rf $(embench_dir)/bd_speedopt_speed/src/tarfind @@ -23,7 +24,7 @@ build_speedopt_speed: find $(embench_dir)/bd_speedopt_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done build_sizeopt_speed: - $(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-Os -nostartfiles" + $(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S -march=$(ARCH)" --cflags="-Os -nostartfiles -march=$(ARCH)" # remove files not used in embench1.0 When changing to 2.0, restore these files #rm -rf $(embench_dir)/bd_sizeopt_speed/src/md5sum #rm -rf $(embench_dir)/bd_sizeopt_speed/src/tarfind @@ -32,10 +33,10 @@ build_sizeopt_speed: # uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size build_speedopt_size: - $(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-O2 -msave-restore" --dummy-libs="libgcc libm libc crt0" + $(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-O2 -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0" build_sizeopt_size: - $(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0" + $(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-Os -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0" # builds dependencies, then launches modelsim and finally runs python wrapper script to present results sim: modelsim_build_memfile modelsim_run speed From 423ae2bb761b4e37ebdfe5cad45a4b2735135f25 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 17 Nov 2023 17:02:32 -0800 Subject: [PATCH 54/62] Ignore benchmark results --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bd7e800df..3990c3823 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,7 @@ __pycache__/ addins/riscv-arch-test/Makefile.include addins/riscv-tests/target addins/TestFloat-3e/build/Linux-x86_64-GCC/* -benchmarks/embench/wally*.json + #vsim work files to ignore transcript @@ -175,3 +175,6 @@ tests/fp/combined_IF_vectors/IF_vectors/*.tv sim/bp-results/*.log sim/branch*.log /tests/custom/fpga-test-sdc/bin/fpga-test-sdc +benchmarks/embench/wally*.json +benchmarks/embench/run* +sim/cfi.log From 96556064a4db013548a84d90ade807af9db31820 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 17 Nov 2023 18:31:44 -0800 Subject: [PATCH 55/62] Restored RV64GC BPRED_SIZE=10 for consistent synthesis results --- config/rv64gc/config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index e00c9153d..84f4de599 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -150,7 +150,7 @@ localparam PLIC_SDC_ID = 32'd9; localparam BPRED_SUPPORTED = 1; localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_NUM_LHR = 32'd6; -localparam BPRED_SIZE = 32'd6; +localparam BPRED_SIZE = 32'd10; localparam BTB_SIZE = 32'd10; localparam RAS_SIZE = 32'd16; From acc2db256f606e253253867d9af353762b01583d Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 17 Nov 2023 20:25:24 -0800 Subject: [PATCH 56/62] turn off IDIVONFPU when FSUPPORTED=0. Already checked in sim, but need it in synth too for feature sweep --- config/rv32gc/config.vh | 4 ++-- src/ieu/datapath.sv | 2 +- src/mdu/mdu.sv | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index 3b306a005..e095e6252 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -74,8 +74,8 @@ localparam ICACHE_LINELENINBITS = 32'd512; // Integer Divider Configuration // IDIV_BITSPERCYCLE must be 1, 2, or 4 -localparam IDIV_BITSPERCYCLE = 32'd4; -localparam IDIV_ON_FPU = 1; +localparam IDIV_BITSPERCYCLE = 32'd2; +localparam IDIV_ON_FPU = 0; // Legal number of PMP entries are 0, 16, or 64 localparam PMP_ENTRIES = 32'd16; diff --git a/src/ieu/datapath.sv b/src/ieu/datapath.sv index bb7638514..8c366a2ef 100644 --- a/src/ieu/datapath.sv +++ b/src/ieu/datapath.sv @@ -131,7 +131,7 @@ module datapath import cvw::*; #(parameter cvw_t P) ( if (P.F_SUPPORTED) begin:fpmux mux2 #(P.XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); mux2 #(P.XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW); - if (P.IDIV_ON_FPU) begin + if (P.IDIV_ON_FPU & P.F_SUPPORTED) begin mux2 #(P.XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW); end else begin assign MulDivResultW = MDUResultW; diff --git a/src/mdu/mdu.sv b/src/mdu/mdu.sv index 83327a460..e152fc6de 100644 --- a/src/mdu/mdu.sv +++ b/src/mdu/mdu.sv @@ -57,7 +57,7 @@ module mdu import cvw::*; #(parameter cvw_t P) ( // Start a divide when a new division instruction is received and the divider isn't already busy or finishing // When IDIV_ON_FPU is set, use the FPU divider instead // In ZMMUL, with M_SUPPORTED = 0, omit the divider - if ((P.IDIV_ON_FPU) || (!P.M_SUPPORTED)) begin:nodiv + if ((P.IDIV_ON_FPU & P.F_SUPPORTED) || (!P.M_SUPPORTED)) begin:nodiv assign QuotM = 0; assign RemM = 0; assign DivBusyE = 0; From 87e6a5ccf2b16c4fa956a506f252c87cc477b1bb Mon Sep 17 00:00:00 2001 From: Jacob Pease Date: Sat, 18 Nov 2023 19:15:39 -0600 Subject: [PATCH 57/62] Updated ROM to preload bootloader from file and infer a block ram when building for FPGA. --- fpga/constraints/marked_debug.txt | 1 + fpga/src/boot.mem | 513 ++++++++++++++++++++++++++++++ src/generic/mem/rom1p1r.sv | 22 +- 3 files changed, 529 insertions(+), 7 deletions(-) create mode 100644 fpga/src/boot.mem diff --git a/fpga/constraints/marked_debug.txt b/fpga/constraints/marked_debug.txt index 3973fc451..7d5636f31 100644 --- a/fpga/constraints/marked_debug.txt +++ b/fpga/constraints/marked_debug.txt @@ -45,6 +45,7 @@ ifu/ifu.sv: logic PCPF ifu/ifu.sv: logic PostSpillInstrRawF mmu/hptw.sv: logic ITLBWriteF mmu/hptw.sv: statetype WalkerState +mmu/hptw.sv: logic ValidPTE privileged/csrs.sv: logic CSRSReadValM privileged/csrs.sv: logic SEPC_REGW privileged/csrs.sv: logic MIP_REGW diff --git a/fpga/src/boot.mem b/fpga/src/boot.mem new file mode 100644 index 000000000..4ad2f0657 --- /dev/null +++ b/fpga/src/boot.mem @@ -0,0 +1,513 @@ +8001819300002197 +4281420141014081 +4481440143814301 +4681460145814501 +4881480147814701 +4a814a0149814901 +4c814c014b814b01 +4e814e014d814d01 +0110011b4f814f01 +059b45011161016e +0004063705fe0010 +1f6000ef8006061b +0ff003930000100f +4e952e3110060e37 +c602829b0053f2b7 +2023fe02dfe312fd +829b0053f2b7007e +fe02dfe312fdc602 +4de31efd000e2023 +059bf1402573fdd0 +0000061705e20870 +0010029b01260613 +68110002806702fe +0085179bf0080813 +038008130107f7b3 +480508a86c632781 +1533357902a87963 +38030000181700a8 +1c6301057833f268 +081a403018370808 +0105783342280813 +1815751308081063 +00367513c295e14d +654ded510207e793 +c1701ff00613f130 +0637c530fff6861b +664dcd10167d0200 +17fd001007b7c25c +859b5a5cc20cd21c +02062a23dfed0007 +4785fffd561c664d +4501461c06f59063 +4a1cc35c465cc31c +e29dc75c4a5cc71c +0c63086008138082 +1ae30a9008130105 +b7710017e793f905 +e793b75901d7e793 +5f5c674db7410197 +66cd02072e23dffd +fff78513ff7d5698 +40a0053300a03533 +bfb100a7e7938082 +e0a2715d8082557d +e486f052f44ef84a +fa13e85aec56fc26 +843289ae892a0086 +00959993000a1463 +864ac4396b054a85 +0009859b4549870a +0004049b05540363 +86a66485008b7363 +870a87aaec7ff0ef +4531458146014681 +f0ef0207c9639c05 +17820094979beb1f +873e020541639381 +993e99ba020a1963 +870aa8094501f85d +e8bff0ef45454685 +60a64505fe0559e3 +79a2794274e26406 +61616b426ae27a02 +9301020497138082 +f40647057179b7f1 +d79867cdec26f022 +dff58b85571c674d +2423d35c03600793 +fffd571c674d0207 +0007a737b00026f3 +b00027f311f70713 +674dfef77de38f95 +4f5ccf9d8b895b1c +26f3cf5c0027e793 +071305f5e737b000 +8f95b00027f30ff7 +4f5c674dfef77de3 +b00026f3cf5c9bf5 +67f7071300989737 +7de38f95b00027f3 +458146014681fef7 +ddbff0ef4501870a +059346014681870a +dcbff0ef45211aa0 +1aa007134782e939 +816393d117d24411 +85220ff0041302e7 +614564e270a27402 +46e3da5ff0efa0cd +0207c7634782fe05 +458146014681870a +d8bff0ef03700513 +46014681870a87aa +0a900513403005b7 +4409bf7dfc07d9e3 +c3998b8583f9bfe1 +4681870a00846413 +f0ef450945814601 +870afa0540e3d59f +123405b746014681 +46e3d45ff0ef450d +870a77c14482f805 +85a6460146818cfd +4ae3d2dff0ef451d +d3d8470567cdf605 +000f4737b00026f3 +b00027f323f70713 +67cdfef77de38f95 +4681870a0007ae23 +0370051385a64601 +f2054fe3cf7ff0ef +458146014681870a +ce3ff0ef08600513 +4681870af20545e3 +4541200005934601 +f0055de3ccfff0ef +3023bf010113bf09 +4605842a86aa4081 +40113423850a4585 +86a265a6da5ff0ef +d99ff0ef04084605 +2201358322813603 +86a2260508700513 +d81ff0ef05629e0d +2a0135832a813603 +9e0d86a226054505 +3603d6bff0ef057e +0513320135833281 +9e0d86a226054010 +3083d53ff0ef0556 +4501400134034081 +0000808241010113 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +00600100d2e3ca40 diff --git a/src/generic/mem/rom1p1r.sv b/src/generic/mem/rom1p1r.sv index 93f8c82df..5a45e354a 100644 --- a/src/generic/mem/rom1p1r.sv +++ b/src/generic/mem/rom1p1r.sv @@ -33,7 +33,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0) ); // Core Memory - logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; + (*rom_style="block" *) logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; // dh 10/30/23 ROM macros are presently commented out // because they don't point to a generated ROM @@ -41,15 +41,23 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0) rom1p1r_128x64 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout)); end if ((`USE_SRAM == 1) & (ADDR_WDITH == 7) & (DATA_WIDTH == 32)) begin - rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout)); + rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout)); - end else begin */ - always @ (posedge clk) - if(ce) dout <= ROM[addr]; + end else begin */ + + initial begin + if (PRELOAD_ENABLED) begin + $readmemh("../../../fpga/src/boot.mem", ROM, 0); + end + end + + always @ (posedge clk) begin + if(ce) dout <= ROM[addr]; + end // for FPGA, initialize with zero-stage bootloader - if(PRELOAD_ENABLED) begin + /*if(PRELOAD_ENABLED) begin initial begin ROM[0]=64'h8001819300002197; ROM[1]=64'h4281420141014081; @@ -195,6 +203,6 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0) ROM[141]=64'h0000808241010113; end // if (PRELOAD_ENABLED) - end + end*/ endmodule From b692c913c4908bbaf7e3407727681eb216c69f0b Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Nov 2023 20:56:50 -0800 Subject: [PATCH 58/62] Changed rv32gc to do IDIV in MDU and have k=2 copies of FDIV stages; added correct sky130 adder data; fixed feature substitution in synthesis makefile --- config/rv32gc/config.vh | 2 +- synthDC/Makefile | 4 ++-- synthDC/extractSummary.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index e095e6252..52baad796 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -169,7 +169,7 @@ localparam ZMMUL_SUPPORTED = 0; // FPU division architecture localparam RADIX = 32'd4; -localparam DIVCOPIES = 32'd4; +localparam DIVCOPIES = 32'd2; // bit manipulation localparam ZBA_SUPPORTED = 1; diff --git a/synthDC/Makefile b/synthDC/Makefile index 2183cab83..7968a7b52 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -94,10 +94,10 @@ endif ifneq ($(MOD), orig) # PMP 0 - sed -i 's/PMP_ENTRIES \(64\|16\|0\)/PMP_ENTRIES = 0;/' $(CONFIGDIR)/config.vh + sed -i 's/PMP_ENTRIES.*\(64\|16\)/PMP_ENTRIES = 0;/' $(CONFIGDIR)/config.vh ifneq ($(MOD), PMP0) # no priv - sed -i 's/ZICSR_SUPPORTED *1/ZICSR_SUPPORTED = 0;/' $(CONFIGDIR)/config.vh + sed -i 's/ZICSR_SUPPORTED.*1/ZICSR_SUPPORTED = 0;/' $(CONFIGDIR)/config.vh ifneq ($(MOD), noPriv) # turn off FPU sed -i 's/1 *<< *3/0 << 3/' $(CONFIGDIR)/config.vh diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index 7a3f45ddd..d6f5933a9 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -252,7 +252,7 @@ if __name__ == '__main__': TechSpec = namedtuple("TechSpec", "color shape targfreq fo4 add32area add32lpower add32denergy") techdict = {} - techdict['sky130'] = TechSpec('green', 'o', args.sky130freq, 99.5e-3, 1440.600027, 714.057, 0.658023) + techdict['sky130'] = TechSpec('green', 'o', args.sky130freq, 99.5e-3, 2581, 18, 0.685) techdict['sky90'] = TechSpec('gray', 'o', args.sky90freq, 43.2e-3, 1440.600027, 714.057, 0.658023) techdict['tsmc28psyn'] = TechSpec('blue', 's', args.tsmcfreq, 12.2e-3, 209.286002, 1060.0, .081533) From 887cf935dce54c77af64f71ca6d49cbb6e012f6c Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 19 Nov 2023 06:49:07 -0800 Subject: [PATCH 59/62] wallySynthAll.sh automates running all synthesis experiments without maxopt --- benchmarks/embench/embench_arch_sweep.py | 7 ++++--- synthDC/wallySynthAll.sh | 13 +++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) create mode 100755 synthDC/wallySynthAll.sh diff --git a/benchmarks/embench/embench_arch_sweep.py b/benchmarks/embench/embench_arch_sweep.py index ad629320a..130a70581 100755 --- a/benchmarks/embench/embench_arch_sweep.py +++ b/benchmarks/embench/embench_arch_sweep.py @@ -10,7 +10,8 @@ from datetime import datetime import re import collections -archs = ["rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr", "rv32imafdc_zba_zbb_zbc_zbs_zicsr"] +#archs = ["rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr", "rv32imafdc_zba_zbb_zbc_zbs_zicsr"] +archs = ["rv32imafdc_zba_zbb_zbc_zbs_zicsr", "rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr"] def calcgeomean(d, arch): progs = ["aha-mont64", "crc32", "cubic", "edn", "huffbench", "matmult-int", "minver", "nbody", "nettle-aes", "nettle-sha256", "nsichneu", "picojpeg", "qrduino", "sglib-combined", "slre", "st", "statemate", "ud", "wikisort"] @@ -81,6 +82,6 @@ def run_arch_sweep(): os.system("mv -f wally"+res+".json "+dir+"/wally"+res+"_"+arch+".json") return dir -#directory = run_arch_sweep() -directory = "run_20231116_071322" +directory = run_arch_sweep() +#directory = "run_20231117_082325" tabulate_arch_sweep(directory) \ No newline at end of file diff --git a/synthDC/wallySynthAll.sh b/synthDC/wallySynthAll.sh new file mode 100755 index 000000000..9c085601c --- /dev/null +++ b/synthDC/wallySynthAll.sh @@ -0,0 +1,13 @@ +# Run all Wally synthesis experiments from chapter 8 +./wallySynth.py --freqsweep 330 --tech sky130 +./wallySynth.py --freqsweep 870 --tech sky90 +./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram +./wallySynth.py --configsweep --tech sky130 --targetfreq 330 +./wallySynth.py --configsweep --tech sky90 --targetfreq 870 +./wallySynth.py --configsweep --tech tsmc28psyn --targetfreq 2800 --usesram +./wallySynth.py --featuresweep --tech sky130 --targetfreq 330 +./wallySynth.py --featuresweep --tech sky90 --targetfreq 870 +./wallySynth.py --featuresweep --tech tsmc28psyn --targetfreq 2800 --usesram +# Extract summary data (run this by hand after all experiments finish) +#./extractSummary.py --sky130freq 330 --sky90freq 870 --tsmcfreq 2800 + From cdd21d6635d19c264868f4e29f6848b30267a3a5 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Sun, 19 Nov 2023 13:44:22 -0600 Subject: [PATCH 60/62] Added menvcfg to debugger for checking what linux has configured. --- fpga/constraints/marked_debug.txt | 139 +++--------------------------- fpga/constraints/small-debug.xdc | 20 +++++ linux/devicetree/wally-vcu118.dts | 1 + 3 files changed, 31 insertions(+), 129 deletions(-) diff --git a/fpga/constraints/marked_debug.txt b/fpga/constraints/marked_debug.txt index 3973fc451..582af32a8 100644 --- a/fpga/constraints/marked_debug.txt +++ b/fpga/constraints/marked_debug.txt @@ -1,131 +1,12 @@ -lsu/lsu.sv: logic IEUAdrM -lsu/lsu.sv: logic WriteDataM -lsu/lsu.sv: logic LSUHADDR -lsu/lsu.sv: logic HRDATA -lsu/lsu.sv: logic LSUHWDATA -lsu/lsu.sv: logic LSUHREADY -lsu/lsu.sv: logic LSUHWRITE -lsu/lsu.sv: logic LSUHSIZE -lsu/lsu.sv: logic LSUHBURST -lsu/lsu.sv: logic LSUHTRANS -lsu/lsu.sv: logic LSUHWSTRB -lsu/lsu.sv: logic IHAdrM -ieu/regfile.sv: logic rf -ieu/datapath.sv: logic RegWriteW -hazard/hazard.sv: logic BPPredWrongE -hazard/hazard.sv: logic LoadStallD -hazard/hazard.sv: logic FCvtIntStallD -hazard/hazard.sv: logic DivBusyE -hazard/hazard.sv: logic EcallFaultM -hazard/hazard.sv: logic WFIStallM -hazard/hazard.sv: logic StallF -hazard/hazard.sv: logic FlushD -cache/cachefsm.sv: statetype CurrState -wally/wallypipelinedcore.sv: logic TrapM -wally/wallypipelinedcore.sv: logic SrcAM -wally/wallypipelinedcore.sv: logic InstrM wally/wallypipelinedcore.sv: logic PCM -wally/wallypipelinedcore.sv: logic MemRWM +wally/wallypipelinedcore.sv: logic TrapM wally/wallypipelinedcore.sv: logic InstrValidM -wally/wallypipelinedcore.sv: logic WriteDataM -wally/wallypipelinedcore.sv: logic IEUAdrM -wally/wallypipelinedcore.sv: logic HRDATA -ifu/spill.sv: statetype CurrState -ifu/ifu.sv: logic IFUStallF -ifu/ifu.sv: logic IFUHADDR -ifu/ifu.sv: logic HRDATA -ifu/ifu.sv: logic IFUHREADY -ifu/ifu.sv: logic IFUHWRITE -ifu/ifu.sv: logic IFUHSIZE -ifu/ifu.sv: logic IFUHBURST -ifu/ifu.sv: logic IFUHTRANS -ifu/ifu.sv: logic PCF -ifu/ifu.sv: logic PCNextF -ifu/ifu.sv: logic PCPF -ifu/ifu.sv: logic PostSpillInstrRawF -mmu/hptw.sv: logic ITLBWriteF -mmu/hptw.sv: statetype WalkerState -privileged/csrs.sv: logic CSRSReadValM -privileged/csrs.sv: logic SEPC_REGW -privileged/csrs.sv: logic MIP_REGW -privileged/csrs.sv: logic SSCRATCH_REGW -privileged/csrs.sv: logic SCAUSE_REGW -privileged/csr.sv: logic CSRReadValM -privileged/csr.sv: logic CSRSrcM -privileged/csr.sv: logic CSRWriteValM -privileged/csr.sv: logic MSTATUS_REGW -privileged/trap.sv: logic InstrMisalignedFaultM -privileged/trap.sv: logic BreakpointFaultM -privileged/trap.sv: logic LoadAccessFaultM -privileged/trap.sv: logic LoadPageFaultM -privileged/trap.sv: logic mretM -privileged/trap.sv: logic MIP_REGW -privileged/trap.sv: logic PendingIntsM -privileged/privileged.sv: logic CSRReadM -privileged/privileged.sv: logic InterruptM -privileged/csrc.sv: logic HPMCOUNTER_REGW -privileged/csri.sv: logic MExtInt -privileged/csri.sv: logic MIP_REGW_writeabl -privileged/csrm.sv: logic MIP_REGW -privileged/csrm.sv: logic MEPC_REGW -privileged/csrm.sv: logic MEDELEG_REGW -privileged/csrm.sv: logic MIDELEG_REGW -privileged/csrm.sv: logic MSCRATCH_REGW -privileged/csrm.sv: logic MCAUSE_REGW -uncore/uart_apb.sv: logic SIN -uncore/uart_apb.sv: logic SOUT -uncore/uart_apb.sv: logic OUT1b -uncore/uartPC16550D.sv: logic RBR -uncore/uartPC16550D.sv: logic FCR -uncore/uartPC16550D.sv: logic IER -uncore/uartPC16550D.sv: logic MCR -uncore/uartPC16550D.sv: logic baudpulse -uncore/uartPC16550D.sv: statetype rxstate -uncore/uartPC16550D.sv: logic rxfifo -uncore/uartPC16550D.sv: logic txfifo -uncore/uartPC16550D.sv: logic rxfifohead -uncore/uartPC16550D.sv: logic rxfifoentries -uncore/uartPC16550D.sv: logic RXBR -uncore/uartPC16550D.sv: logic rxtimeoutcnt -uncore/uartPC16550D.sv: logic rxparityerr -uncore/uartPC16550D.sv: logic rxdataready -uncore/uartPC16550D.sv: logic rxfifoempty -uncore/uartPC16550D.sv: logic rxdata -uncore/uartPC16550D.sv: logic RXerrbit -uncore/uartPC16550D.sv: logic rxfullbitunwrapped -uncore/uartPC16550D.sv: logic txdata -uncore/uartPC16550D.sv: logic txnextbit -uncore/uartPC16550D.sv: logic txfifoempty -uncore/uartPC16550D.sv: logic fifoenabled -uncore/uartPC16550D.sv: logic RXerr -uncore/uartPC16550D.sv: logic THRE -uncore/uartPC16550D.sv: logic rxdataavailintr -uncore/uartPC16550D.sv: logic intrID -uncore/uncore.sv: logic HSELEXTSDCD -uncore/plic_apb.sv: logic MExtInt -uncore/plic_apb.sv: logic Din -uncore/plic_apb.sv: logic requests -uncore/plic_apb.sv: logic intPriority -uncore/plic_apb.sv: logic intInProgress -uncore/plic_apb.sv: logic intThreshold -uncore/plic_apb.sv: logic intEn -uncore/plic_apb.sv: logic intClaim -uncore/plic_apb.sv: logic irqMatrix -uncore/plic_apb.sv: logic priorities_with_irqs -uncore/plic_apb.sv: logic max_priority_with_irqs -uncore/plic_apb.sv: logic irqs_at_max_priority -uncore/plic_apb.sv: logic threshMask -uncore/clint_apb.sv: logic MTIME -uncore/clint_apb.sv: logic MTIMECMP -ebu/ebu.sv: logic HCLK -ebu/ebu.sv: logic HREADY -ebu/ebu.sv: logic HRESP -ebu/ebu.sv: logic HADDR -ebu/ebu.sv: logic HWRITE -ebu/ebu.sv: logic HSIZE -ebu/ebu.sv: logic HBURST -ebu/ebu.sv: logic HPROT -ebu/ebu.sv: logic HTRANS -ebu/ebu.sv: logic HMASTLOC -ebu/buscachefsm.sv: busstatetype CurrState -ebu/busfsm.sv: busstatetype CurrState +wally/wallypipelinedcore.sv: logic InstrM +lsu/lsu.sv: logic IEUAdrM +lsu/lsu.sv: logic PAdrM +lsu/lsu.sv: logic ReadDataM +lsu/lsu.sv: logic WriteDataM +lsu/lsu.sv: logic MemRWM +mmu/hptw.sv: logic SATP_REGW +privileged/csr.sv: logic MENVCFG_REGW +privileged/csr.sv: logic SENVCFG_REGW diff --git a/fpga/constraints/small-debug.xdc b/fpga/constraints/small-debug.xdc index 7bf498a79..8400b7281 100644 --- a/fpga/constraints/small-debug.xdc +++ b/fpga/constraints/small-debug.xdc @@ -53,6 +53,26 @@ set_property port_width 48 [get_debug_ports u_ila_0/probe6] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe6] connect_debug_port u_ila_0/probe6 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[63]}]] +create_debug_port u_ila_0 probe +set_property port_width 64 [get_debug_ports u_ila_0/probe7] +set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe7] +connect_debug_port u_ila_0/probe7 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[44]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[45]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[46]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[47]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[48]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[49]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[50]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[51]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[52]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[53]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[54]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[55]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[56]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[57]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[58]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[59]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[63]} ]] + +create_debug_port u_ila_0 probe +set_property port_width 64 [get_debug_ports u_ila_0/probe8] +set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe8] +connect_debug_port u_ila_0/probe8 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[44]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[45]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[46]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[47]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[48]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[49]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[50]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[51]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[52]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[53]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[54]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[55]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[56]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[57]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[58]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[59]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[63]} ]] + +create_debug_port u_ila_0 probe +set_property port_width 64 [get_debug_ports u_ila_0/probe9] +set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe9] +connect_debug_port u_ila_0/probe9 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[44]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[45]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[46]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[47]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[48]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[49]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[50]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[51]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[52]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[53]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[54]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[55]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[56]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[57]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[58]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[59]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[63]} ]] + +create_debug_port u_ila_0 probe +set_property port_width 64 [get_debug_ports u_ila_0/probe10] +set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe10] +connect_debug_port u_ila_0/probe10 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[44]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[45]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[46]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[47]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[48]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[49]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[50]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[51]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[52]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[53]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[54]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[55]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[56]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[57]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[58]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[59]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[63]} ]] + # the debug hub has issues with the clocks from the mmcm so lets give up an connect to the 100Mhz input clock. #connect_debug_port dbg_hub/clk [get_nets default_100mhz_clk] connect_debug_port dbg_hub/clk [get_nets CPUCLK] diff --git a/linux/devicetree/wally-vcu118.dts b/linux/devicetree/wally-vcu118.dts index e0257c9a6..20448609b 100644 --- a/linux/devicetree/wally-vcu118.dts +++ b/linux/devicetree/wally-vcu118.dts @@ -31,6 +31,7 @@ status = "okay"; compatible = "riscv"; riscv,isa = "rv64imafdcsu"; + riscv,isa-extensions = "imafdc", "sstc", "svinval", "svnapot", "svpbmt", "zba", "zbb", "zbc", "zbs", "zicbom", "zicbop", "zicbopz", "zicntr", "zicsr", "zifencei", "zihpm"; mmu-type = "riscv,sv48"; interrupt-controller { From 8cb433cb66045512b885d1f08335c8c4d39bc1b9 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 19 Nov 2023 19:33:57 -0800 Subject: [PATCH 61/62] Commented IROM preloading --- addins/riscv-arch-test | 2 +- src/generic/mem/rom1p1r.sv | 2 +- src/ifu/irom.sv | 4 +++- synthDC/wallySynthAll.sh | 7 ++++--- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 9f9bdd62d..eb0a38922 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 9f9bdd62d3e37fcd8ad1b1a39d71694ccf1d74f3 +Subproject commit eb0a3892215ad2384702db02da1551a59701ec67 diff --git a/src/generic/mem/rom1p1r.sv b/src/generic/mem/rom1p1r.sv index 5a45e354a..617a779ff 100644 --- a/src/generic/mem/rom1p1r.sv +++ b/src/generic/mem/rom1p1r.sv @@ -47,7 +47,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0) initial begin if (PRELOAD_ENABLED) begin - $readmemh("../../../fpga/src/boot.mem", ROM, 0); + $readmemh("$WALLY/fpga/src/boot.mem", ROM, 0); end end diff --git a/src/ifu/irom.sv b/src/ifu/irom.sv index 0d4286e43..0b29c72cf 100644 --- a/src/ifu/irom.sv +++ b/src/ifu/irom.sv @@ -39,7 +39,9 @@ module irom import cvw::*; #(parameter cvw_t P) ( logic [31:0] RawIROMInstrF; logic [2:1] AdrD; - rom1p1r #(ADDR_WDITH, P.XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull)); + // preload IROM with the FPGA bootloader by default so that it syntehsizes to something, avoiding having the IEU optimized away because instructions are all 0 + // the testbench replaces these dummy contents with the actual program of interest during simulation + rom1p1r #(ADDR_WDITH, P.XLEN, 1) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull)); if (P.XLEN == 32) assign RawIROMInstrF = IROMInstrFFull; else begin // IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two diff --git a/synthDC/wallySynthAll.sh b/synthDC/wallySynthAll.sh index 9c085601c..9af40a379 100755 --- a/synthDC/wallySynthAll.sh +++ b/synthDC/wallySynthAll.sh @@ -1,7 +1,8 @@ # Run all Wally synthesis experiments from chapter 8 -./wallySynth.py --freqsweep 330 --tech sky130 -./wallySynth.py --freqsweep 870 --tech sky90 -./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram +# However, trying to run the freqsweeps at the same time maxes out licenses and some runs fail +#./wallySynth.py --freqsweep 330 --tech sky130 +#./wallySynth.py --freqsweep 870 --tech sky90 +#./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram ./wallySynth.py --configsweep --tech sky130 --targetfreq 330 ./wallySynth.py --configsweep --tech sky90 --targetfreq 870 ./wallySynth.py --configsweep --tech tsmc28psyn --targetfreq 2800 --usesram From 3594c08d4b70fa6b97b1d0063abdbd1eaf145974 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 20 Nov 2023 10:30:35 -0600 Subject: [PATCH 62/62] Modified linux imperas tests to 1. enable zicclsm 2. enable logging at 7000 ms --- config/buildroot/config.vh | 2 +- sim/imperas.ic | 5 +++-- sim/run-imperas-linux.sh | 2 +- sim/wally-linux-imperas.do | 6 ++++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index 0015e2bba..35be93fd6 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -45,7 +45,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 1; -localparam ZICCLSM_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 1; localparam SVPBMT_SUPPORTED = 1; localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; diff --git a/sim/imperas.ic b/sim/imperas.ic index adb10dcad..d9226c4bb 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -18,6 +18,8 @@ # More extensions --override cpu/Zcb=T +--override cpu/unaligned=T + # Cache block operations --override cpu/Zicbom=T --override cpu/Zicbop=T @@ -40,7 +42,6 @@ --override cpu/reset_address=0x80000000 ---override cpu/unaligned=F --override cpu/ignore_non_leaf_DAU=1 --override cpu/wfi_is_nop=T --override cpu/misa_Extensions_mask=0x0 @@ -88,7 +89,7 @@ # Add Imperas simulator application instruction tracing --verbose ---trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange --traceafter 0 +#--trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange --traceafter 300000000 --override cpu/debugflags=6 --override cpu/verbose=1 --override cpu/show_c_prefix=T diff --git a/sim/run-imperas-linux.sh b/sim/run-imperas-linux.sh index fd265cb9e..6a49f46e9 100755 --- a/sim/run-imperas-linux.sh +++ b/sim/run-imperas-linux.sh @@ -7,4 +7,4 @@ export OTHERFLAGS="+TRACE2LOG_ENABLE=1" #export OTHERFLAGS="+TRACE2LOG_ENABLE=1 +TRACE2LOG_AFTER=10500000" export OTHERFLAGS="" -vsim -c -do "do wally-linux-imperas.do buildroot buildroot-no-trace $::env(RISCV) 0 0 0" +vsim -c -do "do wally-linux-imperas.do buildroot buildroot-no-trace $::env(RISCV) 0 0 0" diff --git a/sim/wally-linux-imperas.do b/sim/wally-linux-imperas.do index f173f67c9..196c780be 100644 --- a/sim/wally-linux-imperas.do +++ b/sim/wally-linux-imperas.do @@ -40,6 +40,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { #-- Run the Simulation #run -all + run 7000 ms add log -recursive /* do linux-wave.do run -all @@ -87,9 +88,10 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { #run 100 ns #force -deposit testbench/dut/core/priv/priv/csr/csri/IE_REGW 16'h2aa #force -deposit testbench/dut/uncore/uncore/clint/clint/MTIMECMP 64'h1000 + run 7000 ms + add log -recursive /testbench/dut/* + do wave.do run 14000 ms - #add log -recursive /* - #do linux-wave.do #run -all exec ./slack-notifier/slack-notifier.py