Merge pull request #644 from davidharrishmc/dev

Synthesis fixes
2025-02-11 06:05:49 +00:00 · 2024-03-05 10:39:40 -06:00 · 2024-03-05 10:39:40 -06:00 · d1a1345e4d
commit d1a1345e4d
parent 2e31bf021c e8e0538f6c
9 changed files with 35 additions and 78 deletions
--- a/bin/wally-tool-chain-install.sh
+++ b/bin/wally-tool-chain-install.sh
@ -69,9 +69,6 @@ fi
 cd $RISCV
 git clone https://github.com/riscv/riscv-gnu-toolchain
 cd riscv-gnu-toolchain
-# Temporarily use the following commands until gcc-13 is part of riscv-gnu-toolchain (issue #1249)
-#git clone https://github.com/gcc-mirror/gcc -b releases/gcc-13 gcc-13
-#./configure --prefix=/opt/riscv --with-multilib-generator="rv32e-ilp32e--;rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv32imafdc-ilp32d--;rv64i-lp64--;rv64ic-lp64--;rv64iac-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--;rv64im-lp64--;" --with-gcc-src=`pwd`/gcc-13
 ./configure --prefix=${RISCV} --with-multilib-generator="rv32e-ilp32e--;rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv32imafdc-ilp32d--;rv64i-lp64--;rv64ic-lp64--;rv64iac-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--;rv64im-lp64--;"
 make -j ${NUM_THREADS}

@ -111,14 +108,15 @@ cd riscv-isa-sim/build
 make -j ${NUM_THREADS}
 make install 
 cd ../arch_test_target/spike/device
-sed -i 's/--isa=rv32ic/--isa=rv32iac/' rv32i_m/privilege/Makefile.include
-sed -i 's/--isa=rv64ic/--isa=rv64iac/' rv64i_m/privilege/Makefile.include
+# dh 2/5/24: these should be obsolete
+#sed -i 's/--isa=rv32ic/--isa=rv32iac/' rv32i_m/privilege/Makefile.include
+#sed -i 's/--isa=rv64ic/--isa=rv64iac/' rv64i_m/privilege/Makefile.include

 # Wally needs Verilator 5.021 or later.
 # Verilator needs to be built from scratch to get the latest version
 # apt-get install verilator installs version 4.028 as of 6/8/23
 sudo apt-get install -y perl g++ ccache help2man libgoogle-perftools-dev numactl perl-doc zlib1g 
-sudo apt-get install -y libfl2  libfl-dev  # Ubuntu only (ignore if gives error)
+sudo apt-get install -y perl g++ ccache help2man libgoogle-perftools-dev numactl perl-doc zlib1g 
 cd $RISCV
 git clone https://github.com/verilator/verilator   # Only first time
 # unsetenv VERILATOR_ROOT  # For csh; ignore error if on bash
@ -173,6 +171,8 @@ sudo make install

 cd $RISCV
 opam init -y --disable-sandboxing
+opam update
+opam upgrade
 opam switch create 5.1.0
 opam install sail -y 

--- a/src/cache/cacheLRU.sv
+++ b/src/cache/cacheLRU.sv
@ -143,16 +143,14 @@ module cacheLRU
  // This is a two port memory.
  // Every cycle must read from CacheSetData and each load/store must write the new LRU.
  always_ff @(posedge clk) begin
-    if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] = '0; // exclusion-tag: initialize
+    if (reset | (InvalidateCache & ~FlushStage)) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
    if(CacheEn) begin
-      if(ClearValid & ~FlushStage)
-        LRUMemory[PAdr] <= '0;
-      else if(LRUWriteEn)
+      if(LRUWriteEn)
        LRUMemory[PAdr] <= NextLRU;
      if(LRUWriteEn & (PAdr == CacheSetTag))
-        CurrLRU <= #1 NextLRU;
+        CurrLRU <= NextLRU;
      else 
-        CurrLRU <= #1 LRUMemory[CacheSetTag];
+        CurrLRU <= LRUMemory[CacheSetTag];
    end
  end

--- a/src/fpu/fdivsqrt/fdivsqrtiter.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv
@ -44,7 +44,7 @@ module fdivsqrtiter import cvw::*;  #(parameter cvw_t P) (
  logic [P.DIVb+3:0]      WCNext[P.DIVCOPIES-1:0]; // Q4.DIVb
  logic [P.DIVb+3:0]      WS[P.DIVCOPIES:0];       // Q4.DIVb
  logic [P.DIVb+3:0]      WC[P.DIVCOPIES:0];       // Q4.DIVb
-  logic [P.DIVb:0]        U[P.DIVCOPIES:0];        // U1.DIVb
+  logic [P.DIVb:0]        U[P.DIVCOPIES:0];        // U1.DIVb // *** probably Q not U.  See Table 16.26 notes
  logic [P.DIVb:0]        UM[P.DIVCOPIES:0];       // U1.DIVb
  logic [P.DIVb:0]        UNext[P.DIVCOPIES-1:0];  // U1.DIVb
  logic [P.DIVb:0]        UMNext[P.DIVCOPIES-1:0]; // U1.DIVb
@ -71,7 +71,7 @@ module fdivsqrtiter import cvw::*;  #(parameter cvw_t P) (
  flopen #(P.DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);

  // UOTFC Result U and UM registers/initialization mux
-  // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 otherwise
+  // Initialize U to 0 = 0.0000... and UM to -1 = 1.00000... (in Q1.Divb)
  assign initU  ={(P.DIVb+1){1'b0}};
  assign initUM = {{1'b1}, {(P.DIVb){1'b0}}};
  mux2   #(P.DIVb+1)  Umux(UNext[P.DIVCOPIES-1],  initU,  IFDivStartE, UMux);
@ -79,15 +79,10 @@ module fdivsqrtiter import cvw::*;  #(parameter cvw_t P) (
  flopen #(P.DIVb+1)  UReg(clk, FDivBusyE, UMux,  U[0]);
  flopen #(P.DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]);

-  // C register/initialization mux
-  logic [1:0] initCUpper;
-  if(P.RADIX == 4) begin
-    assign initCUpper = 2'b00;
-  end else begin
-    assign initCUpper = 2'b10;
-  end
-  
-  assign initC = {initCUpper, {P.DIVb{1'b0}}};
+  // C register/initialization mux: C = -R:
+  // C = -4 = 00.000000... (in Q2.DIVb) for radix 4, C = -2 = 10.000000... for radix2
+  if(P.RADIX == 4) assign initC = '0;
+  else             assign initC = {2'b10, {{P.DIVb{1'b0}}}};
  mux2   #(P.DIVb+2) cmux(C[P.DIVCOPIES], initC, IFDivStartE, NextC); 
  flopen #(P.DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);

--- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv
@ -48,16 +48,16 @@ module fdivsqrtstage4 import cvw::*;  #(parameter cvw_t P) (
  logic [7:0]               WCmsbs, WSmsbs;     // U4.4
  logic                     CarryIn;
  logic [P.DIVb+3:0]        WSA, WCA;           // Q4.DIVb
-  logic j0,j1;
+  logic j0, j1;                                 // step j = 0 or step j = 1

  // Digit Selection logic
  assign j0     = ~C[P.DIVb+1];             // first step of R digit selection: C = 00...0
-  assign j1     = C[P.DIVb] ^ C[P.DIVb-1];  // second step of R digit selection: C = 1100...0
+  assign j1     = C[P.DIVb] & ~C[P.DIVb-1]; // second step of R digit selection: C = 1100...0; *** could simplify to ~C[P.DIVb-1] because j=0 case takes priority
  assign Smsbs  = U[P.DIVb:P.DIVb-4];       // U1.4 most significant bits of square root
  assign Dmsbs  = D[P.DIVb-1:P.DIVb-3];     // U0.3 most significant fractional bits of divisor after leading 1
  assign WCmsbs = WC[P.DIVb+3:P.DIVb-4];    // Q4.4 most significant bits of residual
  assign WSmsbs = WS[P.DIVb+3:P.DIVb-4];    // Q4.4 most significant bits of residual
-  fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .j0, .udigit);
+  fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j0, .j1, .udigit);
  assign un = 1'b0; // unused for radix 4

  // F generation logic
--- a/src/fpu/fdivsqrt/fdivsqrtuslc4.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtuslc4.sv
@ -31,7 +31,7 @@ module fdivsqrtuslc4 (
  input  logic [2:0] Dmsbs,             // U0.3 fractional bits after implicit leading 1
  input  logic [4:0] Smsbs,             // U1.4 leading bits of square root approximation
  input  logic [7:0] WSmsbs, WCmsbs,    // Q4.4 redundant residual most significant bits
-  input  logic       Sqrt, j1,
+  input  logic       Sqrt, j0, j1,
  output logic [3:0] udigit             // {2, 1, -1, -2} digit is 0 if none are hot
 );
  logic [7:0] PreWmsbs;                 // Q4.4 nonredundant residual msbs
@ -102,11 +102,12 @@ module fdivsqrtuslc4 (
  // Select A
  always_comb
    if (Sqrt) begin 
-      if (j1) A = 3'b101;                       // on first sqrt iteration        A = .101
-      else if (Smsbs == 5'b10000) A = 3'b111;   // if S = 1.0, use                A = .111
-      else A = Smsbs[2:0];                      // otherwise use                  A = 2S (in U0.3 format)
-    end else A = Dmsbs;                         // division Unless                A = D (IN U0.3 format, dropping leading 1)
+      if (j1)                 A = 3'b101;     // on first sqrt iteration        A = .101
+      else if (Smsbs[4] == 1) A = 3'b111;     // if S = 1.0000, use             A = .111
+      else                    A = Smsbs[2:0]; // otherwise use                  A = 2S (in U0.3 format)
+    end else                  A = Dmsbs;      // division                       A = D (IN U0.3 format, dropping leading 1)

  // Select quotient digit from lookup table based on A and W
-  assign udigit = USel4[{A,Wmsbs}];
+  // On step j = 0 for square root, always select u_0 = 1
+  assign udigit = (Sqrt & j0) ? 4'b0100 : USel4[{A,Wmsbs}];
 endmodule
--- a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv
@ -32,7 +32,7 @@ module fdivsqrtuslc4cmp (
  input  logic [4:0] Smsbs,             // U1.4 leading bits of square root approximation
  input  logic [7:0] WSmsbs, WCmsbs,    // Q4.4 residual most significant bits
  input  logic       SqrtE, 
-  input  logic       j0,j1,             // are we on first (j0) or second step (j1) of digit selection
+  input  logic       j0, j1,            // are we on first (j0) or second step (j1) of digit selection
  output logic [3:0] udigit             // {2, 1, -1, -2} digit is 0 if none are hot
 );
  logic [6:0] Wmsbs;
@ -71,23 +71,22 @@ module fdivsqrtuslc4cmp (
  
  // handles special case when j = 0 or j = 1 for sqrt
  assign mkj2 = 20; // when j = 1 use mk2[101] when j = 0 use anything bigger than 7.
-  assign mkj1 = j1 ? 8 : 0; // when j = 1 use mk1[101] = 8 and when j = 0 use 0 so we choose u_0 = 1
+  assign mkj1 = j0 ? 0 : 8; // when j = 1 use mk1[101] = 8 and when j = 0 use 0 so we choose u_0 = 1
  assign sqrtspecial = SqrtE & (j1 | j0);

  // Choose A for current operation 
 always_comb
    if (SqrtE) begin 
-      if (Smsbs[4]) A = 3'b111; // *** can we get rid of SMSBs case?
+      if (Smsbs[4]) A = 3'b111; // for S = 1.0000  *** can we optimize away this case?
      else A = Smsbs[2:0];
    end else A = Dmsbs;
-
    
  // Choose selection constants based on a
  
  assign mk2 = sqrtspecial ? mkj2 : mks2[A];
  assign mk1 = sqrtspecial ? mkj1 : mks1[A];
  assign mk0 = -mk1;
-  assign mkm1 = (A == 3'b000) ? -13 : -mk2; // asymmetry in table *** can we hide?
+  assign mkm1 = (A == 3'b000) ? -13 : -mk2; // asymmetry in table *** can we hide from critical path
 
  // Compare residual W to selection constants to choose digit
  always_comb 
--- a/src/generic/flop/floprc.sv
+++ b/src/generic/flop/floprc.sv
@ -1,38 +0,0 @@
-///////////////////////////////////////////
-// floprc.sv
-//
-// Written: David_Harris@hmc.edu 9 January 2021
-// Modified: 
-//
-// Purpose: D flip-flop with synchronous reset and clear
-//
-// A component of the CORE-V-WALLY configurable RISC-V project.
-// https://github.com/openhwgroup/cvw
-// 
-// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
-//
-// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
-//
-// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
-// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
-// may obtain a copy of the License at
-//
-// https://solderpad.org/licenses/SHL-2.1/
-//
-// Unless required by applicable law or agreed to in writing, any work distributed under the 
-// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
-// either express or implied. See the License for the specific language governing permissions 
-// and limitations under the License.
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-module floprc #(parameter WIDTH = 8) (
-  input  logic clk,
-  input  logic reset,
-  input  logic clear,
-  input  logic [WIDTH-1:0] d, 
-  output logic [WIDTH-1:0] q);
-
-  always_ff @(posedge clk)
-    if (reset | clear ) q <= #1 0;
-    else                q <= #1 d;
-endmodule
--- a/src/mmu/hptw.sv
+++ b/src/mmu/hptw.sv
@ -148,6 +148,7 @@ module hptw import cvw::*;  #(parameter cvw_t P) (
  flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissOrUpdateDAM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB)
  assign PRegEn = HPTWRW[1] & ~DCacheBusStallM | UpdatePTE;
  flopenr #(P.XLEN) PTEReg(clk, reset, PRegEn, NextPTE, PTE); // Capture page table entry from data cache
+  assert property(@(posedge clk) ~PRegEn | reset | NextPTE[0] !== 1'bx); // report writing an x PTE from an uninitialized page table

  // Assign PTE descriptors common across all XLEN values
  // For non-leaf PTEs, D, A, U bits are reserved and ignored.  They do not cause faults while walking the page table
@ -173,7 +174,8 @@ module hptw import cvw::*;  #(parameter cvw_t P) (
    logic [P.XLEN-1:0]    AccessedPTE;

    assign AccessedPTE = {PTE[P.XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]}; // set accessed bit, conditionally set dirty bit
-    assign ReadDataNoXM = (ReadDataM[0] === 'x) ? '0 : ReadDataM; // If the PTE.V bit is x because it was read from uninitialized memory set to 0 to avoid x propagation and hanging the simulation.
+    //assign ReadDataNoXM = (ReadDataM[0] === 'x) ? '0 : ReadDataM; // If the PTE.V bit is x because it was read from uninitialized memory set to 0 to avoid x propagation and hanging the simulation.
+    assign ReadDataNoXM = ReadDataM; // *** temporary fix for synthesis; === and x in line above are not synthesizable.
    mux2 #(P.XLEN) NextPTEMux(ReadDataNoXM, AccessedPTE, UpdatePTE, NextPTE); // NextPTE = ReadDataNoXM when ADUE = 0 because UpdatePTE = 0
    flopenr #(P.PA_BITS) HPTWAdrWriteReg(clk, reset, SaveHPTWAdr, HPTWReadAdr, HPTWWriteAdr);
    
--- a/src/privileged/privdec.sv
+++ b/src/privileged/privdec.sv
@ -80,8 +80,8 @@ module privdec import cvw::*;  #(parameter cvw_t P) (

  if (P.U_SUPPORTED) begin:wfi
    logic [P.WFI_TIMEOUT_BIT:0] WFICount, WFICountPlus1;
-    assign WFICountPlus1 = WFICount + 1;
-    floprc #(P.WFI_TIMEOUT_BIT+1) wficountreg(clk, reset, ~wfiM, WFICountPlus1, WFICount);  // count while in WFI
+    assign WFICountPlus1 = wfiM ? '0 : WFICount + 1; // restart counting on WFI
+    flopr #(P.WFI_TIMEOUT_BIT+1) wficountreg(clk, reset, WFICountPlus1, WFICount);  // count while in WFI
  // coverage off -item e 1 -fecexprrow 1
  // WFI Timout trap will not occur when STATUS_TW is low while in supervisor mode, so the system gets stuck waiting for an interrupt and triggers a watchdog timeout.
    assign WFITimeoutM = ((STATUS_TW & PrivilegeModeW != P.M_MODE) | (P.S_SUPPORTED & PrivilegeModeW == P.U_MODE)) & WFICount[P.WFI_TIMEOUT_BIT];