From 5a2bcb917fe4e56cab2ad21f3548704b135aea70 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 14 Jun 2023 08:39:01 -0700 Subject: [PATCH 1/9] Removed QEMU from UART --- src/uncore/uartPC16550D.sv | 13 ++++--------- src/uncore/uart_apb.sv | 3 +-- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/uncore/uartPC16550D.sv b/src/uncore/uartPC16550D.sv index fc01f4024..63a7e8d73 100644 --- a/src/uncore/uartPC16550D.sv +++ b/src/uncore/uartPC16550D.sv @@ -35,7 +35,7 @@ /* verilator lint_off UNOPTFLAT */ -module uartPC16550D #(parameter UART_PRESCALE, QEMU) ( +module uartPC16550D #(parameter UART_PRESCALE) ( // Processor Interface input logic PCLK, PRESETn, // UART clock and active low reset input logic [2:0] A, // address input (8 registers) @@ -136,7 +136,7 @@ module uartPC16550D #(parameter UART_PRESCALE, QEMU) ( if (~PRESETn) begin // Table 3 Reset Configuration IER <= #1 4'b0; FCR <= #1 8'b0; - if (QEMU) LCR <= #1 8'b0; else LCR <= #1 8'b11; // fpga only **** BUG + LCR <= #1 8'b11; // spec says to reset to 0, but FPGA needs to reset to 8 data bits MCR <= #1 5'b0; LSR <= #1 8'b01100000; MSR <= #1 4'b0; @@ -258,9 +258,7 @@ module uartPC16550D #(parameter UART_PRESCALE, QEMU) ( else if (fifoenabled & ~rxfifoempty & rxbaudpulse & ~rxfifotimeout) rxtimeoutcnt <= #1 rxtimeoutcnt+1; // *** not right end - // ***explain why - if(QEMU) assign rxcentered = rxbaudpulse & (rxoversampledcnt[1:0] == 2'b10); // implies rxstate = UART_ACTIVE - else assign rxcentered = rxbaudpulse & (rxoversampledcnt == 4'b1000); // implies rxstate = UART_ACTIVE + assign rxcentered = rxbaudpulse & (rxoversampledcnt == 4'b1000); // implies rxstate = UART_ACTIVE assign rxbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1; // start bit + data bits + (parity bit) + stop bit @@ -388,9 +386,7 @@ module uartPC16550D #(parameter UART_PRESCALE, QEMU) ( end assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) - 1 - // *** explain; is this necessary? - if (QEMU) assign txnextbit = txbaudpulse & (txoversampledcnt[1:0] == 2'b00); // implies txstate = UART_ACTIVE - else assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE + assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE /////////////////////////////////////////// // transmit holding register, shift register, FIFO @@ -475,7 +471,6 @@ module uartPC16550D #(parameter UART_PRESCALE, QEMU) ( assign txfifoentries = (txfifohead >= txfifotail) ? (txfifohead-txfifotail) : (txfifohead + 16 - txfifotail); // verilator lint_on WIDTH - //assign txfifofull = (txfifoentries == 4'b1111); assign txfifofull = (txfifohead == txfifotail) & HeadPointerLastMove; // transmit buffer ready bit diff --git a/src/uncore/uart_apb.sv b/src/uncore/uart_apb.sv index b6a0321a9..b7601b24f 100644 --- a/src/uncore/uart_apb.sv +++ b/src/uncore/uart_apb.sv @@ -81,8 +81,7 @@ module uart_apb import cvw::*; #(parameter cvw_t P) ( end logic BAUDOUTb; // loop tx clock BAUDOUTb back to rx clock RCLK - // *** make sure reads don't occur on UART unless fully selected because they could change state. This applies to all peripherals - uartPC16550D #(P.UART_PRESCALE, P.QEMU) u( + uartPC16550D #(P.UART_PRESCALE) u( // Processor Interface .PCLK, .PRESETn, .A(entry), .Din, From 9da4005a1ebd5f63ef9ba839a891446590a10e65 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 14 Jun 2023 08:47:01 -0700 Subject: [PATCH 2/9] Removed *** from UART code --- src/uncore/uartPC16550D.sv | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/uncore/uartPC16550D.sv b/src/uncore/uartPC16550D.sv index 63a7e8d73..8e0709f48 100644 --- a/src/uncore/uartPC16550D.sv +++ b/src/uncore/uartPC16550D.sv @@ -7,11 +7,11 @@ // Purpose: Universial Asynchronous Receiver/ Transmitter with FIFOs // Emulates interface of Texas Instruments PC16550D // https://media.digikey.com/pdf/Data%20Sheets/Texas%20Instruments%20PDFs/PC16550D.pdf -// Compatible with UART in Imperas Virtio model *** +// Compatible with UART in Imperas Virtio model // // Compatible with most of PC16550D with the following known exceptions: // Generates 2 rather than 1.5 stop bits when 5-bit word length is slected and LCR[2] = 1 -// Timeout not yet implemented*** +// Timeout not yet implemented // // Documentation: RISC-V System on Chip Design Chapter 15 // @@ -204,10 +204,11 @@ module uartPC16550D #(parameter UART_PRESCALE) ( // consider switching to same fixed-frequency reference clock used for TIME register // prescale by factor of 2^UART_PRESCALE to allow for high-frequency reference clock // Unlike PC16550D, this unit is hardwired with same rx and tx baud clock - // *** add table of scale factors to get 16x uart clk + // For example, with PCLK = 320 MHz, UART_PRESCALE = 5, DLM = 0, DLL = 65, + // 320 MHz system clock is divided by 65 x 2^5. The UART clock 16x oversamples + // the data, so the baud rate is 320x10^6 / (65 x 2^5 x 16) = 9615 Hz, which is + // close enough to 9600 baud to stay synchronized over the duration of one character. /////////////////////////////////////////// - // Ross Thompson: Found a bug. If the baud rate dividers DLM, and DLL are reloaded - // the baudcount is not reset to {DLM, DLL, UART_PRESCALE} always_ff @(posedge PCLK, negedge PRESETn) if (~PRESETn) begin baudcount <= #1 1; @@ -255,7 +256,7 @@ module uartPC16550D #(parameter UART_PRESCALE) ( end // timeout counting if (~MEMRb & A == 3'b000 & ~DLAB) rxtimeoutcnt <= #1 0; // reset timeout on read - else if (fifoenabled & ~rxfifoempty & rxbaudpulse & ~rxfifotimeout) rxtimeoutcnt <= #1 rxtimeoutcnt+1; // *** not right + else if (fifoenabled & ~rxfifoempty & rxbaudpulse & ~rxfifotimeout) rxtimeoutcnt <= #1 rxtimeoutcnt+1; // may not be right end assign rxcentered = rxbaudpulse & (rxoversampledcnt == 4'b1000); // implies rxstate = UART_ACTIVE @@ -281,7 +282,7 @@ module uartPC16550D #(parameter UART_PRESCALE) ( // ERROR CONDITIONS assign rxparity = ^rxdata; - assign rxparityerr = (rxparity ^ rxparitybit ^ ~evenparitysel) & LCR[3]; // Check even/odd parity (*** check if LCR needs to be inverted) + assign rxparityerr = (rxparity ^ rxparitybit ^ ~evenparitysel) & LCR[3]; // Check even/odd parity assign rxoverrunerr = fifoenabled ? (rxfifoentries == 15) : rxdataready; // overrun if FIFO or receive buffer register full assign rxframingerr = ~rxstopbit; // framing error if no stop bit assign rxbreak = rxframingerr & (rxdata9 == 9'b0); // break when 0 for start + data + parity + stop time @@ -324,7 +325,7 @@ module uartPC16550D #(parameter UART_PRESCALE) ( (rxfifohead + 16 - rxfifotail); // verilator lint_on WIDTH assign rxfifotriggered = rxfifoentries >= rxfifotriggerlevel; - assign rxfifotimeout = rxtimeoutcnt == {rxbitsexpected, 6'b0}; // time out after 4 character periods; *** probably not right yet + assign rxfifotimeout = rxtimeoutcnt == {rxbitsexpected, 6'b0}; // time out after 4 character periods; probably not right yet //assign rxfifotimeout = 0; // disabled pending fix // detect any errors in rx fifo @@ -394,7 +395,7 @@ module uartPC16550D #(parameter UART_PRESCALE) ( always_comb begin // compute value for parity and tx holding register nexttxdata = fifoenabled ? txfifo[txfifotail] : TXHR; // pick from FIFO or holding register case (LCR[1:0]) // compute parity from appropriate number of bits - 2'b00: txparity = ^nexttxdata[4:0] ^ ~evenparitysel; // *** check polarity + 2'b00: txparity = ^nexttxdata[4:0] ^ ~evenparitysel; 2'b01: txparity = ^nexttxdata[5:0] ^ ~evenparitysel; 2'b10: txparity = ^nexttxdata[6:0] ^ ~evenparitysel; 2'b11: txparity = ^nexttxdata[7:0] ^ ~evenparitysel; From 430537a0521a4812a49e58c8971e36027d480add Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 14 Jun 2023 09:44:52 -0700 Subject: [PATCH 3/9] Moved cvw.sv to src root directory to avoid double-compiling and producing a warning. Adjusted to files to reflect this. --- sim/wally-batch.do | 8 ++++---- sim/wally-imperas-cov.do | 2 +- sim/wally-imperas-no-idv.do | 2 +- sim/wally-imperas.do | 2 +- sim/wally-linux-imperas.do | 10 +++++----- sim/wally.do | 10 +++++----- src/{wally => }/cvw.sv | 0 7 files changed, 17 insertions(+), 17 deletions(-) rename src/{wally => }/cvw.sv (100%) diff --git a/sim/wally-batch.do b/sim/wally-batch.do index 6b5acbb92..29d31bd25 100644 --- a/sim/wally-batch.do +++ b/sim/wally-batch.do @@ -59,7 +59,7 @@ if {$argc >= 3} { # default to config/rv64ic, but allow this to be overridden at the command line. For example: # do wally-pipelined-batch.do ../config/rv32imc rv32imc if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { - vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 + vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 # start and run simulation if { $coverage } { echo "wally-batch buildroot coverage" @@ -88,7 +88,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { exec ./slack-notifier/slack-notifier.py } elseif {$2 eq "ahb"} { - vlog -lint -work wkdir/work_${1}_${2}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 +define+RAM_LATENCY=$3 +define+BURST_EN=$4 + vlog -lint -work wkdir/work_${1}_${2}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 +define+RAM_LATENCY=$3 +define+BURST_EN=$4 # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals vopt wkdir/work_${1}_${2}_${3}_${4}.testbench -work wkdir/work_${1}_${2}_${3}_${4} -G TEST=$2 -o testbenchopt @@ -112,7 +112,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { # **** fix this so we can pass any number of +defines. # only allows 3 right now - vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7 + vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7 # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals vopt wkdir/work_${1}_${3}_${4}.testbench -work wkdir/work_${1}_${3}_${4} -G TEST=$4 -o testbenchopt @@ -126,7 +126,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { # power off -r /dut/core/* } else { - vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 + vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals if {$coverage} { diff --git a/sim/wally-imperas-cov.do b/sim/wally-imperas-cov.do index a8beb12f1..e39d9ef38 100644 --- a/sim/wally-imperas-cov.do +++ b/sim/wally-imperas-cov.do @@ -43,7 +43,7 @@ vlog +incdir+../config/$1 \ +incdir+$env(IMPERAS_HOME)/ImpProprietary/source/host/riscvISACOV/source \ $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2cov.sv \ \ - ../src/wally/cvw.sv \ + ../src/cvw.sv \ ../testbench/testbench_imperas.sv \ ../testbench/common/*.sv \ ../src/*/*.sv \ diff --git a/sim/wally-imperas-no-idv.do b/sim/wally-imperas-no-idv.do index b9068da36..bd07daee1 100644 --- a/sim/wally-imperas-no-idv.do +++ b/sim/wally-imperas-no-idv.do @@ -27,7 +27,7 @@ vlib work vlog +incdir+../config/$1 \ +incdir+../config/shared \ ../../external/ImperasDV-HMC/Imperas/ImpPublic/source/host/rvvi/rvvi-trace.sv \ - ../src/wally/cvw.sv \ + ../src/cvw.sv \ ../testbench/testbench_imperas.sv \ ../testbench/common/*.sv \ ../src/*/*.sv \ diff --git a/sim/wally-imperas.do b/sim/wally-imperas.do index 640bae491..1120b8099 100644 --- a/sim/wally-imperas.do +++ b/sim/wally-imperas.do @@ -37,7 +37,7 @@ vlog +incdir+../config/$1 \ $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2api.sv \ $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2log.sv \ $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2cov.sv \ - ../src/wally/cvw.sv \ + ../src/cvw.sv \ ../testbench/testbench_imperas.sv \ ../testbench/common/*.sv \ ../src/*/*.sv \ diff --git a/sim/wally-linux-imperas.do b/sim/wally-linux-imperas.do index cd13c4193..523d31a64 100644 --- a/sim/wally-linux-imperas.do +++ b/sim/wally-linux-imperas.do @@ -33,7 +33,7 @@ vlib work # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { - vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 + vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 # start and run simulation vopt work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G CHECKPOINT=$6 -G NO_SPOOFING=0 -o testbenchopt vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829,13286 -fatal 7 @@ -60,7 +60,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2api.sv \ $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2log.sv \ $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2cov.sv \ - ../src/wally/cvw.sv \ + ../src/cvw.sv \ ../testbench/testbench-linux-imperas.sv \ ../testbench/common/*.sv ../src/*/*.sv \ ../src/*/*/*.sv -suppress 2583 @@ -94,7 +94,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { } elseif {$2 eq "fpga"} { echo "hello" - vlog -work work +incdir+../config/fpga +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/sdc/*.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv ../../fpga/sim/*.sv -suppress 8852,12070,3084,3829,2583,7063,13286 + vlog -work work +incdir+../config/fpga +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/sdc/*.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv ../../fpga/sim/*.sv -suppress 8852,12070,3084,3829,2583,7063,13286 vopt +acc work.testbench -G TEST=$2 -G DEBUG=0 -o workopt vsim workopt +nowarn3829 -fatal 7 @@ -104,10 +104,10 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { } else { if {$2 eq "ahb"} { - vlog +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583,13286 -suppress 7063 +define+RAM_LATENCY=$3 +define+BURST_EN=$4 + vlog +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583,13286 -suppress 7063 +define+RAM_LATENCY=$3 +define+BURST_EN=$4 } else { # *** modelsim won't take `PA_BITS, but will take other defines for the lengths of DTIM_RANGE and IROM_LEN. For now just live with the warnings. - vlog +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583,13286 -suppress 7063 + vlog +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583,13286 -suppress 7063 } vopt +acc work.testbench -G TEST=$2 -G DEBUG=1 -o workopt diff --git a/sim/wally.do b/sim/wally.do index efe82742e..bc987e3d3 100644 --- a/sim/wally.do +++ b/sim/wally.do @@ -33,7 +33,7 @@ vlib work # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { - vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 + vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 # start and run simulation vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G CHECKPOINT=$6 -G NO_SPOOFING=0 -o testbenchopt vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829,13286 -fatal 7 @@ -47,7 +47,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { exec ./slack-notifier/slack-notifier.py } elseif {$2 eq "buildroot-no-trace"} { - vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 + vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 # start and run simulation vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=0 -G INSTR_WAVEON=0 -G CHECKPOINT=0 -G NO_SPOOFING=1 -o testbenchopt vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829,13286 -fatal 7 @@ -68,7 +68,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { } elseif {$2 eq "fpga"} { echo "hello" - vlog -work work +incdir+../config/fpga +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/sdc/*.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv ../../fpga/sim/*.sv -suppress 8852,12070,3084,3829,2583,7063,13286 + vlog -work work +incdir+../config/fpga +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/sdc/*.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv ../../fpga/sim/*.sv -suppress 8852,12070,3084,3829,2583,7063,13286 vopt +acc work.testbench -G TEST=$2 -G DEBUG=0 -o workopt vsim workopt +nowarn3829 -fatal 7 @@ -78,10 +78,10 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { } else { if {$2 eq "ahb"} { - vlog +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583,13286 -suppress 7063 +define+RAM_LATENCY=$3 +define+BURST_EN=$4 + vlog +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583,13286 -suppress 7063 +define+RAM_LATENCY=$3 +define+BURST_EN=$4 } else { # *** modelsim won't take `PA_BITS, but will take other defines for the lengths of DTIM_RANGE and IROM_LEN. For now just live with the warnings. - vlog +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583,13286 -suppress 7063 + vlog +incdir+../config/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583,13286 -suppress 7063 } vopt +acc work.testbench -G TEST=$2 -G DEBUG=1 -o workopt diff --git a/src/wally/cvw.sv b/src/cvw.sv similarity index 100% rename from src/wally/cvw.sv rename to src/cvw.sv From 72002625eb610f0a9c1315ac86385fe434960eeb Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 15 Jun 2023 07:02:59 -0700 Subject: [PATCH 4/9] Fixed cvw path in lint-wally --- sim/lint-wally | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/lint-wally b/sim/lint-wally index acad750b6..7dd635328 100755 --- a/sim/lint-wally +++ b/sim/lint-wally @@ -8,7 +8,7 @@ basepath=$(dirname $0)/.. for config in rv32e rv64gc rv32gc rv32imc rv32i rv64i rv64fpquad; do #for config in rv64gc; do echo "$config linting..." - if !($verilator --no-timing --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/wally/cvw.sv $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then + if !($verilator --no-timing --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/cvw.sv $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then echo "Exiting after $config lint due to errors or warnings" exit 1 fi From 45ee4c2f9fe8c7184e8be2a91ef69ed87a8f3b56 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 15 Jun 2023 09:26:09 -0700 Subject: [PATCH 5/9] Added BMU instructions to instruction name decoder --- testbench/common/instrNameDecTB.sv | 59 +++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/testbench/common/instrNameDecTB.sv b/testbench/common/instrNameDecTB.sv index dd993e715..6834e1b17 100644 --- a/testbench/common/instrNameDecTB.sv +++ b/testbench/common/instrNameDecTB.sv @@ -54,12 +54,28 @@ module instrNameDecTB( 10'b0010011_000: if (instr[31:15] == 0 & instr[11:7] ==0) name = "NOP/FLUSH"; else name = "ADDI"; 10'b0010011_001: if (funct7[6:1] == 6'b000000) name = "SLLI"; - else name = "ILLEGAL"; + else if (funct7[6:1] == 6'b010010) name = "BCLRI"; + else if (funct7[6:1] == 6'b011010) name = "BINVI"; + else if (funct7[6:1] == 6'b001010) name = "BSETI"; + else if (funct7 == 7'b0110000) begin + case (rs2) + 5'b00000: name = "CLZ"; + 5'b00010: name = "CPOP"; + 5'b00001: name = "CTZ"; + 5'b00100: name = "SEXT.B"; + 5'b00101: name = "SEXT.H"; + default: name = "ILLEGAL"; + endcase + end else name = "ILLEGAL"; 10'b0010011_010: name = "SLTI"; 10'b0010011_011: name = "SLTIU"; 10'b0010011_100: name = "XORI"; 10'b0010011_101: if (funct7[6:1] == 6'b000000) name = "SRLI"; else if (funct7[6:1] == 6'b010000) name = "SRAI"; + else if (funct7[6:1] == 6'b011010 & rs2 == 5'b11000) name = "REV8"; + else if (funct7[6:1] == 6'b011000) name = "RORI"; + else if (funct7[6:1] == 6'b010010) name = "BEXTI"; + else if (funct7 == 7'b0010100 & rs2 == 5'b00111) name = "ORC.B"; else name = "ILLEGAL"; 10'b0010011_110: name = "ORI"; 10'b0010011_111: name = "ANDI"; @@ -69,22 +85,41 @@ module instrNameDecTB( 10'b0100011_010: name = "SW"; 10'b0100011_011: name = "SD"; 10'b0011011_000: name = "ADDIW"; - 10'b0011011_001: name = "SLLIW"; + 10'b0011011_001: if (funct7 == 7'b0000000 )name = "SLLIW"; + else if (funct7[6:1] == 6'b000010) name = "SLLI.UW"; + else if (funct7 == 7'b0110000) begin + case (rs2) + 5'b00000: name = "CLZW"; + 5'b00010: name = "CPOPW"; + 5'b00001: name = "CTZW"; + default: name = "ILLEGAL"; + endcase + end else name = "ILLEGAL"; 10'b0011011_101: if (funct7 == 7'b0000000) name = "SRLIW"; else if (funct7 == 7'b0100000) name = "SRAIW"; + else if (funct7 == 7'b0110000) name = "RORIW"; else name = "ILLEGAL"; 10'b0111011_000: if (funct7 == 7'b0000000) name = "ADDW"; else if (funct7 == 7'b0100000) name = "SUBW"; else if (funct7 == 7'b0000001) name = "MULW"; + else if (funct7 == 7'b0000100) name = "ADD.UW"; else name = "ILLEGAL"; 10'b0111011_001: if (funct7 == 7'b0000000) name = "SLLW"; else if (funct7 == 7'b0000001) name = "DIVW"; + else if (funct7 == 7'b0110000) name = "ROLW"; + else name = "ILLEGAL"; + 10'b0111011_010: if (funct7 == 7'b0010000) name = "SH1ADD.UW"; + else name = "ILLEGAL"; + 10'b0111011_100: if (funct7 == 7'b0010000) name = "SH2ADD.UW"; + else if (funct7 == 7'b0000100) name = "ZEXT.H"; else name = "ILLEGAL"; 10'b0111011_101: if (funct7 == 7'b0000000) name = "SRLW"; else if (funct7 == 7'b0100000) name = "SRAW"; else if (funct7 == 7'b0000001) name = "DIVUW"; + else if (funct7 == 7'b0110000) name = "RORW"; else name = "ILLEGAL"; 10'b0111011_110: if (funct7 == 7'b0000001) name = "REMW"; + else if (funct7 == 7'b0010000) name = "SH3ADD.UW"; else name = "ILLEGAL"; 10'b0111011_111: if (funct7 == 7'b0000001) name = "REMUW"; else name = "ILLEGAL"; @@ -94,25 +129,45 @@ module instrNameDecTB( else name = "ILLEGAL"; 10'b0110011_001: if (funct7 == 7'b0000000) name = "SLL"; else if (funct7 == 7'b0000001) name = "MULH"; + else if (funct7 == 7'b0110000) name = "ROL"; + else if (funct7 == 7'b0000101) name = "CLMUL"; + else if (funct7 == 7'b0100100) name = "BCLR"; + else if (funct7 == 7'b0110100) name = "BINV"; + else if (funct7 == 7'b0010100) name = "BSET"; else name = "ILLEGAL"; 10'b0110011_010: if (funct7 == 7'b0000000) name = "SLT"; else if (funct7 == 7'b0000001) name = "MULHSU"; + else if (funct7 == 7'b0010000) name = "SH1ADD"; + else if (funct7 == 7'b0000101) name = "CLMULR"; else name = "ILLEGAL"; 10'b0110011_011: if (funct7 == 7'b0000000) name = "SLTU"; else if (funct7 == 7'b0000001) name = "MULHU"; + else if (funct7 == 7'b0000101) name = "CLMULH"; else name = "ILLEGAL"; 10'b0110011_100: if (funct7 == 7'b0000000) name = "XOR"; else if (funct7 == 7'b0000001) name = "DIV"; + else if (funct7 == 7'b0010000) name = "SH2ADD"; + else if (funct7 == 7'b0000101) name = "MIN"; + else if (funct7 == 7'b0100000) name = "ORN"; + else if (funct7 == 7'b0000100) name = "ZEXT.H"; else name = "ILLEGAL"; 10'b0110011_101: if (funct7 == 7'b0000000) name = "SRL"; else if (funct7 == 7'b0000001) name = "DIVU"; else if (funct7 == 7'b0100000) name = "SRA"; + else if (funct7 == 7'b0000101) name = "MINU"; + else if (funct7 == 7'b0110000) name = "ROR"; + else if (funct7 == 7'b0100100) name = "BEXT"; else name = "ILLEGAL"; 10'b0110011_110: if (funct7 == 7'b0000000) name = "OR"; else if (funct7 == 7'b0000001) name = "REM"; + else if (funct7 == 7'b0010000) name = "SH3ADD"; + else if (funct7 == 7'b0000101) name = "MAX"; + else if (funct7 == 7'b0100000) name = "XNOR"; else name = "ILLEGAL"; 10'b0110011_111: if (funct7 == 7'b0000000) name = "AND"; else if (funct7 == 7'b0000001) name = "REMU"; + else if (funct7 == 7'b0000101) name = "MAXU"; + else if (funct7 == 7'b0100000) name = "ANDN"; else name = "ILLEGAL"; 10'b0110111_???: name = "LUI"; 10'b1100011_000: name = "BEQ"; From a62211bad1b121a1e4ca34ca8a64772ccab896fd Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 15 Jun 2023 11:56:59 -0700 Subject: [PATCH 6/9] Gated inputs to BMU when inactive to save power and simulation time --- src/ieu/alu.sv | 3 ++- src/ieu/bmu/bitmanipalu.sv | 14 ++++++++++---- src/ieu/bmu/bmuctrl.sv | 5 +++-- src/ieu/controller.sv | 4 +++- src/ieu/datapath.sv | 3 ++- src/ieu/ieu.sv | 6 ++++-- 6 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/ieu/alu.sv b/src/ieu/alu.sv index 43df83b60..1114245e9 100644 --- a/src/ieu/alu.sv +++ b/src/ieu/alu.sv @@ -38,6 +38,7 @@ module alu #(parameter WIDTH=32) ( input logic [2:0] ZBBSelect, // ZBB mux select signal input logic [2:0] Funct3, // For BMU decoding input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage + input logic BMUActiveE, // Bit manipulation instruction being executed output logic [WIDTH-1:0] ALUResult, // ALU result output logic [WIDTH-1:0] Sum); // Sum of operands @@ -88,7 +89,7 @@ module alu #(parameter WIDTH=32) ( // Final Result B instruction select mux if (`ZBC_SUPPORTED | `ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED) begin : bitmanipalu - bitmanipalu #(WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect, + bitmanipalu #(WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect, .BMUActiveE, .Funct3, .LT,.LTU, .BALUControl, .PreALUResult, .FullResult, .CondMaskB, .CondShiftA, .ALUResult); end else begin diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv index 228b23132..96076cc84 100644 --- a/src/ieu/bmu/bitmanipalu.sv +++ b/src/ieu/bmu/bitmanipalu.sv @@ -38,6 +38,7 @@ module bitmanipalu #(parameter WIDTH=32) ( input logic LT, // less than flag input logic LTU, // less than unsigned flag input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage + input logic BMUActiveE, // Bit manipulation instruction being executed input logic [WIDTH-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals output logic [WIDTH-1:0] CondMaskB, // B is conditionally masked for ZBS instructions output logic [WIDTH-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions @@ -51,13 +52,18 @@ module bitmanipalu #(parameter WIDTH=32) ( logic PreShift; // Inidicates if it is sh1add, sh2add, sh3add instruction logic [1:0] PreShiftAmt; // Amount to Pre-Shift A logic [WIDTH-1:0] CondZextA; // A Conditional Extend Intermediary Signal + logic [WIDTH-1:0] ABMU, BBMU; // Gated data inputs to reduce BMU activity + + // gate data inputs to BMU to only operate when BSelect[1] indicates BMU is in use + assign ABMU = A & {WIDTH{BMUActiveE}}; + assign BBMU = B & {WIDTH{BMUActiveE}}; // Extract control signals from bitmanip ALUControl. assign {Mask, PreShift} = BALUControl[1:0]; // Mask Generation Mux if (`ZBS_SUPPORTED) begin: zbsdec - decoder #($clog2(WIDTH)) maskgen(B[$clog2(WIDTH)-1:0], MaskB); + decoder #($clog2(WIDTH)) maskgen(BBMU[$clog2(WIDTH)-1:0], MaskB); mux2 #(WIDTH) maskmux(B, MaskB, Mask, CondMaskB); end else assign CondMaskB = B; @@ -75,17 +81,17 @@ module bitmanipalu #(parameter WIDTH=32) ( // Bit reverse needed for some ZBB, ZBC instructions if (`ZBC_SUPPORTED | `ZBB_SUPPORTED) begin: bitreverse - bitreverse #(WIDTH) brA(.A, .RevA); + bitreverse #(WIDTH) brA(.A(ABMU), .RevA); end // ZBC Unit if (`ZBC_SUPPORTED) begin: zbc - zbc #(WIDTH) ZBC(.A, .RevA, .B, .Funct3, .ZBCResult); + zbc #(WIDTH) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult); end else assign ZBCResult = 0; // ZBB Unit if (`ZBB_SUPPORTED) begin: zbb - zbb #(WIDTH) ZBB(.A, .RevA, .B, .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult); + zbb #(WIDTH) ZBB(.A(ABMU), .RevA, .B(BBMU), .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult); end else assign ZBBResult = 0; // Result Select Mux diff --git a/src/ieu/bmu/bmuctrl.sv b/src/ieu/bmu/bmuctrl.sv index ad46ab728..59a8e4a16 100644 --- a/src/ieu/bmu/bmuctrl.sv +++ b/src/ieu/bmu/bmuctrl.sv @@ -46,7 +46,8 @@ module bmuctrl import cvw::*; #(parameter cvw_t P) ( output logic [1:0] BSelectE, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding output logic [2:0] ZBBSelectE, // ZBB mux select signal output logic BRegWriteE, // Indicates if it is a R type B instruction in Execute - output logic [2:0] BALUControlE // ALU Control signals for B instructions in Execute Stage + output logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage + output logic BMUActiveE // Bit manipulation instruction being executed ); logic [6:0] OpD; // Opcode in Decode stage @@ -174,5 +175,5 @@ module bmuctrl import cvw::*; #(parameter cvw_t P) ( assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000); // BMU Execute stage pipieline control register - flopenrc #(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE}); + flopenrc #(10) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD, ~IllegalBitmanipInstrD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE, BMUActiveE}); endmodule diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 8839b9cad..c47eb6799 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -58,6 +58,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( output logic [1:0] BSelectE, // One-Hot encoding of if it's ZBA_ZBB_ZBC_ZBS instruction output logic [2:0] ZBBSelectE, // ZBB mux select signal in Execute stage output logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage + output logic BMUActiveE, // Bit manipulation instruction being executed // Memory stage control signals input logic StallM, FlushM, // Stall, flush Memory stage @@ -253,7 +254,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( bmuctrl #(P) bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD, .BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE, - .ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BALUControlE); + .ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BALUControlE, .BMUActiveE); if (P.ZBA_SUPPORTED) begin // ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw assign sltD = (Funct3D == 3'b010 & (~(Funct7D[4]) | ~OpD[5])) ; @@ -282,6 +283,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( assign BSelectD = 2'b00; assign ZBBSelectE = 3'b000; assign BALUControlE = 3'b0; + assign BMUActiveE = 1'b0; end // Fences diff --git a/src/ieu/datapath.sv b/src/ieu/datapath.sv index 40a72926e..e48bd2c38 100644 --- a/src/ieu/datapath.sv +++ b/src/ieu/datapath.sv @@ -48,6 +48,7 @@ module datapath import cvw::*; #(parameter cvw_t P) ( input logic [1:0] BSelectE, // One hot encoding of ZBA_ZBB_ZBC_ZBS instruction input logic [2:0] ZBBSelectE, // ZBB mux select signal input logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage + input logic BMUActiveE, // Bit manipulation instruction being executed output logic [1:0] FlagsE, // Comparison flags ({eq, lt}) output logic [P.XLEN-1:0] IEUAdrE, // Address computed by ALU output logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B @@ -112,7 +113,7 @@ module datapath import cvw::*; #(parameter cvw_t P) ( comparator #(P.XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE); mux2 #(P.XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(P.XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE); - alu #(P.XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE); + alu #(P.XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, BMUActiveE, ALUResultE, IEUAdrE); mux2 #(P.XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE); mux2 #(P.XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE); diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 8e22fd864..7dc7c5c97 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -93,12 +93,14 @@ module ieu import cvw::*; #(parameter cvw_t P) ( logic MemReadE, CSRReadE; // Load, CSRRead instruction logic BranchSignedE; // Branch does signed comparison on operands logic MDUE; // Multiply/divide instruction + logic BMUActiveE; // Bit manipulation instruction being executed controller #(P) c( .clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD, .IllegalIEUFPUInstrD, .IllegalBaseInstrD, .StallE, .FlushE, .FlagsE, .FWriteIntE, .PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE, - .Funct3E, .IntDivE, .MDUE, .W64E, .SubArithE, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, .BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .StallM, .FlushM, .MemRWM, + .Funct3E, .IntDivE, .MDUE, .W64E, .SubArithE, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, + .BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD); @@ -106,7 +108,7 @@ module ieu import cvw::*; #(parameter cvw_t P) ( datapath #(P) dp( .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .JumpE, .BranchSignedE, - .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .BSelectE, .ZBBSelectE, .BALUControlE, + .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW, .StallW, .FlushW, .RegWriteW, .IntDivW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW, .CSRReadValW, .MDUResultW, .FIntDivResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW); From 9f888488328de59f63814cb153892bbede04279e Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 15 Jun 2023 12:16:46 -0700 Subject: [PATCH 7/9] Bit manipulation comment cleanup --- src/ieu/bmu/bitmanipalu.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv index 96076cc84..c8757e65a 100644 --- a/src/ieu/bmu/bitmanipalu.sv +++ b/src/ieu/bmu/bitmanipalu.sv @@ -54,7 +54,7 @@ module bitmanipalu #(parameter WIDTH=32) ( logic [WIDTH-1:0] CondZextA; // A Conditional Extend Intermediary Signal logic [WIDTH-1:0] ABMU, BBMU; // Gated data inputs to reduce BMU activity - // gate data inputs to BMU to only operate when BSelect[1] indicates BMU is in use + // gate data inputs to BMU to only operate when BMU is active assign ABMU = A & {WIDTH{BMUActiveE}}; assign BBMU = B & {WIDTH{BMUActiveE}}; From 9e839988dc89f644aaec864bb605a624cf9d4be0 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 15 Jun 2023 12:17:23 -0700 Subject: [PATCH 8/9] Gated MDU to save power; doesn't seem to have affected simulation time --- src/ieu/controller.sv | 2 ++ src/ieu/ieu.sv | 5 +++-- src/mdu/mdu.sv | 7 +++++++ src/wally/wallypipelinedcore.sv | 5 +++-- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index c47eb6799..2314cb81e 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -59,6 +59,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( output logic [2:0] ZBBSelectE, // ZBB mux select signal in Execute stage output logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage output logic BMUActiveE, // Bit manipulation instruction being executed + output logic MDUActiveE, // Mul/Div instruction being executed // Memory stage control signals input logic StallM, FlushM, // Stall, flush Memory stage @@ -319,6 +320,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( // Other execute stage controller signals assign MemReadE = MemRWE[1]; assign SCE = (ResultSrcE == 3'b100); + assign MDUActiveE = (ResultSrcE == 3'b011); assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers assign IntDivE = MDUE & Funct3E[2]; // Integer division operation diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 7dc7c5c97..c4e60aca9 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -42,6 +42,7 @@ module ieu import cvw::*; #(parameter cvw_t P) ( output logic [2:0] Funct3E, // Funct3 instruction field output logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU src inputs before the mux choosing between them and PCE to put in srcA/B output logic [4:0] RdE, // Destination register + output logic MDUActiveE, // Mul/Div instruction being executed // Memory stage signals input logic SquashSCW, // Squash store conditional, from LSU output logic [1:0] MemRWM, // Read/write control goes to LSU @@ -100,8 +101,8 @@ module ieu import cvw::*; #(parameter cvw_t P) ( .IllegalIEUFPUInstrD, .IllegalBaseInstrD, .StallE, .FlushE, .FlagsE, .FWriteIntE, .PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE, .Funct3E, .IntDivE, .MDUE, .W64E, .SubArithE, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, - .BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .StallM, .FlushM, .MemRWM, - .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, + .BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .MDUActiveE, + .StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD); diff --git a/src/mdu/mdu.sv b/src/mdu/mdu.sv index 72a908698..83327a460 100644 --- a/src/mdu/mdu.sv +++ b/src/mdu/mdu.sv @@ -33,6 +33,7 @@ module mdu import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // inputs A and B from IEU forwarding mux output input logic [2:0] Funct3E, Funct3M, // type of MDU operation input logic IntDivE, W64E, // Integer division/remainder, and W-type instrutions + input logic MDUActiveE, // Mul/Div instruction being executed output logic [P.XLEN-1:0] MDUResultW, // multiply/divide result output logic DivBusyE // busy signal to stall pipeline in Execute stage ); @@ -43,6 +44,12 @@ module mdu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] MDUResultM; // result after W truncation logic W64M; // W-type instruction + logic [P.XLEN-1:0] AMDU, BMDU; // Gated inputs to MDU + + // gate data inputs to MDU to only operate when MDU is active. + assign AMDU = ForwardedSrcAE & {P.XLEN{MDUActiveE}}; + assign BMDU = ForwardedSrcBE & {P.XLEN{MDUActiveE}}; + // Multiplier mul #(P.XLEN) mul(.clk, .reset, .StallM, .FlushM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .ProdM); diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 5a46cd84c..a01bba009 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -77,6 +77,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( logic DivBusyE; logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD; logic SquashSCW; + logic MDUActiveE; // Mul/Div instruction being executed // floating point unit signals logic [2:0] FRM_REGW; @@ -190,7 +191,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .InstrD, .IllegalIEUFPUInstrD, .IllegalBaseInstrD, // Execute Stage interface .PCE, .PCLinkE, .FWriteIntE, .FCvtIntE, .IEUAdrE, .IntDivE, .W64E, - .Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, + .Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, .MDUActiveE, // Memory stage interface .SquashSCW, // from LSU .MemRWM, // read/write control goes to LSU @@ -306,7 +307,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( if (P.M_SUPPORTED | P.ZMMUL_SUPPORTED) begin:mdu mdu #(P) mdu(.clk, .reset, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .ForwardedSrcAE, .ForwardedSrcBE, - .Funct3E, .Funct3M, .IntDivE, .W64E, + .Funct3E, .Funct3M, .IntDivE, .W64E, .MDUActiveE, .MDUResultW, .DivBusyE); end else begin // no M instructions supported assign MDUResultW = 0; From 3ca271b6a7b9b216220760daf90298677a93a26f Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 15 Jun 2023 12:38:33 -0700 Subject: [PATCH 9/9] Added input gating on FPU --- src/fpu/fctrl.sv | 9 +++++---- src/fpu/fpu.sv | 5 +++-- src/fpu/unpack.sv | 7 ++++--- src/fpu/unpackinput.sv | 7 ++++++- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index e10ba99c2..76855bf81 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -36,7 +36,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( input logic [2:0] FRM_REGW, // rounding mode from CSR input logic [1:0] STATUS_FS, // is FPU enabled? input logic FDivBusyE, // is the divider busy - // intruction + // instruction input logic [31:0] InstrD, // the full instruction input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision input logic [6:0] OpD, // bits 6:0 of instruction @@ -53,6 +53,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic FpLoadStoreM, // FP load or store instruction output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage + output logic FPUActiveE, // FP instruction being executed // register control signals output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable output logic FWriteIntE, FWriteIntM, // Write to integer register @@ -308,9 +309,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( assign Adr3D = InstrD[31:27]; // D/E pipleine register - flopenrc #(13+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE}); + flopenrc #(14+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ~IllegalFPUInstrD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, FPUActiveE}); flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E}); flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE}); diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 3d4981384..f71999471 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -82,6 +82,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic XEnD, YEnD, ZEnD; // X, Y, Z inputs used for current operation logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation logic FRegWriteE; // Write floating-point register + logic FPUActiveE; // FP instruction being executed // regfile signals logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -171,7 +172,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM, .IllegalFPUInstrD, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE, - .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW, + .FResSelE, .FResSelM, .FResSelW, .FPUActiveE, .PostProcSelE, .PostProcSelM, .FCvtIntW, .Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E); // FP register file @@ -226,7 +227,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( // unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity unpack #(P) unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), - .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE), + .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE), .FPUActive(FPUActiveE), .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), diff --git a/src/fpu/unpack.sv b/src/fpu/unpack.sv index 14e9a6f66..145d6a701 100644 --- a/src/fpu/unpack.sv +++ b/src/fpu/unpack.sv @@ -30,6 +30,7 @@ module unpack import cvw::*; #(parameter cvw_t P) ( input logic [P.FLEN-1:0] X, Y, Z, // inputs from register file input logic [P.FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half input logic XEn, YEn, ZEn, // input enables + input logic FPUActive, // Kill inputs when FPU is not active output logic Xs, Ys, Zs, // sign bits of XYZ output logic [P.NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) output logic [P.NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) @@ -46,17 +47,17 @@ module unpack import cvw::*; #(parameter cvw_t P) ( logic XFracZero, YFracZero, ZFracZero; // is the fraction zero logic YExpMax, ZExpMax; // is the exponent all 1s - unpackinput #(P) unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), + unpackinput #(P) unpackinputX (.A(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), .FPUActive, .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero), .Subnorm(XSubnorm), .PostBox(XPostBox)); - unpackinput #(P) unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn), + unpackinput #(P) unpackinputY (.A(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn), .FPUActive, .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero), .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero), .Subnorm(), .PostBox()); - unpackinput #(P) unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), + unpackinput #(P) unpackinputZ (.A(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), .FPUActive, .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero), .Subnorm(), .PostBox()); diff --git a/src/fpu/unpackinput.sv b/src/fpu/unpackinput.sv index 1d429ed4a..c551e8173 100644 --- a/src/fpu/unpackinput.sv +++ b/src/fpu/unpackinput.sv @@ -27,9 +27,10 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module unpackinput import cvw::*; #(parameter cvw_t P) ( - input logic [P.FLEN-1:0] In, // inputs from register file + input logic [P.FLEN-1:0] A, // inputs from register file input logic En, // enable the input input logic [P.FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half + input logic FPUActive, // Kill inputs when FPU is not active output logic Sgn, // sign bits of the number output logic [P.NE-1:0] Exp, // exponent of the number (converted to largest supported precision) output logic [P.NF:0] Man, // mantissa of the number (converted to largest supported precision) @@ -46,6 +47,10 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( logic [P.NF-1:0] Frac; // Fraction of XYZ logic BadNaNBox; // incorrectly NaN Boxed + logic [P.FLEN-1:0] In; + + // Gate input when FPU is not active to save power and simulation + assign In = A & {P.FLEN{FPUActive}}; if (P.FPSIZES == 1) begin // if there is only one floating point format supported assign BadNaNBox = 0;