From 6b9c6223bec95adaebf3b6d4ce4d09fcc01f25c9 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sat, 23 Jan 2021 10:19:09 -0500
Subject: [PATCH] Initial checkin of UART

---
 wally-pipelined/src/dmem.sv           |  15 +-
 wally-pipelined/src/gpio.sv           |   7 +-
 wally-pipelined/src/testbench.sv      |   3 +-
 wally-pipelined/src/uart.sv           |  84 +++++
 wally-pipelined/src/uartPC16550D.sv   | 476 ++++++++++++++++++++++++++
 wally-pipelined/src/wally-macros.sv   |   9 +
 wally-pipelined/src/wallypipelined.sv |   4 +-
 7 files changed, 591 insertions(+), 7 deletions(-)
 create mode 100644 wally-pipelined/src/uart.sv
 create mode 100644 wally-pipelined/src/uartPC16550D.sv

diff --git a/wally-pipelined/src/dmem.sv b/wally-pipelined/src/dmem.sv
index 1cab372b..c6f02c9a 100644
--- a/wally-pipelined/src/dmem.sv
+++ b/wally-pipelined/src/dmem.sv
@@ -36,12 +36,15 @@ module dmem #(parameter XLEN=32) (
   output logic            DataAccessFaultM,
   output logic            TimerIntM, SwIntM,
   input  logic [31:0]     GPIOPinsIn,
-  output logic [31:0]     GPIOPinsOut, GPIOPinsEn);
+  output logic [31:0]     GPIOPinsOut, GPIOPinsEn, 
+  input  logic            UARTSin,
+  output logic            UARTSout);
   
   logic [XLEN-1:0] MaskedWriteDataM;
-  logic [XLEN-1:0] RdTimM, RdCLINTM, RdGPIOM;
-  logic            TimEnM, CLINTEnM, GPIOEnM;
+  logic [XLEN-1:0] RdTimM, RdCLINTM, RdGPIOM, RdUARTM;
+  logic            TimEnM, CLINTEnM, GPIOEnM, UARTEnM;
   logic [1:0]      MemRWdtimM, MemRWclintM, MemRWgpioM;
+  logic            UARTIntr;// *** will need to tie INTR to an interrupt handler
 
   // Address decoding
   generate
@@ -52,6 +55,7 @@ module dmem #(parameter XLEN=32) (
   endgenerate
   assign CLINTEnM = ~(|AdrM[XLEN-1:26]) & AdrM[25] & ~(|AdrM[24:16]); // 0x02000000-0x0200FFFF
   assign GPIOEnM = (AdrM[31:8] == 24'h10012); // 0x10012000-0x100120FF
+  assign UARTEnM = ~(|AdrM[XLEN-1:29]) & AdrM[28] & ~(|AdrM[27:3]); // 0x10000000-0x10000007
 
   assign MemRWdtimM  = MemRWM & {2{TimEnM}};
   assign MemRWclintM = MemRWM & {2{CLINTEnM}};
@@ -62,7 +66,10 @@ module dmem #(parameter XLEN=32) (
 
   // memory-mapped I/O peripherals
   clint #(XLEN) clint(.AdrM(AdrM[15:0]), .*);
-  gpio #(XLEN) gpio(.AdrM(AdrM[7:0]), .*);
+  gpio #(XLEN) gpio(.AdrM(AdrM[7:0]), .*); // *** may want to add GPIO interrupts
+  uart #(XLEN) uart(.TXRDYb(), .RXRDYb(), .INTR(UARTIntr), .SIN(UARTSin), .SOUT(UARTSout),
+                    .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), 
+                    .RTSb(), .DTRb(), .OUT1b(), .OUT2b(), .*); 
 
   // *** add cache and interface to external memory & other peripherals
   
diff --git a/wally-pipelined/src/gpio.sv b/wally-pipelined/src/gpio.sv
index 0952fe21..8723cb98 100644
--- a/wally-pipelined/src/gpio.sv
+++ b/wally-pipelined/src/gpio.sv
@@ -53,7 +53,12 @@ module gpio #(parameter XLEN=32) (
       assign #2 entry = {AdrM[7:2], 2'b00}; 
   endgenerate
   
-  assign INPUT_VAL = GPIOPinsIn & INPUT_EN;
+  generate 
+    if (`GPIO_LOOPBACK_TEST) // connect OUT to IN for loopback testing
+      assign INPUT_VAL = GPIOPinsOut & INPUT_EN & OUTPUT_EN;
+    else
+      assign INPUT_VAL = GPIOPinsIn & INPUT_EN;
+  endgenerate
   assign GPIOPinsOut = OUTPUT_VAL;
   assign GPIOPinsEn = OUTPUT_EN;
 
diff --git a/wally-pipelined/src/testbench.sv b/wally-pipelined/src/testbench.sv
index 190a631e..2fbc678a 100644
--- a/wally-pipelined/src/testbench.sv
+++ b/wally-pipelined/src/testbench.sv
@@ -236,12 +236,13 @@ string tests32i[] = {
   string signame, memfilename;
 
   logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
+  logic UARTSin, UARTSout;
 
   // instantiate device to be tested
   assign GPIOPinsIn = 0;
   wallypipelined #(XLEN, MISA, ZCSR, ZCOUNTERS) dut(
     clk, reset, WriteData, DataAdr, MemRW, 
-    GPIOPinsIn, GPIOPinsOut, GPIOPinsEn
+    GPIOPinsIn, GPIOPinsOut, GPIOPinsEn, UARTSin, UARTSout
   ); 
 
   // Track names of instructions
diff --git a/wally-pipelined/src/uart.sv b/wally-pipelined/src/uart.sv
new file mode 100644
index 00000000..31e001db
--- /dev/null
+++ b/wally-pipelined/src/uart.sv
@@ -0,0 +1,84 @@
+///////////////////////////////////////////
+// uart.sv
+//
+// Written: David_Harris@hmc.edu 21 January 2021
+// Modified: 
+//
+// Purpose: Interface to Universial Asynchronous Receiver/ Transmitter with FIFOs
+//          Emulates interface of Texas Instruments PC165550D
+//          Compatible with UART in Imperas Virtio model ***
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+`include "wally-macros.sv"
+
+module uart #(parameter XLEN=32) (
+  input  logic            clk, reset, 
+  input  logic [1:0]      MemRWgpioM,
+  input  logic [7:0]      ByteMaskM,
+  input  logic [XLEN-1:0] AdrM, 
+  input  logic [XLEN-1:0] MaskedWriteDataM,
+  output logic [XLEN-1:0] RdUARTM,
+  input  logic            SIN, DSRb, DCDb, CTSb, RIb,    // from E1A driver from RS232 interface
+  output logic            SOUT, RTSb, DTRb, // to E1A driver to RS232 interface
+  output logic            OUT1b, OUT2b, INTR, TXRDYb, RXRDYb);         // to CPU
+
+  // UART interface signals
+  logic [2:0]      A;
+  logic            MEMRb, MEMWb;
+  logic [7:0]      Din, Dout;
+  logic            SINint; // for loopback testing
+
+  // rename processor interface signals to match PC16550D and provide one-byte interface
+  assign MEMRb = ~MemRWgpioM[1];
+  assign MEMWb = ~MemRWgpioM[0];
+  assign A = AdrM[2:0];
+
+  generate
+    if (XLEN == 64) begin
+      always_comb begin
+/*        RdUARTM = {Dout, Dout, Dout, Dout, Dout, Dout, Dout, Dout};
+        case (AdrM[2:0])
+          3'b000: Din = MaskedWriteDataM[7:0];
+          3'b001: Din = MaskedWriteDataM[15:8];
+          3'b010: Din = MaskedWriteDataM[23:16];
+          3'b011: Din = MaskedWriteDataM[31:24];
+          3'b100: Din = MaskedWriteDataM[39:32];
+          3'b101: Din = MaskedWriteDataM[47:40];
+          3'b110: Din = MaskedWriteDataM[55:48];
+          3'b111: Din = MaskedWriteDataM[63:56];
+        endcase */
+      end 
+    end else begin // 32-bit
+      always_comb begin
+        RdUARTM = {Dout, Dout, Dout, Dout};
+        case (AdrM[1:0])
+          2'b00: Din = MaskedWriteDataM[7:0];
+          2'b01: Din = MaskedWriteDataM[15:8];
+          2'b10: Din = MaskedWriteDataM[23:16];
+          2'b11: Din = MaskedWriteDataM[31:24];
+        endcase
+      end
+    end
+  endgenerate
+  
+  logic BAUDOUTb;  // loop tx clock BAUDOUTb back to rx clock RCLK
+  uartPC16550D u(.RCLK(BAUDOUTb), .*);
+
+endmodule
+
diff --git a/wally-pipelined/src/uartPC16550D.sv b/wally-pipelined/src/uartPC16550D.sv
new file mode 100644
index 00000000..62037cc6
--- /dev/null
+++ b/wally-pipelined/src/uartPC16550D.sv
@@ -0,0 +1,476 @@
+///////////////////////////////////////////
+// uart.sv
+//
+// Written: David_Harris@hmc.edu 21 January 2021
+// Modified: 
+//
+// Purpose: Universial Asynchronous Receiver/ Transmitter with FIFOs
+//          Emulates interface of Texas Instruments PC16550D
+//          Compatible with UART in Imperas Virtio model ***
+//
+//  Compatible with most of PC16550D with the following known exceptions:
+//   Generates 2 rather than 1.5 stop bits when 5-bit word length is slected and LCR[2] = 1
+//   Timeout not ye implemented***
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+`include "wally-macros.sv"
+
+module uartPC16550D(
+  // Processor Interface
+  input  logic       clk, reset,
+  input  logic [2:0] A,
+  input  logic [7:0] Din,
+  output logic [7:0] Dout,
+  input  logic       MEMRb, MEMWb, 
+  output logic       INTR, TXRDYb, RXRDYb,
+  // Clocks
+  output  logic      BAUDOUTb,
+  input   logic      RCLK,
+  // E1A Driver
+  input  logic       SIN, DSRb, DCDb, CTSb, RIb,
+  output logic       SOUT, RTSb, DTRb, OUT1b, OUT2b
+);
+
+  // transmit and receive states
+  typedef enum {UART_IDLE, UART_ACTIVE, UART_DONE, UART_BREAK} statetype;
+
+  // Registers
+  logic [10:0] RBR;
+  logic [7:0] IIR, FCR, LCR, LSR, SCR, DLL, DLM;
+  logic [3:0] IER, MSR;
+  logic [4:0] MCR;
+
+  // Syncrhonized and delayed UART signals
+  logic SINd, DSRbd, DCDbd, CTSbd, RIbd;
+  logic SINsync, DSRbsync, DCDbsync, CTSbsync, RIbsync;
+  logic DSRb2, DCDb2, CTSb2, RIb2;
+  logic SOUTbit;
+
+  // Control signals
+  logic loop; // loopback mode
+  logic DLAB; // Divisor Latch Access Bit (LCR bit 7)
+
+  // Baud and rx/tx timing
+  logic baudpulse, txbaudpulse, rxbaudpulse; // high one system clk cycle each baud/16 period
+  logic [23:0] baudcount;
+  logic [3:0] rxoversampledcnt, txoversampledcnt; // count oversampled-by-16
+  logic [3:0] rxbitsreceived, txbitssent;
+  statetype rxstate, txstate;
+
+  // shift registrs and FIFOs
+  logic [9:0] rxshiftreg;
+  logic [11:0] txshiftreg;
+  logic [10:0] rxfifo[15:0];
+  logic [7:0] txfifo[15:0];
+  logic [3:0] rxfifohead, rxfifotail, txfifohead, txfifotail, rxfifotriggerlevel;
+  logic [3:0] rxfifoentries, txfifoentries;
+  logic [3:0] rxbitsexpected, txbitsexpected;
+
+  // receive data
+  logic [10:0] RXBR;
+  logic [6:0] rxtimeoutcnt;
+  logic rxcentered;
+  logic rxparity, rxparitybit, rxstopbit;
+  logic rxparityerr, rxoverrunerr, rxframingerr, rxbreak, rxfifohaserr;
+  logic rxdataready;
+  logic rxfifoempty, rxfifotriggered, rxfifotimeout;
+  logic rxfifodmaready;
+  logic [8:0] rxdata9;
+  logic [7:0] rxdata;
+  logic [15:0] rxerrbit, rxfullbit;
+
+  // transmit data
+  logic [11:0] TXHR, txdata, nexttxdata, txsr;
+  logic txnextbit, txhrfull, txsrfull;
+  logic txparity;
+  logic txfifoempty, txfifofull, txfifodmaready;
+
+  // control signals
+  logic fifoenabled, fifodmamodesel, evenparitysel;
+
+  // interrupts
+  logic rxlinestatusintr, rxdataavailintr, txhremptyintr, modemstatusintr, intrpending;
+  logic [2:0] intrid;
+
+  ///////////////////////////////////////////
+  // Input synchronization: 2-stage synchronizer
+  ///////////////////////////////////////////
+  always_ff @(posedge clk) begin
+    {SINd, DSRbd, DCDbd, CTSbd, RIbd} <= {SIN, DSRb, DCDb, CTSb, RIb};
+    {SINsync, DSRbsync, DCDbsync, CTSbsync, RIbsync} <= loop ? {SOUTbit, ~MCR[0], ~MCR[3], ~MCR[1], ~MCR[2]} : 
+        {SINd, DSRbd, DCDbd, CTSbd, RIbd}; // syncrhonized signals, handle loopback testing
+    {DSRb2, DCDb2, CTSb2, RIb2} <= {DSRbsync, DCDbsync, CTSbsync, RIbsync}; // for detecting state changes
+  end
+
+  ///////////////////////////////////////////
+  // Register interface (Table 1, note some are read only and some write only)
+  ///////////////////////////////////////////
+  always_ff @(posedge clk, posedge reset) 
+    if (reset) begin // Table 3 Reset Configuration
+      IER <= 4'b0;
+      IIR <= 8'b1; 
+      FCR <= 8'b0;
+      LCR <= 8'b0;
+      MCR <= 5'b0;
+      LSR <= 8'b01100000;
+      MSR <= 4'b0;
+      DLL <= 8'b0;
+      DLM <= 8'b0;
+      SCR <= 8'b0; // not strictly necessary to reset
+    end else begin
+      if (~MEMWb) begin
+        case (A)
+          3'b000: if (DLAB) DLL <= Din; // else TXHR <= Din; // TX handled in TX register/FIFO section
+          3'b001: if (DLAB) DLM <= Din; else IER <= Din[3:0];
+          3'b010: FCR <= {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing
+          3'b011: LCR <= Din;
+          3'b100: MCR <= Din[4:0];
+          3'b101: LSR[6:1] <= Din[6:1];  // recommended only for test, see 8.6.3
+          3'b110: MSR <= Din[3:0];
+          3'b111: SCR <= Din;
+        endcase
+      end else if (~MEMRb) begin
+        /* verilator lint_off CASEINCOMPLETE */
+        case (A)
+          3'b101: begin // clear some LSR bits on read
+            LSR[4:1] <= 0;
+            LSR[7] <= 0;
+          end
+          3'b110: MSR[1:0] <= 4'b0; // clear status bits on read
+        endcase
+        /* verilator lint_on CASEINCOMPLETE */
+      end
+      // Line Status Register (8.6.3)
+      LSR[0] = rxdataready; // Data ready
+      if (RXBR[10]) LSR[1] = 1; // overrun error
+      if (RXBR[9])  LSR[2] = 1; // parity error
+      if (RXBR[8])  LSR[3] = 1; // framing error
+      if (rxbreak)  LSR[4] = 1; // break indicator
+      LSR[5] = txhremptyintr ; //  THRE
+      LSR[6] = ~txsrfull & txhremptyintr; //  TEMT
+      if (rxfifohaserr) LSR[7] = 1; // any bits in FIFO have error
+
+      // Modem Status Register (8.6.8)
+      MSR[0] |= CTSb2 ^ CTSbsync; // Delta Clear to Send
+      MSR[1] |= DSRb2 ^ DSRbsync; // Delta Data Set Ready
+      MSR[2] |= (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator
+      MSR[3] |= DCDb2 ^ DCDbsync; // Delta Data Carrier Detect
+    end
+
+  always_comb
+    if (~MEMRb)
+      case (A)
+        3'b000: if (DLAB) Dout = DLL; else Dout = RBR;
+        3'b001: if (DLAB) Dout = DLM; else Dout = {4'b0, IER[3:0]};
+        3'b010: Dout = {{2{fifoenabled}}, 2'b00, intrid[2:0], ~intrpending}; // Read only Interupt Ident Register
+        3'b011: Dout = LCR;
+        3'b100: Dout = {3'b000, MCR};
+        3'b101: Dout = LSR;
+        3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]}; 
+        3'b111: Dout = SCR;      
+      endcase
+    else Dout = 8'b0;
+
+  ///////////////////////////////////////////
+  // Baud rate generator
+  // consider switching to same fixed-frequency reference clock used for TIME register
+  // prescale by factor of 2^UART_PRESCALE to allow for high-frequency reference clock
+  // Unlike PC16550D, this unit is hardwired with same rx and tx baud clock
+  // *** add table of scale factors to get 16x uart clk
+  ///////////////////////////////////////////
+  always_ff @(posedge clk, posedge reset) 
+    if (reset) begin
+      baudcount <= 0;
+      baudpulse <= 0;
+    end else begin
+      baudpulse <= (baudcount == {DLM, DLL, {(`UART_PRESCALE){1'b0}}});
+      baudcount <= baudpulse ? 0 :  baudcount +1;
+    end
+  assign txbaudpulse = baudpulse;
+  assign BAUDOUTb = ~baudpulse;
+  assign rxbaudpulse = ~RCLK; // usually BAUDOUTb tied to RCLK externally
+
+  ///////////////////////////////////////////
+  // receive timing and control
+  ///////////////////////////////////////////
+
+  always_ff @(posedge clk, posedge reset)
+    if (reset) begin
+      rxoversampledcnt <= 0;
+      rxstate = UART_IDLE;
+      rxbitsreceived <= 0;
+      rxtimeoutcnt <= 0;
+    end else begin
+      if (rxstate == UART_IDLE & ~SINsync) begin // got start bit
+        rxstate = UART_ACTIVE;
+        rxoversampledcnt <= 0;
+        rxbitsreceived <= 0;
+        rxtimeoutcnt <= 0; // reset timeout when new character is arriving
+      end else if (rxbaudpulse & (rxstate == UART_ACTIVE)) begin
+        rxoversampledcnt <= rxoversampledcnt + 1;  // 16x oversampled counter
+        if (rxcentered) rxbitsreceived <= rxbitsreceived + 1;
+        if (rxbitsreceived == rxbitsexpected) rxstate <= UART_DONE; // pulse rxdone for a cycle
+      end else if (rxstate == UART_DONE || rxstate == UART_BREAK) begin
+        if (rxbreak & ~SINsync) rxstate <= UART_BREAK;
+        else rxstate <= UART_IDLE;
+      end
+      // timeout counting
+      if (~MEMRb && A == 3'b000 && ~DLAB) rxtimeoutcnt <= 0; // reset timeout on read
+      else if (fifoenabled & ~rxfifoempty & rxbaudpulse & ~rxfifotimeout) rxtimeoutcnt <= rxtimeoutcnt+1; // *** not right
+    end
+
+  assign rxcentered = rxbaudpulse && (rxoversampledcnt == 4'b1000);  // implies rxstate = UART_ACTIVE
+  assign rxbitsexpected = 1 + (5 + LCR[1:0]) + LCR[3] + 1; // start bit + data bits + (parity bit) + stop bit 
+  
+  ///////////////////////////////////////////
+  // receive shift register, buffer register, FIFO
+  ///////////////////////////////////////////
+  always_ff @(posedge clk, posedge reset)
+    if (reset) rxshiftreg <= 0;
+    else if (rxcentered) rxshiftreg <= {rxshiftreg[8:0], SINsync}; // capture bit
+  assign rxparitybit = rxshiftreg[1]; // parity, if it exists, in bit 1 when all done
+  assign rxstopbit = rxshiftreg[0];
+  always_comb
+    case(LCR[1:0]) // check how many bits used.  Grab all bits including possible parity
+      2'b00: rxdata9 = {3'b0, rxshiftreg[6:1]}; // 5-bit character
+      2'b01: rxdata9 = {2'b0, rxshiftreg[7:1]}; // 6-bit 
+      2'b10: rxdata9 = {1'b0, rxshiftreg[8:1]}; // 7-bit
+      2'b11: rxdata9 = rxshiftreg[9:1];
+    endcase
+  assign rxdata = LCR[3] ? rxdata9[8:1] : rxdata9[7:0]; // discard parity bit
+
+  // ERROR CONDITIONS
+  assign rxparity = ^rxdata;
+  assign rxparityerr = rxparity ^ rxparitybit ^ ~evenparitysel; // Check even/odd parity (*** check if LCR needs to be inverted)
+  assign rxoverrunerr = fifoenabled ? (rxfifoentries == 15) : rxdataready; // overrun if FIFO or receive buffer register full 
+  assign rxframingerr = ~rxstopbit; // framing error if no stop bit
+  assign rxbreak = rxframingerr & (rxdata9 == 9'b0); // break when 0 for start + data + parity + stop time
+
+  // receive FIFO and register
+  always_ff @(posedge clk, posedge reset)
+    if (reset) begin
+      rxfifohead <= 0; rxfifotail <= 0; rxdataready <= 0; RXBR <= 0;
+    end else begin
+      if (rxstate == UART_DONE) begin
+        RXBR = {rxoverrunerr, rxparityerr, rxframingerr, rxdata}; // load recevive buffer register
+        if (fifoenabled) begin
+          rxfifo[rxfifohead] <= RXBR; 
+          rxfifohead <= rxfifohead + 1;
+        end
+        rxdataready <= 1;
+      end else if (~MEMRb && A == 3'b000 && ~DLAB) begin // reading RBR updates ready / pops fifo 
+        if (fifoenabled) begin
+          rxfifotail = rxfifotail + 1;
+          if (rxfifohead == rxfifotail) rxdataready <= 0;
+        end else rxdataready <= 0;
+      end else if (~MEMWb && A == 3'b010)  // writes to FIFO Control Register
+        if (Din[1] | ~Din[0]) begin // rx FIFO reset or FIFO disable clears FIFO contents
+          rxfifohead <= 0; rxfifotail <= 0;
+        end
+    end
+
+  assign rxfifoempty = (rxfifohead == rxfifotail);
+  assign rxfifoentries = (rxfifohead >= rxfifotail) ? (rxfifohead-rxfifotail) : 
+                                                      (rxfifohead + 16 - rxfifotail);
+  assign rxfifotriggered = rxfifoentries >= rxfifotriggerlevel;
+  //assign rxfifotimeout = rxtimeoutcnt[6]; // time out after 4 character periods; *** probably not right yet
+  assign rxfifotimeout = 0; // disabled pending fix
+
+  // detect any errors in rx fifo
+  generate
+    genvar i;
+    for (i=0; i<16; i++) begin
+      assign rxerrbit[i] = |rxfifo[i][10:8]; // are any of the error conditions set?
+      if (i > 0)
+        assign rxfullbit[i] = ((rxfifohead==i) | rxfullbit[i-1]) & (rxfifotail != i);
+      else
+        assign rxfullbit[0] = ((rxfifohead==i) | rxfullbit[15]) & (rxfifotail != i);
+    end
+  endgenerate
+  assign rxfifohaserr = |(rxerrbit & rxfullbit);
+
+  // receive buffer register and ready bit
+  always_ff @(posedge clk, posedge reset) // track rxrdy for DMA mode (FCR3 = FCR0 = 1)
+    if (reset) rxfifodmaready <= 0;
+    else if (rxfifotriggered | rxfifotimeout) rxfifodmaready <= 1;
+    else if (rxfifoempty) rxfifodmaready <= 0;
+
+  always_comb
+    if (fifoenabled) begin
+      if (rxfifoempty) RBR = 11'b0;
+      else             RBR = rxfifo[rxfifotail];
+      if (fifodmamodesel) RXRDYb = ~rxfifodmaready;
+      else                RXRDYb = rxfifoempty;
+    end else begin
+      RBR = RXBR;
+      RXRDYb = ~rxdataready;
+    end
+
+  ///////////////////////////////////////////
+  // transmit timing and control
+  ///////////////////////////////////////////
+  always_ff @(posedge clk, posedge reset)
+    if (reset) begin
+      txoversampledcnt <= 0;
+      txstate <= UART_IDLE;
+      txbitssent <= 0;
+    end else if ((txstate == UART_IDLE) && txsrfull) begin // start transmitting
+      txstate <= UART_ACTIVE;
+      txoversampledcnt <= 0;
+      txbitssent <= 0;
+    end else if (txbaudpulse & (txstate == UART_ACTIVE)) begin
+      txoversampledcnt <= txoversampledcnt + 1; 
+      if (txnextbit) begin // transmit at end of phase
+        txbitssent <= txbitssent+1;
+        if (txbitssent == txbitsexpected) txstate <= UART_DONE;
+      end
+    end else if (txstate == UART_DONE) begin
+      txstate <= UART_IDLE;
+    end
+
+  assign txbitsexpected = 1 + (5 + LCR[1:0]) + LCR[3] + 1 + LCR[2] - 1; // start bit + data bits + (parity bit) + stop bit(s)
+  assign txnextbit = txbaudpulse && (txoversampledcnt == 4'b0000);  // implies txstate = UART_ACTIVE
+
+  ///////////////////////////////////////////
+  // transmit holding register, shift register, FIFO
+  ///////////////////////////////////////////
+
+  always_comb begin // compute value for parity and tx holding register
+    nexttxdata = fifoenabled ? txfifo[txfifotail] : TXHR; // pick from FIFO or holding register
+    case (LCR[1:0]) // compute parity from appropriate number of bits
+      2'b00: txparity = ^nexttxdata[4:0] ^ ~evenparitysel; // *** check polarity
+      2'b01: txparity = ^nexttxdata[5:0] ^ ~evenparitysel; 
+      2'b10: txparity = ^nexttxdata[6:0] ^ ~evenparitysel; 
+      2'b11: txparity = ^nexttxdata[7:0] ^ ~evenparitysel; 
+    endcase
+    case({LCR[3], LCR[1:0]}) // parity, data bits
+      // load up start bit (0), 5-8 data bits, 0-1 parity bits, 2 stop bits (only one sometimes used), padding
+      3'b000: txdata = {1'b0, nexttxdata[4:0], 6'b111111};          // 5 data, no parity
+      3'b001: txdata = {1'b0, nexttxdata[5:0], 5'b11111};           // 6 data, no parity
+      3'b010: txdata = {1'b0, nexttxdata[6:0], 4'b1111};            // 7 data, no parity
+      3'b011: txdata = {1'b0, nexttxdata[7:0], 3'b111};             // 8 data, no parity
+      3'b100: txdata = {1'b0, nexttxdata[4:0], txparity, 5'b11111}; // 5 data, parity
+      3'b101: txdata = {1'b0, nexttxdata[5:0], txparity, 4'b1111};  // 6 data, parity
+      3'b110: txdata = {1'b0, nexttxdata[6:0], txparity, 3'b111};   // 7 data, parity
+      3'b111: txdata = {1'b0, nexttxdata[7:0], txparity, 2'b11};    // 8 data, parity
+    endcase
+  end
+       
+  // registers & FIFO
+  always_ff @(posedge clk, posedge reset)
+    if (reset) begin
+      txfifohead <= 0; txfifotail <= 0; txhrfull <= 0; txsrfull <= 0; TXHR <= 0; txsr <= 0;
+    end else begin
+      if (~MEMWb && A == 3'b000 && ~DLAB) begin // writing transmit holding register or fifo
+        if (fifoenabled) begin
+          txfifo[txfifohead] <= Din;
+          txfifohead <= txfifohead + 1;          
+        end else begin 
+          TXHR <= Din;
+          txhrfull <= 1;
+        end
+        $display("UART transmits: %c",Din); // for testbench
+      end
+      if (txstate == UART_IDLE) // move data into tx shift register if available
+        if (fifoenabled) 
+          if (~txfifoempty) begin
+            txsr <= txdata;
+            txfifotail <= txfifotail+1;
+            txsrfull <= 1;
+          end
+        else if (txhrfull) begin
+          txsr <= txdata;
+          txhrfull <= 0;
+          txsrfull <= 1;
+        end
+      else if (txstate == UART_DONE) txsrfull <= 0; // done transmitting shift register
+      else if (txstate == UART_ACTIVE && txnextbit) TXHR <= {TXHR[10:0], 1'b1}; // shift txhr
+      if (!MEMWb && A == 3'b010) // writes to FIFO control register
+        if (Din[2] | ~Din[0]) begin // tx FIFO reste or FIFO disable clears FIFO contents
+          txfifohead <= 0; txfifotail <= 0;
+        end
+    end
+
+  assign txfifoempty = (txfifohead == txfifotail);
+  assign txfifoentries = (txfifohead >= txfifotail) ? (txfifohead-txfifotail) : 
+                                                      (txfifohead + 16 - txfifotail);
+  assign txfifofull = (txfifoentries == 4'b1111);
+
+  // transmit buffer ready bit
+  always_ff @(posedge clk, posedge reset) // track txrdy for DMA mode (FCR3 = FCR0 = 1)
+    if (reset) txfifodmaready <= 0;
+    else if (txfifoempty) txfifodmaready <= 1;
+    else if (txfifofull)  txfifodmaready <= 0;
+
+  always_comb
+    if (fifoenabled & fifodmamodesel) TXRDYb = ~txfifodmaready;
+    else TXRDYb  = ~txhremptyintr;
+
+  // Transmitter pin 
+  assign SOUTbit = TXHR[11]; // transmit most significant bit
+  assign SOUT = loop ? 1 : (LCR[6] ? 0 : SOUTbit); // tied to 1 during loopback or 0 during break 
+
+  ///////////////////////////////////////////
+  // interrupts
+  ///////////////////////////////////////////
+
+  assign rxlinestatusintr = |LSR[4:1]; // LS interrupt if any of the flags are true
+  assign rxdataavailintr = fifoenabled ? rxfifotriggered : rxdataready; 
+  assign txhremptyintr = fifoenabled ? txfifoempty : ~txhrfull; 
+  assign modemstatusintr = |MSR[3:0]; // set interrupt when modem pins change
+ 
+  // interrupt priority (Table 5)
+  // set intrid based on highest priority pending interrupt source; otherwise, no interrupt is pending
+  always_comb begin
+    intrpending = 1;
+    if      (rxlinestatusintr & IER[2])            intrid = 3'b011;
+    else if (rxdataavailintr & IER[0])             intrid = 3'b010;
+    else if (rxfifotimeout & fifoenabled & IER[0]) intrid = 3'b110;
+    else if (txhremptyintr & IER[1])               intrid = 3'b001;
+    else if (modemstatusintr & IER[3])             intrid = 3'b000;
+    else begin
+      intrid = 3'b000;
+      intrpending = 0;
+    end
+  end
+  always @(posedge clk) INTR <= intrpending; // prevent glitches on interrupt pin
+
+  ///////////////////////////////////////////
+  // modem control logic
+  ///////////////////////////////////////////
+
+  assign loop = MCR[4]; 
+  assign DTRb  = ~MCR[0] | loop; // disable modem signals in loopback mode
+  assign RTSb  = ~MCR[1] | loop;
+  assign OUT1b = ~MCR[2] | loop;
+  assign OUT2b = ~MCR[3] | loop;
+
+  assign DLAB = LCR[7];
+  assign evenparitysel = LCR[4];
+  assign fifoenabled = FCR[0];
+  assign fifodmamodesel = FCR[3];
+  always_comb
+    case (FCR[7:6]) 
+      2'b00: rxfifotriggerlevel = 1;
+      2'b01: rxfifotriggerlevel = 4;
+      2'b10: rxfifotriggerlevel = 8;
+      2'b11: rxfifotriggerlevel = 14;
+    endcase
+
+endmodule
diff --git a/wally-pipelined/src/wally-macros.sv b/wally-pipelined/src/wally-macros.sv
index 9ff61617..c52e206d 100644
--- a/wally-pipelined/src/wally-macros.sv
+++ b/wally-pipelined/src/wally-macros.sv
@@ -20,5 +20,14 @@
 `define S_MODE (2'b01)
 `define U_MODE (2'b00)
 
+// Test modes
+
+// Tie GPIO outputs back to inputs
+`define GPIO_LOOPBACK_TEST 0
+
+
+// Hardware configuration
+`define UART_PRESCALE 1
+
 /* verilator lint_off STMTDLY */
 /* verilator lint_off WIDTH */
diff --git a/wally-pipelined/src/wallypipelined.sv b/wally-pipelined/src/wallypipelined.sv
index 762e260a..bec09fd6 100644
--- a/wally-pipelined/src/wallypipelined.sv
+++ b/wally-pipelined/src/wallypipelined.sv
@@ -57,7 +57,9 @@ module wallypipelined #(parameter XLEN=32, MISA=0, ZCSR = 1, ZCOUNTERS = 1) (
   output logic [XLEN-1:0] WriteDataM, DataAdrM, 
   output logic [1:0]      MemRWM,
   input  logic [31:0]     GPIOPinsIn,
-  output logic [31:0]     GPIOPinsOut, GPIOPinsEn
+  output logic [31:0]     GPIOPinsOut, GPIOPinsEn,
+  input  logic            UARTSin,
+  output logic            UARTSout
 );
 
   logic [XLEN-1:0] PCF, ReadDataM;