diff --git a/wally-pipelined/src/fpu/fpudivsqrtrecur.sv b/wally-pipelined/src/fpu/fpudivsqrtrecur.sv new file mode 100644 index 000000000..0d1b89ff2 --- /dev/null +++ b/wally-pipelined/src/fpu/fpudivsqrtrecur.sv @@ -0,0 +1,69 @@ +/////////////////////////////////////////// +// +// Written: David Harris +// Modified: 11 September 2021 +// +// Purpose: Recurrence-based SRT Division and Square Root +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module fpudivsqrtrecur ( + input logic clk, + input logic reset, + input logic FlushM, // flush the memory stage + input logic StallM, // stall memory stage + input logic FDivSqrtStart, // start a computation + input logic FmtE, // precision 1 = double 0 = single + input logic FDivE, FSqrtE, + input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic XSgnE, YSgnE, // input signs - execute stage + input logic [`NE-1:0] XExpE, YExpE, // input exponents - execute stage + input logic [`NF:0] XManE, YManE, // input mantissa - execute stage + input logic XDenormE, YDenormE, // is denorm + input logic XZeroE, YZeroE, // is zero - execute stage + input logic XNaNE, YNaNE, // is NaN + input logic XSNaNE, YSNaNE, // is signaling NaN + input logic XInfE, YInfE, ZInfE, // is infinity + input logic [10:0] BiasE, // bias (max exponent/2) ***parameterize in unpacking unit + output logic FDviSqrtBusy, FDivSqrtDone, //currently occpied, or done with operation + output logic [`FLEN-1:0] FDivSqrtResM, // result + output logic [4:0] FDivSqrtFlgM // flags + ); + + logic FDivSqrtResSgn; + logic [`FLEN-1:0] FDivSqrtRecurRes; + + // Radix-2 SRT Division and Square Root + + // Special Cases + // *** shift to handle denorms in hardware + + assign FDivSqrtResSign = FDivE & (XSgnE ^ YSgnE); // Sign is negative for division if inputs have opposite signs + + always_comb begin + if (FSqrtE & XSgnE | FDivE & XZeroE & YZeroE | XNaNE | FDivE & YNaNE) FDivSqrtResM = 0; // ***replace with NAN; // *** which one + else if (FDivE & YZeroE | XInfE) FDivSqrtResM = {FDivSqrtResSgn, `NE'b1, `NF'b0}; // infinity + else if (FDivE & YInfE) FDivSqrtResM = {FDivSqrtResSgn, `NE'b0, `NF'b0}; // zero + else FDivSqrtResM = FDivSqrtRecurRes; + end + + // *** handle early termination in the special cases + // *** handle signaling NANs +endmodule \ No newline at end of file diff --git a/wally-pipelined/src/fpu/fpudivsqrtrecurcore.sv b/wally-pipelined/src/fpu/fpudivsqrtrecurcore.sv new file mode 100644 index 000000000..3f6fe67d9 --- /dev/null +++ b/wally-pipelined/src/fpu/fpudivsqrtrecurcore.sv @@ -0,0 +1,100 @@ +/////////////////////////////////////////// +// +// Written: David Harris +// Modified: 11 September 2021 +// +// Purpose: Recurrence-based SRT Division and Square Root +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +// Bit counts: +// Inputs are originally normalized floating point numbers with NF fractional bits and a leading 1 integer bit +// x is right shifted by up to 2 to be in the range of 1/4 <= x < 1/2 for divide, 1/4 <= x < 1 for sqrt +// Hence, x now has NF+2 fractional bits and 0 integer bits +// d is right shifted by 1 to be in the range of 1/2 <= d < 1. It thus has NF+1 fractional bits and 0 integer bits +// q is eventually in the range of 1/4 < q < 1 and hence needs NF+2 bits to keep NF bits when normalized, plus some*** more bits for rounding +// The partial + +/* +module fpudivsqrtrecurcore ( + input logic clk, + input logic reset, + input logic start, // start a computation + input logic busy, // computation running + input logic fmt, // precision 1 = double 0 = single + input logic [`NF+1:0] x, // in range 1/4 <= x < 1/2 for divide, 1/4 <=x < 1 for sqrt + input logic [`NF+1:0] din, // in range 1/2 <= d < 1 for divide + input logic FDiv, FSqrt, // *** not yet used + output logic [`FLEN-1:0] FDivSqrtRecurRes // result + ); + + assign FDivSqrtRecurRes = 0; + + logic [***] d, ws, wsout, wsnext, wc, wcout, wcnext; + logic [1:0] q; // 00 = 0, 01 = 1, 10 = -1 + + // Radix-2 SRT Division + + // registers for divisor and partial remainder + flopen #(NF+1) dreg(clk, start, din, d); + mux2 #(NF+1) wsmux(wsout, x, start, wsnext); + flopen #(NF+1) wsreg(clk, busy, wsnext, ws); + mux2 #(NF+1) wcmux(wcout, 0, start, wcnext); + flopen #(NF+1) wcreg(clk, busy, wcnext, wc); + + // quotient selection + qsel qsel(ws[***4bits], wc[***], q); + + // partial remainder update + always_comb begin // select -d * q to add to partial remainder + if (q[1]) dq = d; + else if (q[0]) dq = ~d; + else dq = 0; + end + csa #(***) csa(ws, wc, dq, q[1], wsout, wcout); + + +endmodule +*/ + +/* +module csa #(parameter N=4) ( + input logic [N-1:0] sin, cin, ain, + input logic carry, + output logic [N-1:0] sum, cout +); + + logic [N-1:0] c; + + assign c = {cin[N-2:0], carry}; // shift carries left and inject optional 1 into lsb + assign sum = sin ^ ain ^ c; + assign cout = sin & ain | sin & c | ain & c; +endmodule +*/ + +module qsel( // radix 2 SRT division quotient selection + input logic [3:0] wc, ws, + output logic [1:0] q +); + +endmodule + + + diff --git a/wally-pipelined/src/muldiv/intdiv_restoring.sv b/wally-pipelined/src/muldiv/intdiv_restoring.sv new file mode 100644 index 000000000..9571ba721 --- /dev/null +++ b/wally-pipelined/src/muldiv/intdiv_restoring.sv @@ -0,0 +1,76 @@ +/////////////////////////////////////////// +// intdiv_restoring.sv +// +// Written: David_Harris@hmc.edu 12 September 2021 +// Modified: +// +// Purpose: Restoring integer division using a shift register a subtractor +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module intdiv_restoring ( + input logic clk, + input logic reset, + input logic signedDivide, + input logic start, + input logic [`XLEN-1:0] X, D, + output logic busy, done, + output logic [`XLEN-1:0] Q, REM + ); + + logic [`XLEN-1:0] W, Win, Wshift, Wprime, Wnext, XQ, XQin, XQshift; + logic qi; // curent quotient bit + localparam STEPBITS = $clog2(`XLEN); + logic [STEPBITS:0] step; + logic div0; + + // restoring division + mux2 #(`XLEN) wmux(W, 0, start, Win); + mux2 #(`XLEN) xmux(0, X, start, XQin); + assign {Wshift, XQshift} = {Win[`XLEN-2:0], XQin, qi}; + assign {qi, Wprime} = Wshift - D; // subtractor, carry out determines quotient bit + mux2 #(`XLEN) wrestoremux(Wshift, Wprime, qi, Wnext); + flopen #(`XLEN) wreg(clk, busy, Wnext, W); + flopen #(`XLEN) xreg(clk, busy, XQshift, XQ); + + // outputs + // *** sign extension, handling W instructions + assign div0 = (D == 0); + mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero + mux2 #(`XLEN) remmux(W, X, div0, REM); // REM taken from W register, or from X when dividing by zero + + // busy logic + always_ff @(posedge clk) + if (start) begin + busy = 1; done = 0; step = 0; + end else if (busy) begin + step = step + 1; + if (step[STEPBITS] | div0) begin // *** early terminate on division by 0 + step = 0; + busy = 0; + done = 1; + end + end else if (done) begin + done = 0; + end + +endmodule // muldiv + + diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 7288229c5..a42e9debc 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -51,7 +51,7 @@ module muldiv ( logic enable_q; logic [2:0] Funct3E_Q; - logic div0error; + logic div0error; // ***unused logic [`XLEN-1:0] N, D; logic [`XLEN-1:0] Num0, Den0; @@ -88,12 +88,14 @@ module muldiv ( .reset(reset), .clk(~gclk)); assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); - intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); + //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); + intdiv_restoring div(.clk, .reset, .signedDivide, .start(startDivideE), .X(N), .D(D), .busy(DivBusyE), .done(DivDoneE), .Q(QuotE), .REM(RemE)); // Added for debugging of start signal for divide assign startDivideE = MulDivE&DivStartE&~DivBusyE; // capture the start control signals since they are not held constant. + // *** appears to be unused flopenrc #(3) funct3ereg (.d(Funct3E), .q(Funct3E_Q), .en(DivStartE), @@ -114,7 +116,7 @@ module muldiv ( 3'b111: PrelimResultE = RemE; endcase // case (Funct3E) - // Start Divide process + // Start Divide process. This simplifies to DivStartE = Funct3E[2]; always_comb case (Funct3E) 3'b000: DivStartE = 1'b0; diff --git a/wally-pipelined/src/uncore/uartPC16550D.sv b/wally-pipelined/src/uncore/uartPC16550D.sv index 23d580880..bf688d1bb 100644 --- a/wally-pipelined/src/uncore/uartPC16550D.sv +++ b/wally-pipelined/src/uncore/uartPC16550D.sv @@ -79,6 +79,7 @@ module uartPC16550D( logic [9:0] rxshiftreg; logic [10:0] rxfifo[15:0]; logic [7:0] txfifo[15:0]; + logic [4:0] rxfifotailunwrapped; logic [3:0] rxfifohead, rxfifotail, txfifohead, txfifotail, rxfifotriggerlevel; logic [3:0] rxfifoentries, txfifoentries; logic [3:0] rxbitsexpected, txbitsexpected; @@ -95,6 +96,7 @@ module uartPC16550D( logic [8:0] rxdata9; logic [7:0] rxdata; logic [15:0] RXerrbit, rxfullbit; + logic [31:0] rxfullbitunwrapped; // transmit data logic [7:0] TXHR, nexttxdata; @@ -289,14 +291,21 @@ module uartPC16550D( // detect any errors in rx fifo // although rxfullbit looks like a combinational loop, in one bit rxfifotail == i and breaks the loop + // tail is normally higher than head, but might wrap around. unwrapped variable adds 16 to eliminate wrapping generate + assign rxfifotailunwrapped = rxfifotail < rxfifohead ? {1'b1, rxfifotail} : {1'b0, rxfifotail}; genvar i; + for (i=0; i<32; i++) begin:rxfull + if (i == 0) assign rxfullbitunwrapped[i] = (rxfifohead==0) & (rxfifotail != 0); + else assign rxfullbitunwrapped[i] = ({1'b0,rxfifohead}==i | rxfullbitunwrapped[i-1]) & (rxfifotailunwrapped != i); + end for (i=0; i<16; i++) begin:rx assign RXerrbit[i] = |rxfifo[i][10:8]; // are any of the error conditions set? - if (i > 0) + assign rxfullbit[i] = rxfullbitunwrapped[i] | rxfullbitunwrapped[i+16]; +/* if (i > 0) assign rxfullbit[i] = ((rxfifohead==i) | rxfullbit[i-1]) & (rxfifotail != i); else - assign rxfullbit[0] = ((rxfifohead==i) | rxfullbit[15]) & (rxfifotail != i); + assign rxfullbit[0] = ((rxfifohead==i) | rxfullbit[15]) & (rxfifotail != i);*/ end endgenerate assign rxfifohaserr = |(RXerrbit & rxfullbit);