diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index b4fc2cea..0b1952ed 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -26,7 +26,7 @@ `include "wally-constants.vh" // macros to define supported modes -// NOTE: No hardware support fo Q yet +// NOTE: No hardware support for Q yet `define A_SUPPORTED ((`MISA >> 0) % 2 == 1) `define C_SUPPORTED ((`MISA >> 2) % 2 == 1) @@ -111,15 +111,15 @@ // division constants `define RADIX 32'h4 `define DIVCOPIES 32'h3 -`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) +`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input -`define DIVN (`NF < `XLEN ? `XLEN : `NF+3) // length of input -`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) -`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3)) -`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) +`define DIVN (`NF<`XLEN ? `XLEN : (`NF + 3)) // length of input +`define EXTRAFRACBITS ((`NF < (`XLEN)) ? (`XLEN - `NF) : 3) +`define EXTRAINTBITS ((`NF < `XLEN) ? 0 : (`NF - `XLEN + 3)) +`define DIVRESLEN ((`NF>`XLEN) ? (`NF + 4) : `XLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) -// one interation is required for the integer bit for minimally redundent radix-4 +// one iteration is required for the integer bit for minimally redundent radix-4 `define FPDUR ((`DIVN+2+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 80735168..cde357bf 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -67,7 +67,7 @@ module fdivsqrt( fdivsqrtpreproc fdivsqrtpreproc( .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), - .Sqrt(SqrtE), .Int(MDUE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, + .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index 5c067796..17cc3f5c 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -50,7 +50,7 @@ module fdivsqrtiter( //QLEN = 1.(number of bits created for division) // N is NF+1 or XLEN // WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift -// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-1:0 +// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-2:0 // Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0 // U/UM should be 1.b so b+1 bits or b:0 // C needs to be the lenght of the final fraction 0.b so b or b-1:0 diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index a601271e..e6fe1a79 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -37,7 +37,6 @@ module fdivsqrtpreproc ( input logic [`NE-1:0] Xe, Ye, input logic [`FMTBITS-1:0] Fmt, input logic Sqrt, - input logic Int, input logic XZero, input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [2:0] Funct3E, Funct3M, @@ -46,15 +45,17 @@ module fdivsqrtpreproc ( output logic [`DIVb+3:0] X, output logic [`DIVN-2:0] Dpreproc ); - // logic [`XLEN-1:0] PosA, PosB; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; logic [`NF-1:0] PreprocA, PreprocX; logic [`NF-1:0] PreprocB, PreprocY; - // logic [`DIVN-1:0] ZeroBufX, ZeroBufY; add after Cedar Commit logic [`NF+1:0] SqrtX; - logic [`DIVb+3:0] DivX; - logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic [`NE+1:0] Qe; + logic [`DIVb+3:0] DivX; + logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; + logic [`NE+1:0] Qe; + // Intdiv signals + // logic [`DIVN-1:0] ZeroBufX, ZeroBufY; add after Cedar Commit + logic [`XLEN-1:0] PosA, PosB; + logic Signed, Aneg, Bneg; // ***can probably merge X LZC with conversion // cout the number of leading zeros @@ -64,6 +65,12 @@ module fdivsqrtpreproc ( lzc #(`NF+1) lzcX (Xm, XZeroCnt); lzc #(`NF+1) lzcY (Ym, YZeroCnt); + assign Signed = Funct3E[0]; + assign Aneg = ForwardedSrcAE[`XLEN-1] & Signed; + assign Bneg = ForwardedSrcBE[`XLEN-1] & Signed; + assign PosA = Aneg ? -ForwardedSrcAE : ForwardedSrcAE; + assign PosB = Bneg ? -ForwardedSrcBE : ForwardedSrcBE; + assign PreprocX = Xm[`NF-1:0]<