This commit is contained in:
David Harris 2022-06-23 23:16:45 +00:00
commit 178f4efbab
9 changed files with 128 additions and 49 deletions

View File

@ -32,7 +32,7 @@
`define DESIGN_COMPILER 0
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 64
`define XLEN 32
// IEEE 754 compliance
`define IEEE754 0

View File

@ -22,3 +22,7 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
add wave -group {Testbench} -noupdate /testbenchfp/*
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*

View File

@ -0,0 +1,15 @@
`include "wally-config.vh"
module divshiftcalc(
input logic [`DIVLEN+2:0] Quot,
input logic [`NE:0] DivCalcExpM,
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
output logic [`NE:0] CorrDivExp
);
assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]};
// the quotent is in the range [.5,2)
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]};
endmodule

View File

@ -123,7 +123,7 @@ module fpu (
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
//divide signals
logic [`DIVLEN-1:0] Quot;
logic [`DIVLEN+2:0] Quot;
logic [`NE:0] DivCalcExpM;
// result and flag signals

View File

@ -59,7 +59,7 @@ module postprocess(
input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic IntZeroM, // is the input zero
input logic [1:0] PostProcSelM, // select result to be written to fp register
input logic [`DIVLEN-1:0] Quot,
input logic [`DIVLEN+2:0] Quot,
output logic [`FLEN-1:0] PostProcResM, // FMA final result
output logic [4:0] PostProcFlgM,
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
@ -84,6 +84,7 @@ module postprocess(
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result?
@ -93,6 +94,7 @@ module postprocess(
logic IntToFp; // is the opperation an int->fp conversion?
logic ToInt; // is the opperation an fp->int conversion?
logic [`NE+1:0] RoundExp;
logic [`NE:0] CorrDivExp;
logic [1:0] NegResMSBS;
logic CvtOp;
logic FmaOp;
@ -137,6 +139,7 @@ module postprocess(
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt);
always_comb
case(PostProcSelM)
@ -149,8 +152,8 @@ module postprocess(
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
end
2'b01: begin //div ***prob can take out
ShiftAmt = {$clog2(`NORMSHIFTSZ){1'b0}};//{DivShiftAmt};
ShiftIn = {Quot, {`NORMSHIFTSZ-`DIVLEN{1'b0}}};
ShiftAmt = DivShiftAmt;
ShiftIn = {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}};
end
default: begin
ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}};
@ -173,9 +176,9 @@ module postprocess(
// round to infinity
// round to nearest max magnitude
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .DivCalcExpM,
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
.InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
.UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
.DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation

View File

@ -11,6 +11,7 @@ module round(
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic FmaOp,
input logic DivOp,
input logic [1:0] PostProcSelM,
input logic CvtResDenormUfM,
input logic ToInt,
@ -23,7 +24,7 @@ module round(
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic RoundSgn, // the result's sign
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NE:0] DivCalcExpM, // the calculated expoent
input logic [`NE:0] CorrDivExp, // the calculated expoent
output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
output logic [`NF-1:0] ResFrac, // Result fraction
@ -304,7 +305,7 @@ module round(
case(PostProcSelM)
2'b10: RoundExp = SumExp; // fma
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
2'b01: RoundExp = {DivCalcExpM[`NE], DivCalcExpM[`NE:0]}; // divide
2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide
default: RoundExp = 0;
endcase

View File

@ -54,10 +54,6 @@ module bram1p1rw
logic [DATA_WIDTH-1:0] RAM [(2**ADDR_WIDTH)-1:0];
integer i;
initial begin
$readmemh("big64.txt", RAM);
end
always @ (posedge clk) begin
dout <= RAM[addr];
if(we) begin

View File

@ -34,14 +34,15 @@ module srtradix4 (
input logic clk,
input logic DivStart,
input logic [`NE-1:0] XExpE, YExpE,
input logic [`NF-1:0] XFrac, YFrac,
input logic [`NF:0] XManE, YManE,
input logic [`XLEN-1:0] SrcA, SrcB,
input logic XZeroE,
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic DivDone,
output logic [`DIVLEN-1:0] Quot,
output logic [`DIVLEN+2:0] Quot,
output logic [`XLEN-1:0] Rem, // *** later handle integers
output logic [`NE:0] DivCalcExpE
);
@ -49,14 +50,15 @@ module srtradix4 (
// logic qp, qz, qm; // quotient is +1, 0, or -1
logic [3:0] q;
logic [`NE:0] DivCalcExp;
logic [`DIVLEN-1:0] X, Dpreproc;
logic [`DIVLEN:0] X;
logic [`DIVLEN-1:0] Dpreproc;
logic [`DIVLEN+3:0] WS, WSA, WSN;
logic [`DIVLEN+3:0] WC, WCA, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
@ -68,7 +70,7 @@ module srtradix4 (
// - otherwise load WSA into the flipflop
// *** what does N and A stand for?
// *** change shift amount for radix4
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, DivStart, WSN);
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
@ -110,9 +112,9 @@ module srtradix4 (
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
//*** change for radix 4
otfc4 otfc4(clk, DivStart, q, Quot);
otfc4 otfc4(.clk, .DivStart, .q, .Quot);
expcalc expcalc(.XExpE, .YExpE, .DivCalcExp);
expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp);
divcounter divcounter(clk, DivStart, DivDone);
@ -164,7 +166,57 @@ module qsel4 (
// Wmsbs = | |
logic [3:0] QSel4[1023:0];
initial $readmemh("../srt/qsel4.dat", QSel4);
initial begin
integer d, w, i, w2;
for(d=0; d<8; d++)
for(w=0; w<128; w++)begin
i = d*128+w;
w2 = w-128*(w>=64); // convert to two's complement
case(d)
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-4) QSel4[i] = 4'b0000;
else if(w2>=-13) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
1: if(w2>=14) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-15) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
2: if(w2>=15) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-16) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
3: if(w2>=16) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-18) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
4: if(w2>=18) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
5: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
6: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-22) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
7: if(w2>=24) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-24) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
endcase
end
end
assign q = QSel4[{Dmsbs,Wmsbs}];
endmodule
@ -174,39 +226,42 @@ endmodule
///////////////////
module srtpreproc (
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [`NF-1:0] XFrac, YFrac,
input logic [`NF:0] XManE, YManE,
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [`DIVLEN-1:0] X, D,
output logic [`DIVLEN:0] X,
output logic [`DIVLEN-1:0] Dpreproc,
output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
output logic intSign // Quotient integer sign
);
logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
logic [`XLEN-1:0] PosA, PosB;
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
// logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
// logic [`XLEN-1:0] PosA, PosB;
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
logic [`DIVLEN:0] PreprocA, PreprocX;
logic [`DIVLEN-1:0] PreprocB, PreprocY;
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
// assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
// assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB);
// lzc #(`XLEN) lzcA (PosA, zeroCntA);
// lzc #(`XLEN) lzcB (PosB, zeroCntB);
assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
assign PreprocA = ExtraA << zeroCntA;
assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}};
// assign PreprocA = ExtraA << zeroCntA;
// assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}};
assign X = Int ? PreprocA : PreprocX;
assign D = Int ? PreprocB : PreprocY;
assign intExp = zeroCntB - zeroCntA + 1;
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
assign Dpreproc = Int ? PreprocB : PreprocY;
// assign intExp = zeroCntB - zeroCntA + 1;
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
endmodule
///////////////////////////////////
@ -216,7 +271,7 @@ module otfc4 (
input logic clk,
input logic DivStart,
input logic [3:0] q,
output logic [`DIVLEN-1:0] Quot
output logic [`DIVLEN+2:0] Quot
);
// The on-the-fly converter transfers the quotient
@ -228,7 +283,7 @@ module otfc4 (
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [`DIVLEN+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
@ -236,7 +291,7 @@ module otfc4 (
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
flop #(`DIVLEN+3) Qreg(clk, QMux, Q);
flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
// shift Q (quotent) and QM (quotent-1)
@ -248,7 +303,7 @@ module otfc4 (
// *** how does the 0 concatination numbers work?
always_comb begin
QR = Q[`DIVLEN:0];
QR = Quot[`DIVLEN:0];
QMR = QM[`DIVLEN:0]; // Shift Q and QM
if (q[3]) begin // +2
QNext = {QR, 2'b10};
@ -268,7 +323,7 @@ module otfc4 (
end
end
// Quot is in the range [.5, 2) so normalize the result if nesissary
assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
// assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
endmodule
@ -302,9 +357,10 @@ endmodule
//////////////
module expcalc(
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE,
output logic [`NE:0] DivCalcExp
);
assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS);
assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}};
endmodule

View File

@ -53,6 +53,7 @@ module testbenchfp;
logic CvtResSgnE;
logic [`NE:0] CvtCalcExpE; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
logic [`DIVLEN+2:0] Quot;
logic CvtResDenormUfE;
logic DivStart, DivDone;
@ -69,7 +70,6 @@ module testbenchfp;
logic ZSgnEffE;
logic PSgnE;
logic DivSgn;
logic [`DIVLEN-1:0] Quot;
logic [`NE:0] DivCalcExp;
@ -659,8 +659,8 @@ module testbenchfp;
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp),
.XFrac(XMan[`NF-1:0]), .YFrac(YMan[`NF-1:0]), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]),
srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero),
.XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]),
.DivDone, .Quot, .Rem());
assign CmpFlg[3:0] = 0;
@ -899,7 +899,7 @@ module readvectors (
// apply test vectors on rising edge of clk
// Format of vectors Inputs(1/2/3)_AnsFlg
always @(TestNum) begin
always @(VectorNum) begin
#1;
AnsFlg = TestVector[4:0];
DivStart = 1'b0;
@ -971,6 +971,7 @@ module readvectors (
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
Ans = TestVector[8+(`Q_LEN-1):8];
if (~clk) #5;
DivStart = 1'b1; #10 // one clk cycle
DivStart = 1'b0;
end
@ -978,6 +979,7 @@ module readvectors (
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
@ -985,6 +987,7 @@ module readvectors (
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
@ -992,6 +995,7 @@ module readvectors (
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end