merged floating-point radix-2 divider with radix-4

This commit is contained in:
Katherine Parry 2022-07-15 20:16:59 +00:00
parent cabd41a5a0
commit 5cb9c9f319
12 changed files with 52 additions and 393 deletions

View File

@ -97,19 +97,20 @@
`define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
`define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+3) : (3*`NF+9)) `define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`DIVRESLEN+`NF+3) > (3*`NF+8) ? (`DIVRESLEN+`NF+3) : (3*`NF+6)) `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))
// division constants // division constants
`define RADIX 32'h4 `define RADIX 32'h2
`define DIVCOPIES 32'h4 `define DIVCOPIES 32'h1
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3)
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3)) `define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3))
`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN)
`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
`define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)) // one interation is required for the integer bit for minimally redundent radix-4
`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
`define DURLEN ($clog2(`FPDUR+1)) `define DURLEN ($clog2(`FPDUR+1))
`define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES)

View File

@ -1,2 +1,2 @@
vsim -do "do wally-pipelined.do rv32gc arch32i" vsim -do "do wally-pipelined.do rv64gc arch64d"

View File

@ -20,12 +20,20 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WC
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WS
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WCA
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/otfc4/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WSA
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/Q
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QM
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/*
# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/*
# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/*
add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/*
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*

View File

@ -1,7 +1,7 @@
`include "wally-config.vh" `include "wally-config.vh"
module divshiftcalc( module divshiftcalc(
input logic [`QLEN-1:0] Quot, input logic [`QLEN-1-(`RADIX/4):0] Quot,
input logic [`FMTBITS-1:0] Fmt, input logic [`FMTBITS-1:0] Fmt,
input logic [`DURLEN-1:0] DivEarlyTermShift, input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic [`NE+1:0] DivCalcExp, input logic [`NE+1:0] DivCalcExp,
@ -30,12 +30,12 @@ module divshiftcalc(
// 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards) // 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after) // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after)
// inital Left shift amount = NF // inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (`NE+2)'(`NF); assign NormShift = (`NE+2)'(`NF);
// if the shift amount is negitive then dont shift (keep sticky bit) // if the shift amount is negitive then dont shift (keep sticky bit)
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)}; assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}};
// *** QLEN can be changed to DIVLEN if we figure out what divLEN is - chenge normshiftsize definifion assign DivShiftIn = {{`NF{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};
assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}};
endmodule endmodule

View File

@ -47,22 +47,23 @@ module divsqrt(
output logic DivDone, output logic DivDone,
output logic [`NE+1:0] DivCalcExpM, output logic [`NE+1:0] DivCalcExpM,
output logic [`DURLEN-1:0] EarlyTermShiftM, output logic [`DURLEN-1:0] EarlyTermShiftM,
output logic [`QLEN-1:0] QuotM output logic [`QLEN-1-(`RADIX/4):0] QuotM
// output logic [`XLEN-1:0] RemM, // output logic [`XLEN-1:0] RemM,
); );
logic [`DIVLEN+3:0] NextWSN, NextWCN; logic [`DIVLEN+3:0] NextWSN, NextWCN;
logic [`DIVLEN+3:0] WS, WC; logic [`DIVLEN+3:0] WS, WC;
logic [`DIVLEN+3:0] StickyWSA;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] X;
logic [`DIVLEN-1:0] Dpreproc; logic [`DIVLEN-1:0] Dpreproc;
logic [`DURLEN-1:0] Dur; logic [`DURLEN-1:0] Dur;
logic NegSticky; logic NegSticky;
srtpreproc srtpreproc(.XManE, .Dur, .YManE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
.XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE,
.DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
endmodule endmodule

View File

@ -125,7 +125,7 @@ module fpu (
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
//divide signals //divide signals
logic [`QLEN-1:0] QuotM; logic [`QLEN-1-(`RADIX/4):0] QuotM;
logic [`NE+1:0] DivCalcExpE, DivCalcExpM; logic [`NE+1:0] DivCalcExpE, DivCalcExpM;
logic DivStickyE, DivStickyM; logic DivStickyE, DivStickyM;
logic DivDoneM; logic DivDoneM;

View File

@ -58,7 +58,7 @@ module postprocess (
input logic DivSticky, input logic DivSticky,
input logic DivDone, input logic DivDone,
input logic [`NE+1:0] DivCalcExp, input logic [`NE+1:0] DivCalcExp,
input logic [`QLEN-1:0] Quot, input logic [`QLEN-1-(`RADIX/4):0] Quot,
// conversion signals // conversion signals
input logic CvtCs, // the result's sign input logic CvtCs, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent input logic [`NE:0] CvtCe, // the calculated expoent

View File

@ -43,7 +43,7 @@ module shiftcorrection(
output logic [`NE+1:0] FmaSe // exponent of the normalized sum output logic [`NE+1:0] FmaSe // exponent of the normalized sum
); );
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ:0] CorrQuotShifted; logic [`CORRSHIFTSZ-1:0] CorrQuotShifted;
logic ResDenorm; // is the result denormalized logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
@ -53,9 +53,9 @@ module shiftcorrection(
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
assign CorrQuotShifted = {LZAPlus2|(DivCalcExp==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0}; assign CorrQuotShifted = (LZAPlus2|(DivCalcExp==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent // Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};

View File

@ -1,359 +0,0 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module srtradix4(
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE, YZeroE,
input logic [`DIVLEN-1:0] X,
input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
input logic NegSticky,
output logic [`QLEN-1:0] Quot,
output logic [`DIVLEN+3:0] NextWSN, NextWCN,
output logic [`DIVLEN+3:0] FirstWS, FirstWC,
output logic [`NE+1:0] DivCalcExpM,
output logic [`XLEN-1:0] Rem
);
/* verilator lint_off UNOPTFLAT */
logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0];
logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0];
logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0];
logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0];
logic [`QLEN-1:0] Q[`DIVCOPIES-1:0];
logic [`QLEN-1:0] QM[`DIVCOPIES-1:0];
logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
/* verilator lint_on UNOPTFLAT */
logic [`DIVLEN+3:0] WSN, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2;
logic [`NE+1:0] DivCalcExp;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
logic [`QLEN-1:0] QMMux;
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]);
mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
// Divisor Selections
// - choose the negitive version of what's being selected
assign DBar = ~D;
assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
assign D2 = {D[`DIVLEN+2:0], 1'b0};
genvar i;
generate
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin
divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2,
.WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
if(i<(`DIVCOPIES-1)) begin
assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
assign Q[i+1] = QNext[i];
assign QM[i+1] = QMNext[i];
end
end
endgenerate
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
assign Quot = NegSticky ? QM[0] : Q[0];
assign FirstWS = WS[0];
assign FirstWC = WC[0];
expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
endmodule
////////////////
// Submodules //
////////////////
/* verilator lint_off UNOPTFLAT */
module divinteration (
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [`DIVLEN+3:0] D,
input logic [`DIVLEN+3:0] DBar, D2, DBar2,
input logic [`QLEN-1:0] Q, QM,
input logic [`DIVLEN+3:0] WS, WC,
output logic [`QLEN-1:0] QNext, QMNext,
output logic [`DIVLEN+3:0] WSA, WCA
);
/* verilator lint_on UNOPTFLAT */
logic [`DIVLEN+3:0] Dsel;
logic [3:0] q;
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// q encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
qsel4 qsel4(.D, .WS, .WC, .q);
always_comb
case (q)
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = {`DIVLEN+4{1'b0}};
4'b0010: Dsel = D;
4'b0001: Dsel = D2;
default: Dsel = {`DIVLEN+4{1'bx}};
endcase
// Partial Product Generation
// WSA, WCA = WS + WC - qD
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Q, .QM, .QNext, .QMNext);
endmodule
module qsel4 (
input logic [`DIVLEN+3:0] D,
input logic [`DIVLEN+3:0] WS, WC,
output logic [3:0] q
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] Dmsbs;
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
assign Wmsbs = PreWmsbs[7:1];
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
// Wmsbs = | |
logic [3:0] QSel4[1023:0];
always_comb begin
integer d, w, i, w2;
for(d=0; d<8; d++)
for(w=0; w<128; w++)begin
i = d*128+w;
w2 = w-128*(w>=64); // convert to two's complement
case(d)
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-4) QSel4[i] = 4'b0000;
else if(w2>=-13) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
1: if(w2>=14) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-15) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
2: if(w2>=15) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-16) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
3: if(w2>=16) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-18) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
4: if(w2>=18) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
5: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
6: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-22) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
7: if(w2>=24) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-24) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
endcase
end
end
assign q = QSel4[{Dmsbs,Wmsbs}];
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc4 (
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [3:0] q,
input logic [`QLEN-1:0] Q, QM,
output logic [`QLEN-1:0] QNext, QMNext
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-4 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [`QLEN-3:0] QR, QMR;
// shift Q (quotent) and QM (quotent-1)
// if q = 2 Q = {Q, 10} QM = {Q, 01}
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
// *** how does the 0 concatination numbers work?
assign QR = Q[`QLEN-3:0];
assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM
always_comb begin
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
end else if (q[2]) begin // +1
QNext = {QR, 2'b01};
QMNext = {QR, 2'b00};
end else if (q[1]) begin // -1
QNext = {QMR, 2'b11};
QMNext = {QMR, 2'b10};
end else if (q[0]) begin // -2
QNext = {QMR, 2'b10};
QMNext = {QMR, 2'b01};
end else begin // 0
QNext = {QR, 2'b00};
QMNext = {QMR, 2'b11};
end
end
// Final Quoteint is in the range [.5, 2)
endmodule
/////////
// csa //
/////////
module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3,
input logic cin,
output logic [N-1:0] out1, out2
);
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is Startuired to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign out1 = in1 ^ in2 ^ in3;
assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule
module expcalc(
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] DivCalcExp
);
logic [`NE-2:0] Bias;
if (`FPSIZES == 1) begin
assign Bias = (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
end else if (`FPSIZES == 3) begin
always_comb
case (FmtE)
`FMT: Bias = (`NE-1)'(`BIAS);
`FMT1: Bias = (`NE-1)'(`BIAS1);
`FMT2: Bias = (`NE-1)'(`BIAS2);
default: Bias = 'x;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (FmtE)
2'h3: Bias = (`NE-1)'(`Q_BIAS);
2'h1: Bias = (`NE-1)'(`D_BIAS);
2'h0: Bias = (`NE-1)'(`S_BIAS);
2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase
end
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
endmodule

View File

@ -38,8 +38,9 @@ module srtfsm(
input logic XZeroE, YZeroE, input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE, input logic XNaNE, YNaNE,
input logic DivStart, input logic DivStart,
input logic StallE, input logic StallE,
input logic StallM, input logic StallM,
input logic [`DIVLEN+3:0] StickyWSA,
input logic [`DURLEN-1:0] Dur, input logic [`DURLEN-1:0] Dur,
output logic [`DURLEN-1:0] EarlyTermShiftE, output logic [`DURLEN-1:0] EarlyTermShiftE,
output logic DivStickyE, output logic DivStickyE,
@ -59,7 +60,14 @@ module srtfsm(
//flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
assign DivBusy = (state == BUSY); assign DivBusy = (state == BUSY);
assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0}); assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0});
assign DivStickyE = |W; // calculate sticky bit
// - there is a chance that a value is subtracted infinitly, resulting in an exact QM result
// this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
// radix-4 division can't create a QM that continually adds 0's
if (`RADIX == 2)
assign DivStickyE = |W&~(StickyWSA == WS);
else
assign DivStickyE = |W;
assign DivDone = (state == DONE); assign DivDone = (state == DONE);
assign W = WC+WS; assign W = WC+WS;
assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???

View File

@ -31,7 +31,7 @@
`include "wally-config.vh" `include "wally-config.vh"
module srtpreproc ( module srtpreproc (
input logic [`NF:0] XManE, YManE, input logic [`NF:0] Xm, Ym,
output logic [`DIVLEN-1:0] X, output logic [`DIVLEN-1:0] X,
output logic [`DIVLEN-1:0] Dpreproc, output logic [`DIVLEN-1:0] Dpreproc,
output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
@ -49,16 +49,16 @@ module srtpreproc (
// ***can probably merge X LZC with conversion // ***can probably merge X LZC with conversion
// cout the number of leading zeros // cout the number of leading zeros
lzc #(`NF+1) lzcA (XManE, XZeroCnt); lzc #(`NF+1) lzcA (Xm, XZeroCnt);
lzc #(`NF+1) lzcB (YManE, YZeroCnt); lzc #(`NF+1) lzcB (Ym, YZeroCnt);
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
// assign PreprocA = ExtraA << zeroCntA; // assign PreprocA = ExtraA << zeroCntA;
// assign PreprocB = ExtraB << (zeroCntB + 1); // assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}}; assign PreprocX = {Xm[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}}; assign PreprocY = {Ym[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign X = PreprocX; assign X = PreprocX;

View File

@ -80,7 +80,7 @@ module testbenchfp;
logic CvtResSgnE; logic CvtResSgnE;
logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`NE:0] CvtCalcExpE; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
logic [`QLEN-1:0] Quot; logic [`QLEN-1-(`RADIX/4):0] Quot;
logic CvtResDenormUfE; logic CvtResDenormUfE;
logic [`DURLEN-1:0] EarlyTermShift; logic [`DURLEN-1:0] EarlyTermShift;
logic DivStart, DivBusy; logic DivStart, DivBusy;