Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Ross Thompson 2022-10-09 16:46:51 -05:00
commit 4bf5245f75
6 changed files with 182 additions and 72 deletions

View File

@ -55,7 +55,6 @@ module fdivsqrt(
// output logic [`XLEN-1:0] RemM,
);
logic [`DIVb+3:0] NextWSN, NextWCN;
logic [`DIVb+3:0] WS, WC;
logic [`DIVb+3:0] X;
logic [`DIVN-2:0] D; // U0.N-1
@ -77,7 +76,7 @@ module fdivsqrt(
.XInfE, .YInfE, .WZero, .SpecialCaseM);
fdivsqrtiter fdivsqrtiter(
.clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM,
.X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN,
.X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
.DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
.DivBusy);
fdivsqrtpostproc fdivsqrtpostproc(.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .QmM, .WZero, .DivSM);

View File

@ -41,7 +41,6 @@ module fdivsqrtiter(
input logic [`DIVb+3:0] X,
input logic [`DIVN-2:0] Dpreproc,
output logic [`DIVN-2:0] D, // U0.N-1
output logic [`DIVb+3:0] NextWSN, NextWCN,
output logic [`DIVb:0] FirstU, FirstUM,
output logic [`DIVb+1:0] FirstC,
output logic Firstun,
@ -56,12 +55,12 @@ module fdivsqrtiter(
// U/UM should be 1.b so b+1 bits or b:0
// C needs to be the lenght of the final fraction 0.b so b or b-1:0
/* verilator lint_off UNOPTFLAT */
logic [`DIVb+3:0] WSA[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WCA[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WS[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WC[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb:0] U[`DIVCOPIES-1:0]; // U1.b
logic [`DIVb:0] UM[`DIVCOPIES-1:0];// 1.b
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UM[`DIVCOPIES:0];// 1.b
logic [`DIVb:0] UNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
@ -79,31 +78,35 @@ module fdivsqrtiter(
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
assign NextWSN = WSA[`DIVCOPIES-1] << `LOGR;
assign NextWCN = WCA[`DIVCOPIES-1] << `LOGR;
// Otherwise, the divisor is retained and the residual and result
// are fed back for the next iteration.
// Initialize C to -1 for sqrt and -R for division
logic [1:0] initCSqrt, initCDiv2, initCDiv4, initCUpper;
assign initCSqrt = 2'b11; // -1
assign initCDiv2 = 2'b10; // -2
assign initCDiv4 = 2'b00; // -4
assign initCUpper = SqrtE ? initCSqrt : (`RADIX == 4) ? initCDiv4 : initCDiv2;
assign initC = {initCUpper, {`DIVb{1'b0}}};
mux2 #(`DIVb+4) wsmux(NextWSN, X, DivStartE, WSN);
// Residual WS/SC registers/initializaiton mux
mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, DivStartE, WSN);
mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, DivStartE, WCN);
flopen #(`DIVb+4) wsflop(clk, DivStartE|DivBusy, WSN, WS[0]);
mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStartE, WCN);
flopen #(`DIVb+4) wcflop(clk, DivStartE|DivBusy, WCN, WC[0]);
flopen #(`DIVN-1) dflop(clk, DivStartE, Dpreproc, D);
// UOTFC Result U and UM registers/initialization mux
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0;
assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}};
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStartE, UMux);
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStartE, UMMux);
flopen #(`DIVb+1) UReg(clk, DivStartE|DivBusy, UMux, U[0]);
flopen #(`DIVb+1) UMReg(clk, DivStartE|DivBusy, UMMux, UM[0]);
// C register/initialization mux
// Initialize C to -1 for sqrt and -R for division
logic [1:0] initCUpper;
assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10;
assign initC = {initCUpper, {`DIVb{1'b0}}};
mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, DivStartE, CMux);
flopen #(`DIVb+2) cflop(clk, DivStartE|DivBusy, CMux, C[0]);
// Divisior register
flopen #(`DIVN-1) dflop(clk, DivStartE, Dpreproc, D);
// Divisor Selections
// - choose the negitive version of what's being selected
// - D is only the fraction
@ -113,37 +116,29 @@ module fdivsqrtiter(
assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}};
end
// k=DIVCOPIES of the recurrence logic
genvar i;
generate
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
if (`RADIX == 2) begin: stage
fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtM,
.WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]),
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
end else begin: stage
logic j1;
assign j1 = (i == 0 & ~C[0][`DIVb-1]);
fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, .j1,
.WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]),
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
end
if(i<(`DIVCOPIES-1)) begin
assign WS[i+1] = WSA[i] << `LOGR;
assign WC[i+1] = WCA[i] << `LOGR;
assign WS[i+1] = WSNext[i];
assign WC[i+1] = WCNext[i];
assign U[i+1] = UNext[i];
assign UM[i+1] = UMNext[i];
end
end
endgenerate
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0;
assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}};
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStartE, UMux);
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStartE, UMMux);
flopen #(`DIVb+1) UReg(clk, DivStartE|DivBusy, UMux, U[0]);
flopen #(`DIVb+1) UMReg(clk, DivStartE|DivBusy, UMMux, UM[0]);
// Send values from start of cycle for postprocessing
assign FirstWS = WS[0];
assign FirstWC = WC[0];
assign FirstU = U[0];

View File

@ -31,19 +31,18 @@
`include "wally-config.vh"
module fdivsqrtqsel4 (
input logic [`DIVN-2:0] D,
input logic [2:0] Dmsbs,
input logic [4:0] Smsbs,
input logic [`DIVb+3:0] WS, WC,
input logic [7:0] WSmsbs, WCmsbs,
input logic Sqrt, j1,
output logic [3:0] udigit
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] Dmsbs, A;
logic [2:0] A;
assign PreWmsbs = WC[`DIVb+3:`DIVb-4] + WS[`DIVb+3:`DIVb-4];
assign PreWmsbs = WCmsbs + WSmsbs;
assign Wmsbs = PreWmsbs[7:1];
assign Dmsbs = D[`DIVN-2:`DIVN-4];//|{3{D[`DIVN-2]&Sqrt}};
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
@ -51,6 +50,7 @@ module fdivsqrtqsel4 (
logic [3:0] USel4[1023:0];
// Prepopulate selection table; this is constant at compile time
always_comb begin
integer a, w, i, w2;
for(a=0; a<8; a++)
@ -101,12 +101,15 @@ module fdivsqrtqsel4 (
endcase
end
end
// Select A
always_comb
if (Sqrt) begin
if (j1) A = 3'b101;
else if (Smsbs == 5'b10000) A = 3'b111;
else A = Smsbs[2:0];
end else A = Dmsbs;
assign udigit = USel4[{A,Wmsbs}];
// Select quotient digit from lookup table based on A and W
assign udigit = USel4[{A,Wmsbs}];
endmodule

View File

@ -0,0 +1,93 @@
///////////////////////////////////////////
// fdivsqrtqsel4cmp.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Comparator-based Radix 4 Quotient Digit Selection
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtqsel4cmp (
input logic [2:0] Dmsbs,
input logic [4:0] Smsbs,
input logic [7:0] WSmsbs, WCmsbs,
input logic Sqrt, j1,
output logic [3:0] udigit
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] A;
assign PreWmsbs = WCmsbs + WSmsbs;
assign Wmsbs = PreWmsbs[7:1];
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
// Wmsbs = | |
logic [6:0] mk2, mk1, mk0, mkm1;
logic [6:0] mks2[7:0], mks1[7:0];
// Prepopulate table of mks0
assign mks2[0] = 12;
assign mks2[1] = 14;
assign mks2[2] = 16;
assign mks2[3] = 17;
assign mks2[4] = 18;
assign mks2[5] = 20;
assign mks2[6] = 22;
assign mks2[7] = 23;
assign mks1[0] = 4;
assign mks1[1] = 4;
assign mks1[2] = 6;
assign mks1[3] = 6;
assign mks1[4] = 6;
assign mks1[5] = 8; // is the logic any cheaper if this is a 6?
assign mks1[6] = 8;
assign mks1[7] = 8;
// Choose A for current operation
always_comb
if (Sqrt) begin
if (j1) A = 3'b101;
else if (Smsbs == 5'b10000) A = 3'b111;
else A = Smsbs[2:0];
end else A = Dmsbs;
// Choose selection constants based on a
assign mk2 = mks2[A];
assign mk1 = mks1[A];
assign mk0 = -mks1[A];
assign mkm1 = (A == 3'b000) ? -13 : -mks2[A]; // asymmetry in table
// Compare residual W to selection constants to choose digit
always_comb
if ($signed(Wmsbs) >= $signed(mk2)) udigit = 4'b1000; // choose 2
else if ($signed(Wmsbs) >= $signed(mk1)) udigit = 4'b0100; // choose 1
else if ($signed(Wmsbs) >= $signed(mk0)) udigit = 4'b0000; // choose 0
else if ($signed(Wmsbs) >= $signed(mkm1)) udigit = 4'b0010; // choose -1
else udigit = 4'b0001; // choose -2
endmodule

View File

@ -41,7 +41,7 @@ module fdivsqrtstage2 (
output logic un,
output logic [`DIVb+1:0] CNext,
output logic [`DIVb:0] UNext, UMNext,
output logic [`DIVb+3:0] WSA, WCA
output logic [`DIVb+3:0] WSNext, WCNext
);
/* verilator lint_on UNOPTFLAT */
@ -49,8 +49,7 @@ module fdivsqrtstage2 (
logic up, uz;
logic [`DIVb+3:0] F;
logic [`DIVb+3:0] AddIn;
assign CNext = {1'b1, C[`DIVb+1:1]};
logic [`DIVb+3:0] WSA, WCA;
// Qmient Selection logic
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
@ -61,8 +60,11 @@ module fdivsqrtstage2 (
// 0010 = -1
// 0001 = -2
fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un);
// Sqrt F generatin
fdivsqrtfgen2 fgen2(.up, .uz, .C(CNext), .U, .UM, .F);
// Divisor multiple
always_comb
if (up) Dsel = DBar;
else if (uz) Dsel = '0; // qz
@ -72,7 +74,13 @@ module fdivsqrtstage2 (
// WSA, WCA = WS + WC - qD
assign AddIn = SqrtM ? F : Dsel;
csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtM, WSA, WCA);
assign WSNext = WSA << 1;
assign WCNext = WCA << 1;
// Shift thermometer code C
assign CNext = {1'b1, C[`DIVb+1:1]};
// Unified On-The-Fly Converter to accumulate result
fdivsqrtuotfc2 uotfc2(.up, .uz, .C(CNext), .U, .UM, .UNext, .UMNext);
endmodule

View File

@ -30,7 +30,6 @@
`include "wally-config.vh"
/* verilator lint_off UNOPTFLAT */
module fdivsqrtstage4 (
input logic [`DIVN-2:0] D,
input logic [`DIVb+3:0] DBar, D2, DBar2,
@ -41,17 +40,18 @@ module fdivsqrtstage4 (
input logic SqrtM, j1,
output logic un,
output logic [`DIVb:0] UNext, UMNext,
output logic [`DIVb+3:0] WSA, WCA
output logic [`DIVb+3:0] WSNext, WCNext
);
/* verilator lint_on UNOPTFLAT */
logic [`DIVb+3:0] Dsel;
logic [3:0] udigit;
logic [`DIVb+3:0] F;
logic [`DIVb+3:0] AddIn;
logic [4:0] Smsbs;
logic [2:0] Dmsbs;
logic [7:0] WCmsbs, WSmsbs;
logic CarryIn;
assign CNext = {2'b11, C[`DIVb+1:2]};
logic [`DIVb+3:0] WSA, WCA;
// Digit Selection logic
// u encoding:
@ -61,9 +61,17 @@ module fdivsqrtstage4 (
// 0010 = -1
// 0001 = -2
assign Smsbs = U[`DIVb:`DIVb-4];
fdivsqrtqsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .udigit);
assign Dmsbs = D[`DIVN-2:`DIVN-4];
assign WCmsbs = WC[`DIVb+3:`DIVb-4];
assign WSmsbs = WS[`DIVb+3:`DIVb-4];
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .Sqrt(SqrtM), .j1, .udigit);
assign un = 0; // unused for radix 4
// F generation logic
fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
// Divisor multiple logic
always_comb
case (udigit)
4'b1000: Dsel = DBar2;
@ -74,15 +82,19 @@ module fdivsqrtstage4 (
default: Dsel = 'x;
endcase
// Partial Product Generation
// WSA, WCA = WS + WC - qD
// Residual Update
// {WS, WC}}Next = (WS + WC - qD or F) << 2
assign AddIn = SqrtM ? F : Dsel;
assign CarryIn = ~SqrtM & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D
csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
assign WSNext = WSA << 2;
assign WCNext = WCA << 2;
// Shift thermometer code C
assign CNext = {2'b11, C[`DIVb+1:2]};
// On-the-fly converter to accumulate result
fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
assign un = 0; // unused for radix 4
endmodule