From b3e1badd3184b3e1a3c374f2fc2541054d80486a Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 12 Jan 2023 07:15:14 -0800 Subject: [PATCH] MDU comment cleanup --- pipelined/src/mdu/intdivrestoring.sv | 55 +++++++------ pipelined/src/mdu/intdivrestoringstep.sv | 17 ++-- pipelined/src/mdu/mdu.sv | 61 +++++++-------- pipelined/src/mdu/mul.sv | 99 ++++++++++++------------ 4 files changed, 121 insertions(+), 111 deletions(-) diff --git a/pipelined/src/mdu/intdivrestoring.sv b/pipelined/src/mdu/intdivrestoring.sv index dab3c2d97..0d95233ca 100644 --- a/pipelined/src/mdu/intdivrestoring.sv +++ b/pipelined/src/mdu/intdivrestoring.sv @@ -6,6 +6,8 @@ // // Purpose: Restoring integer division using a shift register and subtractor // +// Documentation: RISC-V System on Chip Design Chapter 12 (Figure 12.19) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -26,32 +28,39 @@ `include "wally-config.vh" - /* verilator lint_off UNOPTFLAT */ - -module intdivrestoring ( - input logic clk, - input logic reset, - input logic StallM, - input logic FlushE, - input logic DivSignedE, W64E, - input logic IntDivE, - //input logic [`XLEN-1:0] SrcAE, SrcBE, - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B - output logic DivBusyE, - output logic [`XLEN-1:0] QuotM, RemM +module intdivrestoring( + input logic clk, + input logic reset, + input logic StallM, + input logic FlushE, + input logic IntDivE, // integer division/remainder instruction of any type + input logic DivSignedE, // signed division + input logic W64E, // W-type instructions (divw, divuw, remw, remuw) + input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Forwarding mux outputs for Source A and B + output logic DivBusyE, // Divide is busy - stall pipeline + output logic [`XLEN-1:0] QuotM, RemM // Quotient and remainder outputs ); - typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; + localparam STEPBITS = $clog2(`XLEN/`IDIV_BITSPERCYCLE); // Number of steps + + typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; // division FSM state statetype state; - logic [`XLEN-1:0] W[`IDIV_BITSPERCYCLE:0]; - logic [`XLEN-1:0] XQ[`IDIV_BITSPERCYCLE:0]; - logic [`XLEN-1:0] DinE, XinE, DnE, DAbsBE, DAbsB, XnE, XInitE, WnM, XQnM; - localparam STEPBITS = $clog2(`XLEN/`IDIV_BITSPERCYCLE); - logic [STEPBITS:0] step; - logic Div0E, Div0M; - logic DivStartE, SignXE, SignDE, NegQE, NegWM, NegQM; - logic [`XLEN-1:0] WNext, XQNext; + logic [`XLEN-1:0] W[`IDIV_BITSPERCYCLE:0]; // Residual for each of k steps + logic [`XLEN-1:0] XQ[`IDIV_BITSPERCYCLE:0]; // dividend/quotient for each of k steps + logic [`XLEN-1:0] WNext, XQNext; // initialized W and XQ going into registers + logic [`XLEN-1:0] DinE, XinE; // divisor & dividend, possibly truncated to 32 bits + logic [`XLEN-1:0] DnE; // DnE = ~DinE + logic [`XLEN-1:0] DAbsBE; // absolute value of D + logic [`XLEN-1:0] DAbsB; // registered absolute value of D, constant during division + logic [`XLEN-1:0] XnE; // DXnE = ~XinE + logic [`XLEN-1:0] XInitE; // |X|, or original X for divide by 0 + logic [`XLEN-1:0] WnM, XQnM; // negated residual W and quotient XQ for postprocessing sign correction + logic [STEPBITS:0] step; // division step + logic Div0E, Div0M; // divide by 0 + logic DivStartE; // start integer division + logic SignXE, SignDE; // sign of dividend and divisor + logic NegQE, NegWM, NegQM; // negate quotient or residual during postprocessing ////////////////////////////// // Execute Stage: prepare for division calculation with control logic, W logic and absolute values, initialize W and XQ @@ -134,5 +143,3 @@ module intdivrestoring ( else state <= IDLE; end endmodule - -/* verilator lint_on UNOPTFLAT */ diff --git a/pipelined/src/mdu/intdivrestoringstep.sv b/pipelined/src/mdu/intdivrestoringstep.sv index 95a26e82f..cc27a7d5b 100644 --- a/pipelined/src/mdu/intdivrestoringstep.sv +++ b/pipelined/src/mdu/intdivrestoringstep.sv @@ -4,8 +4,10 @@ // Written: David_Harris@hmc.edu 2 October 2021 // Modified: // -// Purpose: Restoring integer division using a shift register and subtractor +// Purpose: Restoring integer division step. k steps are used in intdivrestoring // +// Documentation: RISC-V System on Chip Design Chapter 12 (Figure 12.19) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -29,11 +31,16 @@ /* verilator lint_off UNOPTFLAT */ module intdivrestoringstep( - input logic [`XLEN-1:0] W, XQ, DAbsB, - output logic [`XLEN-1:0] WOut, XQOut); + input logic [`XLEN-1:0] W, // Residual in + input logic [`XLEN-1:0] XQ, // bits of dividend X and quotient Q in + input logic [`XLEN-1:0] DAbsB, // complement of absolute value of divisor D (for subtraction) + output logic [`XLEN-1:0] WOut, // Residual out + output logic [`XLEN-1:0] XQOut // bits of dividend and quotient out: discard one bit of X, append one bit of Q +); - logic [`XLEN-1:0] WShift, WPrime; - logic qi, qib; + logic [`XLEN-1:0] WShift; // Shift W left by one bit, bringing in most significant bit of X + logic [`XLEN-1:0] WPrime; // WShift - D, for comparison and possible result + logic qi, qib; // Quotient digit and its complement assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; // shift W and X/Q left, insert quotient bit at bottom adder #(`XLEN+1) wdsub({1'b0, WShift}, {1'b1, DAbsB}, {qib, WPrime}); // effective subtractor, carry out determines quotient bit diff --git a/pipelined/src/mdu/mdu.sv b/pipelined/src/mdu/mdu.sv index bb242b75f..4a85bf478 100644 --- a/pipelined/src/mdu/mdu.sv +++ b/pipelined/src/mdu/mdu.sv @@ -6,6 +6,8 @@ // // Purpose: M extension multiply and divide // +// Documentation: RISC-V System on Chip Design Chapter 12 (Figure 12.21) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -26,56 +28,49 @@ `include "wally-config.vh" -module mdu ( - input logic clk, reset, - // Execute Stage interface - // input logic [`XLEN-1:0] SrcAE, SrcBE, - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B - input logic [2:0] Funct3E, Funct3M, - input logic IntDivE, W64E, - // Writeback stage - output logic [`XLEN-1:0] MDUResultW, - // Divide Done - output logic DivBusyE, - // hazards - input logic StallM, StallW, FlushE, FlushM, FlushW - ); +module mdu( + input logic clk, reset, + input logic StallM, StallW, + input logic FlushE, FlushM, FlushW, + input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // inputs A and B from IEU forwarding mux output + input logic [2:0] Funct3E, Funct3M, // type of MDU operation + input logic IntDivE, W64E, // Integer division/remainder, and W-type instrutions + output logic [`XLEN-1:0] MDUResultW, // multiply/divide result + output logic DivBusyE // busy signal to stall pipeline in Execute stage +); - logic [`XLEN-1:0] MDUResultM; - logic [`XLEN-1:0] PrelimResultM; - logic [`XLEN-1:0] QuotM, RemM; - logic [`XLEN*2-1:0] ProdM; - - logic DivSignedE; - logic W64M; + logic [`XLEN*2-1:0] ProdM; // double-width product from mul + logic [`XLEN-1:0] QuotM, RemM; // quotient and remainder from intdivrestoring + logic [`XLEN-1:0] PrelimResultM; // selected result before W truncation + logic [`XLEN-1:0] MDUResultM; // result after W truncation + logic W64M; // W-type instruction // Multiplier mul mul(.clk, .reset, .StallM, .FlushM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .ProdM); - // Divide + // Divider // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - // When F extensions are supported, use the FPU divider instead + // When IDIV_ON_FPU is set, use the FPU divider instead if (`IDIV_ON_FPU) begin assign QuotM = 0; assign RemM = 0; assign DivBusyE = 0; end else begin - assign DivSignedE = ~Funct3E[0]; - intdivrestoring div(.clk, .reset, .StallM, .FlushE, .DivSignedE, .W64E, .IntDivE, + intdivrestoring div(.clk, .reset, .StallM, .FlushE, .DivSignedE(~Funct3E[0]), .W64E, .IntDivE, .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); end // Result multiplexer always_comb case (Funct3M) - 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; - 3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; - 3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; - 3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; - 3'b100: PrelimResultM = QuotM; - 3'b101: PrelimResultM = QuotM; - 3'b110: PrelimResultM = RemM; - 3'b111: PrelimResultM = RemM; + 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; // mul + 3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulh + 3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulhsu + 3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulhu + 3'b100: PrelimResultM = QuotM; // div + 3'b101: PrelimResultM = QuotM; // divu + 3'b110: PrelimResultM = RemM; // rem + 3'b111: PrelimResultM = RemM; // remu endcase // Handle sign extension for W-type instructions diff --git a/pipelined/src/mdu/mul.sv b/pipelined/src/mdu/mul.sv index b94ce7993..952b4daf2 100644 --- a/pipelined/src/mdu/mul.sv +++ b/pipelined/src/mdu/mul.sv @@ -4,8 +4,10 @@ // Written: David_Harris@hmc.edu 16 February 2021 // Modified: // -// Purpose: Multiply instructions +// Purpose: Integer multiplication // +// Documentation: RISC-V System on Chip Design Chapter 12 (Figure 12.18) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -26,69 +28,68 @@ `include "wally-config.vh" -module mul ( - // Execute Stage interface - input logic clk, reset, - input logic StallM, FlushM, - // input logic [`XLEN-1:0] SrcAE, SrcBE, - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B - input logic [2:0] Funct3E, - output logic [`XLEN*2-1:0] ProdM +module mul( + input logic clk, reset, + input logic StallM, FlushM, + input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // source A and B from after Forwarding mux + input logic [2:0] Funct3E, // type of multiply + output logic [`XLEN*2-1:0] ProdM // double-widthproduct ); - // Number systems - // Let A' = sum(i=0, XLEN-2, A[i]*2^i) - // Unsigned: A = A' + A[XLEN-1]*2^(XLEN-1) - // Signed: A = A' - A[XLEN-1]*2^(XLEN-1) + // Number systems + // Let A' = sum(i=0, XLEN-2, A[i]*2^i) + // Unsigned: A = A' + A[XLEN-1]*2^(XLEN-1) + // Signed: A = A' - A[XLEN-1]*2^(XLEN-1) - // Multiplication: A*B - // Let P' = A' * B' - // PA = (A' * B[XLEN-1]) - // PB = (B' * A[XLEN-1]) - // PP = A[XLEN-1] * B[XLEN-1] - // Signed * Signed = P' + (-PA - PB)*2^(XLEN-1) + PP*2^(2XLEN-2) - // Signed * Unsigned = P' + ( PA - PB)*2^(XLEN-1) - PP*2^(2XLEN-2) - // Unsigned * Unsigned = P' + ( PA + PB)*2^(XLEN-1) + PP*2^(2XLEN-2) - - logic [`XLEN*2-1:0] PP1E, PP2E, PP3E, PP4E; - logic [`XLEN*2-1:0] PP1M, PP2M, PP3M, PP4M; - logic [`XLEN-2:0] PA, PB; - logic PP; - logic MULH, MULHSU; - logic [`XLEN-1:0] Aprime, Bprime; + // Multiplication: A*B + // Let P' = A' * B' + // PA = (A' * B[XLEN-1]) + // PB = (B' * A[XLEN-1]) + // PP = A[XLEN-1] * B[XLEN-1] + // Signed * Signed = P' + (-PA - PB)*2^(XLEN-1) + PP*2^(2XLEN-2) + // Signed * Unsigned = P' + ( PA - PB)*2^(XLEN-1) - PP*2^(2XLEN-2) + // Unsigned * Unsigned = P' + ( PA + PB)*2^(XLEN-1) + PP*2^(2XLEN-2) + logic [`XLEN-1:0] Aprime, Bprime; // lower bits of source A and B + logic MULH, MULHSU; // type of multiply + logic [`XLEN-2:0] PA, PB; // product of msb and lsbs + logic PP; // product of msbs + logic [`XLEN*2-1:0] PP1E, PP2E, PP3E, PP4E; // partial products + logic [`XLEN*2-1:0] PP1M, PP2M, PP3M, PP4M; // registered partial proudcts + ////////////////////////////// // Execute Stage: Compute partial products ////////////////////////////// - assign Aprime = {1'b0, ForwardedSrcAE[`XLEN-2:0]}; - assign Bprime = {1'b0, ForwardedSrcBE[`XLEN-2:0]}; - assign PP1E = Aprime * Bprime; - assign PA = {(`XLEN-1){ForwardedSrcAE[`XLEN-1]}} & ForwardedSrcBE[`XLEN-2:0]; - assign PB = {(`XLEN-1){ForwardedSrcBE[`XLEN-1]}} & ForwardedSrcAE[`XLEN-2:0]; - assign PP = ForwardedSrcAE[`XLEN-1] & ForwardedSrcBE[`XLEN-1]; + assign Aprime = {1'b0, ForwardedSrcAE[`XLEN-2:0]}; + assign Bprime = {1'b0, ForwardedSrcBE[`XLEN-2:0]}; + assign PP1E = Aprime * Bprime; + assign PA = {(`XLEN-1){ForwardedSrcAE[`XLEN-1]}} & ForwardedSrcBE[`XLEN-2:0]; + assign PB = {(`XLEN-1){ForwardedSrcBE[`XLEN-1]}} & ForwardedSrcAE[`XLEN-2:0]; + assign PP = ForwardedSrcAE[`XLEN-1] & ForwardedSrcBE[`XLEN-1]; - // flavor of multiplication - assign MULH = (Funct3E == 3'b001); - assign MULHSU = (Funct3E == 3'b010); + // flavor of multiplication + assign MULH = (Funct3E == 3'b001); + assign MULHSU = (Funct3E == 3'b010); - // Handle signs - assign PP2E = {2'b00, (MULH | MULHSU) ? ~PA : PA, {(`XLEN-1){1'b0}}}; - assign PP3E = {2'b00, (MULH) ? ~PB : PB, {(`XLEN-1){1'b0}}}; - always_comb - if (MULH) PP4E = {1'b1, PP, {(`XLEN-3){1'b0}}, 1'b1, {(`XLEN){1'b0}}}; - else if (MULHSU) PP4E = {1'b1, ~PP, {(`XLEN-2){1'b0}}, 1'b1, {(`XLEN-1){1'b0}}}; - else PP4E = {1'b0, PP, {(`XLEN*2-2){1'b0}}}; + // Select partial products, handling signed multiplication + assign PP2E = {2'b00, (MULH | MULHSU) ? ~PA : PA, {(`XLEN-1){1'b0}}}; + assign PP3E = {2'b00, (MULH) ? ~PB : PB, {(`XLEN-1){1'b0}}}; + always_comb + if (MULH) PP4E = {1'b1, PP, {(`XLEN-3){1'b0}}, 1'b1, {(`XLEN){1'b0}}}; + else if (MULHSU) PP4E = {1'b1, ~PP, {(`XLEN-2){1'b0}}, 1'b1, {(`XLEN-1){1'b0}}}; + else PP4E = {1'b0, PP, {(`XLEN*2-2){1'b0}}}; ////////////////////////////// // Memory Stage: Sum partial proudcts ////////////////////////////// - flopenrc #(`XLEN*2) PP1Reg(clk, reset, FlushM, ~StallM, PP1E, PP1M); - flopenrc #(`XLEN*2) PP2Reg(clk, reset, FlushM, ~StallM, PP2E, PP2M); - flopenrc #(`XLEN*2) PP3Reg(clk, reset, FlushM, ~StallM, PP3E, PP3M); - flopenrc #(`XLEN*2) PP4Reg(clk, reset, FlushM, ~StallM, PP4E, PP4M); + flopenrc #(`XLEN*2) PP1Reg(clk, reset, FlushM, ~StallM, PP1E, PP1M); + flopenrc #(`XLEN*2) PP2Reg(clk, reset, FlushM, ~StallM, PP2E, PP2M); + flopenrc #(`XLEN*2) PP3Reg(clk, reset, FlushM, ~StallM, PP3E, PP3M); + flopenrc #(`XLEN*2) PP4Reg(clk, reset, FlushM, ~StallM, PP4E, PP4M); - assign ProdM = PP1M + PP2M + PP3M + PP4M; //ForwardedSrcAE * ForwardedSrcBE; + // add up partial products; this multi-input add implies CSAs and a final CPA + assign ProdM = PP1M + PP2M + PP3M + PP4M; //ForwardedSrcAE * ForwardedSrcBE; endmodule