MDU comment cleanup

This commit is contained in:
David Harris 2023-01-12 07:15:14 -08:00
parent e67f125201
commit b3e1badd31
4 changed files with 121 additions and 111 deletions

View File

@ -6,6 +6,8 @@
//
// Purpose: Restoring integer division using a shift register and subtractor
//
// Documentation: RISC-V System on Chip Design Chapter 12 (Figure 12.19)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -26,32 +28,39 @@
`include "wally-config.vh"
/* verilator lint_off UNOPTFLAT */
module intdivrestoring (
input logic clk,
input logic reset,
input logic StallM,
input logic FlushE,
input logic DivSignedE, W64E,
input logic IntDivE,
//input logic [`XLEN-1:0] SrcAE, SrcBE,
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
output logic DivBusyE,
output logic [`XLEN-1:0] QuotM, RemM
module intdivrestoring(
input logic clk,
input logic reset,
input logic StallM,
input logic FlushE,
input logic IntDivE, // integer division/remainder instruction of any type
input logic DivSignedE, // signed division
input logic W64E, // W-type instructions (divw, divuw, remw, remuw)
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Forwarding mux outputs for Source A and B
output logic DivBusyE, // Divide is busy - stall pipeline
output logic [`XLEN-1:0] QuotM, RemM // Quotient and remainder outputs
);
typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
localparam STEPBITS = $clog2(`XLEN/`IDIV_BITSPERCYCLE); // Number of steps
typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; // division FSM state
statetype state;
logic [`XLEN-1:0] W[`IDIV_BITSPERCYCLE:0];
logic [`XLEN-1:0] XQ[`IDIV_BITSPERCYCLE:0];
logic [`XLEN-1:0] DinE, XinE, DnE, DAbsBE, DAbsB, XnE, XInitE, WnM, XQnM;
localparam STEPBITS = $clog2(`XLEN/`IDIV_BITSPERCYCLE);
logic [STEPBITS:0] step;
logic Div0E, Div0M;
logic DivStartE, SignXE, SignDE, NegQE, NegWM, NegQM;
logic [`XLEN-1:0] WNext, XQNext;
logic [`XLEN-1:0] W[`IDIV_BITSPERCYCLE:0]; // Residual for each of k steps
logic [`XLEN-1:0] XQ[`IDIV_BITSPERCYCLE:0]; // dividend/quotient for each of k steps
logic [`XLEN-1:0] WNext, XQNext; // initialized W and XQ going into registers
logic [`XLEN-1:0] DinE, XinE; // divisor & dividend, possibly truncated to 32 bits
logic [`XLEN-1:0] DnE; // DnE = ~DinE
logic [`XLEN-1:0] DAbsBE; // absolute value of D
logic [`XLEN-1:0] DAbsB; // registered absolute value of D, constant during division
logic [`XLEN-1:0] XnE; // DXnE = ~XinE
logic [`XLEN-1:0] XInitE; // |X|, or original X for divide by 0
logic [`XLEN-1:0] WnM, XQnM; // negated residual W and quotient XQ for postprocessing sign correction
logic [STEPBITS:0] step; // division step
logic Div0E, Div0M; // divide by 0
logic DivStartE; // start integer division
logic SignXE, SignDE; // sign of dividend and divisor
logic NegQE, NegWM, NegQM; // negate quotient or residual during postprocessing
//////////////////////////////
// Execute Stage: prepare for division calculation with control logic, W logic and absolute values, initialize W and XQ
@ -134,5 +143,3 @@ module intdivrestoring (
else state <= IDLE;
end
endmodule
/* verilator lint_on UNOPTFLAT */

View File

@ -4,7 +4,9 @@
// Written: David_Harris@hmc.edu 2 October 2021
// Modified:
//
// Purpose: Restoring integer division using a shift register and subtractor
// Purpose: Restoring integer division step. k steps are used in intdivrestoring
//
// Documentation: RISC-V System on Chip Design Chapter 12 (Figure 12.19)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
@ -29,11 +31,16 @@
/* verilator lint_off UNOPTFLAT */
module intdivrestoringstep(
input logic [`XLEN-1:0] W, XQ, DAbsB,
output logic [`XLEN-1:0] WOut, XQOut);
input logic [`XLEN-1:0] W, // Residual in
input logic [`XLEN-1:0] XQ, // bits of dividend X and quotient Q in
input logic [`XLEN-1:0] DAbsB, // complement of absolute value of divisor D (for subtraction)
output logic [`XLEN-1:0] WOut, // Residual out
output logic [`XLEN-1:0] XQOut // bits of dividend and quotient out: discard one bit of X, append one bit of Q
);
logic [`XLEN-1:0] WShift, WPrime;
logic qi, qib;
logic [`XLEN-1:0] WShift; // Shift W left by one bit, bringing in most significant bit of X
logic [`XLEN-1:0] WPrime; // WShift - D, for comparison and possible result
logic qi, qib; // Quotient digit and its complement
assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; // shift W and X/Q left, insert quotient bit at bottom
adder #(`XLEN+1) wdsub({1'b0, WShift}, {1'b1, DAbsB}, {qib, WPrime}); // effective subtractor, carry out determines quotient bit

View File

@ -6,6 +6,8 @@
//
// Purpose: M extension multiply and divide
//
// Documentation: RISC-V System on Chip Design Chapter 12 (Figure 12.21)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
@ -26,56 +28,49 @@
`include "wally-config.vh"
module mdu (
input logic clk, reset,
// Execute Stage interface
// input logic [`XLEN-1:0] SrcAE, SrcBE,
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [2:0] Funct3E, Funct3M,
input logic IntDivE, W64E,
// Writeback stage
output logic [`XLEN-1:0] MDUResultW,
// Divide Done
output logic DivBusyE,
// hazards
input logic StallM, StallW, FlushE, FlushM, FlushW
);
module mdu(
input logic clk, reset,
input logic StallM, StallW,
input logic FlushE, FlushM, FlushW,
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // inputs A and B from IEU forwarding mux output
input logic [2:0] Funct3E, Funct3M, // type of MDU operation
input logic IntDivE, W64E, // Integer division/remainder, and W-type instrutions
output logic [`XLEN-1:0] MDUResultW, // multiply/divide result
output logic DivBusyE // busy signal to stall pipeline in Execute stage
);
logic [`XLEN-1:0] MDUResultM;
logic [`XLEN-1:0] PrelimResultM;
logic [`XLEN-1:0] QuotM, RemM;
logic [`XLEN*2-1:0] ProdM;
logic DivSignedE;
logic W64M;
logic [`XLEN*2-1:0] ProdM; // double-width product from mul
logic [`XLEN-1:0] QuotM, RemM; // quotient and remainder from intdivrestoring
logic [`XLEN-1:0] PrelimResultM; // selected result before W truncation
logic [`XLEN-1:0] MDUResultM; // result after W truncation
logic W64M; // W-type instruction
// Multiplier
mul mul(.clk, .reset, .StallM, .FlushM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .ProdM);
// Divide
// Divider
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
// When F extensions are supported, use the FPU divider instead
// When IDIV_ON_FPU is set, use the FPU divider instead
if (`IDIV_ON_FPU) begin
assign QuotM = 0;
assign RemM = 0;
assign DivBusyE = 0;
end else begin
assign DivSignedE = ~Funct3E[0];
intdivrestoring div(.clk, .reset, .StallM, .FlushE, .DivSignedE, .W64E, .IntDivE,
intdivrestoring div(.clk, .reset, .StallM, .FlushE, .DivSignedE(~Funct3E[0]), .W64E, .IntDivE,
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
end
// Result multiplexer
always_comb
case (Funct3M)
3'b000: PrelimResultM = ProdM[`XLEN-1:0];
3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];
3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];
3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];
3'b100: PrelimResultM = QuotM;
3'b101: PrelimResultM = QuotM;
3'b110: PrelimResultM = RemM;
3'b111: PrelimResultM = RemM;
3'b000: PrelimResultM = ProdM[`XLEN-1:0]; // mul
3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulh
3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulhsu
3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulhu
3'b100: PrelimResultM = QuotM; // div
3'b101: PrelimResultM = QuotM; // divu
3'b110: PrelimResultM = RemM; // rem
3'b111: PrelimResultM = RemM; // remu
endcase
// Handle sign extension for W-type instructions

View File

@ -4,7 +4,9 @@
// Written: David_Harris@hmc.edu 16 February 2021
// Modified:
//
// Purpose: Multiply instructions
// Purpose: Integer multiplication
//
// Documentation: RISC-V System on Chip Design Chapter 12 (Figure 12.18)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
@ -26,69 +28,68 @@
`include "wally-config.vh"
module mul (
// Execute Stage interface
input logic clk, reset,
input logic StallM, FlushM,
// input logic [`XLEN-1:0] SrcAE, SrcBE,
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [2:0] Funct3E,
output logic [`XLEN*2-1:0] ProdM
module mul(
input logic clk, reset,
input logic StallM, FlushM,
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // source A and B from after Forwarding mux
input logic [2:0] Funct3E, // type of multiply
output logic [`XLEN*2-1:0] ProdM // double-widthproduct
);
// Number systems
// Let A' = sum(i=0, XLEN-2, A[i]*2^i)
// Unsigned: A = A' + A[XLEN-1]*2^(XLEN-1)
// Signed: A = A' - A[XLEN-1]*2^(XLEN-1)
// Number systems
// Let A' = sum(i=0, XLEN-2, A[i]*2^i)
// Unsigned: A = A' + A[XLEN-1]*2^(XLEN-1)
// Signed: A = A' - A[XLEN-1]*2^(XLEN-1)
// Multiplication: A*B
// Let P' = A' * B'
// PA = (A' * B[XLEN-1])
// PB = (B' * A[XLEN-1])
// PP = A[XLEN-1] * B[XLEN-1]
// Signed * Signed = P' + (-PA - PB)*2^(XLEN-1) + PP*2^(2XLEN-2)
// Signed * Unsigned = P' + ( PA - PB)*2^(XLEN-1) - PP*2^(2XLEN-2)
// Unsigned * Unsigned = P' + ( PA + PB)*2^(XLEN-1) + PP*2^(2XLEN-2)
// Multiplication: A*B
// Let P' = A' * B'
// PA = (A' * B[XLEN-1])
// PB = (B' * A[XLEN-1])
// PP = A[XLEN-1] * B[XLEN-1]
// Signed * Signed = P' + (-PA - PB)*2^(XLEN-1) + PP*2^(2XLEN-2)
// Signed * Unsigned = P' + ( PA - PB)*2^(XLEN-1) - PP*2^(2XLEN-2)
// Unsigned * Unsigned = P' + ( PA + PB)*2^(XLEN-1) + PP*2^(2XLEN-2)
logic [`XLEN*2-1:0] PP1E, PP2E, PP3E, PP4E;
logic [`XLEN*2-1:0] PP1M, PP2M, PP3M, PP4M;
logic [`XLEN-2:0] PA, PB;
logic PP;
logic MULH, MULHSU;
logic [`XLEN-1:0] Aprime, Bprime;
logic [`XLEN-1:0] Aprime, Bprime; // lower bits of source A and B
logic MULH, MULHSU; // type of multiply
logic [`XLEN-2:0] PA, PB; // product of msb and lsbs
logic PP; // product of msbs
logic [`XLEN*2-1:0] PP1E, PP2E, PP3E, PP4E; // partial products
logic [`XLEN*2-1:0] PP1M, PP2M, PP3M, PP4M; // registered partial proudcts
//////////////////////////////
// Execute Stage: Compute partial products
//////////////////////////////
assign Aprime = {1'b0, ForwardedSrcAE[`XLEN-2:0]};
assign Bprime = {1'b0, ForwardedSrcBE[`XLEN-2:0]};
assign PP1E = Aprime * Bprime;
assign PA = {(`XLEN-1){ForwardedSrcAE[`XLEN-1]}} & ForwardedSrcBE[`XLEN-2:0];
assign PB = {(`XLEN-1){ForwardedSrcBE[`XLEN-1]}} & ForwardedSrcAE[`XLEN-2:0];
assign PP = ForwardedSrcAE[`XLEN-1] & ForwardedSrcBE[`XLEN-1];
assign Aprime = {1'b0, ForwardedSrcAE[`XLEN-2:0]};
assign Bprime = {1'b0, ForwardedSrcBE[`XLEN-2:0]};
assign PP1E = Aprime * Bprime;
assign PA = {(`XLEN-1){ForwardedSrcAE[`XLEN-1]}} & ForwardedSrcBE[`XLEN-2:0];
assign PB = {(`XLEN-1){ForwardedSrcBE[`XLEN-1]}} & ForwardedSrcAE[`XLEN-2:0];
assign PP = ForwardedSrcAE[`XLEN-1] & ForwardedSrcBE[`XLEN-1];
// flavor of multiplication
assign MULH = (Funct3E == 3'b001);
assign MULHSU = (Funct3E == 3'b010);
// flavor of multiplication
assign MULH = (Funct3E == 3'b001);
assign MULHSU = (Funct3E == 3'b010);
// Handle signs
assign PP2E = {2'b00, (MULH | MULHSU) ? ~PA : PA, {(`XLEN-1){1'b0}}};
assign PP3E = {2'b00, (MULH) ? ~PB : PB, {(`XLEN-1){1'b0}}};
always_comb
if (MULH) PP4E = {1'b1, PP, {(`XLEN-3){1'b0}}, 1'b1, {(`XLEN){1'b0}}};
else if (MULHSU) PP4E = {1'b1, ~PP, {(`XLEN-2){1'b0}}, 1'b1, {(`XLEN-1){1'b0}}};
else PP4E = {1'b0, PP, {(`XLEN*2-2){1'b0}}};
// Select partial products, handling signed multiplication
assign PP2E = {2'b00, (MULH | MULHSU) ? ~PA : PA, {(`XLEN-1){1'b0}}};
assign PP3E = {2'b00, (MULH) ? ~PB : PB, {(`XLEN-1){1'b0}}};
always_comb
if (MULH) PP4E = {1'b1, PP, {(`XLEN-3){1'b0}}, 1'b1, {(`XLEN){1'b0}}};
else if (MULHSU) PP4E = {1'b1, ~PP, {(`XLEN-2){1'b0}}, 1'b1, {(`XLEN-1){1'b0}}};
else PP4E = {1'b0, PP, {(`XLEN*2-2){1'b0}}};
//////////////////////////////
// Memory Stage: Sum partial proudcts
//////////////////////////////
flopenrc #(`XLEN*2) PP1Reg(clk, reset, FlushM, ~StallM, PP1E, PP1M);
flopenrc #(`XLEN*2) PP2Reg(clk, reset, FlushM, ~StallM, PP2E, PP2M);
flopenrc #(`XLEN*2) PP3Reg(clk, reset, FlushM, ~StallM, PP3E, PP3M);
flopenrc #(`XLEN*2) PP4Reg(clk, reset, FlushM, ~StallM, PP4E, PP4M);
flopenrc #(`XLEN*2) PP1Reg(clk, reset, FlushM, ~StallM, PP1E, PP1M);
flopenrc #(`XLEN*2) PP2Reg(clk, reset, FlushM, ~StallM, PP2E, PP2M);
flopenrc #(`XLEN*2) PP3Reg(clk, reset, FlushM, ~StallM, PP3E, PP3M);
flopenrc #(`XLEN*2) PP4Reg(clk, reset, FlushM, ~StallM, PP4E, PP4M);
assign ProdM = PP1M + PP2M + PP3M + PP4M; //ForwardedSrcAE * ForwardedSrcBE;
// add up partial products; this multi-input add implies CSAs and a final CPA
assign ProdM = PP1M + PP2M + PP3M + PP4M; //ForwardedSrcAE * ForwardedSrcBE;
endmodule