mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-03 18:25:27 +00:00
Merge pull request #189 from kipmacsaigoren/bitmanip_cleanup
Bitmanip: Removed Comparator Flag to ALU
This commit is contained in:
commit
ad0b430c63
@ -37,14 +37,13 @@ module alu #(parameter WIDTH=32) (
|
||||
input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction
|
||||
input logic [2:0] ZBBSelect, // ZBB mux select signal
|
||||
input logic [2:0] Funct3, // For BMU decoding
|
||||
input logic CompLT, // Less-Than flag from comparator
|
||||
input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage
|
||||
output logic [WIDTH-1:0] Result, // ALU result
|
||||
output logic [WIDTH-1:0] ALUResult, // ALU result
|
||||
output logic [WIDTH-1:0] Sum); // Sum of operands
|
||||
|
||||
// CondInvB = ~B when subtracting, B otherwise. Shift = shift result. SLT/U = result of a slt/u instruction.
|
||||
// FullResult = ALU result before adjusting for a RV64 w-suffix instruction.
|
||||
logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, ALUResult; // Intermediate Signals
|
||||
logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, PreALUResult; // Intermediate Signals
|
||||
logic [WIDTH-1:0] CondMaskB; // Result of B mask select mux
|
||||
logic [WIDTH-1:0] CondShiftA; // Result of A shifted select mux
|
||||
logic [WIDTH-1:0] CondExtA; // Result of Zero Extend A select mux
|
||||
@ -84,16 +83,16 @@ module alu #(parameter WIDTH=32) (
|
||||
end
|
||||
|
||||
// Support RV64I W-type addw/subw/addiw/shifts that discard upper 32 bits and sign-extend 32-bit result to 64 bits
|
||||
if (WIDTH == 64) assign ALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
|
||||
else assign ALUResult = FullResult;
|
||||
if (WIDTH == 64) assign PreALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
|
||||
else assign PreALUResult = FullResult;
|
||||
|
||||
// Final Result B instruction select mux
|
||||
if (`ZBC_SUPPORTED | `ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED) begin : bitmanipalu
|
||||
bitmanipalu #(WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect,
|
||||
.Funct3, .CompLT, .BALUControl, .ALUResult, .FullResult,
|
||||
.CondMaskB, .CondShiftA, .Result);
|
||||
.Funct3, .LT,.LTU, .BALUControl, .PreALUResult, .FullResult,
|
||||
.CondMaskB, .CondShiftA, .ALUResult);
|
||||
end else begin
|
||||
assign Result = ALUResult;
|
||||
assign ALUResult = PreALUResult;
|
||||
assign CondMaskB = B;
|
||||
assign CondShiftA = A;
|
||||
end
|
||||
|
@ -35,12 +35,13 @@ module bitmanipalu #(parameter WIDTH=32) (
|
||||
input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction
|
||||
input logic [2:0] ZBBSelect, // ZBB mux select signal
|
||||
input logic [2:0] Funct3, // Funct3 field of opcode indicates operation to perform
|
||||
input logic CompLT, // Less-Than flag from comparator
|
||||
input logic LT, // less than flag
|
||||
input logic LTU, // less than unsigned flag
|
||||
input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage
|
||||
input logic [WIDTH-1:0] ALUResult, FullResult, // ALUResult, FullResult signals
|
||||
input logic [WIDTH-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals
|
||||
output logic [WIDTH-1:0] CondMaskB, // B is conditionally masked for ZBS instructions
|
||||
output logic [WIDTH-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions
|
||||
output logic [WIDTH-1:0] Result); // Result
|
||||
output logic [WIDTH-1:0] ALUResult); // Result
|
||||
|
||||
logic [WIDTH-1:0] ZBBResult, ZBCResult; // ZBB, ZBC Result
|
||||
logic [WIDTH-1:0] MaskB; // BitMask of B
|
||||
@ -84,16 +85,16 @@ module bitmanipalu #(parameter WIDTH=32) (
|
||||
|
||||
// ZBB Unit
|
||||
if (`ZBB_SUPPORTED) begin: zbb
|
||||
zbb #(WIDTH) ZBB(.A, .RevA, .B, .W64, .lt(CompLT), .ZBBSelect, .ZBBResult);
|
||||
zbb #(WIDTH) ZBB(.A, .RevA, .B, .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult);
|
||||
end else assign ZBBResult = 0;
|
||||
|
||||
// Result Select Mux
|
||||
always_comb
|
||||
case (BSelect)
|
||||
// 00: ALU, 01: ZBA/ZBS, 10: ZBB, 11: ZBC
|
||||
2'b00: Result = ALUResult;
|
||||
2'b01: Result = FullResult; // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word.
|
||||
2'b10: Result = ZBBResult;
|
||||
2'b11: Result = ZBCResult;
|
||||
2'b00: ALUResult = PreALUResult;
|
||||
2'b01: ALUResult = FullResult; // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word.
|
||||
2'b10: ALUResult = ZBBResult;
|
||||
2'b11: ALUResult = ZBCResult;
|
||||
endcase
|
||||
endmodule
|
||||
|
@ -48,7 +48,6 @@ module bmuctrl(
|
||||
output logic [1:0] BSelectE, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding
|
||||
output logic [2:0] ZBBSelectE, // ZBB mux select signal
|
||||
output logic BRegWriteE, // Indicates if it is a R type B instruction in Execute
|
||||
output logic BComparatorSignedE, // Indicates if comparator signed in Execute Stage
|
||||
output logic [2:0] BALUControlE // ALU Control signals for B instructions in Execute Stage
|
||||
);
|
||||
|
||||
@ -56,7 +55,6 @@ module bmuctrl(
|
||||
logic [2:0] Funct3D; // Funct3 field in Decode stage
|
||||
logic [6:0] Funct7D; // Funct7 field in Decode stage
|
||||
logic [4:0] Rs2D; // Rs2 source register in Decode stage
|
||||
logic BComparatorSignedD; // Indicates if comparator signed (max, min instruction) in Decode Stage
|
||||
logic RotateD; // Indicates if rotate instruction in Decode Stage
|
||||
logic MaskD; // Indicates if zbs instruction in Decode Stage
|
||||
logic PreShiftD; // Indicates if sh1add, sh2add, sh3add instruction in Decode Stage
|
||||
@ -110,10 +108,10 @@ module bmuctrl(
|
||||
BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // rev8
|
||||
17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111)
|
||||
BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // orc.b
|
||||
17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0; // max
|
||||
17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0; // maxu
|
||||
17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0; // min
|
||||
17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0; // minu
|
||||
17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0; // max
|
||||
17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0; // maxu
|
||||
17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // min
|
||||
17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // minu
|
||||
endcase
|
||||
if (`XLEN==32)
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
@ -172,12 +170,9 @@ module bmuctrl(
|
||||
// Pack BALUControl Signals
|
||||
assign BALUControlD = {RotateD, MaskD, PreShiftD};
|
||||
|
||||
// Comparator should perform signed comparison when min/max instruction. We have overlap in funct3 with some branch instructions so we use opcode to differentiate betwen min/max and branches
|
||||
assign BComparatorSignedD = (Funct3D[2]^Funct3D[0]) & ~OpD[6];
|
||||
|
||||
// Choose ALUSelect brom BMU for BMU operations, Funct3 for IEU operations, or 0 for addition
|
||||
assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000);
|
||||
|
||||
// BMU Execute stage pipieline control register
|
||||
flopenrc#(10) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BComparatorSignedD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BComparatorSignedE, BALUControlE});
|
||||
flopenrc#(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE});
|
||||
endmodule
|
||||
|
@ -30,20 +30,20 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module clmul #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A, B, // Operands
|
||||
input logic [WIDTH-1:0] X, Y, // Operands
|
||||
output logic [WIDTH-1:0] ClmulResult); // ZBS result
|
||||
|
||||
logic [(WIDTH*WIDTH)-1:0] s; // intermediary signals for carry-less multiply
|
||||
logic [(WIDTH*WIDTH)-1:0] S; // intermediary signals for carry-less multiply
|
||||
|
||||
integer i,j;
|
||||
|
||||
always_comb begin
|
||||
for (i=0;i<WIDTH;i++) begin: outer
|
||||
s[WIDTH*i]=A[0]&B[i];
|
||||
S[WIDTH*i] = X[0] & Y[i];
|
||||
for (j=1;j<=i;j++) begin: inner
|
||||
s[WIDTH*i+j] = (A[j]&B[i-j])^s[WIDTH*i+j-1];
|
||||
S[WIDTH*i+j] = (X[j] & Y[i-j]) ^ S[WIDTH*i+j-1];
|
||||
end
|
||||
ClmulResult[i] = s[WIDTH*i+j-1];
|
||||
ClmulResult[i] = S[WIDTH*i+j-1];
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
@ -33,21 +33,25 @@
|
||||
module zbb #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A, RevA, B, // Operands
|
||||
input logic W64, // Indicates word operation
|
||||
input logic lt, // lt flag
|
||||
input logic LT, // lt flag
|
||||
input logic LTU, // ltu flag
|
||||
input logic BUnsigned, // max/min (signed) flag
|
||||
input logic [2:0] ZBBSelect, // ZBB Result select signal
|
||||
output logic [WIDTH-1:0] ZBBResult); // ZBB result
|
||||
|
||||
|
||||
logic lt; // lt given signed/unsigned
|
||||
logic [WIDTH-1:0] CntResult; // count result
|
||||
logic [WIDTH-1:0] MinMaxResult; // min, max result
|
||||
logic [WIDTH-1:0] ByteResult; // byte results
|
||||
logic [WIDTH-1:0] ExtResult; // sign/zero extend results
|
||||
|
||||
mux2 #(1) ltmux(LT, LTU, BUnsigned , lt);
|
||||
cnt #(WIDTH) cnt(.A, .RevA, .B(B[1:0]), .W64, .CntResult);
|
||||
byteUnit #(WIDTH) bu(.A, .ByteSelect(B[0]), .ByteResult);
|
||||
ext #(WIDTH) ext(.A, .ExtSelect({~B[2], {B[2] & B[0]}}), .ExtResult);
|
||||
|
||||
// ZBBSelect[2] differentiates between min(u) vs max(u) instruction
|
||||
mux2 #(WIDTH) minmaxmux(B, A, lt^ZBBSelect[2], MinMaxResult);
|
||||
mux2 #(WIDTH) minmaxmux(B, A, ZBBSelect[2]^lt, MinMaxResult);
|
||||
|
||||
// ZBB Result select mux
|
||||
mux4 #(WIDTH) zbbresultmux(CntResult, ExtResult, ByteResult, MinMaxResult, ZBBSelect[1:0], ZBBResult);
|
||||
|
@ -36,19 +36,16 @@ module zbc #(parameter WIDTH=32) (
|
||||
|
||||
logic [WIDTH-1:0] ClmulResult, RevClmulResult;
|
||||
logic [WIDTH-1:0] RevB;
|
||||
logic [WIDTH-1:0] x,y;
|
||||
logic [1:0] select;
|
||||
logic [WIDTH-1:0] X, Y;
|
||||
|
||||
assign select = ~Funct3[1:0];
|
||||
bitreverse #(WIDTH) brB(B, RevB);
|
||||
|
||||
bitreverse #(WIDTH) brB(.A(B), .RevA(RevB));
|
||||
mux3 #(WIDTH) xmux({RevA[WIDTH-2:0], {1'b0}}, RevA, A, ~Funct3[1:0], X);
|
||||
mux3 #(WIDTH) ymux({{1'b0}, RevB[WIDTH-2:0]}, RevB, B, ~Funct3[1:0], Y);
|
||||
|
||||
mux3 #(WIDTH) xmux({RevA[WIDTH-2:0], {1'b0}}, RevA, A, select, x);
|
||||
mux3 #(WIDTH) ymux({{1'b0},RevB[WIDTH-2:0]}, RevB, B, select, y);
|
||||
|
||||
clmul #(WIDTH) clm(.A(x), .B(y), .ClmulResult(ClmulResult));
|
||||
clmul #(WIDTH) clm(.X, .Y, .ClmulResult);
|
||||
|
||||
bitreverse #(WIDTH) brClmulResult(.A(ClmulResult), .RevA(RevClmulResult));
|
||||
bitreverse #(WIDTH) brClmulResult(ClmulResult, RevClmulResult);
|
||||
|
||||
mux2 #(WIDTH) zbcresultmux(ClmulResult, RevClmulResult, Funct3[1], ZBCResult);
|
||||
endmodule
|
@ -125,7 +125,6 @@ module controller(
|
||||
logic IntDivM; // Integer divide instruction
|
||||
logic [1:0] BSelectD; // One-Hot encoding if it's ZBA_ZBB_ZBC_ZBS instruction in decode stage
|
||||
logic [2:0] ZBBSelectD; // ZBB Mux Select Signal
|
||||
logic BComparatorSignedE; // Indicates if max, min (signed comarison) instruction in Execute Stage
|
||||
logic IFunctD, RFunctD, MFunctD; // Detect I, R, and M-type RV32IM/Rv64IM instructions
|
||||
logic LFunctD, SFunctD, BFunctD; // Detect load, store, branch instructions
|
||||
logic JFunctD; // detect jalr instruction
|
||||
@ -257,7 +256,7 @@ module controller(
|
||||
|
||||
bmuctrl bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD,
|
||||
.BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE,
|
||||
.ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BComparatorSignedE, .BALUControlE);
|
||||
.ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BALUControlE);
|
||||
if (`ZBA_SUPPORTED) begin
|
||||
// ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw
|
||||
assign sltD = (Funct3D == 3'b010 & (~(Funct7D[4]) | ~OpD[5])) ;
|
||||
@ -283,7 +282,6 @@ module controller(
|
||||
assign BSelectE = 2'b00;
|
||||
assign BSelectD = 2'b00;
|
||||
assign ZBBSelectE = 3'b000;
|
||||
assign BComparatorSignedE = 1'b0;
|
||||
assign BALUControlE = 3'b0;
|
||||
end
|
||||
|
||||
@ -311,8 +309,7 @@ module controller(
|
||||
// Branch Logic
|
||||
// The comparator handles both signed and unsigned branches using BranchSignedE
|
||||
// Hence, only eq and lt flags are needed
|
||||
// We also want comparator to handle signed comparison on a max/min bitmanip instruction
|
||||
assign BranchSignedE = (~(Funct3E[2:1] == 2'b11) & BranchE) | BComparatorSignedE;
|
||||
assign BranchSignedE = (~(Funct3E[2:1] == 2'b11) & BranchE);
|
||||
assign {eqE, ltE} = FlagsE;
|
||||
mux2 #(1) branchflagmux(eqE, ltE, Funct3E[2], BranchFlagE);
|
||||
assign BranchTakenE = BranchFlagE ^ Funct3E[0];
|
||||
|
@ -114,7 +114,7 @@ module datapath (
|
||||
comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
|
||||
mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
|
||||
mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
|
||||
alu #(`XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, FlagsE[0], BALUControlE, ALUResultE, IEUAdrE);
|
||||
alu #(`XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE);
|
||||
mux2 #(`XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
|
||||
mux2 #(`XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user