diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv index 3f7d0ae7a..373dbe437 100644 --- a/src/ieu/bmu/bitmanipalu.sv +++ b/src/ieu/bmu/bitmanipalu.sv @@ -31,9 +31,11 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] A, B, // Operands input logic W64, // W64-type instruction - input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction - input logic [2:0] ZBBSelect, // ZBB mux select signal + input logic [3:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction + input logic [3:0] ZBBSelect, // ZBB mux select signal input logic [2:0] Funct3, // Funct3 field of opcode indicates operation to perform + input logic [6:0] Funct7, // Funct7 field for ZKND and ZKNE operations + input logic [4:0] Rs2E, // Register source2 for RNUM of ZKNE/ZKND input logic LT, // less than flag input logic LTU, // less than unsigned flag input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage @@ -43,7 +45,10 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions output logic [P.XLEN-1:0] ALUResult); // Result - logic [P.XLEN-1:0] ZBBResult, ZBCResult; // ZBB, ZBC Result + logic [P.XLEN-1:0] ZBBResult, ZBCResult; // ZBB, ZBC Result + logic [P.XLEN-1:0] ZBKBResult, ZBKCResult, ZBKXResult; // ZBKB, ZBKC Result + logic [P.XLEN-1:0] ZKNDResult, ZKNEResult; // ZKND, ZKNE Result + logic [P.XLEN-1:0] ZKNHResult; // ZKNH Result logic [P.XLEN-1:0] MaskB; // BitMask of B logic [P.XLEN-1:0] RevA; // Bit-reversed A logic Rotate; // Indicates if it is Rotate instruction @@ -90,16 +95,69 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) ( // ZBB Unit if (P.ZBB_SUPPORTED) begin: zbb - zbb #(P.XLEN) ZBB(.A(ABMU), .RevA, .B(BBMU), .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult); + zbb #(P.XLEN) ZBB(.A(ABMU), .RevA, .B(BBMU), .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect(ZBBSelect[2:0]), .ZBBResult); end else assign ZBBResult = 0; + // ZBKB Unit + if (P.ZBKB_SUPPORTED) begin: zbkb + zbkb #(P.XLEN) ZBKB(.A(ABMU), .B(BBMU), .RevA, .W64, .Funct3, .ZBKBSelect(ZBBSelect[2:0]), .ZBKBResult); + end else assign ZBKBResult = 0; + + // ZBKC Unit + if (P.ZBKC_SUPPORTED) begin: zbkc + zbkc #(P.XLEN) ZBKC(.A(ABMU), .B(BBMU), .ZBKCSelect(ZBBSelect[0]), .ZBKCResult); + end else assign ZBKCResult = 0; + + // ZBKX Unit + if (P.ZBKX_SUPPORTED) begin: zbkx + zbkx #(P.XLEN) ZBKX(.A(ABMU), .B(BBMU), .ZBKXSelect(ZBBSelect[2:0]), .ZBKXResult); + end else assign ZBKXResult = 0; + + // ZKND Unit + if (P.ZKND_SUPPORTED) begin: zknd + if (P.XLEN == 32) begin + zknd_32 #(P.XLEN) ZKND32(.A(ABMU), .B(BBMU), .Funct7, .ZKNDSelect(ZBBSelect[2:0]), .ZKNDResult); + end + else begin + zknd_64 #(P.XLEN) ZKND64(.A(ABMU), .B(BBMU), .Funct7, .RNUM(Rs2E[3:0]), .ZKNDSelect(ZBBSelect[2:0]), .ZKNDResult); + end + end else assign ZKNDResult = 0; + + // ZKNE Unit + if (P.ZKNE_SUPPORTED) begin: zkne + if (P.XLEN == 32) begin + zkne_32 #(P.XLEN) ZKNE32(.A(ABMU), .B(BBMU), .Funct7, .ZKNESelect(ZBBSelect[2:0]), .ZKNEResult); + end + else begin + zkne_64 #(P.XLEN) ZKNE64(.A(ABMU), .B(BBMU), .Funct7, .RNUM(Rs2E[3:0]), .ZKNESelect(ZBBSelect[2:0]), .ZKNEResult); + end + end else assign ZKNEResult = 0; + + // ZKNH Unit + if (P.ZKNH_SUPPORTED) begin: zknh + if (P.XLEN == 32) begin + zknh_32 ZKNH_32(.A(ABMU), .B(BBMU), .ZKNHSelect(ZBBSelect), .ZKNHResult(ZKNHResult)); + end + else begin + zknh_64 ZKNH_64(.A(ABMU), .B(BBMU), .ZKNHSelect(ZBBSelect), .ZKNHResult(ZKNHResult)); + end + end else assign ZKNHResult = 0; + // Result Select Mux always_comb case (BSelect) - // 00: ALU, 01: ZBA/ZBS, 10: ZBB, 11: ZBC - 2'b00: ALUResult = PreALUResult; - 2'b01: ALUResult = FullResult; // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word. - 2'b10: ALUResult = ZBBResult; - 2'b11: ALUResult = ZBCResult; + // 0000: ALU, 0001: ZBA/ZBS, 0010: ZBB, 0011: ZBC, 0100: ZBKB, 0101: ZBKC, 0110: ZBKX + // 0111: ZKND, 1000: ZKNE, 1001: ZKNH, 1010: ZKSED, 1011: ZKSH... + 4'b0000: ALUResult = PreALUResult; + 4'b0001: ALUResult = FullResult; // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word. + 4'b0010: ALUResult = ZBBResult; + 4'b0011: ALUResult = ZBCResult; + 4'b0100: ALUResult = ZBKBResult; + 4'b0101: ALUResult = ZBKCResult; + 4'b0110: ALUResult = ZBKXResult; + 4'b0111: ALUResult = ZKNDResult; + 4'b1000: ALUResult = ZKNEResult; + 4'b1001: ALUResult = ZKNHResult; + default: ALUResult = PreALUResult; endcase endmodule diff --git a/src/ieu/bmu/bmuctrl.sv b/src/ieu/bmu/bmuctrl.sv index 5b758f123..19ed746b7 100644 --- a/src/ieu/bmu/bmuctrl.sv +++ b/src/ieu/bmu/bmuctrl.sv @@ -34,8 +34,8 @@ module bmuctrl import cvw::*; #(parameter cvw_t P) ( input logic StallD, FlushD, // Stall, flush Decode stage input logic [31:0] InstrD, // Instruction in Decode stage input logic ALUOpD, // Regular ALU Operation - output logic [1:0] BSelectD, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding in Decode stage - output logic [2:0] ZBBSelectD, // ZBB mux select signal in Decode stage NOTE: do we need this in decode? + output logic [3:0] BSelectD, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding in Decode stage + output logic [3:0] ZBBSelectD, // ZBB mux select signal in Decode stage NOTE: do we need this in decode? output logic BRegWriteD, // Indicates if it is a R type B instruction in Decode Stage output logic BALUSrcBD, // Indicates if it is an I/IW (non auipc) type B instruction in Decode Stage output logic BW64D, // Indiciates if it is a W type B instruction in Decode Stage @@ -44,8 +44,8 @@ module bmuctrl import cvw::*; #(parameter cvw_t P) ( // Execute stage control signals input logic StallE, FlushE, // Stall, flush Execute stage output logic [2:0] ALUSelectD, // ALU select - output logic [1:0] BSelectE, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding - output logic [2:0] ZBBSelectE, // ZBB mux select signal + output logic [3:0] BSelectE, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding + output logic [3:0] ZBBSelectE, // ZBB mux select signal output logic BRegWriteE, // Indicates if it is a R type B instruction in Execute output logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage output logic BMUActiveE // Bit manipulation instruction being executed @@ -62,7 +62,7 @@ module bmuctrl import cvw::*; #(parameter cvw_t P) ( logic [2:0] BALUSelectD; // ALU Mux select signal in Decode Stage for BMU operations logic BALUOpD; // Indicates if it is an ALU B instruction in Decode Stage - `define BMUCTRLW 17 + `define BMUCTRLW 20 logic [`BMUCTRLW-1:0] BMUControlsD; // Main B Instructions Decoder control signals @@ -78,92 +78,205 @@ module bmuctrl import cvw::*; #(parameter cvw_t P) ( BMUControlsD = `BMUCTRLW'b000_00_000_0_0_0_0_0_0_0_0_1; // default: Illegal bmu instruction; if (P.ZBA_SUPPORTED) begin casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh1add - 17'b0110011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh2add - 17'b0110011_0010000_110: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh3add + 17'b0110011_0010000_010: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_1_0_0_0_1_0; // sh1add + 17'b0110011_0010000_100: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_1_0_0_0_1_0; // sh2add + 17'b0110011_0010000_110: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_1_0_0_0_1_0; // sh3add endcase if (P.XLEN==64) casez({OpD, Funct7D, Funct3D}) - 17'b0111011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh1add.uw - 17'b0111011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh2add.uw - 17'b0111011_0010000_110: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh3add.uw - 17'b0111011_0000100_000: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_0_0; // add.uw - 17'b0011011_000010?_001: BMUControlsD = `BMUCTRLW'b001_01_000_1_1_1_1_0_0_0_0_0; // slli.uw + 17'b0111011_0010000_010: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_1_1_0_0_0_1_0; // sh1add.uw + 17'b0111011_0010000_100: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_1_1_0_0_0_1_0; // sh2add.uw + 17'b0111011_0010000_110: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_1_1_0_0_0_1_0; // sh3add.uw + 17'b0111011_0000100_000: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_1_1_0_0_0_0_0; // add.uw + 17'b0011011_000010?_001: BMUControlsD = `BMUCTRLW'b001_0001_0000_1_1_1_1_0_0_0_0_0; // slli.uw endcase end if (P.ZBB_SUPPORTED) begin casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0110000_001: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0; // rol - 17'b0110011_0110000_101: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0; // ror 17'b0010011_0110000_001: if ((Rs2D[4:1] == 4'b0010)) - BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // sign extend instruction + BMUControlsD = `BMUCTRLW'b000_0010_0001_1_1_0_1_0_0_0_0_0; // sign extend instruction else if ((Rs2D[4:2]==3'b000) & ~(Rs2D[1] & Rs2D[0])) - BMUControlsD = `BMUCTRLW'b000_10_000_1_1_0_1_0_0_0_0_0; // count instruction + BMUControlsD = `BMUCTRLW'b000_0010_0000_1_1_0_1_0_0_0_0_0; // count instruction // // coverage off: This case can't occur in RV64 // 17'b0110011_0000100_100: if (P.XLEN == 32) // BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // zexth (rv32) // // coverage on - 17'b0110011_0100000_111: BMUControlsD = `BMUCTRLW'b111_01_111_1_0_0_1_1_0_0_0_0; // andn - 17'b0110011_0100000_110: BMUControlsD = `BMUCTRLW'b110_01_111_1_0_0_1_1_0_0_0_0; // orn - 17'b0110011_0100000_100: BMUControlsD = `BMUCTRLW'b100_01_111_1_0_0_1_1_0_0_0_0; // xnor - 17'b0010011_011010?_101: if ((P.XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000)) - BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // rev8 17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111) - BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // orc.b - 17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0; // max - 17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0; // maxu - 17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // min - 17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // minu + BMUControlsD = `BMUCTRLW'b000_0010_0010_1_1_0_1_0_0_0_0_0; // orc.b + 17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_0010_0111_1_0_0_1_1_0_0_0_0; // max + 17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_0010_0111_1_0_0_1_1_0_0_0_0; // maxu + 17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_0010_0011_1_0_0_1_1_0_0_0_0; // min + 17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_0010_0011_1_0_0_1_1_0_0_0_0; // minu endcase if (P.XLEN==32) casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // zexth (rv32) - 17'b0010011_0110000_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_1_0_1_0_1_0_0_0; // rori (rv32) + 17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0010_0001_1_1_0_1_0_0_0_0_0; // zexth (rv32) endcase else if (P.XLEN==64) casez({OpD, Funct7D, Funct3D}) - 17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_0_0_1_0_0_0_0_0; // zexth (rv64) - 17'b0111011_0110000_001: BMUControlsD = `BMUCTRLW'b001_00_111_1_0_1_1_0_1_0_0_0; // rolw - 17'b0111011_0110000_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_0_1_1_0_1_0_0_0; // rorw - 17'b0010011_011000?_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_1_0_1_0_1_0_0_0; // rori (rv64) - 17'b0011011_0110000_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_1_1_1_0_1_0_0_0; // roriw + 17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0010_0001_1_0_0_1_0_0_0_0_0; // zexth (rv64) 17'b0011011_0110000_001: if ((Rs2D[4:2]==3'b000) & ~(Rs2D[1] & Rs2D[0])) - BMUControlsD = `BMUCTRLW'b000_10_000_1_1_1_1_0_0_0_0_0; // count word instruction + BMUControlsD = `BMUCTRLW'b000_0010_0000_1_1_1_1_0_0_0_0_0; // count word instruction endcase end if (P.ZBC_SUPPORTED) casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_11_000_1_0_0_1_0_0_0_0_0; // ZBC instruction + 17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_0011_0000_1_0_0_1_0_0_0_0_0; // ZBC instruction endcase if (P.ZBS_SUPPORTED) begin // ZBS casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_0_0_1_1_0_1_0_0; // bclr - 17'b0110011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_0_0_1_1_0_1_0_0; // bext - 17'b0110011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_0_0_1_0_0_1_0_0; // binv - 17'b0110011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_0_0_1_0_0_1_0_0; // bset + 17'b0110011_0100100_001: BMUControlsD = `BMUCTRLW'b111_0001_0000_1_0_0_1_1_0_1_0_0; // bclr + 17'b0110011_0100100_101: BMUControlsD = `BMUCTRLW'b101_0001_0000_1_0_0_1_1_0_1_0_0; // bext + 17'b0110011_0110100_001: BMUControlsD = `BMUCTRLW'b100_0001_0000_1_0_0_1_0_0_1_0_0; // binv + 17'b0110011_0010100_001: BMUControlsD = `BMUCTRLW'b110_0001_0000_1_0_0_1_0_0_1_0_0; // bset endcase if (P.XLEN==32) // ZBS 64-bit casez({OpD, Funct7D, Funct3D}) - 17'b0010011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0; // bclri - 17'b0010011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0; // bexti - 17'b0010011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_1_0_1_0_0_1_0_0; // binvi - 17'b0010011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0; // bseti + 17'b0010011_0100100_001: BMUControlsD = `BMUCTRLW'b111_0001_0000_1_1_0_1_1_0_1_0_0; // bclri + 17'b0010011_0100100_101: BMUControlsD = `BMUCTRLW'b101_0001_0000_1_1_0_1_1_0_1_0_0; // bexti + 17'b0010011_0110100_001: BMUControlsD = `BMUCTRLW'b100_0001_0000_1_1_0_1_0_0_1_0_0; // binvi + 17'b0010011_0010100_001: BMUControlsD = `BMUCTRLW'b110_0001_0000_1_1_0_1_0_0_1_0_0; // bseti endcase else if (P.XLEN==64) // ZBS 64-bit casez({OpD, Funct7D, Funct3D}) - 17'b0010011_010010?_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0; // bclri (rv64) - 17'b0010011_010010?_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0; // bexti (rv64) - 17'b0010011_011010?_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_1_0_1_0_0_1_0_0; // binvi (rv64) - 17'b0010011_001010?_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0; // bseti (rv64) + 17'b0010011_010010?_001: BMUControlsD = `BMUCTRLW'b111_0001_0000_1_1_0_1_1_0_1_0_0; // bclri (rv64) + 17'b0010011_010010?_101: BMUControlsD = `BMUCTRLW'b101_0001_0000_1_1_0_1_1_0_1_0_0; // bexti (rv64) + 17'b0010011_011010?_001: BMUControlsD = `BMUCTRLW'b100_0001_0000_1_1_0_1_0_0_1_0_0; // binvi (rv64) + 17'b0010011_001010?_001: BMUControlsD = `BMUCTRLW'b110_0001_0000_1_1_0_1_0_0_1_0_0; // bseti (rv64) endcase end if (P.ZBB_SUPPORTED | P.ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_0_0_1_0_0_0_0_0; // sra, srl, sll - 17'b0010011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_1_0_1_0_0_0_0_0; // srai, srli, slli - 17'b0111011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_0_1_1_0_0_0_0_0; // sraw, srlw, sllw - 17'b0011011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_1_1_1_0_0_0_0_0; // sraiw, srliw, slliw + 17'b0110011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_0_0_1_0_0_0_0_0; // sra, srl, sll + 17'b0010011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_1_0_1_0_0_0_0_0; // srai, srli, slli + 17'b0111011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_0_1_1_0_0_0_0_0; // sraw, srlw, sllw + 17'b0011011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_1_1_1_0_0_0_0_0; // sraiw, srliw, slliw endcase + + if (P.ZBKB_SUPPORTED) begin // ZBKB Bitmanip + casez({OpD,Funct7D, Funct3D}) + 17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0100_0001_1_0_0_1_0_0_0_0_0; // pack + 17'b0110011_0000100_111: BMUControlsD = `BMUCTRLW'b000_0100_0001_1_0_0_1_0_0_0_0_0; //packh + 17'b0010011_0110100_101: if (Rs2D == 5'b00111) + BMUControlsD = `BMUCTRLW'b000_0100_0000_1_1_0_1_0_0_0_0_0; //brev8 + endcase + if (P.XLEN==32) + casez({OpD, Funct7D, Funct3D}) + 17'b0010011_0000100_001: if (Rs2D == 5'b01111) + BMUControlsD = `BMUCTRLW'b000_0100_0011_1_1_0_1_0_0_0_0_0; //zip + 17'b0010011_0000100_101: if (Rs2D == 5'b01111) + BMUControlsD = `BMUCTRLW'b000_0100_0011_1_1_0_1_0_0_0_0_0; //unzip + endcase + else if (P.XLEN==64) + casez({OpD,Funct7D, Funct3D}) + 17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0100_0101_1_0_1_1_0_0_0_0_0; //packw + endcase + end + + if (P.ZBB_SUPPORTED | P.ZBKB_SUPPORTED) begin // ZBB and ZBKB shared instructions + casez({OpD, Funct7D, Funct3D}) + 17'b0110011_0110000_001: BMUControlsD = `BMUCTRLW'b001_0001_0111_1_0_0_1_0_1_0_0_0; // rol + 17'b0110011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0001_0111_1_0_0_1_0_1_0_0_0; // ror + 17'b0110011_0100000_111: BMUControlsD = `BMUCTRLW'b111_0001_0111_1_0_0_1_1_0_0_0_0; // andn + 17'b0110011_0100000_110: BMUControlsD = `BMUCTRLW'b110_0001_0111_1_0_0_1_1_0_0_0_0; // orn + 17'b0110011_0100000_100: BMUControlsD = `BMUCTRLW'b100_0001_0111_1_0_0_1_1_0_0_0_0; // xnor + 17'b0010011_011010?_101: if ((P.XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000)) + BMUControlsD = `BMUCTRLW'b000_0010_0010_1_1_0_1_0_0_0_0_0; // rev8 + endcase + if (P.XLEN==32) + casez({OpD, Funct7D, Funct3D}) + 17'b0010011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_1_0_1_0_1_0_0_0; // rori (rv32) + endcase + else if (P.XLEN==64) + casez({OpD, Funct7D, Funct3D}) + 17'b0111011_0110000_001: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_0_1_1_0_1_0_0_0; // rolw + 17'b0111011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_0_1_1_0_1_0_0_0; // rorw + 17'b0010011_011000?_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_1_0_1_0_1_0_0_0; // rori (rv64) + 17'b0011011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_1_1_1_0_1_0_0_0; // roriw + endcase + end + + if (P.ZBKC_SUPPORTED) begin // ZBKC + casez({OpD, Funct7D, Funct3D}) + 17'b0110011_0000101_001: BMUControlsD = `BMUCTRLW'b000_0101_0000_1_0_0_1_0_0_0_0_0; // clmul + 17'b0110011_0000101_011: BMUControlsD = `BMUCTRLW'b000_0101_0001_1_0_0_1_0_0_0_0_0; // clmulh + endcase + end + + if (P.ZBKX_SUPPORTED) begin //ZBKX + casez({OpD, Funct7D, Funct3D}) + 17'b0110011_0010100_100: BMUControlsD = `BMUCTRLW'b000_0110_0000_1_0_0_1_0_0_0_0_0; // xperm8 + 17'b0110011_0010100_010: BMUControlsD = `BMUCTRLW'b000_0110_0001_1_0_0_1_0_0_0_0_0; // xperm4 + endcase + end + + if (P.ZKND_SUPPORTED) begin //ZKND + if (P.XLEN==32) + casez({OpD, Funct7D, Funct3D}) + 17'b0110011_??10101_000: BMUControlsD = `BMUCTRLW'b000_0111_0000_1_0_0_1_0_0_0_0_0; // aes32dsi - final round decrypt + 17'b0110011_??10111_000: BMUControlsD = `BMUCTRLW'b000_0111_0001_1_0_0_1_0_0_0_0_0; // aes32dsmi - mid round decrypt + endcase + else if (P.XLEN==64) + casez({OpD, Funct7D, Funct3D}) + 17'b0110011_0011101_000: BMUControlsD = `BMUCTRLW'b000_0111_0000_1_0_0_1_0_0_0_0_0; // aes64ds - decrypt final round + 17'b0110011_0011111_000: BMUControlsD = `BMUCTRLW'b000_0111_0001_1_0_0_1_0_0_0_0_0; // aes64dsm - decrypt mid round + 17'b0010011_0011000_001: if (Rs2D == 5'b00000) + BMUControlsD = `BMUCTRLW'b000_0111_0010_1_1_0_1_0_0_0_0_0; // aes64im - decrypt keyschdule mixcolumns + endcase + end + + if (P.ZKNE_SUPPORTED) begin //ZKNE + if (P.XLEN==32) + casez({OpD, Funct7D, Funct3D}) + 17'b0110011_??10001_000: BMUControlsD = `BMUCTRLW'b000_1000_0000_1_0_0_1_0_0_0_0_0; // aes32esi - final round encrypt + 17'b0110011_??10011_000: BMUControlsD = `BMUCTRLW'b000_1000_0001_1_0_0_1_0_0_0_0_0; // aes32esmi - mid round encrypt + endcase + else if (P.XLEN==64) + casez({OpD, Funct7D, Funct3D}) + 17'b0110011_0011001_000: BMUControlsD = `BMUCTRLW'b000_1000_0000_1_0_0_1_0_0_0_0_0; // aes64es - encrypt final round + 17'b0110011_0011011_000: BMUControlsD = `BMUCTRLW'b000_1000_0001_1_0_0_1_0_0_0_0_0; // aes64esm - encrypt mid round + endcase + end + + if (P.ZKND_SUPPORTED | P.ZKNE_SUPPORTED) begin // ZKND and ZKNE shared instructions + casez({OpD, Funct7D, Funct3D}) + 17'b0010011_0011000_001: if (Rs2D[4] == 1'b1) + BMUControlsD = `BMUCTRLW'b000_0111_0011_1_0_0_1_0_0_0_0_0; // aes64ks1i - key schedule istr1 ... Don't know why this works here only ... P.XLEN is not 64 bits? + endcase + if (P.XLEN==64) + casez({OpD, Funct7D, Funct3D}) + 17'b0110011_0111111_000: BMUControlsD = `BMUCTRLW'b000_0111_0100_1_0_0_1_0_0_0_0_0; // aes64ks2 - key schedule istr2 + endcase + end + + if (P.ZKNH_SUPPORTED) begin // ZKNH + casez({OpD, Funct7D, Funct3D}) + 17'b0010011_0001000_001: + if (Rs2D == 5'b00010) BMUControlsD = `BMUCTRLW'b000_1001_0000_1_0_0_1_0_0_0_0_0; // sha256sig0 + else if (Rs2D == 5'b00011) BMUControlsD = `BMUCTRLW'b000_1001_0001_1_0_0_1_0_0_0_0_0; // sha256sig1 + else if (Rs2D == 5'b00000) BMUControlsD = `BMUCTRLW'b000_1001_0010_1_0_0_1_0_0_0_0_0; // sha256sum0 + else if (Rs2D == 5'b00001) BMUControlsD = `BMUCTRLW'b000_1001_0011_1_0_0_1_0_0_0_0_0; // sha256sum1 + endcase + + if (P.XLEN==32) + casez({OpD, Funct7D, Funct3D}) + 17'b0110011_0101110_000: BMUControlsD = `BMUCTRLW'b000_1001_0100_1_0_0_1_0_0_0_0_0; // sha512sig0h + 17'b0110011_0101010_000: BMUControlsD = `BMUCTRLW'b000_1001_0101_1_0_0_1_0_0_0_0_0; // sha512sig0l + 17'b0110011_0101111_000: BMUControlsD = `BMUCTRLW'b000_1001_0110_1_0_0_1_0_0_0_0_0; // sha512sig1h + 17'b0110011_0101011_000: BMUControlsD = `BMUCTRLW'b000_1001_0111_1_0_0_1_0_0_0_0_0; // sha512sig1l + 17'b0110011_0101000_000: BMUControlsD = `BMUCTRLW'b000_1001_1000_1_0_0_1_0_0_0_0_0; // sha512sum0r + 17'b0110011_0101001_000: BMUControlsD = `BMUCTRLW'b000_1001_1001_1_0_0_1_0_0_0_0_0; // sha512sum1r + endcase + + else if (P.XLEN==64) + casez({OpD, Funct7D, Funct3D}) + 17'b0010011_0001000_001: + if (Rs2D == 5'b00110) BMUControlsD = `BMUCTRLW'b000_1001_1010_1_0_0_1_0_0_0_0_0; // sha512sig0 + else if (Rs2D == 5'b00111) BMUControlsD = `BMUCTRLW'b000_1001_1011_1_0_0_1_0_0_0_0_0; // sha512sig1 + else if (Rs2D == 5'b00100) BMUControlsD = `BMUCTRLW'b000_1001_1100_1_0_0_1_0_0_0_0_0; // sha512sum0 + else if (Rs2D == 5'b00101) BMUControlsD = `BMUCTRLW'b000_1001_1101_1_0_0_1_0_0_0_0_0; // sha512sum1 + endcase + end end // Unpack Control Signals @@ -176,5 +289,5 @@ module bmuctrl import cvw::*; #(parameter cvw_t P) ( assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000); // BMU Execute stage pipieline control register - flopenrc #(10) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD, ~IllegalBitmanipInstrD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE, BMUActiveE}); + flopenrc #(13) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD, ~IllegalBitmanipInstrD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE, BMUActiveE}); endmodule