diff --git a/bin/wsim b/bin/wsim index 8d9ba3167..34e725707 100755 --- a/bin/wsim +++ b/bin/wsim @@ -47,7 +47,8 @@ elif (args.elf != ""): if(args.testsuite.endswith('.elf') and args.elf == ""): # No --elf argument; check if testsuite has a .elf extension and use that instead if (os.path.isfile(args.testsuite)): ElfFile = "+ElfFile=" + os.path.abspath(args.testsuite) - args.testsuite=args.testsuite.rsplit('/', 1)[1] + if ('/' in args.testsuite): + args.testsuite=args.testsuite.rsplit('/', 1)[1] # strip off path if present else: print("ELF file not found: " + args.testsuite) exit(1) diff --git a/src/ieu/alu.sv b/src/ieu/alu.sv index e142de1e7..acd5b025d 100644 --- a/src/ieu/alu.sv +++ b/src/ieu/alu.sv @@ -30,7 +30,7 @@ module alu import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] A, B, // Operands - input logic W64, // W64-type instruction + input logic W64, UW64, // W64/.uw-type instruction input logic SubArith, // Subtraction or arithmetic shift input logic [2:0] ALUSelect, // ALU mux select signal input logic [3:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction @@ -77,7 +77,7 @@ module alu import cvw::*; #(parameter cvw_t P) ( end else assign ZeroCondMaskInvB = CondMaskInvB; // no masking if Zicond is not supported // Shifts (configurable for rotation) - shifter #(P) sh(.A, .Amt(B[P.LOG_XLEN-1:0]), .Right(Funct3[2]), .W64, .SubArith, .Y(Shift), .Rotate(BALUControl[2])); + shifter #(P) sh(.A(CondShiftA), .Amt(B[P.LOG_XLEN-1:0]), .Right(Funct3[2]), .W64, .SubArith, .Y(Shift), .Rotate(BALUControl[2])); // Condition code flags are based on subtraction output Sum = A-B. // Overflow occurs when the numbers being subtracted have the opposite sign @@ -113,7 +113,7 @@ module alu import cvw::*; #(parameter cvw_t P) ( P.ZBKB_SUPPORTED | P.ZBKC_SUPPORTED | P.ZBKX_SUPPORTED | P.ZKND_SUPPORTED | P.ZKNE_SUPPORTED | P.ZKNH_SUPPORTED) begin : bitmanipalu bitmanipalu #(P) balu( - .A, .B, .W64, .BSelect, .ZBBSelect, .BMUActive, + .A, .B, .W64, .UW64, .BSelect, .ZBBSelect, .BMUActive, .Funct3, .Funct7, .Rs2E, .LT,.LTU, .BALUControl, .PreALUResult, .FullResult, .CondMaskB, .CondShiftA, .ALUResult); end else begin diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv index 4caee782a..9bb0905ad 100644 --- a/src/ieu/bmu/bitmanipalu.sv +++ b/src/ieu/bmu/bitmanipalu.sv @@ -30,16 +30,16 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] A, B, // Operands - input logic W64, // W64-type instruction + input logic W64, UW64, // W64/.uw-type instruction input logic [3:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction input logic [3:0] ZBBSelect, // ZBB mux select signal input logic [2:0] Funct3, // Funct3 field of opcode indicates operation to perform input logic [6:0] Funct7, // Funct7 field for ZKND and ZKNE operations input logic [4:0] Rs2E, // Register source2 for RNUM of ZKNE/ZKND - input logic LT, // less than flag - input logic LTU, // less than unsigned flag + input logic LT, // less than flag + input logic LTU, // less than unsigned flag input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage - input logic BMUActive, // Bit manipulation instruction being executed + input logic BMUActive, // Bit manipulation instruction being executed input logic [P.XLEN-1:0] PreALUResult, // PreALUResult signals input logic [P.XLEN-1:0] FullResult, // FullResult signals output logic [P.XLEN-1:0] CondMaskB, // B is conditionally masked for ZBS instructions @@ -76,7 +76,7 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) ( // 0-3 bit Pre-Shift Mux if (P.ZBA_SUPPORTED) begin: zbapreshift if (P.XLEN == 64) begin - mux2 #(64) zextmux(A, {{32{1'b0}}, A[31:0]}, W64, CondZextA); + mux2 #(64) zextmux(A, {{32{1'b0}}, A[31:0]}, UW64, CondZextA); end else assign CondZextA = A; assign PreShiftAmt = Funct3[2:1] & {2{PreShift}}; assign CondShiftA = CondZextA << (PreShiftAmt); diff --git a/src/ieu/bmu/bmuctrl.sv b/src/ieu/bmu/bmuctrl.sv index fb4e603c0..d482616ef 100644 --- a/src/ieu/bmu/bmuctrl.sv +++ b/src/ieu/bmu/bmuctrl.sv @@ -36,6 +36,7 @@ module bmuctrl import cvw::*; #(parameter cvw_t P) ( output logic BRegWriteD, // Indicates if it is a R type B instruction in Decode Stage output logic BALUSrcBD, // Indicates if it is an I/IW (non auipc) type B instruction in Decode Stage output logic BW64D, // Indiciates if it is a W type B instruction in Decode Stage + output logic BUW64D, // Indiciates if it is a .uw type B instruction in Decode Stage output logic BSubArithD, // TRUE if ext, clr, andn, orn, xnor instruction in Decode Stage output logic IllegalBitmanipInstrD, // Indicates if it is unrecognized B instruction in Decode Stage // Execute stage control signals @@ -60,7 +61,7 @@ module bmuctrl import cvw::*; #(parameter cvw_t P) ( logic [3:0] BSelectD; // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding in Decode stage logic [3:0] ZBBSelectD; // ZBB mux select signal in Decode stage - `define BMUCTRLW 20 + `define BMUCTRLW 21 logic [`BMUCTRLW-1:0] BMUControlsD; // Main B Instructions Decoder control signals @@ -72,209 +73,209 @@ module bmuctrl import cvw::*; #(parameter cvw_t P) ( // Main Instruction Decoder always_comb begin - // BALUSelect_BSelect_ZBBSelect_BRegWrite_BALUSrcB_BW64_BALUOp_BSubArithD_RotateD_MaskD_PreShiftD_IllegalBitmanipInstrD + // BALUSelect_BSelect_ZBBSelect_BRegWrite_BALUSrcB_BW64_BUW64_BALUOp_BSubArithD_RotateD_MaskD_PreShiftD_IllegalBitmanipInstrD BMUControlsD = `BMUCTRLW'b000_00_000_0_0_0_0_0_0_0_0_1; // default: Illegal bmu instruction; if (P.ZBA_SUPPORTED) begin casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0010000_010: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_1_0_0_0_1_0; // sh1add - 17'b0110011_0010000_100: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_1_0_0_0_1_0; // sh2add - 17'b0110011_0010000_110: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_1_0_0_0_1_0; // sh3add + 17'b0110011_0010000_010: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_0_1_0_0_0_1_0; // sh1add + 17'b0110011_0010000_100: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_0_1_0_0_0_1_0; // sh2add + 17'b0110011_0010000_110: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_0_1_0_0_0_1_0; // sh3add endcase if (P.XLEN==64) casez({OpD, Funct7D, Funct3D}) - 17'b0111011_0010000_010: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_1_1_0_0_0_1_0; // sh1add.uw - 17'b0111011_0010000_100: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_1_1_0_0_0_1_0; // sh2add.uw - 17'b0111011_0010000_110: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_1_1_0_0_0_1_0; // sh3add.uw - 17'b0111011_0000100_000: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_1_1_0_0_0_0_0; // add.uw - 17'b0011011_000010?_001: BMUControlsD = `BMUCTRLW'b001_0001_0000_1_1_1_1_0_0_0_0_0; // slli.uw + 17'b0111011_0010000_010: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_1_1_0_0_0_1_0; // sh1add.uw + 17'b0111011_0010000_100: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_1_1_0_0_0_1_0; // sh2add.uw + 17'b0111011_0010000_110: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_1_1_0_0_0_1_0; // sh3add.uw + 17'b0111011_0000100_000: BMUControlsD = `BMUCTRLW'b000_0001_0000_1_0_0_1_1_0_0_0_0_0; // add.uw + 17'b0011011_000010?_001: BMUControlsD = `BMUCTRLW'b001_0001_0000_1_1_0_1_1_0_0_0_0_0; // slli.uw endcase end if (P.ZBB_SUPPORTED) begin casez({OpD, Funct7D, Funct3D}) 17'b0010011_0110000_001: if ((Rs2D[4:1] == 4'b0010)) - BMUControlsD = `BMUCTRLW'b000_0010_0001_1_1_0_1_0_0_0_0_0; // sign extend instruction + BMUControlsD = `BMUCTRLW'b000_0010_0001_1_1_0_0_1_0_0_0_0_0; // sign extend instruction else if ((Rs2D[4:2]==3'b000) & ~(Rs2D[1] & Rs2D[0])) - BMUControlsD = `BMUCTRLW'b000_0010_0000_1_1_0_1_0_0_0_0_0; // count instruction + BMUControlsD = `BMUCTRLW'b000_0010_0000_1_1_0_0_1_0_0_0_0_0; // count instruction 17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111) - BMUControlsD = `BMUCTRLW'b000_0010_0010_1_1_0_1_0_0_0_0_0; // orc.b - 17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_0010_0111_1_0_0_1_1_0_0_0_0; // max - 17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_0010_0111_1_0_0_1_1_0_0_0_0; // maxu - 17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_0010_0011_1_0_0_1_1_0_0_0_0; // min - 17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_0010_0011_1_0_0_1_1_0_0_0_0; // minu + BMUControlsD = `BMUCTRLW'b000_0010_0010_1_1_0_0_1_0_0_0_0_0; // orc.b + 17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_0010_0111_1_0_0_0_1_1_0_0_0_0; // max + 17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_0010_0111_1_0_0_0_1_1_0_0_0_0; // maxu + 17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_0010_0011_1_0_0_0_1_1_0_0_0_0; // min + 17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_0010_0011_1_0_0_0_1_1_0_0_0_0; // minu endcase if (P.XLEN==32) casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0010_0001_1_1_0_1_0_0_0_0_0; // zexth (rv32) + 17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0010_0001_1_1_0_0_1_0_0_0_0_0; // zexth (rv32) endcase else if (P.XLEN==64) casez({OpD, Funct7D, Funct3D}) - 17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0010_0001_1_0_0_1_0_0_0_0_0; // zexth (rv64) + 17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0010_0001_1_0_0_0_1_0_0_0_0_0; // zexth (rv64) 17'b0011011_0110000_001: if ((Rs2D[4:2]==3'b000) & ~(Rs2D[1] & Rs2D[0])) - BMUControlsD = `BMUCTRLW'b000_0010_0000_1_1_1_1_0_0_0_0_0; // count word instruction + BMUControlsD = `BMUCTRLW'b000_0010_0000_1_1_1_1_0_0_0_0_0_0; // count word instruction (clzw/ctzw/cpopw) endcase end if (P.ZBC_SUPPORTED) casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0000101_010: BMUControlsD = `BMUCTRLW'b000_0011_0001_1_0_0_1_0_0_0_0_0; // clmulr - 17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_0011_0000_1_0_0_1_0_0_0_0_0; // clmul/clmulh + 17'b0110011_0000101_010: BMUControlsD = `BMUCTRLW'b000_0011_0001_1_0_0_0_1_0_0_0_0_0; // clmulr + 17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_0011_0000_1_0_0_0_1_0_0_0_0_0; // clmul/clmulh endcase if (P.ZBKC_SUPPORTED) begin casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_0011_0000_1_0_0_1_0_0_0_0_0; // clmul/clmulh - // 17'b0110011_0000101_001: BMUControlsD = `BMUCTRLW'b000_0011_0000_1_0_0_1_0_0_0_0_0; // clmul - // 17'b0110011_0000101_011: BMUControlsD = `BMUCTRLW'b000_0011_0001_1_0_0_1_0_0_0_0_0; // clmulh + 17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_0011_0000_1_0_0_0_1_0_0_0_0_0; // clmul/clmulh + // 17'b0110011_0000101_001: BMUControlsD = `BMUCTRLW'b000_0011_0000_1_0_0_0_1_0_0_0_0_0; // clmul + // 17'b0110011_0000101_011: BMUControlsD = `BMUCTRLW'b000_0011_0001_1_0_0_0_1_0_0_0_0_0; // clmulh endcase end if (P.ZBS_SUPPORTED) begin // ZBS casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0100100_001: BMUControlsD = `BMUCTRLW'b111_0001_0000_1_0_0_1_1_0_1_0_0; // bclr - 17'b0110011_0100100_101: BMUControlsD = `BMUCTRLW'b101_0001_0000_1_0_0_1_0_0_1_0_0; // bext - 17'b0110011_0110100_001: BMUControlsD = `BMUCTRLW'b100_0001_0000_1_0_0_1_0_0_1_0_0; // binv - 17'b0110011_0010100_001: BMUControlsD = `BMUCTRLW'b110_0001_0000_1_0_0_1_0_0_1_0_0; // bset + 17'b0110011_0100100_001: BMUControlsD = `BMUCTRLW'b111_0001_0000_1_0_0_0_1_1_0_1_0_0; // bclr + 17'b0110011_0100100_101: BMUControlsD = `BMUCTRLW'b101_0001_0000_1_0_0_0_1_0_0_1_0_0; // bext + 17'b0110011_0110100_001: BMUControlsD = `BMUCTRLW'b100_0001_0000_1_0_0_0_1_0_0_1_0_0; // binv + 17'b0110011_0010100_001: BMUControlsD = `BMUCTRLW'b110_0001_0000_1_0_0_0_1_0_0_1_0_0; // bset endcase if (P.XLEN==32) // ZBS 64-bit casez({OpD, Funct7D, Funct3D}) - 17'b0010011_0100100_001: BMUControlsD = `BMUCTRLW'b111_0001_0000_1_1_0_1_1_0_1_0_0; // bclri - 17'b0010011_0100100_101: BMUControlsD = `BMUCTRLW'b101_0001_0000_1_1_0_1_0_0_1_0_0; // bexti - 17'b0010011_0110100_001: BMUControlsD = `BMUCTRLW'b100_0001_0000_1_1_0_1_0_0_1_0_0; // binvi - 17'b0010011_0010100_001: BMUControlsD = `BMUCTRLW'b110_0001_0000_1_1_0_1_0_0_1_0_0; // bseti + 17'b0010011_0100100_001: BMUControlsD = `BMUCTRLW'b111_0001_0000_1_1_0_0_1_1_0_1_0_0; // bclri + 17'b0010011_0100100_101: BMUControlsD = `BMUCTRLW'b101_0001_0000_1_1_0_0_1_0_0_1_0_0; // bexti + 17'b0010011_0110100_001: BMUControlsD = `BMUCTRLW'b100_0001_0000_1_1_0_0_1_0_0_1_0_0; // binvi + 17'b0010011_0010100_001: BMUControlsD = `BMUCTRLW'b110_0001_0000_1_1_0_0_1_0_0_1_0_0; // bseti endcase else if (P.XLEN==64) // ZBS 64-bit casez({OpD, Funct7D, Funct3D}) - 17'b0010011_010010?_001: BMUControlsD = `BMUCTRLW'b111_0001_0000_1_1_0_1_1_0_1_0_0; // bclri (rv64) - 17'b0010011_010010?_101: BMUControlsD = `BMUCTRLW'b101_0001_0000_1_1_0_1_0_0_1_0_0; // bexti (rv64) - 17'b0010011_011010?_001: BMUControlsD = `BMUCTRLW'b100_0001_0000_1_1_0_1_0_0_1_0_0; // binvi (rv64) - 17'b0010011_001010?_001: BMUControlsD = `BMUCTRLW'b110_0001_0000_1_1_0_1_0_0_1_0_0; // bseti (rv64) + 17'b0010011_010010?_001: BMUControlsD = `BMUCTRLW'b111_0001_0000_1_1_0_0_1_1_0_1_0_0; // bclri (rv64) + 17'b0010011_010010?_101: BMUControlsD = `BMUCTRLW'b101_0001_0000_1_1_0_0_1_0_0_1_0_0; // bexti (rv64) + 17'b0010011_011010?_001: BMUControlsD = `BMUCTRLW'b100_0001_0000_1_1_0_0_1_0_0_1_0_0; // binvi (rv64) + 17'b0010011_001010?_001: BMUControlsD = `BMUCTRLW'b110_0001_0000_1_1_0_0_1_0_0_1_0_0; // bseti (rv64) endcase end if (P.ZBB_SUPPORTED | P.ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_0_0_1_0_0_0_0_0; // sra, srl, sll - 17'b0010011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_1_0_1_0_0_0_0_0; // srai, srli, slli - 17'b0111011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_0_1_1_0_0_0_0_0; // sraw, srlw, sllw - 17'b0011011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_1_1_1_0_0_0_0_0; // sraiw, srliw, slliw + 17'b0110011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_0_0_0_1_0_0_0_0_0; // sra, srl, sll + 17'b0010011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_1_0_0_1_0_0_0_0_0; // srai, srli, slli + 17'b0111011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_0_1_0_1_0_0_0_0_0; // sraw, srlw, sllw + 17'b0011011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_0000_0000_1_1_1_0_1_0_0_0_0_0; // sraiw, srliw, slliw endcase if (P.ZBKB_SUPPORTED) begin // ZBKB Bitmanip casez({OpD,Funct7D, Funct3D}) - 17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0100_0001_1_0_0_1_0_0_0_0_0; // pack - 17'b0110011_0000100_111: BMUControlsD = `BMUCTRLW'b000_0100_0001_1_0_0_1_0_0_0_0_0; // packh + 17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0100_0001_1_0_0_0_1_0_0_0_0_0; // pack + 17'b0110011_0000100_111: BMUControlsD = `BMUCTRLW'b000_0100_0001_1_0_0_0_1_0_0_0_0_0; // packh 17'b0010011_0110100_101: if (Rs2D == 5'b00111) - BMUControlsD = `BMUCTRLW'b000_0100_0000_1_1_0_1_0_0_0_0_0; // brev8 + BMUControlsD = `BMUCTRLW'b000_0100_0000_1_1_0_0_1_0_0_0_0_0; // brev8 endcase if (P.XLEN==32) casez({OpD, Funct7D, Funct3D}) 17'b0010011_0000100_001: if (Rs2D == 5'b01111) - BMUControlsD = `BMUCTRLW'b000_0100_0011_1_1_0_1_0_0_0_0_0; //zip + BMUControlsD = `BMUCTRLW'b000_0100_0011_1_1_0_0_1_0_0_0_0_0; //zip 17'b0010011_0000100_101: if (Rs2D == 5'b01111) - BMUControlsD = `BMUCTRLW'b000_0100_0011_1_1_0_1_0_0_0_0_0; //unzip + BMUControlsD = `BMUCTRLW'b000_0100_0011_1_1_0_0_1_0_0_0_0_0; //unzip endcase else if (P.XLEN==64) casez({OpD,Funct7D, Funct3D}) - 17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0100_0101_1_0_1_1_0_0_0_0_0; //packw + 17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_0100_0101_1_0_1_0_1_0_0_0_0_0; //packw endcase end if (P.ZBB_SUPPORTED | P.ZBKB_SUPPORTED) begin // ZBB and ZBKB shared instructions casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0110000_001: BMUControlsD = `BMUCTRLW'b001_0001_0111_1_0_0_1_0_1_0_0_0; // rol - 17'b0110011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0001_0111_1_0_0_1_0_1_0_0_0; // ror - 17'b0110011_0100000_111: BMUControlsD = `BMUCTRLW'b111_0001_0111_1_0_0_1_1_0_0_0_0; // andn - 17'b0110011_0100000_110: BMUControlsD = `BMUCTRLW'b110_0001_0111_1_0_0_1_1_0_0_0_0; // orn - 17'b0110011_0100000_100: BMUControlsD = `BMUCTRLW'b100_0001_0111_1_0_0_1_1_0_0_0_0; // xnor + 17'b0110011_0110000_001: BMUControlsD = `BMUCTRLW'b001_0001_0111_1_0_0_0_1_0_1_0_0_0; // rol + 17'b0110011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0001_0111_1_0_0_0_1_0_1_0_0_0; // ror + 17'b0110011_0100000_111: BMUControlsD = `BMUCTRLW'b111_0001_0111_1_0_0_0_1_1_0_0_0_0; // andn + 17'b0110011_0100000_110: BMUControlsD = `BMUCTRLW'b110_0001_0111_1_0_0_0_1_1_0_0_0_0; // orn + 17'b0110011_0100000_100: BMUControlsD = `BMUCTRLW'b100_0001_0111_1_0_0_0_1_1_0_0_0_0; // xnor 17'b0010011_011010?_101: if ((P.XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000)) - BMUControlsD = `BMUCTRLW'b000_0010_0010_1_1_0_1_0_0_0_0_0; // rev8 + BMUControlsD = `BMUCTRLW'b000_0010_0010_1_1_0_0_1_0_0_0_0_0; // rev8 endcase if (P.XLEN==32) casez({OpD, Funct7D, Funct3D}) - 17'b0010011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_1_0_1_0_1_0_0_0; // rori (rv32) + 17'b0010011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_1_0_0_1_0_1_0_0_0; // rori (rv32) endcase else if (P.XLEN==64) casez({OpD, Funct7D, Funct3D}) - 17'b0111011_0110000_001: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_0_1_1_0_1_0_0_0; // rolw - 17'b0111011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_0_1_1_0_1_0_0_0; // rorw - 17'b0010011_011000?_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_1_0_1_0_1_0_0_0; // rori (rv64) - 17'b0011011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_1_1_1_0_1_0_0_0; // roriw + 17'b0111011_0110000_001: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_0_1_0_1_0_1_0_0_0; // rolw + 17'b0111011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_0_1_0_1_0_1_0_0_0; // rorw + 17'b0010011_011000?_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_1_0_0_1_0_1_0_0_0; // rori (rv64) + 17'b0011011_0110000_101: BMUControlsD = `BMUCTRLW'b001_0000_0111_1_1_1_0_1_0_1_0_0_0; // roriw endcase end if (P.ZBKX_SUPPORTED) begin //ZBKX casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0010100_100: BMUControlsD = `BMUCTRLW'b000_0110_0000_1_0_0_1_0_0_0_0_0; // xperm8 - 17'b0110011_0010100_010: BMUControlsD = `BMUCTRLW'b000_0110_0001_1_0_0_1_0_0_0_0_0; // xperm4 + 17'b0110011_0010100_100: BMUControlsD = `BMUCTRLW'b000_0110_0000_1_0_0_0_1_0_0_0_0_0; // xperm8 + 17'b0110011_0010100_010: BMUControlsD = `BMUCTRLW'b000_0110_0001_1_0_0_0_1_0_0_0_0_0; // xperm4 endcase end if (P.ZKND_SUPPORTED) begin //ZKND if (P.XLEN==32) casez({OpD, Funct7D, Funct3D}) - 17'b0110011_??10101_000: BMUControlsD = `BMUCTRLW'b000_0111_0100_1_0_0_1_0_0_0_0_0; // aes32dsi - final round decrypt - 17'b0110011_??10111_000: BMUControlsD = `BMUCTRLW'b000_0111_0000_1_0_0_1_0_0_0_0_0; // aes32dsmi - mid round decrypt + 17'b0110011_??10101_000: BMUControlsD = `BMUCTRLW'b000_0111_0100_1_0_0_0_1_0_0_0_0_0; // aes32dsi - final round decrypt + 17'b0110011_??10111_000: BMUControlsD = `BMUCTRLW'b000_0111_0000_1_0_0_0_1_0_0_0_0_0; // aes32dsmi - mid round decrypt endcase else if (P.XLEN==64) casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0011101_000: BMUControlsD = `BMUCTRLW'b000_0111_0100_1_0_0_1_0_0_0_0_0; // aes64ds - decrypt final round - 17'b0110011_0011111_000: BMUControlsD = `BMUCTRLW'b000_0111_0000_1_0_0_1_0_0_0_0_0; // aes64dsm - decrypt mid round + 17'b0110011_0011101_000: BMUControlsD = `BMUCTRLW'b000_0111_0100_1_0_0_0_1_0_0_0_0_0; // aes64ds - decrypt final round + 17'b0110011_0011111_000: BMUControlsD = `BMUCTRLW'b000_0111_0000_1_0_0_0_1_0_0_0_0_0; // aes64dsm - decrypt mid round 17'b0010011_0011000_001: if (Rs2D == 5'b00000) - BMUControlsD = `BMUCTRLW'b000_0111_1000_1_1_0_1_0_0_0_0_0; // aes64im - decrypt keyschdule mixcolumns + BMUControlsD = `BMUCTRLW'b000_0111_1000_1_1_0_0_1_0_0_0_0_0; // aes64im - decrypt keyschdule mixcolumns endcase end if (P.ZKNE_SUPPORTED) begin //ZKNE if (P.XLEN==32) casez({OpD, Funct7D, Funct3D}) - 17'b0110011_??10001_000: BMUControlsD = `BMUCTRLW'b000_0111_0101_1_0_0_1_0_0_0_0_0; // aes32esi - final round encrypt - 17'b0110011_??10011_000: BMUControlsD = `BMUCTRLW'b000_0111_0001_1_0_0_1_0_0_0_0_0; // aes32esmi - mid round encrypt + 17'b0110011_??10001_000: BMUControlsD = `BMUCTRLW'b000_0111_0101_1_0_0_0_1_0_0_0_0_0; // aes32esi - final round encrypt + 17'b0110011_??10011_000: BMUControlsD = `BMUCTRLW'b000_0111_0001_1_0_0_0_1_0_0_0_0_0; // aes32esmi - mid round encrypt endcase else if (P.XLEN==64) casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0011001_000: BMUControlsD = `BMUCTRLW'b000_0111_0101_1_0_0_1_0_0_0_0_0; // aes64es - encrypt final round - 17'b0110011_0011011_000: BMUControlsD = `BMUCTRLW'b000_0111_0001_1_0_0_1_0_0_0_0_0; // aes64esm - encrypt mid round + 17'b0110011_0011001_000: BMUControlsD = `BMUCTRLW'b000_0111_0101_1_0_0_0_1_0_0_0_0_0; // aes64es - encrypt final round + 17'b0110011_0011011_000: BMUControlsD = `BMUCTRLW'b000_0111_0001_1_0_0_0_1_0_0_0_0_0; // aes64esm - encrypt mid round endcase end if ((P.ZKND_SUPPORTED | P.ZKNE_SUPPORTED) & P.XLEN == 64) begin // ZKND and ZKNE shared instructions casez({OpD, Funct7D, Funct3D}) 17'b0010011_0011000_001: if (Rs2D[4] == 1'b1) - BMUControlsD = `BMUCTRLW'b000_0111_0010_1_0_0_1_0_0_0_0_0; // aes64ks1i - key schedule istr1 - 17'b0110011_0111111_000: BMUControlsD = `BMUCTRLW'b000_0111_0011_1_0_0_1_0_0_0_0_0; // aes64ks2 - key schedule istr2 + BMUControlsD = `BMUCTRLW'b000_0111_0010_1_0_0_0_1_0_0_0_0_0; // aes64ks1i - key schedule istr1 + 17'b0110011_0111111_000: BMUControlsD = `BMUCTRLW'b000_0111_0011_1_0_0_0_1_0_0_0_0_0; // aes64ks2 - key schedule istr2 endcase end if (P.ZKNH_SUPPORTED) begin // ZKNH casez({OpD, Funct7D, Funct3D}) 17'b0010011_0001000_001: - if (Rs2D == 5'b00010) BMUControlsD = `BMUCTRLW'b000_1000_0000_1_0_0_1_0_0_0_0_0; // sha256sig0 - else if (Rs2D == 5'b00011) BMUControlsD = `BMUCTRLW'b000_1000_0001_1_0_0_1_0_0_0_0_0; // sha256sig1 - else if (Rs2D == 5'b00000) BMUControlsD = `BMUCTRLW'b000_1000_0010_1_0_0_1_0_0_0_0_0; // sha256sum0 - else if (Rs2D == 5'b00001) BMUControlsD = `BMUCTRLW'b000_1000_0011_1_0_0_1_0_0_0_0_0; // sha256sum1 + if (Rs2D == 5'b00010) BMUControlsD = `BMUCTRLW'b000_1000_0000_1_0_0_0_1_0_0_0_0_0; // sha256sig0 + else if (Rs2D == 5'b00011) BMUControlsD = `BMUCTRLW'b000_1000_0001_1_0_0_0_1_0_0_0_0_0; // sha256sig1 + else if (Rs2D == 5'b00000) BMUControlsD = `BMUCTRLW'b000_1000_0010_1_0_0_0_1_0_0_0_0_0; // sha256sum0 + else if (Rs2D == 5'b00001) BMUControlsD = `BMUCTRLW'b000_1000_0011_1_0_0_0_1_0_0_0_0_0; // sha256sum1 endcase if (P.XLEN==32) casez({OpD, Funct7D, Funct3D}) - 17'b0110011_0101110_000: BMUControlsD = `BMUCTRLW'b000_1000_1000_1_0_0_1_0_0_0_0_0; // sha512sig0h - 17'b0110011_0101010_000: BMUControlsD = `BMUCTRLW'b000_1000_1001_1_0_0_1_0_0_0_0_0; // sha512sig0l - 17'b0110011_0101111_000: BMUControlsD = `BMUCTRLW'b000_1000_1010_1_0_0_1_0_0_0_0_0; // sha512sig1h - 17'b0110011_0101011_000: BMUControlsD = `BMUCTRLW'b000_1000_1011_1_0_0_1_0_0_0_0_0; // sha512sig1l - 17'b0110011_0101000_000: BMUControlsD = `BMUCTRLW'b000_1000_1100_1_0_0_1_0_0_0_0_0; // sha512sum0r - 17'b0110011_0101001_000: BMUControlsD = `BMUCTRLW'b000_1000_1110_1_0_0_1_0_0_0_0_0; // sha512sum1r + 17'b0110011_0101110_000: BMUControlsD = `BMUCTRLW'b000_1000_1000_1_0_0_0_1_0_0_0_0_0; // sha512sig0h + 17'b0110011_0101010_000: BMUControlsD = `BMUCTRLW'b000_1000_1001_1_0_0_0_1_0_0_0_0_0; // sha512sig0l + 17'b0110011_0101111_000: BMUControlsD = `BMUCTRLW'b000_1000_1010_1_0_0_0_1_0_0_0_0_0; // sha512sig1h + 17'b0110011_0101011_000: BMUControlsD = `BMUCTRLW'b000_1000_1011_1_0_0_0_1_0_0_0_0_0; // sha512sig1l + 17'b0110011_0101000_000: BMUControlsD = `BMUCTRLW'b000_1000_1100_1_0_0_0_1_0_0_0_0_0; // sha512sum0r + 17'b0110011_0101001_000: BMUControlsD = `BMUCTRLW'b000_1000_1110_1_0_0_0_1_0_0_0_0_0; // sha512sum1r endcase else if (P.XLEN==64) casez({OpD, Funct7D, Funct3D}) 17'b0010011_0001000_001: - if (Rs2D == 5'b00110) BMUControlsD = `BMUCTRLW'b000_1000_1000_1_0_0_1_0_0_0_0_0; // sha512sig0 - else if (Rs2D == 5'b00111) BMUControlsD = `BMUCTRLW'b000_1000_1001_1_0_0_1_0_0_0_0_0; // sha512sig1 - else if (Rs2D == 5'b00100) BMUControlsD = `BMUCTRLW'b000_1000_1010_1_0_0_1_0_0_0_0_0; // sha512sum0 - else if (Rs2D == 5'b00101) BMUControlsD = `BMUCTRLW'b000_1000_1011_1_0_0_1_0_0_0_0_0; // sha512sum1 + if (Rs2D == 5'b00110) BMUControlsD = `BMUCTRLW'b000_1000_1000_1_0_0_0_1_0_0_0_0_0; // sha512sig0 + else if (Rs2D == 5'b00111) BMUControlsD = `BMUCTRLW'b000_1000_1001_1_0_0_0_1_0_0_0_0_0; // sha512sig1 + else if (Rs2D == 5'b00100) BMUControlsD = `BMUCTRLW'b000_1000_1010_1_0_0_0_1_0_0_0_0_0; // sha512sum0 + else if (Rs2D == 5'b00101) BMUControlsD = `BMUCTRLW'b000_1000_1011_1_0_0_0_1_0_0_0_0_0; // sha512sum1 endcase end end // Unpack Control Signals - assign {BALUSelectD, BSelectD, ZBBSelectD, BRegWriteD,BALUSrcBD, BW64D, BALUOpD, BSubArithD, RotateD, MaskD, PreShiftD, IllegalBitmanipInstrD} = BMUControlsD; + assign {BALUSelectD, BSelectD, ZBBSelectD, BRegWriteD,BALUSrcBD, BW64D, BUW64D, BALUOpD, BSubArithD, RotateD, MaskD, PreShiftD, IllegalBitmanipInstrD} = BMUControlsD; // Pack BALUControl Signals assign BALUControlD = {RotateD, MaskD, PreShiftD}; diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 4dbb8ff82..74f5162ba 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -56,7 +56,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( output logic [2:0] Funct3E, // Instruction's funct3 field output logic [6:0] Funct7E, // Instruction's funct7 field output logic IntDivE, // Integer divide - output logic W64E, // RV64 W-type operation + output logic W64E, UW64E, // RV64 W/.uw-type operation output logic SubArithE, // Subtraction or arithmetic shift output logic JumpE, // jump instruction output logic BranchE, // Branch instruction @@ -158,6 +158,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( logic MatchDE; // Match between a source register in Decode stage and destination register in Execute stage logic FCvtIntStallD, MDUStallD, CSRRdStallD; // Stall due to conversion, load, multiply/divide, CSR read logic FunctCZeroD; // Funct7 and Funct3 indicate czero.* (not including Op check) + logic BUW64D; // Indiciates if it is a .uw type B instruction in Decode Stage // Extract fields assign OpD = InstrD[6:0]; @@ -326,7 +327,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( logic BALUSrcBD; // BMU alu src select signal bmuctrl #(P) bmuctrl(.clk, .reset, .InstrD, .ALUOpD, - .BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE, + .BRegWriteD, .BALUSrcBD, .BW64D, .BUW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE, .ALUSelectD(PreALUSelectD), .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE); if (P.ZBA_SUPPORTED) begin // ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw @@ -350,6 +351,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( assign W64D = BaseW64D; assign ALUSrcBD = BaseALUSrcBD; assign SubArithD = BaseSubArithD; // TRUE If B-type or R-type instruction involves inverted operand + assign BUW64D = 1'b0; // no .uw instructions // tie off unused bit manipulation signals assign BSelectE = 4'b0000; @@ -417,9 +419,9 @@ module controller import cvw::*; #(parameter cvw_t P) ( flopenrc #(1) controlregD(clk, reset, FlushD, ~StallD, 1'b1, InstrValidD); // Execute stage pipeline control register and logic - flopenrc #(44) controlregE(clk, reset, FlushE, ~StallE, - {ALUSelectD, RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, Funct7D, W64D, SubArithD, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FenceD, CMOpD, IFUPrefetchD, LSUPrefetchD, CZeroD, InstrValidD}, - {ALUSelectE, IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, Funct7E, W64E, SubArithE, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, CMOpE, IFUPrefetchE, LSUPrefetchE, CZeroE, InstrValidE}); + flopenrc #(45) controlregE(clk, reset, FlushE, ~StallE, + {ALUSelectD, RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, Funct7D, W64D, BUW64D, SubArithD, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FenceD, CMOpD, IFUPrefetchD, LSUPrefetchD, CZeroD, InstrValidD}, + {ALUSelectE, IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, Funct7E, W64E, UW64E, SubArithE, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, CMOpE, IFUPrefetchE, LSUPrefetchE, CZeroE, InstrValidE}); flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E); flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); diff --git a/src/ieu/datapath.sv b/src/ieu/datapath.sv index 12f4204e7..c04d0a4a6 100644 --- a/src/ieu/datapath.sv +++ b/src/ieu/datapath.sv @@ -41,7 +41,7 @@ module datapath import cvw::*; #(parameter cvw_t P) ( input logic [6:0] Funct7E, // Funct7 field of instruction in Execute stage input logic StallE, FlushE, // Stall, flush Execute stage input logic [1:0] ForwardAE, ForwardBE, // Forward ALU operands from later stages - input logic W64E, // W64-type instruction + input logic W64E,UW64E, // W64/.uw-type instruction input logic SubArithE, // Subtraction or arithmetic shift input logic ALUSrcAE, ALUSrcBE, // ALU operands input logic ALUResultSrcE, // Selects result to pass on to Memory stage @@ -109,7 +109,7 @@ module datapath import cvw::*; #(parameter cvw_t P) ( comparator #(P.XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE); mux2 #(P.XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(P.XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE); - alu #(P) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, Funct7E, Rs2E, BALUControlE, BMUActiveE, CZeroE, ALUResultE, IEUAdrE); + alu #(P) alu(SrcAE, SrcBE, W64E, UW64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, Funct7E, Rs2E, BALUControlE, BMUActiveE, CZeroE, ALUResultE, IEUAdrE); mux2 #(P.XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE); mux2 #(P.XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE); diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 4f9c62cb3..e820a3e41 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -93,6 +93,7 @@ module ieu import cvw::*; #(parameter cvw_t P) ( logic [3:0] ZBBSelectE; // ZBB Result Select Signal in Execute Stage logic [2:0] BALUControlE; // ALU Control signals for B instructions in Execute Stage logic SubArithE; // Subtraction or arithmetic shift + logic UW64E; // .uw-type instruction logic [6:0] Funct7E; @@ -111,7 +112,7 @@ module ieu import cvw::*; #(parameter cvw_t P) ( .StructuralStallD, .LoadStallD, .StoreStallD, .Rs1D, .Rs2D, .Rs2E, .StallE, .FlushE, .FlagsE, .FWriteIntE, .PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, - .Funct3E, .Funct7E, .IntDivE, .W64E, .SubArithE, .BranchD, .BranchE, .JumpD, .JumpE, + .Funct3E, .Funct7E, .IntDivE, .W64E, .UW64E, .SubArithE, .BranchD, .BranchE, .JumpD, .JumpE, .BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .CZeroE, .MDUActiveE, .FCvtIntE, .ForwardAE, .ForwardBE, .CMOpM, .IFUPrefetchE, .LSUPrefetchM, .StallM, .FlushM, .MemRWE, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, @@ -120,7 +121,7 @@ module ieu import cvw::*; #(parameter cvw_t P) ( .RdW, .RdE, .RdM); datapath #(P) dp( - .clk, .reset, .ImmSrcD, .InstrD, .Rs1D, .Rs2D, .Rs2E, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE, + .clk, .reset, .ImmSrcD, .InstrD, .Rs1D, .Rs2D, .Rs2E, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .UW64E, .SubArithE, .Funct3E, .Funct7E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .JumpE, .BranchSignedE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .CZeroE, .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW,