From 3990417d704209eac501755de21bf6a0df354487 Mon Sep 17 00:00:00 2001 From: Alessandro Maiuolo Date: Wed, 29 Mar 2023 20:57:26 -0700 Subject: [PATCH 1/5] integrated tv generation for IFdivsqrt --- .../fp/combined_IF_vectors/IF_vectors/README | 4 + .../combined_IF_vectors/create_IF_vectors.sh | 8 + .../extract_arch_vectors.py | 251 ++++++++++++++++++ .../extract_testfloat_vectors.py | 79 ++++++ tests/fp/create_all_vectors.sh | 3 + 5 files changed, 345 insertions(+) create mode 100644 tests/fp/combined_IF_vectors/IF_vectors/README create mode 100755 tests/fp/combined_IF_vectors/create_IF_vectors.sh create mode 100755 tests/fp/combined_IF_vectors/extract_arch_vectors.py create mode 100755 tests/fp/combined_IF_vectors/extract_testfloat_vectors.py diff --git a/tests/fp/combined_IF_vectors/IF_vectors/README b/tests/fp/combined_IF_vectors/IF_vectors/README new file mode 100644 index 00000000..bfbf368d --- /dev/null +++ b/tests/fp/combined_IF_vectors/IF_vectors/README @@ -0,0 +1,4 @@ +This folder holds the archtest and testfloat vectors necessary fo evaluating performance +of standalone intdiv vs combined IFdivsqrt + +to generate vectors, uncomment line 8 in create_all_vectors.sh \ No newline at end of file diff --git a/tests/fp/combined_IF_vectors/create_IF_vectors.sh b/tests/fp/combined_IF_vectors/create_IF_vectors.sh new file mode 100755 index 00000000..d6b83f59 --- /dev/null +++ b/tests/fp/combined_IF_vectors/create_IF_vectors.sh @@ -0,0 +1,8 @@ +#!/bin/sh +# create test vectors for stand alone int + +./extract_testfloat_vectors.py +./extract_arch_vectors.py + +# to create tvs for evaluation of combined IFdivsqrt +#./combined_IF_vectors/create_IF_vectors.sh \ No newline at end of file diff --git a/tests/fp/combined_IF_vectors/extract_arch_vectors.py b/tests/fp/combined_IF_vectors/extract_arch_vectors.py new file mode 100755 index 00000000..c80c7a84 --- /dev/null +++ b/tests/fp/combined_IF_vectors/extract_arch_vectors.py @@ -0,0 +1,251 @@ +#! /usr/bin/python3 + +# author: Alessandro Maiuolo +# contact: amaiuolo@g.hmc.edu +# date created: 3-29-2023 + +# extract all arch test vectors +import os +wally = os.popen('echo $WALLY').read().strip() + +def ext_bits(my_string): + target_len = 32 # we want 128 bits, div by 4 bc hex notation + zeroes_to_add = target_len - len(my_string) + return zeroes_to_add*"0" + my_string + +def twos_comp(b, x): + if b == 32: + return hex(0x100000000 - int(x,16))[2:] + elif b == 64: + return hex(0x10000000000000000 - int(x,16))[2:] + else: + return "UNEXPECTED_BITSIZE" + +def unpack_rf(packed): + bin_u = bin(int(packed, 16))[2:].zfill(8) # translate to binary + flags = hex(int(bin_u[3:],2))[2:].zfill(2) + rounding_mode = hex(int(bin_u[:3],2))[2:] + return flags, rounding_mode + +# rounding mode dictionary +round_dict = { + "rne":"0", + "rnm":"4", + "ru":"3", + "rz":"1", + "rd":"2", + "dyn":"7" +} + +# fcsr dictionary +fcsr_dict = { + "0":"rne", + "128":"rnm", + "96":"ru", + "32":"rz", + "64":"rd", + "224":"dyn" +} + +print("creating arch test vectors") + +class Config: + def __init__(self, bits, letter, op, filt, op_code): + self.bits = bits + self.letter = letter + self.op = op + self.filt = filt + self.op_code = op_code + +def create_vectors(my_config): + suite_folder_num = my_config.bits + if my_config.bits == 64 and my_config.letter == "F": suite_folder_num = 32 + source_dir1 = "{}/addins/riscv-arch-test/riscv-test-suite/rv{}i_m/{}/src/".format(wally, suite_folder_num, my_config.letter) + source_dir2 = "{}/tests/riscof/work/riscv-arch-test/rv{}i_m/{}/src/".format(wally, my_config.bits, my_config.letter) + dest_dir = "{}/tests/fp/combined_IF_vectors/IF_vectors/".format(wally) + all_vectors1 = os.listdir(source_dir1) + + filt_vectors1 = [v for v in all_vectors1 if my_config.filt in v] + # print(filt_vectors1) + filt_vectors2 = [v + "/ref/Reference-sail_c_simulator.signature" for v in all_vectors1 if my_config.filt in v] + + # iterate through all vectors + for i in range(len(filt_vectors1)): + vector1 = filt_vectors1[i] + vector2 = filt_vectors2[i] + operation = my_config.op_code + rounding_mode = "X" + flags = "XX" + # use name to create our new tv + dest_file = open("{}cvw_{}_{}.tv".format(dest_dir, my_config.bits, vector1[:-2]), 'a') + # open vectors + src_file1 = open(source_dir1 + vector1,'r') + src_file2 = open(source_dir2 + vector2,'r') + # for each test in the vector + reading = True + src_file2.readline() #skip first bc junk + # print(my_config.bits, my_config.letter) + if my_config.letter == "F" and my_config.bits == 64: + reading = True + # print("trigger 64F") + #skip first 2 lines bc junk + src_file2.readline() + while reading: + # get answer and flags from Ref...signature + # answers are before deadbeef (first line of 4) + # flags are after deadbeef (third line of 4) + answer = src_file2.readline().strip() + deadbeef = src_file2.readline().strip() + # print(answer) + if not (answer == "e7d4b281" and deadbeef == "6f5ca309"): # if there is still stuff to read + # get flags + packed = src_file2.readline().strip()[6:] + flags, rounding_mode = unpack_rf(packed) + # skip 00000000 buffer + src_file2.readline() + + # parse through .S file + detected = False + done = False + op1val = "0" + op2val = "0" + while not (detected or done): + # print("det1") + line = src_file1.readline() + # print(line) + if "op1val" in line: + # print("det2") + # parse line + op1val = line.split("op1val")[1].split("x")[1].split(";")[0] + if my_config.op != "fsqrt": # sqrt doesn't have two input vals + op2val = line.split("op2val")[1].split("x")[1].strip() + if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there + else: + op2val = 32*"X" + # go to next test in vector + detected = True + elif "RVTEST_CODE_END" in line: + done = True + # put it all together + if not done: + translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode) + dest_file.write(translation + "\n") + else: + # print("read false") + reading = False + elif my_config.letter == "M" and my_config.bits == 64: + reading = True + #skip first 2 lines bc junk + src_file2.readline() + while reading: + # print("trigger 64M") + # get answer from Ref...signature + # answers span two lines and are reversed + answer2 = src_file2.readline().strip() + answer1 = src_file2.readline().strip() + answer = answer1 + answer2 + # print(answer1,answer2) + if not (answer2 == "e7d4b281" and answer1 == "6f5ca309"): # if there is still stuff to read + # parse through .S file + detected = False + done = False + op1val = "0" + op2val = "0" + while not (detected or done): + # print("det1") + line = src_file1.readline() + # print(line) + if "op1val" in line: + # print("det2") + # parse line + op1val = line.split("op1val")[1].split("x")[1].split(";")[0] + if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling + op1val = twos_comp(my_config.bits, op1val) + if my_config.op != "fsqrt": # sqrt doesn't have two input vals, unnec here but keeping for later + op2val = line.split("op2val")[1].split("x")[1].strip() + if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there + if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling + op2val = twos_comp(my_config.bits, op2val) + # go to next test in vector + detected = True + elif "RVTEST_CODE_END" in line: + done = True + # ints don't have flags + flags = "XX" + # put it all together + if not done: + translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode) + dest_file.write(translation + "\n") + else: + # print("read false") + reading = False + else: + while reading: + # get answer and flags from Ref...signature + answer = src_file2.readline() + # print(answer) + packed = src_file2.readline()[6:] + # print(packed) + if len(packed.strip())>0: # if there is still stuff to read + # print("packed") + # parse through .S file + detected = False + done = False + op1val = "0" + op2val = "0" + while not (detected or done): + # print("det1") + line = src_file1.readline() + # print(line) + if "op1val" in line: + # print("det2") + # parse line + op1val = line.split("op1val")[1].split("x")[1].split(";")[0] + if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling + op1val = twos_comp(my_config.bits, op1val) + if my_config.op != "fsqrt": # sqrt doesn't have two input vals + op2val = line.split("op2val")[1].split("x")[1].strip() + if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there + if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling + op2val = twos_comp(my_config.bits, op2val) + # go to next test in vector + detected = True + elif "RVTEST_CODE_END" in line: + done = True + # rounding mode for float + if not done and (my_config.op == "fsqrt" or my_config.op == "fdiv"): + flags, rounding_mode = unpack_rf(packed) + + # put it all together + if not done: + translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode) + dest_file.write(translation + "\n") + else: + # print("read false") + reading = False + print("out") + dest_file.close() + src_file1.close() + src_file2.close() + +config_list = [ +Config(32, "M", "div", "div_", 0), +Config(32, "F", "fdiv", "fdiv", 1), +Config(32, "F", "fsqrt", "fsqrt", 2), +Config(32, "M", "rem", "rem-", 3), +Config(32, "M", "divu", "divu-", 4), +Config(32, "M", "remu", "remu-", 5), +Config(64, "M", "div", "div-", 0), +Config(64, "F", "fdiv", "fdiv", 1), +Config(64, "F", "fsqrt", "fsqrt", 2), +Config(64, "M", "rem", "rem-", 3), +Config(64, "M", "divu", "divu-", 4), +Config(64, "M", "remu", "remu-", 5), +Config(64, "M", "divw", "divw-", 6), +Config(64, "M", "divuw", "divuw-", 7), +Config(64, "M", "remw", "remw-", 8), +Config(64, "M", "remuw", "remuw-", 9) +] + +for c in config_list: + create_vectors(c) \ No newline at end of file diff --git a/tests/fp/combined_IF_vectors/extract_testfloat_vectors.py b/tests/fp/combined_IF_vectors/extract_testfloat_vectors.py new file mode 100755 index 00000000..07047be2 --- /dev/null +++ b/tests/fp/combined_IF_vectors/extract_testfloat_vectors.py @@ -0,0 +1,79 @@ +#! /usr/bin/python3 +# extract sqrt and float div testfloat vectors + +# author: Alessandro Maiuolo +# contact: amaiuolo@g.hmc.edu +# date created: 3-29-2023 + +import os +wally = os.popen('echo $WALLY').read().strip() +# print(wally) + +def ext_bits(my_string): + target_len = 32 # we want 128 bits, div by 4 bc hex notation + zeroes_to_add = target_len - len(my_string) + return zeroes_to_add*"0" + my_string + +# rounding mode dictionary +round_dict = { + "rne":"0", + "rnm":"4", + "ru":"3", + "rz":"1", + "rd":"2", + "dyn":"7" +} + + +print("creating testfloat div test vectors") + +source_dir = "{}/tests/fp/vectors/".format(wally) +dest_dir = "{}/tests/fp/combined_IF_vectors/IF_vectors/".format(wally) +all_vectors = os.listdir(source_dir) + +div_vectors = [v for v in all_vectors if "div" in v] + +# iterate through all float div vectors +for vector in div_vectors: + # use name to determine configs + config_list = vector.split(".")[0].split("_") + operation = "1" #float div + rounding_mode = round_dict[str(config_list[2])] + # use name to create our new tv + dest_file = open(dest_dir + "cvw_" + vector, 'a') + # open vector + src_file = open(source_dir + vector,'r') + # for each test in the vector + for i in src_file.readlines(): + translation = "" # this stores the test that we are currently working on + [input_1, input_2, answer, flags] = i.split("_") # separate inputs, answer, and flags + # put it all together, strip nec for removing \n on the end of the flags + translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(input_1), ext_bits(input_2), ext_bits(answer), flags.strip(), rounding_mode) + dest_file.write(translation + "\n") + dest_file.close() + src_file.close() + + +print("creating testfloat sqrt test vectors") + +sqrt_vectors = [v for v in all_vectors if "sqrt" in v] + +# iterate through all float div vectors +for vector in sqrt_vectors: + # use name to determine configs + config_list = vector.split(".")[0].split("_") + operation = "2" #sqrt + rounding_mode = round_dict[str(config_list[2])] + # use name to create our new tv + dest_file = open(dest_dir + "cvw_" + vector, 'a') + # open vector + src_file = open(source_dir + vector,'r') + # for each test in the vector + for i in src_file.readlines(): + translation = "" # this stores the test that we are currently working on + [input_1, answer, flags] = i.split("_") # separate inputs, answer, and flags + # put it all together, strip nec for removing \n on the end of the flags + translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(input_1), "X"*32, ext_bits(answer), flags.strip(), rounding_mode) + dest_file.write(translation + "\n") + dest_file.close() + src_file.close() \ No newline at end of file diff --git a/tests/fp/create_all_vectors.sh b/tests/fp/create_all_vectors.sh index f1ba3625..38cfd555 100755 --- a/tests/fp/create_all_vectors.sh +++ b/tests/fp/create_all_vectors.sh @@ -3,3 +3,6 @@ mkdir -p vectors ./create_vectors.sh ./remove_spaces.sh + +# to create tvs for evaluation of combined IFdivsqrt +#./combined_IF_vectors/create_IF_vectors.sh \ No newline at end of file From e3f3f1421673a7c0016ad81f936df7f61620ac58 Mon Sep 17 00:00:00 2001 From: James Stine Date: Sun, 2 Apr 2023 18:16:23 -0500 Subject: [PATCH 2/5] Update one bug in testfloat - still have to fix fpdiv but others should now all work --- testbench/testbench-fp.sv | 309 +++++++++++++++++++------------------- 1 file changed, 155 insertions(+), 154 deletions(-) diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv index d1f6f9b6..f250e803 100644 --- a/testbench/testbench-fp.sv +++ b/testbench/testbench-fp.sv @@ -1,7 +1,8 @@ -/////////////////////////////////////////// + 64 bit conversions to the to-be-tested list @@ -270,27 +270,27 @@ module testbenchfp; end if (`D_SUPPORTED) begin // if double precision is supported if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested - Tests = {Tests, f64rv32cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b01}; - end - if (`XLEN == 64) begin // if 64-bit integers are being supported - Tests = {Tests, f64rv64cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b01}; - end - end - end + Tests = {Tests, f64rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; + end + if (`XLEN == 64) begin // if 64-bit integers are being supported + Tests = {Tests, f64rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested if(`F_SUPPORTED) begin // if single precision is supported // add the 64 <-> 32 bit conversions to the to-be-tested list @@ -397,27 +397,27 @@ module testbenchfp; end if (`F_SUPPORTED) begin // if single precision being supported if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested - Tests = {Tests, f32rv32cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b00}; - end - if (`XLEN == 64) begin // if 64-bit integers are supported - Tests = {Tests, f32rv64cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b00}; - end - end - end + Tests = {Tests, f32rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; + end + if (`XLEN == 64) begin // if 64-bit integers are supported + Tests = {Tests, f32rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + end if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversion is being tested if(`ZFH_SUPPORTED) begin // add the 32 <-> 16 bit conversions to the to-be-tested list @@ -508,27 +508,27 @@ module testbenchfp; end if (`ZFH_SUPPORTED) begin // if half precision supported if (TEST === "cvtint"| TEST === "all") begin // if in conversions are being tested - Tests = {Tests, f16rv32cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b10}; - end - if (`XLEN == 64) begin // if 64-bit integers are supported - Tests = {Tests, f16rv64cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b10}; - end - end - end + Tests = {Tests, f16rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (`XLEN == 64) begin // if 64-bit integers are supported + Tests = {Tests, f16rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16cmp}; @@ -656,7 +656,8 @@ module testbenchfp; end // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector - readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, + readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), + .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .Xs, .Ys, .Zs, .Unit(UnitVal), .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), .Xm, .Ym, .Zm, .DivStart, @@ -680,7 +681,7 @@ module testbenchfp; /////////////////////////////////////////////////////////////////////////////////////////////// // instantiate devices under test - if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "all") begin : fma + if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma fma fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), .Xe(Xe), .Ye(Ye), .Ze(Ze), .Xm(Xm), .Ym(Ym), .Zm(Zm), @@ -1331,4 +1332,4 @@ module readvectors ( .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN, .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf, .XEn, .YEn, .ZEn, .XExpMax); -endmodule \ No newline at end of file +endmodule From 9a4fa6ce96b5410ebbea2202f73557457f054372 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sun, 2 Apr 2023 18:28:09 -0700 Subject: [PATCH 3/5] changed signal names on clmul and zbc to match book --- src/ieu/bmu/clmul.sv | 10 +++++----- src/ieu/bmu/zbc.sv | 15 ++++++--------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/ieu/bmu/clmul.sv b/src/ieu/bmu/clmul.sv index 904c6423..cf3e1c6b 100644 --- a/src/ieu/bmu/clmul.sv +++ b/src/ieu/bmu/clmul.sv @@ -30,20 +30,20 @@ `include "wally-config.vh" module clmul #(parameter WIDTH=32) ( - input logic [WIDTH-1:0] A, B, // Operands + input logic [WIDTH-1:0] X, Y, // Operands output logic [WIDTH-1:0] ClmulResult); // ZBS result - logic [(WIDTH*WIDTH)-1:0] s; // intermediary signals for carry-less multiply + logic [(WIDTH*WIDTH)-1:0] S; // intermediary signals for carry-less multiply integer i,j; always_comb begin for (i=0;i Date: Sun, 2 Apr 2023 18:42:41 -0700 Subject: [PATCH 4/5] signal renaming on bitmanip alu and alu --- src/ieu/alu.sv | 14 +++++++------- src/ieu/bmu/bitmanipalu.sv | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/ieu/alu.sv b/src/ieu/alu.sv index 4db187e5..9dfb6ae6 100644 --- a/src/ieu/alu.sv +++ b/src/ieu/alu.sv @@ -39,12 +39,12 @@ module alu #(parameter WIDTH=32) ( input logic [2:0] Funct3, // For BMU decoding input logic CompLT, // Less-Than flag from comparator input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage - output logic [WIDTH-1:0] Result, // ALU result + output logic [WIDTH-1:0] ALUResult, // ALU result output logic [WIDTH-1:0] Sum); // Sum of operands // CondInvB = ~B when subtracting, B otherwise. Shift = shift result. SLT/U = result of a slt/u instruction. // FullResult = ALU result before adjusting for a RV64 w-suffix instruction. - logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, ALUResult; // Intermediate Signals + logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, PreALUResult; // Intermediate Signals logic [WIDTH-1:0] CondMaskB; // Result of B mask select mux logic [WIDTH-1:0] CondShiftA; // Result of A shifted select mux logic [WIDTH-1:0] CondExtA; // Result of Zero Extend A select mux @@ -84,16 +84,16 @@ module alu #(parameter WIDTH=32) ( end // Support RV64I W-type addw/subw/addiw/shifts that discard upper 32 bits and sign-extend 32-bit result to 64 bits - if (WIDTH == 64) assign ALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult; - else assign ALUResult = FullResult; + if (WIDTH == 64) assign PreALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult; + else assign PreALUResult = FullResult; // Final Result B instruction select mux if (`ZBC_SUPPORTED | `ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED) begin : bitmanipalu bitmanipalu #(WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect, - .Funct3, .CompLT, .BALUControl, .ALUResult, .FullResult, - .CondMaskB, .CondShiftA, .Result); + .Funct3, .CompLT, .BALUControl, .PreALUResult, .FullResult, + .CondMaskB, .CondShiftA, .ALUResult); end else begin - assign Result = ALUResult; + assign ALUResult = PreALUResult; assign CondMaskB = B; assign CondShiftA = A; end diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv index ae71db7b..7bc9055c 100644 --- a/src/ieu/bmu/bitmanipalu.sv +++ b/src/ieu/bmu/bitmanipalu.sv @@ -37,10 +37,10 @@ module bitmanipalu #(parameter WIDTH=32) ( input logic [2:0] Funct3, // Funct3 field of opcode indicates operation to perform input logic CompLT, // Less-Than flag from comparator input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage - input logic [WIDTH-1:0] ALUResult, FullResult, // ALUResult, FullResult signals + input logic [WIDTH-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals output logic [WIDTH-1:0] CondMaskB, // B is conditionally masked for ZBS instructions output logic [WIDTH-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions - output logic [WIDTH-1:0] Result); // Result + output logic [WIDTH-1:0] ALUResult); // Result logic [WIDTH-1:0] ZBBResult, ZBCResult; // ZBB, ZBC Result logic [WIDTH-1:0] MaskB; // BitMask of B @@ -91,9 +91,9 @@ module bitmanipalu #(parameter WIDTH=32) ( always_comb case (BSelect) // 00: ALU, 01: ZBA/ZBS, 10: ZBB, 11: ZBC - 2'b00: Result = ALUResult; - 2'b01: Result = FullResult; // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word. - 2'b10: Result = ZBBResult; - 2'b11: Result = ZBCResult; + 2'b00: ALUResult = PreALUResult; + 2'b01: ALUResult = FullResult; // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word. + 2'b10: ALUResult = ZBBResult; + 2'b11: ALUResult = ZBCResult; endcase endmodule From 5e7bbeddd1f49ec0093ec543a9f4385bf4d32b7e Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sun, 2 Apr 2023 21:14:31 -0700 Subject: [PATCH 5/5] removed comparator flag to ALU --- src/ieu/alu.sv | 3 +-- src/ieu/bmu/bitmanipalu.sv | 5 +++-- src/ieu/bmu/bmuctrl.sv | 15 +++++---------- src/ieu/bmu/zbb.sv | 10 +++++++--- src/ieu/controller.sv | 7 ++----- src/ieu/datapath.sv | 2 +- 6 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/ieu/alu.sv b/src/ieu/alu.sv index 9dfb6ae6..43df83b6 100644 --- a/src/ieu/alu.sv +++ b/src/ieu/alu.sv @@ -37,7 +37,6 @@ module alu #(parameter WIDTH=32) ( input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction input logic [2:0] ZBBSelect, // ZBB mux select signal input logic [2:0] Funct3, // For BMU decoding - input logic CompLT, // Less-Than flag from comparator input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage output logic [WIDTH-1:0] ALUResult, // ALU result output logic [WIDTH-1:0] Sum); // Sum of operands @@ -90,7 +89,7 @@ module alu #(parameter WIDTH=32) ( // Final Result B instruction select mux if (`ZBC_SUPPORTED | `ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED) begin : bitmanipalu bitmanipalu #(WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect, - .Funct3, .CompLT, .BALUControl, .PreALUResult, .FullResult, + .Funct3, .LT,.LTU, .BALUControl, .PreALUResult, .FullResult, .CondMaskB, .CondShiftA, .ALUResult); end else begin assign ALUResult = PreALUResult; diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv index 7bc9055c..228b2313 100644 --- a/src/ieu/bmu/bitmanipalu.sv +++ b/src/ieu/bmu/bitmanipalu.sv @@ -35,7 +35,8 @@ module bitmanipalu #(parameter WIDTH=32) ( input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction input logic [2:0] ZBBSelect, // ZBB mux select signal input logic [2:0] Funct3, // Funct3 field of opcode indicates operation to perform - input logic CompLT, // Less-Than flag from comparator + input logic LT, // less than flag + input logic LTU, // less than unsigned flag input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage input logic [WIDTH-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals output logic [WIDTH-1:0] CondMaskB, // B is conditionally masked for ZBS instructions @@ -84,7 +85,7 @@ module bitmanipalu #(parameter WIDTH=32) ( // ZBB Unit if (`ZBB_SUPPORTED) begin: zbb - zbb #(WIDTH) ZBB(.A, .RevA, .B, .W64, .lt(CompLT), .ZBBSelect, .ZBBResult); + zbb #(WIDTH) ZBB(.A, .RevA, .B, .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult); end else assign ZBBResult = 0; // Result Select Mux diff --git a/src/ieu/bmu/bmuctrl.sv b/src/ieu/bmu/bmuctrl.sv index 90d031a1..3ddc3231 100644 --- a/src/ieu/bmu/bmuctrl.sv +++ b/src/ieu/bmu/bmuctrl.sv @@ -48,7 +48,6 @@ module bmuctrl( output logic [1:0] BSelectE, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding output logic [2:0] ZBBSelectE, // ZBB mux select signal output logic BRegWriteE, // Indicates if it is a R type B instruction in Execute - output logic BComparatorSignedE, // Indicates if comparator signed in Execute Stage output logic [2:0] BALUControlE // ALU Control signals for B instructions in Execute Stage ); @@ -56,7 +55,6 @@ module bmuctrl( logic [2:0] Funct3D; // Funct3 field in Decode stage logic [6:0] Funct7D; // Funct7 field in Decode stage logic [4:0] Rs2D; // Rs2 source register in Decode stage - logic BComparatorSignedD; // Indicates if comparator signed (max, min instruction) in Decode Stage logic RotateD; // Indicates if rotate instruction in Decode Stage logic MaskD; // Indicates if zbs instruction in Decode Stage logic PreShiftD; // Indicates if sh1add, sh2add, sh3add instruction in Decode Stage @@ -110,10 +108,10 @@ module bmuctrl( BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // rev8 17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111) BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // orc.b - 17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0; // max - 17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0; // maxu - 17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0; // min - 17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0; // minu + 17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0; // max + 17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0; // maxu + 17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // min + 17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // minu endcase if (`XLEN==32) casez({OpD, Funct7D, Funct3D}) @@ -172,12 +170,9 @@ module bmuctrl( // Pack BALUControl Signals assign BALUControlD = {RotateD, MaskD, PreShiftD}; - // Comparator should perform signed comparison when min/max instruction. We have overlap in funct3 with some branch instructions so we use opcode to differentiate betwen min/max and branches - assign BComparatorSignedD = (Funct3D[2]^Funct3D[0]) & ~OpD[6]; - // Choose ALUSelect brom BMU for BMU operations, Funct3 for IEU operations, or 0 for addition assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000); // BMU Execute stage pipieline control register - flopenrc#(10) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BComparatorSignedD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BComparatorSignedE, BALUControlE}); + flopenrc#(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE}); endmodule diff --git a/src/ieu/bmu/zbb.sv b/src/ieu/bmu/zbb.sv index a85114b5..1a1b9de2 100644 --- a/src/ieu/bmu/zbb.sv +++ b/src/ieu/bmu/zbb.sv @@ -33,21 +33,25 @@ module zbb #(parameter WIDTH=32) ( input logic [WIDTH-1:0] A, RevA, B, // Operands input logic W64, // Indicates word operation - input logic lt, // lt flag + input logic LT, // lt flag + input logic LTU, // ltu flag + input logic BUnsigned, // max/min (signed) flag input logic [2:0] ZBBSelect, // ZBB Result select signal output logic [WIDTH-1:0] ZBBResult); // ZBB result - + + logic lt; // lt given signed/unsigned logic [WIDTH-1:0] CntResult; // count result logic [WIDTH-1:0] MinMaxResult; // min, max result logic [WIDTH-1:0] ByteResult; // byte results logic [WIDTH-1:0] ExtResult; // sign/zero extend results + mux2 #(1) ltmux(LT, LTU, BUnsigned , lt); cnt #(WIDTH) cnt(.A, .RevA, .B(B[1:0]), .W64, .CntResult); byteUnit #(WIDTH) bu(.A, .ByteSelect(B[0]), .ByteResult); ext #(WIDTH) ext(.A, .ExtSelect({~B[2], {B[2] & B[0]}}), .ExtResult); // ZBBSelect[2] differentiates between min(u) vs max(u) instruction - mux2 #(WIDTH) minmaxmux(B, A, lt^ZBBSelect[2], MinMaxResult); + mux2 #(WIDTH) minmaxmux(B, A, ZBBSelect[2]^lt, MinMaxResult); // ZBB Result select mux mux4 #(WIDTH) zbbresultmux(CntResult, ExtResult, ByteResult, MinMaxResult, ZBBSelect[1:0], ZBBResult); diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 5d0b7845..78031617 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -125,7 +125,6 @@ module controller( logic IntDivM; // Integer divide instruction logic [1:0] BSelectD; // One-Hot encoding if it's ZBA_ZBB_ZBC_ZBS instruction in decode stage logic [2:0] ZBBSelectD; // ZBB Mux Select Signal - logic BComparatorSignedE; // Indicates if max, min (signed comarison) instruction in Execute Stage logic IFunctD, RFunctD, MFunctD; // Detect I, R, and M-type RV32IM/Rv64IM instructions logic LFunctD, SFunctD, BFunctD; // Detect load, store, branch instructions logic JFunctD; // detect jalr instruction @@ -257,7 +256,7 @@ module controller( bmuctrl bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD, .BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE, - .ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BComparatorSignedE, .BALUControlE); + .ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BALUControlE); if (`ZBA_SUPPORTED) begin // ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw assign sltD = (Funct3D == 3'b010 & (~(Funct7D[4]) | ~OpD[5])) ; @@ -283,7 +282,6 @@ module controller( assign BSelectE = 2'b00; assign BSelectD = 2'b00; assign ZBBSelectE = 3'b000; - assign BComparatorSignedE = 1'b0; assign BALUControlE = 3'b0; end @@ -311,8 +309,7 @@ module controller( // Branch Logic // The comparator handles both signed and unsigned branches using BranchSignedE // Hence, only eq and lt flags are needed - // We also want comparator to handle signed comparison on a max/min bitmanip instruction - assign BranchSignedE = (~(Funct3E[2:1] == 2'b11) & BranchE) | BComparatorSignedE; + assign BranchSignedE = (~(Funct3E[2:1] == 2'b11) & BranchE); assign {eqE, ltE} = FlagsE; mux2 #(1) branchflagmux(eqE, ltE, Funct3E[2], BranchFlagE); assign BranchTakenE = BranchFlagE ^ Funct3E[0]; diff --git a/src/ieu/datapath.sv b/src/ieu/datapath.sv index 19d1264a..d6fe92a2 100644 --- a/src/ieu/datapath.sv +++ b/src/ieu/datapath.sv @@ -114,7 +114,7 @@ module datapath ( comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE); mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE); - alu #(`XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, FlagsE[0], BALUControlE, ALUResultE, IEUAdrE); + alu #(`XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE); mux2 #(`XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE); mux2 #(`XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);