Merge branch 'main' of https://github.com/openhwgroup/cvw into dev

2023-04-03 06:13:16 -07:00 · 2023-04-03 06:13:16 -07:00 · af8f1ab786
commit af8f1ab786
parent 800fdeb7ad 0799072556
14 changed files with 542 additions and 203 deletions
--- a/src/ieu/alu.sv
+++ b/src/ieu/alu.sv
@ -37,14 +37,13 @@ module alu #(parameter WIDTH=32) (
  input  logic [1:0]       BSelect,     // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction
  input  logic [2:0]       ZBBSelect,   // ZBB mux select signal
  input  logic [2:0]       Funct3,      // For BMU decoding
  input  logic             CompLT,      // Less-Than flag from comparator
  input  logic [2:0]       BALUControl, // ALU Control signals for B instructions in Execute Stage
-  output logic [WIDTH-1:0] Result,      // ALU result
+  output logic [WIDTH-1:0] ALUResult,   // ALU result
  output logic [WIDTH-1:0] Sum);        // Sum of operands
  // CondInvB = ~B when subtracting, B otherwise. Shift = shift result. SLT/U = result of a slt/u instruction.
  // FullResult = ALU result before adjusting for a RV64 w-suffix instruction.
-  logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, ALUResult;                   // Intermediate Signals 
+  logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, PreALUResult;                // Intermediate Signals 
  logic [WIDTH-1:0] CondMaskB;                                                    // Result of B mask select mux
  logic [WIDTH-1:0] CondShiftA;                                                   // Result of A shifted select mux
  logic [WIDTH-1:0] CondExtA;                                                     // Result of Zero Extend A select mux
@ -84,16 +83,16 @@ module alu #(parameter WIDTH=32) (
  end
  // Support RV64I W-type addw/subw/addiw/shifts that discard upper 32 bits and sign-extend 32-bit result to 64 bits
-  if (WIDTH == 64)  assign ALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
+  if (WIDTH == 64)  assign PreALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
-  else              assign ALUResult = FullResult;
+  else              assign PreALUResult = FullResult;
  // Final Result B instruction select mux
  if (`ZBC_SUPPORTED | `ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED) begin : bitmanipalu
    bitmanipalu #(WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect, 
-      .Funct3, .CompLT, .BALUControl, .ALUResult, .FullResult,
+      .Funct3, .LT,.LTU, .BALUControl, .PreALUResult, .FullResult,
-      .CondMaskB, .CondShiftA, .Result);
+      .CondMaskB, .CondShiftA, .ALUResult);
  end else begin
-    assign Result = ALUResult;
+    assign ALUResult = PreALUResult;
    assign CondMaskB = B;
    assign CondShiftA = A;
  end
--- a/src/ieu/bmu/bitmanipalu.sv
+++ b/src/ieu/bmu/bitmanipalu.sv
@ -35,12 +35,13 @@ module bitmanipalu #(parameter WIDTH=32) (
  input  logic [1:0]       BSelect,                 // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction
  input  logic [2:0]       ZBBSelect,               // ZBB mux select signal
  input  logic [2:0]       Funct3,                  // Funct3 field of opcode indicates operation to perform
-  input  logic             CompLT,                  // Less-Than flag from comparator
+  input  logic             LT,                      // less than flag
  input  logic             LTU,                     // less than unsigned flag
  input  logic [2:0]       BALUControl,             // ALU Control signals for B instructions in Execute Stage
-  input  logic [WIDTH-1:0] ALUResult, FullResult,   // ALUResult, FullResult signals
+  input  logic [WIDTH-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals
  output logic [WIDTH-1:0] CondMaskB,               // B is conditionally masked for ZBS instructions
  output logic [WIDTH-1:0] CondShiftA,              // A is conditionally shifted for ShAdd instructions
-  output logic [WIDTH-1:0] Result);                 // Result
+  output logic [WIDTH-1:0] ALUResult);              // Result
  logic [WIDTH-1:0] ZBBResult, ZBCResult;           // ZBB, ZBC Result
  logic [WIDTH-1:0] MaskB;                          // BitMask of B
@ -84,16 +85,16 @@ module bitmanipalu #(parameter WIDTH=32) (
  // ZBB Unit
  if (`ZBB_SUPPORTED) begin: zbb
-    zbb #(WIDTH) ZBB(.A, .RevA, .B, .W64, .lt(CompLT), .ZBBSelect, .ZBBResult);
+    zbb #(WIDTH) ZBB(.A, .RevA, .B, .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult);
  end else assign ZBBResult = 0;
  // Result Select Mux
  always_comb
    case (BSelect)
      // 00: ALU, 01: ZBA/ZBS, 10: ZBB, 11: ZBC
-      2'b00: Result = ALUResult; 
+      2'b00: ALUResult = PreALUResult; 
-      2'b01: Result = FullResult;         // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word.
+      2'b01: ALUResult = FullResult;         // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word.
-      2'b10: Result = ZBBResult; 
+      2'b10: ALUResult = ZBBResult; 
-      2'b11: Result = ZBCResult;
+      2'b11: ALUResult = ZBCResult;
    endcase
 endmodule
--- a/src/ieu/bmu/bmuctrl.sv
+++ b/src/ieu/bmu/bmuctrl.sv
@ -48,7 +48,6 @@ module bmuctrl(
  output logic [1:0]  BSelectE,                // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding
  output logic [2:0]  ZBBSelectE,              // ZBB mux select signal
  output logic        BRegWriteE,              // Indicates if it is a R type B instruction in Execute
  output logic        BComparatorSignedE,      // Indicates if comparator signed in Execute Stage
  output logic [2:0]  BALUControlE             // ALU Control signals for B instructions in Execute Stage
 );
@ -56,7 +55,6 @@ module bmuctrl(
  logic [2:0] Funct3D;                         // Funct3 field in Decode stage
  logic [6:0] Funct7D;                         // Funct7 field in Decode stage
  logic [4:0] Rs2D;                            // Rs2 source register in Decode stage
  logic       BComparatorSignedD;              // Indicates if comparator signed (max, min instruction) in Decode Stage
  logic       RotateD;                         // Indicates if rotate instruction in Decode Stage
  logic       MaskD;                           // Indicates if zbs instruction in Decode Stage
  logic       PreShiftD;                       // Indicates if sh1add, sh2add, sh3add instruction in Decode Stage
@ -112,10 +110,10 @@ module bmuctrl(
                                  BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0;  // rev8
        17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111)
                                  BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0;  // orc.b
-        17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0;  // max
+        17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0;  // max
-        17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0;  // maxu
+        17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0;  // maxu
-        17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0;  // min
+        17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0;  // min
-        17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0;  // minu
+        17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0;  // minu
      endcase
      if (`XLEN==32)
        casez({OpD, Funct7D, Funct3D})
@ -174,12 +172,9 @@ module bmuctrl(
  // Pack BALUControl Signals
  assign BALUControlD = {RotateD, MaskD, PreShiftD};
  // Comparator should perform signed comparison when min/max instruction. We have overlap in funct3 with some branch instructions so we use opcode to differentiate betwen min/max and branches
  assign BComparatorSignedD = (Funct3D[2]^Funct3D[0]) & ~OpD[6];
  // Choose ALUSelect brom BMU for BMU operations, Funct3 for IEU operations, or 0 for addition
  assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000);
  // BMU Execute stage pipieline control register
-  flopenrc#(10) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BComparatorSignedD,  BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BComparatorSignedE, BALUControlE});
+  flopenrc#(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE});
 endmodule
--- a/src/ieu/bmu/clmul.sv
+++ b/src/ieu/bmu/clmul.sv
@ -30,20 +30,20 @@
 `include "wally-config.vh"
 module clmul #(parameter WIDTH=32) (
-  input  logic [WIDTH-1:0] A, B,             // Operands
+  input  logic [WIDTH-1:0] X, Y,             // Operands
  output logic [WIDTH-1:0] ClmulResult);     // ZBS result
-  logic [(WIDTH*WIDTH)-1:0] s;               // intermediary signals for carry-less multiply
+  logic [(WIDTH*WIDTH)-1:0] S;               // intermediary signals for carry-less multiply
  integer i,j;
  always_comb begin
    for (i=0;i<WIDTH;i++) begin: outer
-      s[WIDTH*i]=A[0]&B[i];
+      S[WIDTH*i] = X[0] & Y[i];
      for (j=1;j<=i;j++) begin: inner
-        s[WIDTH*i+j] = (A[j]&B[i-j])^s[WIDTH*i+j-1];
+        S[WIDTH*i+j] = (X[j] & Y[i-j]) ^ S[WIDTH*i+j-1];
      end
-      ClmulResult[i] = s[WIDTH*i+j-1];
+      ClmulResult[i] = S[WIDTH*i+j-1];
    end
  end
 endmodule
--- a/src/ieu/bmu/zbb.sv
+++ b/src/ieu/bmu/zbb.sv
@ -33,21 +33,25 @@
 module zbb #(parameter WIDTH=32) (
  input  logic [WIDTH-1:0] A, RevA, B,   // Operands
  input  logic             W64,          // Indicates word operation
-  input  logic             lt,           // lt flag
+  input  logic             LT,           // lt flag
  input  logic             LTU,          // ltu flag
  input  logic             BUnsigned,      // max/min (signed) flag
  input  logic [2:0]       ZBBSelect,    // ZBB Result select signal
  output logic [WIDTH-1:0] ZBBResult);   // ZBB result
  logic lt;                              // lt given signed/unsigned
  logic [WIDTH-1:0] CntResult;           // count result
  logic [WIDTH-1:0] MinMaxResult;        // min, max result
  logic [WIDTH-1:0] ByteResult;          // byte results
  logic [WIDTH-1:0] ExtResult;           // sign/zero extend results
  mux2 #(1) ltmux(LT, LTU, BUnsigned , lt);
  cnt #(WIDTH) cnt(.A, .RevA, .B(B[1:0]), .W64, .CntResult);
  byteUnit #(WIDTH) bu(.A, .ByteSelect(B[0]), .ByteResult);
  ext #(WIDTH) ext(.A, .ExtSelect({~B[2], {B[2] & B[0]}}), .ExtResult);
  // ZBBSelect[2] differentiates between min(u) vs max(u) instruction
-  mux2 #(WIDTH) minmaxmux(B, A, lt^ZBBSelect[2], MinMaxResult);
+  mux2 #(WIDTH) minmaxmux(B, A, ZBBSelect[2]^lt, MinMaxResult);
  // ZBB Result select mux
  mux4 #(WIDTH) zbbresultmux(CntResult, ExtResult, ByteResult, MinMaxResult, ZBBSelect[1:0], ZBBResult);
--- a/src/ieu/bmu/zbc.sv
+++ b/src/ieu/bmu/zbc.sv
@ -36,19 +36,16 @@ module zbc #(parameter WIDTH=32) (
  logic [WIDTH-1:0] ClmulResult, RevClmulResult;
  logic [WIDTH-1:0] RevB;
-  logic [WIDTH-1:0] x,y;
+  logic [WIDTH-1:0] X, Y;
  logic [1:0] select;
-  assign select = ~Funct3[1:0];
+  bitreverse #(WIDTH) brB(B, RevB);
-  bitreverse #(WIDTH) brB(.A(B), .RevA(RevB));
+  mux3 #(WIDTH) xmux({RevA[WIDTH-2:0], {1'b0}}, RevA, A, ~Funct3[1:0], X);
  mux3 #(WIDTH) ymux({{1'b0}, RevB[WIDTH-2:0]}, RevB, B, ~Funct3[1:0], Y);
-  mux3 #(WIDTH) xmux({RevA[WIDTH-2:0], {1'b0}}, RevA, A, select, x);
+  clmul #(WIDTH) clm(.X, .Y, .ClmulResult);
  mux3 #(WIDTH) ymux({{1'b0},RevB[WIDTH-2:0]}, RevB, B,  select, y);
-  clmul #(WIDTH) clm(.A(x), .B(y), .ClmulResult(ClmulResult));
+  bitreverse  #(WIDTH) brClmulResult(ClmulResult, RevClmulResult);
  bitreverse  #(WIDTH) brClmulResult(.A(ClmulResult), .RevA(RevClmulResult));
  mux2 #(WIDTH) zbcresultmux(ClmulResult, RevClmulResult, Funct3[1], ZBCResult);
 endmodule
--- a/src/ieu/controller.sv
+++ b/src/ieu/controller.sv
@ -125,7 +125,6 @@ module controller(
  logic        IntDivM;                        // Integer divide instruction
  logic [1:0]  BSelectD;                       // One-Hot encoding if it's ZBA_ZBB_ZBC_ZBS instruction in decode stage
  logic [2:0]  ZBBSelectD;                     // ZBB Mux Select Signal
  logic        BComparatorSignedE;             // Indicates if max, min (signed comarison) instruction in Execute Stage
  logic        IFunctD, RFunctD, MFunctD;      // Detect I, R, and M-type RV32IM/Rv64IM instructions
  logic        LFunctD, SFunctD, BFunctD;      // Detect load, store, branch instructions
  logic        JFunctD;                        // detect jalr instruction
@ -257,7 +256,7 @@ module controller(
    bmuctrl bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD, 
      .BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE, 
-      .ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BComparatorSignedE, .BALUControlE);
+      .ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BALUControlE);
    if (`ZBA_SUPPORTED) begin
      // ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw
      assign sltD = (Funct3D == 3'b010 & (~(Funct7D[4]) | ~OpD[5])) ;
@ -285,7 +284,6 @@ module controller(
    assign BSelectE = 2'b00;
    assign BSelectD = 2'b00;
    assign ZBBSelectE = 3'b000;
    assign BComparatorSignedE = 1'b0;
    assign BALUControlE = 3'b0;
  end
@ -313,8 +311,7 @@ module controller(
  // Branch Logic
  //  The comparator handles both signed and unsigned branches using BranchSignedE
  //  Hence, only eq and lt flags are needed
-  //  We also want comparator to handle signed comparison on a max/min bitmanip instruction
+  assign BranchSignedE = (~(Funct3E[2:1] == 2'b11) & BranchE);
  assign BranchSignedE = (~(Funct3E[2:1] == 2'b11) & BranchE) | BComparatorSignedE;
  assign {eqE, ltE} = FlagsE;
  mux2 #(1) branchflagmux(eqE, ltE, Funct3E[2], BranchFlagE);
  assign BranchTakenE = BranchFlagE ^ Funct3E[0];
--- a/src/ieu/datapath.sv
+++ b/src/ieu/datapath.sv
@ -114,7 +114,7 @@ module datapath (
  comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
  mux2  #(`XLEN)  srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
  mux2  #(`XLEN)  srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
-  alu   #(`XLEN)  alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, FlagsE[0], BALUControlE, ALUResultE, IEUAdrE);
+  alu   #(`XLEN)  alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE);
  mux2 #(`XLEN)   altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
  mux2 #(`XLEN)   ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@ -1,7 +1,8 @@
-///////////////////////////////////////////
+<///////////////////////////////////////////
 //
 // Written: me@KatherineParry.com
 // Modified: 7/5/2022
 // Modified: 4/2/2023
 //
 // Purpose: Testbench for Testfloat
 // 
@ -39,7 +40,6 @@ module testbenchfp;
  logic [2:0]                  Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
  logic [1:0]                  Fmt[];          // list of formats for the other units  
  logic                        clk=0;
  logic [31:0]                 TestNum=0;    // index for the test
  logic [31:0]                 OpCtrlNum=0;  // index for OpCtrl
@ -656,7 +656,8 @@ module testbenchfp;
  end
  // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
-  readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
+  readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), 
                                    .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
                                    .Xs, .Ys, .Zs, .Unit(UnitVal),
                                    .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal),
                                    .Xm, .Ym, .Zm, .DivStart,
@ -680,7 +681,7 @@ module testbenchfp;
  ///////////////////////////////////////////////////////////////////////////////////////////////
  // instantiate devices under test
-  if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "all") begin : fma
+  if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma
    fma fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), 
            .Xe(Xe), .Ye(Ye), .Ze(Ze), 
            .Xm(Xm), .Ym(Ym), .Zm(Zm),
--- a/tests/fp/combined_IF_vectors/IF_vectors/README
+++ b/tests/fp/combined_IF_vectors/IF_vectors/README
@ -0,0 +1,4 @@
 This folder holds the archtest and testfloat vectors necessary fo evaluating performance
 of standalone intdiv vs combined IFdivsqrt
 to generate vectors, uncomment line 8 in create_all_vectors.sh
--- a/tests/fp/combined_IF_vectors/create_IF_vectors.sh
+++ b/tests/fp/combined_IF_vectors/create_IF_vectors.sh
@ -0,0 +1,8 @@
 #!/bin/sh
 # create test vectors for stand alone int
 ./extract_testfloat_vectors.py
 ./extract_arch_vectors.py
 # to create tvs for evaluation of combined IFdivsqrt
 #./combined_IF_vectors/create_IF_vectors.sh
--- a/tests/fp/combined_IF_vectors/extract_arch_vectors.py
+++ b/tests/fp/combined_IF_vectors/extract_arch_vectors.py
@ -0,0 +1,251 @@
 #! /usr/bin/python3
 # author: Alessandro Maiuolo
 # contact: amaiuolo@g.hmc.edu
 # date created: 3-29-2023
 # extract all arch test vectors
 import os
 wally = os.popen('echo $WALLY').read().strip()
 def ext_bits(my_string):
    target_len = 32 # we want 128 bits, div by 4 bc hex notation
    zeroes_to_add = target_len - len(my_string)
    return zeroes_to_add*"0" + my_string
 def twos_comp(b, x):
    if b == 32:
        return hex(0x100000000 - int(x,16))[2:]
    elif b == 64:
        return hex(0x10000000000000000 - int(x,16))[2:]
    else:
        return "UNEXPECTED_BITSIZE"
 def unpack_rf(packed):
    bin_u = bin(int(packed, 16))[2:].zfill(8) # translate to binary
    flags = hex(int(bin_u[3:],2))[2:].zfill(2)
    rounding_mode = hex(int(bin_u[:3],2))[2:]
    return flags, rounding_mode
 # rounding mode dictionary
 round_dict = {
    "rne":"0",
    "rnm":"4",
    "ru":"3",
    "rz":"1",
    "rd":"2",
    "dyn":"7"
 }
 # fcsr dictionary
 fcsr_dict = {
    "0":"rne",
    "128":"rnm",
    "96":"ru",
    "32":"rz",
    "64":"rd",
    "224":"dyn"
 }
 print("creating arch test vectors")
 class Config:
  def __init__(self, bits, letter, op, filt, op_code):
    self.bits = bits
    self.letter = letter
    self.op = op
    self.filt = filt
    self.op_code = op_code
 def create_vectors(my_config):
    suite_folder_num = my_config.bits
    if my_config.bits == 64 and my_config.letter == "F": suite_folder_num = 32
    source_dir1 = "{}/addins/riscv-arch-test/riscv-test-suite/rv{}i_m/{}/src/".format(wally, suite_folder_num, my_config.letter)
    source_dir2 = "{}/tests/riscof/work/riscv-arch-test/rv{}i_m/{}/src/".format(wally, my_config.bits, my_config.letter)
    dest_dir = "{}/tests/fp/combined_IF_vectors/IF_vectors/".format(wally)
    all_vectors1 = os.listdir(source_dir1)
    filt_vectors1 = [v for v in all_vectors1 if my_config.filt in v]
    # print(filt_vectors1)
    filt_vectors2 = [v + "/ref/Reference-sail_c_simulator.signature" for v in all_vectors1 if my_config.filt in v]
    # iterate through all vectors
    for i in range(len(filt_vectors1)):
        vector1 = filt_vectors1[i]
        vector2 = filt_vectors2[i]
        operation = my_config.op_code
        rounding_mode = "X"
        flags = "XX"
        # use name to create our new tv
        dest_file = open("{}cvw_{}_{}.tv".format(dest_dir, my_config.bits, vector1[:-2]), 'a')
        # open vectors
        src_file1 = open(source_dir1 + vector1,'r')
        src_file2 = open(source_dir2 + vector2,'r')
        # for each test in the vector
        reading = True
        src_file2.readline() #skip first bc junk
        # print(my_config.bits, my_config.letter)
        if my_config.letter == "F" and my_config.bits == 64:
            reading = True
            # print("trigger 64F")
            #skip first 2 lines bc junk
            src_file2.readline()
            while reading:
                # get answer and flags from Ref...signature
                # answers are before deadbeef (first line of 4)
                # flags are after deadbeef (third line of 4)
                answer = src_file2.readline().strip()
                deadbeef = src_file2.readline().strip()
                # print(answer)
                if not (answer == "e7d4b281" and deadbeef == "6f5ca309"): # if there is still stuff to read
                    # get flags
                    packed = src_file2.readline().strip()[6:]
                    flags, rounding_mode = unpack_rf(packed)
                    # skip 00000000 buffer
                    src_file2.readline()
                    # parse through .S file
                    detected = False
                    done = False
                    op1val = "0"
                    op2val = "0"
                    while not (detected or done):
                        # print("det1")
                        line = src_file1.readline()
                        # print(line)
                        if "op1val" in line:
                            # print("det2")
                            # parse line
                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals
                                op2val = line.split("op2val")[1].split("x")[1].strip()
                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
                            else:
                                op2val = 32*"X"
                            # go to next test in vector
                            detected = True
                        elif "RVTEST_CODE_END" in line:
                            done = True
                    # put it all together
                    if not done:
                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode)
                        dest_file.write(translation + "\n")
                else:
                    # print("read false")
                    reading = False
        elif my_config.letter == "M" and my_config.bits == 64:
            reading = True
            #skip first 2 lines bc junk
            src_file2.readline()
            while reading:
                # print("trigger 64M")
                # get answer from Ref...signature
                # answers span two lines and are reversed
                answer2 = src_file2.readline().strip()
                answer1 = src_file2.readline().strip()
                answer = answer1 + answer2
                # print(answer1,answer2)
                if not (answer2 == "e7d4b281" and answer1 == "6f5ca309"): # if there is still stuff to read
                    # parse through .S file
                    detected = False
                    done = False
                    op1val = "0"
                    op2val = "0"
                    while not (detected or done):
                        # print("det1")
                        line = src_file1.readline()
                        # print(line)
                        if "op1val" in line:
                            # print("det2")
                            # parse line
                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
                                op1val = twos_comp(my_config.bits, op1val)
                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals, unnec here but keeping for later
                                op2val = line.split("op2val")[1].split("x")[1].strip()
                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
                                    op2val = twos_comp(my_config.bits, op2val)
                            # go to next test in vector
                            detected = True
                        elif "RVTEST_CODE_END" in line:
                            done = True
                    # ints don't have flags
                    flags = "XX"
                    # put it all together
                    if not done:
                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode)
                        dest_file.write(translation + "\n")
                else:
                    # print("read false")
                    reading = False
        else:
            while reading:
                # get answer and flags from Ref...signature
                answer = src_file2.readline()
                # print(answer)
                packed = src_file2.readline()[6:]
                # print(packed)
                if len(packed.strip())>0: # if there is still stuff to read
                    # print("packed")
                    # parse through .S file
                    detected = False
                    done = False
                    op1val = "0"
                    op2val = "0"
                    while not (detected or done):
                        # print("det1")
                        line = src_file1.readline()
                        # print(line)
                        if "op1val" in line:
                            # print("det2")
                            # parse line
                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
                                op1val = twos_comp(my_config.bits, op1val)
                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals
                                op2val = line.split("op2val")[1].split("x")[1].strip()
                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
                                    op2val = twos_comp(my_config.bits, op2val)
                            # go to next test in vector
                            detected = True
                        elif "RVTEST_CODE_END" in line:
                            done = True
                    # rounding mode for float
                    if not done and (my_config.op == "fsqrt" or my_config.op == "fdiv"):
                        flags, rounding_mode = unpack_rf(packed)
                    # put it all together
                    if not done:
                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode)
                        dest_file.write(translation + "\n")
                else:
                    # print("read false")
                    reading = False
        print("out")
        dest_file.close()
        src_file1.close()
        src_file2.close()
 config_list = [
 Config(32, "M", "div", "div_", 0),
 Config(32, "F", "fdiv", "fdiv", 1),
 Config(32, "F", "fsqrt", "fsqrt", 2),
 Config(32, "M", "rem", "rem-", 3),
 Config(32, "M", "divu", "divu-", 4),
 Config(32, "M", "remu", "remu-", 5),
 Config(64, "M", "div", "div-", 0),
 Config(64, "F", "fdiv", "fdiv", 1),
 Config(64, "F", "fsqrt", "fsqrt", 2),
 Config(64, "M", "rem", "rem-", 3),
 Config(64, "M", "divu", "divu-", 4),
 Config(64, "M", "remu", "remu-", 5),
 Config(64, "M", "divw", "divw-", 6),
 Config(64, "M", "divuw", "divuw-", 7),
 Config(64, "M", "remw", "remw-", 8),
 Config(64, "M", "remuw", "remuw-", 9)
 ]
 for c in config_list:
    create_vectors(c)
--- a/tests/fp/combined_IF_vectors/extract_testfloat_vectors.py
+++ b/tests/fp/combined_IF_vectors/extract_testfloat_vectors.py
@ -0,0 +1,79 @@
 #! /usr/bin/python3
 # extract sqrt and float div testfloat vectors
 # author: Alessandro Maiuolo
 # contact: amaiuolo@g.hmc.edu
 # date created: 3-29-2023
 import os
 wally = os.popen('echo $WALLY').read().strip()
 # print(wally)
 def ext_bits(my_string):
    target_len = 32 # we want 128 bits, div by 4 bc hex notation
    zeroes_to_add = target_len - len(my_string)
    return zeroes_to_add*"0" + my_string
 # rounding mode dictionary
 round_dict = {
    "rne":"0",
    "rnm":"4",
    "ru":"3",
    "rz":"1",
    "rd":"2",
    "dyn":"7"
 }
 print("creating testfloat div test vectors")
 source_dir = "{}/tests/fp/vectors/".format(wally)
 dest_dir = "{}/tests/fp/combined_IF_vectors/IF_vectors/".format(wally)
 all_vectors = os.listdir(source_dir)
 div_vectors = [v for v in all_vectors if "div" in v]
 # iterate through all float div vectors
 for vector in div_vectors:
    # use name to determine configs
    config_list = vector.split(".")[0].split("_")
    operation = "1" #float div
    rounding_mode = round_dict[str(config_list[2])]
    # use name to create our new tv
    dest_file = open(dest_dir + "cvw_" + vector, 'a')
    # open vector
    src_file = open(source_dir + vector,'r')
    # for each test in the vector
    for i in src_file.readlines():
        translation = "" # this stores the test that we are currently working on
        [input_1, input_2, answer, flags] = i.split("_") # separate inputs, answer, and flags
        # put it all together, strip nec for removing \n on the end of the flags
        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(input_1), ext_bits(input_2), ext_bits(answer), flags.strip(), rounding_mode)
        dest_file.write(translation + "\n")
    dest_file.close()
    src_file.close()
 print("creating testfloat sqrt test vectors")
 sqrt_vectors = [v for v in all_vectors if "sqrt" in v]
 # iterate through all float div vectors
 for vector in sqrt_vectors:
    # use name to determine configs
    config_list = vector.split(".")[0].split("_")
    operation = "2" #sqrt
    rounding_mode = round_dict[str(config_list[2])]
    # use name to create our new tv
    dest_file = open(dest_dir + "cvw_" + vector, 'a')
    # open vector
    src_file = open(source_dir + vector,'r')
    # for each test in the vector
    for i in src_file.readlines():
        translation = "" # this stores the test that we are currently working on
        [input_1, answer, flags] = i.split("_") # separate inputs, answer, and flags
        # put it all together, strip nec for removing \n on the end of the flags
        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(input_1), "X"*32, ext_bits(answer), flags.strip(), rounding_mode)
        dest_file.write(translation + "\n")
    dest_file.close()
    src_file.close()
--- a/tests/fp/create_all_vectors.sh
+++ b/tests/fp/create_all_vectors.sh
@ -3,3 +3,6 @@
 mkdir -p vectors
 ./create_vectors.sh
 ./remove_spaces.sh
 # to create tvs for evaluation of combined IFdivsqrt
 #./combined_IF_vectors/create_IF_vectors.sh