diff --git a/src/cache/cache.sv b/src/cache/cache.sv index da7f8327..56044384 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -96,8 +96,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; logic SelFetchBuffer; logic CacheEn; - logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded; - logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel; + logic [LINELEN/8-1:0] LineByteMask; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; genvar index; @@ -161,21 +160,30 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path ///////////////////////////////////////////////////////////////////////////////////////////// + if(!READ_ONLY_CACHE) begin:WriteSelLogic + logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded; + logic [LINELEN/8-1:0] DemuxedByteMask, FetchBufferByteSel; - // Adjust byte mask from word to cache line - onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded)); - for(index = 0; index < 2**LOGCWPL; index++) begin - assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; - end - assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. - assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0; + // Adjust byte mask from word to cache line + onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded)); + for(index = 0; index < 2**LOGCWPL; index++) begin + assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; + end + assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. - // Merge write data into fetched cache line for store miss - for(index = 0; index < LINELEN/8; index++) begin - mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]), - .d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index])); + // Merge write data into fetched cache line for store miss + for(index = 0; index < LINELEN/8; index++) begin + mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]), + .d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index])); + end + assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0; end - + else + begin:WriteSelLogic + // No need for this mux if the cache does not handle writes. + assign LineWriteData = FetchBuffer; + assign LineByteMask = '1; + end ///////////////////////////////////////////////////////////////////////////////////////////// // Flush logic ///////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index 05e26f4b..78080794 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -98,7 +98,9 @@ module cacheLRU assign LRUUpdate[t1] = LRUUpdate[s] & WayEncoded[r]; end - mux2 #(1) LRUMuxes[NUMWAYS-2:0](CurrLRU, ~WayExpanded, LRUUpdate, NextLRU); + // The root node of the LRU tree will always be selected in LRUUpdate. No mux needed. + assign NextLRU[NUMWAYS-2] = ~WayExpanded[NUMWAYS-2]; + mux2 #(1) LRUMuxes[NUMWAYS-3:0](CurrLRU[NUMWAYS-3:0], ~WayExpanded[NUMWAYS-3:0], LRUUpdate[NUMWAYS-3:0], NextLRU[NUMWAYS-3:0]); // Compute next victim way. for(s = NUMWAYS-2; s >= NUMWAYS/2; s--) begin @@ -128,8 +130,8 @@ module cacheLRU always_ff @(posedge clk) begin if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0; if(CacheEn) begin - if((InvalidateCache | FlushCache) & ~FlushStage) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0; - else if (LRUWriteEn & ~FlushStage) begin + // if((InvalidateCache | FlushCache) & ~FlushStage) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0; + if (LRUWriteEn & ~FlushStage) begin LRUMemory[PAdr] <= NextLRU; end if(LRUWriteEn & ~FlushStage & (PAdr == CacheSet)) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index be10e800..b3400107 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -75,109 +75,139 @@ module fctrl ( logic [1:0] FResSelD; // Select one of the results that finish in the memory stage logic [2:0] FrmD, FrmE; // FP rounding mode logic [`FMTBITS-1:0] FmtD; // FP format - logic [1:0] Fmt; // format - before possible reduction + logic [1:0] Fmt, Fmt2; // format - before possible reduction logic SupportedFmt; // is the format supported + logic SupportedFmt2; // is the source format supported for fp -> fp logic FCvtIntD, FCvtIntM; // convert to integer opperation // FPU Instruction Decoder assign Fmt = Funct7D[1:0]; + assign Fmt2 = Rs2D[1:0]; // source format for fcvt fp->fp - // Note: only Fmt is checked; fcvt does not check destination format assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & `D_SUPPORTED) | (Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED)); + assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & `D_SUPPORTED) | + (Fmt2 == 2'b10 & `ZFH_SUPPORTED) | (Fmt2 == 2'b11 & `Q_SUPPORTED)); // decode the instruction + // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt always_comb if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt) ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // for anything other than loads and stores, check for supported format - else case(OpD) - // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt - 7'b0000111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flw - 3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // fld - else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // fld not supported - 3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flq - else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // flq not supported - 3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flh - else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // flh not supported - default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction - endcase - 7'b0100111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsw - 3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsd - else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // fsd not supported - 3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsq - else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // fsq not supported - 3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsh - else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // fsh not supported - default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction - endcase - 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd - 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub - 7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0; // fnmsub - 7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0; // fnmadd - 7'b1010011: casez(Funct7D) - 7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0; // fadd - 7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0; // fsub - 7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0; // fmul - 7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0; // fdiv - 7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0; // fsqrt - 7'b00100??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_00_xx_000_0_0_0; // fsgnj - 3'b001: ControlsD = `FCTRLW'b1_0_00_xx_001_0_0_0; // fsgnjn - 3'b010: ControlsD = `FCTRLW'b1_0_00_xx_010_0_0_0; // fsgnjx - default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction - endcase - 7'b00101??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_00_xx_110_0_0_0; // fmin - 3'b001: ControlsD = `FCTRLW'b1_0_00_xx_101_0_0_0; // fmax - default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction - endcase - 7'b10100??: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_1_00_xx_010_0_0_0; // feq - 3'b001: ControlsD = `FCTRLW'b0_1_00_xx_001_0_0_0; // flt - 3'b000: ControlsD = `FCTRLW'b0_1_00_xx_011_0_0_0; // fle - default: ControlsD = `FCTRLW'b0_0_00_xx_000__0_1_0; // non-implemented instruction - endcase - 7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_10_xx_000_0_0_0; // fclass - else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0_0; // fmv.x.w to int reg - else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0_0; // fmv.x.d to int reg - else ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction - 7'b1101000: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.s.w w->s - 2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.s.wu wu->s - 2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.s.l l->s - 2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.s.lu lu->s - endcase - 7'b1100000: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.s s->w - 2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.s s->wu - 2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.s s->l - 2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.s s->lu - endcase - 7'b1111000: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0_0; // fmv.w.x to fp reg - 7'b0100000: ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.d - 7'b1101001: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.d.w w->d - 2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.d.wu wu->d - 2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.d.l l->d - 2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.d.lu lu->d - endcase - 7'b1100001: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.d d->w - 2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.d d->wu - 2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.d d->l - 2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.d d->lu - endcase - 7'b1111001: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0_0; // fmv.d.x to fp reg - 7'b0100001: ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.s - default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction - endcase - default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction - endcase + else begin + ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // default: non-implemented instruction + /* verilator lint_off CASEINCOMPLETE */ // default value above has priority so no other default needed + case(OpD) + 7'b0000111: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flw + 3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // fld + 3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flq + 3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flh + endcase + 7'b0100111: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsw + 3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsd + 3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsq + 3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsh + endcase + 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd + 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub + 7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0; // fnmsub + 7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0; // fnmadd + 7'b1010011: casez(Funct7D) + 7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0; // fadd + 7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0; // fsub + 7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0; // fmul + 7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0; // fdiv + 7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0; // fsqrt + 7'b00100??: case(Funct3D) + 3'b000: ControlsD = `FCTRLW'b1_0_00_xx_000_0_0_0; // fsgnj + 3'b001: ControlsD = `FCTRLW'b1_0_00_xx_001_0_0_0; // fsgnjn + 3'b010: ControlsD = `FCTRLW'b1_0_00_xx_010_0_0_0; // fsgnjx + endcase + 7'b00101??: case(Funct3D) + 3'b000: ControlsD = `FCTRLW'b1_0_00_xx_110_0_0_0; // fmin + 3'b001: ControlsD = `FCTRLW'b1_0_00_xx_101_0_0_0; // fmax + endcase + 7'b10100??: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b0_1_00_xx_010_0_0_0; // feq + 3'b001: ControlsD = `FCTRLW'b0_1_00_xx_001_0_0_0; // flt + 3'b000: ControlsD = `FCTRLW'b0_1_00_xx_011_0_0_0; // fle + endcase + 7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000) + ControlsD = `FCTRLW'b0_1_10_xx_000_0_0_0; // fclass + else if (Funct3D == 3'b000 & Rs2D == 5'b00000) + ControlsD = `FCTRLW'b0_1_11_xx_000_0_0_0; // fmv.x.w / fmv.x.d to int register + 7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000) + ControlsD = `FCTRLW'b1_0_00_xx_011_0_0_0; // fmv.w.x / fmv.d.x to fp reg + 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h) + 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) + ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.(s/h/q) + // coverage off + // Not covered in testing because rv64gc does not support half or quad precision + 7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10) + ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0; // fcvt.h.(s/d/q) + 7'b0100011: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b11) + ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0; // fcvt.q.(s/h/d) + // coverage on + 7'b1101000: case(Rs2D) + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.s.w w->s + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.s.wu wu->s + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.s.l l->s + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.s.lu lu->s + endcase + 7'b1100000: case(Rs2D) + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.s s->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.s s->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.s s->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.s s->lu + endcase + 7'b1101001: case(Rs2D) + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.d.w w->d + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.d.wu wu->d + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.d.l l->d + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.d.lu lu->d + endcase + 7'b1100001: case(Rs2D) + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.d d->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.d d->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.d d->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.d d->lu + endcase + // coverage off + // Not covered in testing because rv64gc does not support half or quad precision + 7'b1101010: case(Rs2D) + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.h.w w->h + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.h.wu wu->h + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.h.l l->h + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.h.lu lu->h + endcase + 7'b1100010: case(Rs2D) + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.h h->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.h h->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.h h->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.h h->lu + endcase + 7'b1101011: case(Rs2D) + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.q.w w->q + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.q.wu wu->q + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.q.l l->q + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.q.lu lu->q + endcase + 7'b1100011: case(Rs2D) + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.q q->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.q q->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.q q->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.q q->lu + endcase + // coverage on + endcase + endcase + end + /* verilator lint_on CASEINCOMPLETE */ // unswizzle control bits assign #1 {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD} = ControlsD; @@ -303,7 +333,5 @@ module fctrl ( flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW, {FRegWriteM, FResSelM, FCvtIntM}, {FRegWriteW, FResSelW, FCvtIntW}); - - //assign FCvtIntW = (FResSelW == 2'b01); - + endmodule diff --git a/src/ieu/alu.sv b/src/ieu/alu.sv index c4e0f390..43df83b6 100644 --- a/src/ieu/alu.sv +++ b/src/ieu/alu.sv @@ -37,14 +37,13 @@ module alu #(parameter WIDTH=32) ( input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction input logic [2:0] ZBBSelect, // ZBB mux select signal input logic [2:0] Funct3, // For BMU decoding - input logic [1:0] CompFlags, // Comparator flags input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage - output logic [WIDTH-1:0] Result, // ALU result + output logic [WIDTH-1:0] ALUResult, // ALU result output logic [WIDTH-1:0] Sum); // Sum of operands // CondInvB = ~B when subtracting, B otherwise. Shift = shift result. SLT/U = result of a slt/u instruction. // FullResult = ALU result before adjusting for a RV64 w-suffix instruction. - logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, ALUResult; // Intermediate Signals + logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, PreALUResult; // Intermediate Signals logic [WIDTH-1:0] CondMaskB; // Result of B mask select mux logic [WIDTH-1:0] CondShiftA; // Result of A shifted select mux logic [WIDTH-1:0] CondExtA; // Result of Zero Extend A select mux @@ -84,16 +83,16 @@ module alu #(parameter WIDTH=32) ( end // Support RV64I W-type addw/subw/addiw/shifts that discard upper 32 bits and sign-extend 32-bit result to 64 bits - if (WIDTH == 64) assign ALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult; - else assign ALUResult = FullResult; + if (WIDTH == 64) assign PreALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult; + else assign PreALUResult = FullResult; // Final Result B instruction select mux if (`ZBC_SUPPORTED | `ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED) begin : bitmanipalu bitmanipalu #(WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect, - .Funct3, .CompFlags, .BALUControl, .ALUResult, .FullResult, - .CondMaskB, .CondShiftA, .Result); + .Funct3, .LT,.LTU, .BALUControl, .PreALUResult, .FullResult, + .CondMaskB, .CondShiftA, .ALUResult); end else begin - assign Result = ALUResult; + assign ALUResult = PreALUResult; assign CondMaskB = B; assign CondShiftA = A; end diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv index 07c7e534..228b2313 100644 --- a/src/ieu/bmu/bitmanipalu.sv +++ b/src/ieu/bmu/bitmanipalu.sv @@ -35,12 +35,13 @@ module bitmanipalu #(parameter WIDTH=32) ( input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction input logic [2:0] ZBBSelect, // ZBB mux select signal input logic [2:0] Funct3, // Funct3 field of opcode indicates operation to perform - input logic [1:0] CompFlags, // Comparator flags + input logic LT, // less than flag + input logic LTU, // less than unsigned flag input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage - input logic [WIDTH-1:0] ALUResult, FullResult, // ALUResult, FullResult signals + input logic [WIDTH-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals output logic [WIDTH-1:0] CondMaskB, // B is conditionally masked for ZBS instructions output logic [WIDTH-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions - output logic [WIDTH-1:0] Result); // Result + output logic [WIDTH-1:0] ALUResult); // Result logic [WIDTH-1:0] ZBBResult, ZBCResult; // ZBB, ZBC Result logic [WIDTH-1:0] MaskB; // BitMask of B @@ -84,16 +85,16 @@ module bitmanipalu #(parameter WIDTH=32) ( // ZBB Unit if (`ZBB_SUPPORTED) begin: zbb - zbb #(WIDTH) ZBB(.A, .RevA, .B, .ALUResult, .W64, .lt(CompFlags[0]), .ZBBSelect, .ZBBResult); + zbb #(WIDTH) ZBB(.A, .RevA, .B, .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult); end else assign ZBBResult = 0; // Result Select Mux always_comb case (BSelect) // 00: ALU, 01: ZBA/ZBS, 10: ZBB, 11: ZBC - 2'b00: Result = ALUResult; - 2'b01: Result = FullResult; // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word. - 2'b10: Result = ZBBResult; - 2'b11: Result = ZBCResult; + 2'b00: ALUResult = PreALUResult; + 2'b01: ALUResult = FullResult; // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word. + 2'b10: ALUResult = ZBBResult; + 2'b11: ALUResult = ZBCResult; endcase endmodule diff --git a/src/ieu/bmu/bmuctrl.sv b/src/ieu/bmu/bmuctrl.sv index 90d031a1..3ddc3231 100644 --- a/src/ieu/bmu/bmuctrl.sv +++ b/src/ieu/bmu/bmuctrl.sv @@ -48,7 +48,6 @@ module bmuctrl( output logic [1:0] BSelectE, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding output logic [2:0] ZBBSelectE, // ZBB mux select signal output logic BRegWriteE, // Indicates if it is a R type B instruction in Execute - output logic BComparatorSignedE, // Indicates if comparator signed in Execute Stage output logic [2:0] BALUControlE // ALU Control signals for B instructions in Execute Stage ); @@ -56,7 +55,6 @@ module bmuctrl( logic [2:0] Funct3D; // Funct3 field in Decode stage logic [6:0] Funct7D; // Funct7 field in Decode stage logic [4:0] Rs2D; // Rs2 source register in Decode stage - logic BComparatorSignedD; // Indicates if comparator signed (max, min instruction) in Decode Stage logic RotateD; // Indicates if rotate instruction in Decode Stage logic MaskD; // Indicates if zbs instruction in Decode Stage logic PreShiftD; // Indicates if sh1add, sh2add, sh3add instruction in Decode Stage @@ -110,10 +108,10 @@ module bmuctrl( BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // rev8 17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111) BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // orc.b - 17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0; // max - 17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0; // maxu - 17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0; // min - 17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0; // minu + 17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0; // max + 17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0; // maxu + 17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // min + 17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // minu endcase if (`XLEN==32) casez({OpD, Funct7D, Funct3D}) @@ -172,12 +170,9 @@ module bmuctrl( // Pack BALUControl Signals assign BALUControlD = {RotateD, MaskD, PreShiftD}; - // Comparator should perform signed comparison when min/max instruction. We have overlap in funct3 with some branch instructions so we use opcode to differentiate betwen min/max and branches - assign BComparatorSignedD = (Funct3D[2]^Funct3D[0]) & ~OpD[6]; - // Choose ALUSelect brom BMU for BMU operations, Funct3 for IEU operations, or 0 for addition assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000); // BMU Execute stage pipieline control register - flopenrc#(10) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BComparatorSignedD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BComparatorSignedE, BALUControlE}); + flopenrc#(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE}); endmodule diff --git a/src/ieu/bmu/clmul.sv b/src/ieu/bmu/clmul.sv index 904c6423..cf3e1c6b 100644 --- a/src/ieu/bmu/clmul.sv +++ b/src/ieu/bmu/clmul.sv @@ -30,20 +30,20 @@ `include "wally-config.vh" module clmul #(parameter WIDTH=32) ( - input logic [WIDTH-1:0] A, B, // Operands + input logic [WIDTH-1:0] X, Y, // Operands output logic [WIDTH-1:0] ClmulResult); // ZBS result - logic [(WIDTH*WIDTH)-1:0] s; // intermediary signals for carry-less multiply + logic [(WIDTH*WIDTH)-1:0] S; // intermediary signals for carry-less multiply integer i,j; always_comb begin for (i=0;i 64 bit conversions to the to-be-tested list @@ -270,27 +270,27 @@ module testbenchfp; end if (`D_SUPPORTED) begin // if double precision is supported if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested - Tests = {Tests, f64rv32cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b01}; - end - if (`XLEN == 64) begin // if 64-bit integers are being supported - Tests = {Tests, f64rv64cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b01}; - end - end - end + Tests = {Tests, f64rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; + end + if (`XLEN == 64) begin // if 64-bit integers are being supported + Tests = {Tests, f64rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested if(`F_SUPPORTED) begin // if single precision is supported // add the 64 <-> 32 bit conversions to the to-be-tested list @@ -397,27 +397,27 @@ module testbenchfp; end if (`F_SUPPORTED) begin // if single precision being supported if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested - Tests = {Tests, f32rv32cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b00}; - end - if (`XLEN == 64) begin // if 64-bit integers are supported - Tests = {Tests, f32rv64cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b00}; - end - end - end + Tests = {Tests, f32rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; + end + if (`XLEN == 64) begin // if 64-bit integers are supported + Tests = {Tests, f32rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + end if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversion is being tested if(`ZFH_SUPPORTED) begin // add the 32 <-> 16 bit conversions to the to-be-tested list @@ -508,27 +508,27 @@ module testbenchfp; end if (`ZFH_SUPPORTED) begin // if half precision supported if (TEST === "cvtint"| TEST === "all") begin // if in conversions are being tested - Tests = {Tests, f16rv32cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b10}; - end - if (`XLEN == 64) begin // if 64-bit integers are supported - Tests = {Tests, f16rv64cvtint}; - // add the op-codes for these tests to the op-code list - OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; - WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; - // add what unit is used and the fmt to their lists (one for each test) - for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b10}; - end - end - end + Tests = {Tests, f16rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (`XLEN == 64) begin // if 64-bit integers are supported + Tests = {Tests, f16rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16cmp}; @@ -656,7 +656,8 @@ module testbenchfp; end // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector - readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, + readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), + .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .Xs, .Ys, .Zs, .Unit(UnitVal), .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), .Xm, .Ym, .Zm, .DivStart, @@ -680,7 +681,7 @@ module testbenchfp; /////////////////////////////////////////////////////////////////////////////////////////////// // instantiate devices under test - if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "all") begin : fma + if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma fma fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), .Xe(Xe), .Ye(Ye), .Ze(Ze), .Xm(Xm), .Ym(Ym), .Zm(Zm), @@ -1331,4 +1332,4 @@ module readvectors ( .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN, .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf, .XEn, .YEn, .ZEn, .XExpMax); -endmodule \ No newline at end of file +endmodule diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 4372fd4e..2bc3622c 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -28,7 +28,7 @@ `include "wally-config.vh" `include "tests.vh" -`define PrintHPMCounters 1 +`define PrintHPMCounters 0 `define BPRED_LOGGER 0 `define I_CACHE_ADDR_LOGGER 0 `define D_CACHE_ADDR_LOGGER 0 diff --git a/testbench/tests.vh b/testbench/tests.vh index 79f100d4..7f5ad4d0 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -47,7 +47,9 @@ string tvpaths[] = '{ "ieu", "ebu", "csrwrites", - "priv" + "priv", + "ifu", + "fpu" }; string coremark[] = '{ diff --git a/tests/coverage/Makefile b/tests/coverage/Makefile index 7d4552af..b2a2544d 100644 --- a/tests/coverage/Makefile +++ b/tests/coverage/Makefile @@ -17,7 +17,7 @@ all: $(OBJECTS) # Change many things if bit width isn't 64 %.elf: $(SRCDIR)/%.$(SEXT) WALLY-init-lib.h Makefile - riscv64-unknown-elf-gcc -g -o $@ -march=rv64gc_zba_zbb_zbc_zbs -mabi=lp64 -mcmodel=medany \ + riscv64-unknown-elf-gcc -g -o $@ -march=rv64gqc_zba_zbb_zbc_zbs_zfh -mabi=lp64 -mcmodel=medany \ -nostartfiles -T../../examples/link/link.ld $< riscv64-unknown-elf-objdump -S $@ > $@.objdump riscv64-unknown-elf-elf2hex --bit-width 64 --input $@ --output $@.memfile diff --git a/tests/coverage/fpu.S b/tests/coverage/fpu.S new file mode 100644 index 00000000..3fdca6e8 --- /dev/null +++ b/tests/coverage/fpu.S @@ -0,0 +1,73 @@ +/////////////////////////////////////////// +// fpu.S +// +// Written: David_Harris@hmc.edu 28 March 2023 +// +// Purpose: Test coverage for FPU +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// load code to initalize stack, handle interrupts, terminate +#include "WALLY-init-lib.h" + +main: + + #bseti t0, zero, 14 # turn on FPU + csrs mstatus, t0 + + # Test legal instructions not covered elsewhere + flq ft0, 0(a0) + flh ft0, 8(a0) + fsq ft0, 0(a0) + fsh ft0, 8(a0) + + # Tests for fpu/fctrl.sv + fcvt.h.s ft1, ft0 + fcvt.q.s ft2, ft0 + fcvt.h.w ft3, a0 + fcvt.h.wu ft3, a0 + fcvt.h.l ft3, a0 + fcvt.h.lu ft3, a0 + fcvt.w.h a0, ft3 + fcvt.wu.h a0, ft3 + fcvt.l.h a0, ft3 + fcvt.lu.h a0, ft3 + fcvt.q.w ft3, a0 + fcvt.q.wu ft3, a0 + fcvt.q.l ft3, a0 + fcvt.q.lu ft3, a0 + fcvt.w.q a0, ft3 + fcvt.wu.q a0, ft3 + fcvt.l.q a0, ft3 + fcvt.lu.q a0, ft3 + + # Test illegal instructions are detected + .word 0x00000007 // illegal floating-point load (bad Funct3) + .word 0x00000027 // illegal floating-point store (bad Funct3) + .word 0x58F00053 // illegal fsqrt (bad Rs2D) + .word 0x20007053 // illegal fsgnj (bad Funct3) + .word 0x28007053 // illegal fmin/max (bad Funct3) + .word 0xA0007053 // illegal fcmp (bad Funct3) + .word 0xE0007053 // illegal fclass/fmv (bad Funct3) + .word 0xF0007053 // illegal fmv (bad Funct3) + .word 0x43007053 // illegal fcvt.d.* (bad Rs2D) + .word 0x42207053 // illegal fcvt.d.* (bad Rs2D[1]) + + j done + diff --git a/tests/coverage/ifu.S b/tests/coverage/ifu.S new file mode 100644 index 00000000..3ceeeac1 --- /dev/null +++ b/tests/coverage/ifu.S @@ -0,0 +1,40 @@ +/////////////////////////////////////////// +// ifu.S +// +// Written: sriley@g.hmc.edu 28 March 2023 +// +// Purpose: Test coverage for IFU +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// load code to initalize stack, handle interrupts, terminate +#include "WALLY-init-lib.h" + +main: + # turn floating point on + li t0, 0x2000 + csrs mstatus, t0 + + # calling compressed floating point load double instruction + //.halfword 0x2000 // CL type compressed floating-point ld-->funct3,imm,rs1',imm,rd',op + // binary version 0000 0000 0000 0000 0010 0000 0000 0000 + mv s0, sp + c.fld fs0, 0(s0) + + j done diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index 3aa3aea5..8cd18925 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -36,4 +36,11 @@ main: addi t0, zero, 0 csrr t0, stimecmp + # Test write to STVAL, SCAUSE, SEPC, and STIMECMP CSRs + li t0, 0 + csrw stval, t0 + csrw scause, t0 + csrw sepc, t0 + csrw stimecmp, t0 + j done diff --git a/tests/fp/combined_IF_vectors/IF_vectors/README b/tests/fp/combined_IF_vectors/IF_vectors/README new file mode 100644 index 00000000..bfbf368d --- /dev/null +++ b/tests/fp/combined_IF_vectors/IF_vectors/README @@ -0,0 +1,4 @@ +This folder holds the archtest and testfloat vectors necessary fo evaluating performance +of standalone intdiv vs combined IFdivsqrt + +to generate vectors, uncomment line 8 in create_all_vectors.sh \ No newline at end of file diff --git a/tests/fp/combined_IF_vectors/create_IF_vectors.sh b/tests/fp/combined_IF_vectors/create_IF_vectors.sh new file mode 100755 index 00000000..d6b83f59 --- /dev/null +++ b/tests/fp/combined_IF_vectors/create_IF_vectors.sh @@ -0,0 +1,8 @@ +#!/bin/sh +# create test vectors for stand alone int + +./extract_testfloat_vectors.py +./extract_arch_vectors.py + +# to create tvs for evaluation of combined IFdivsqrt +#./combined_IF_vectors/create_IF_vectors.sh \ No newline at end of file diff --git a/tests/fp/combined_IF_vectors/extract_arch_vectors.py b/tests/fp/combined_IF_vectors/extract_arch_vectors.py new file mode 100755 index 00000000..c80c7a84 --- /dev/null +++ b/tests/fp/combined_IF_vectors/extract_arch_vectors.py @@ -0,0 +1,251 @@ +#! /usr/bin/python3 + +# author: Alessandro Maiuolo +# contact: amaiuolo@g.hmc.edu +# date created: 3-29-2023 + +# extract all arch test vectors +import os +wally = os.popen('echo $WALLY').read().strip() + +def ext_bits(my_string): + target_len = 32 # we want 128 bits, div by 4 bc hex notation + zeroes_to_add = target_len - len(my_string) + return zeroes_to_add*"0" + my_string + +def twos_comp(b, x): + if b == 32: + return hex(0x100000000 - int(x,16))[2:] + elif b == 64: + return hex(0x10000000000000000 - int(x,16))[2:] + else: + return "UNEXPECTED_BITSIZE" + +def unpack_rf(packed): + bin_u = bin(int(packed, 16))[2:].zfill(8) # translate to binary + flags = hex(int(bin_u[3:],2))[2:].zfill(2) + rounding_mode = hex(int(bin_u[:3],2))[2:] + return flags, rounding_mode + +# rounding mode dictionary +round_dict = { + "rne":"0", + "rnm":"4", + "ru":"3", + "rz":"1", + "rd":"2", + "dyn":"7" +} + +# fcsr dictionary +fcsr_dict = { + "0":"rne", + "128":"rnm", + "96":"ru", + "32":"rz", + "64":"rd", + "224":"dyn" +} + +print("creating arch test vectors") + +class Config: + def __init__(self, bits, letter, op, filt, op_code): + self.bits = bits + self.letter = letter + self.op = op + self.filt = filt + self.op_code = op_code + +def create_vectors(my_config): + suite_folder_num = my_config.bits + if my_config.bits == 64 and my_config.letter == "F": suite_folder_num = 32 + source_dir1 = "{}/addins/riscv-arch-test/riscv-test-suite/rv{}i_m/{}/src/".format(wally, suite_folder_num, my_config.letter) + source_dir2 = "{}/tests/riscof/work/riscv-arch-test/rv{}i_m/{}/src/".format(wally, my_config.bits, my_config.letter) + dest_dir = "{}/tests/fp/combined_IF_vectors/IF_vectors/".format(wally) + all_vectors1 = os.listdir(source_dir1) + + filt_vectors1 = [v for v in all_vectors1 if my_config.filt in v] + # print(filt_vectors1) + filt_vectors2 = [v + "/ref/Reference-sail_c_simulator.signature" for v in all_vectors1 if my_config.filt in v] + + # iterate through all vectors + for i in range(len(filt_vectors1)): + vector1 = filt_vectors1[i] + vector2 = filt_vectors2[i] + operation = my_config.op_code + rounding_mode = "X" + flags = "XX" + # use name to create our new tv + dest_file = open("{}cvw_{}_{}.tv".format(dest_dir, my_config.bits, vector1[:-2]), 'a') + # open vectors + src_file1 = open(source_dir1 + vector1,'r') + src_file2 = open(source_dir2 + vector2,'r') + # for each test in the vector + reading = True + src_file2.readline() #skip first bc junk + # print(my_config.bits, my_config.letter) + if my_config.letter == "F" and my_config.bits == 64: + reading = True + # print("trigger 64F") + #skip first 2 lines bc junk + src_file2.readline() + while reading: + # get answer and flags from Ref...signature + # answers are before deadbeef (first line of 4) + # flags are after deadbeef (third line of 4) + answer = src_file2.readline().strip() + deadbeef = src_file2.readline().strip() + # print(answer) + if not (answer == "e7d4b281" and deadbeef == "6f5ca309"): # if there is still stuff to read + # get flags + packed = src_file2.readline().strip()[6:] + flags, rounding_mode = unpack_rf(packed) + # skip 00000000 buffer + src_file2.readline() + + # parse through .S file + detected = False + done = False + op1val = "0" + op2val = "0" + while not (detected or done): + # print("det1") + line = src_file1.readline() + # print(line) + if "op1val" in line: + # print("det2") + # parse line + op1val = line.split("op1val")[1].split("x")[1].split(";")[0] + if my_config.op != "fsqrt": # sqrt doesn't have two input vals + op2val = line.split("op2val")[1].split("x")[1].strip() + if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there + else: + op2val = 32*"X" + # go to next test in vector + detected = True + elif "RVTEST_CODE_END" in line: + done = True + # put it all together + if not done: + translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode) + dest_file.write(translation + "\n") + else: + # print("read false") + reading = False + elif my_config.letter == "M" and my_config.bits == 64: + reading = True + #skip first 2 lines bc junk + src_file2.readline() + while reading: + # print("trigger 64M") + # get answer from Ref...signature + # answers span two lines and are reversed + answer2 = src_file2.readline().strip() + answer1 = src_file2.readline().strip() + answer = answer1 + answer2 + # print(answer1,answer2) + if not (answer2 == "e7d4b281" and answer1 == "6f5ca309"): # if there is still stuff to read + # parse through .S file + detected = False + done = False + op1val = "0" + op2val = "0" + while not (detected or done): + # print("det1") + line = src_file1.readline() + # print(line) + if "op1val" in line: + # print("det2") + # parse line + op1val = line.split("op1val")[1].split("x")[1].split(";")[0] + if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling + op1val = twos_comp(my_config.bits, op1val) + if my_config.op != "fsqrt": # sqrt doesn't have two input vals, unnec here but keeping for later + op2val = line.split("op2val")[1].split("x")[1].strip() + if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there + if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling + op2val = twos_comp(my_config.bits, op2val) + # go to next test in vector + detected = True + elif "RVTEST_CODE_END" in line: + done = True + # ints don't have flags + flags = "XX" + # put it all together + if not done: + translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode) + dest_file.write(translation + "\n") + else: + # print("read false") + reading = False + else: + while reading: + # get answer and flags from Ref...signature + answer = src_file2.readline() + # print(answer) + packed = src_file2.readline()[6:] + # print(packed) + if len(packed.strip())>0: # if there is still stuff to read + # print("packed") + # parse through .S file + detected = False + done = False + op1val = "0" + op2val = "0" + while not (detected or done): + # print("det1") + line = src_file1.readline() + # print(line) + if "op1val" in line: + # print("det2") + # parse line + op1val = line.split("op1val")[1].split("x")[1].split(";")[0] + if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling + op1val = twos_comp(my_config.bits, op1val) + if my_config.op != "fsqrt": # sqrt doesn't have two input vals + op2val = line.split("op2val")[1].split("x")[1].strip() + if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there + if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling + op2val = twos_comp(my_config.bits, op2val) + # go to next test in vector + detected = True + elif "RVTEST_CODE_END" in line: + done = True + # rounding mode for float + if not done and (my_config.op == "fsqrt" or my_config.op == "fdiv"): + flags, rounding_mode = unpack_rf(packed) + + # put it all together + if not done: + translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode) + dest_file.write(translation + "\n") + else: + # print("read false") + reading = False + print("out") + dest_file.close() + src_file1.close() + src_file2.close() + +config_list = [ +Config(32, "M", "div", "div_", 0), +Config(32, "F", "fdiv", "fdiv", 1), +Config(32, "F", "fsqrt", "fsqrt", 2), +Config(32, "M", "rem", "rem-", 3), +Config(32, "M", "divu", "divu-", 4), +Config(32, "M", "remu", "remu-", 5), +Config(64, "M", "div", "div-", 0), +Config(64, "F", "fdiv", "fdiv", 1), +Config(64, "F", "fsqrt", "fsqrt", 2), +Config(64, "M", "rem", "rem-", 3), +Config(64, "M", "divu", "divu-", 4), +Config(64, "M", "remu", "remu-", 5), +Config(64, "M", "divw", "divw-", 6), +Config(64, "M", "divuw", "divuw-", 7), +Config(64, "M", "remw", "remw-", 8), +Config(64, "M", "remuw", "remuw-", 9) +] + +for c in config_list: + create_vectors(c) \ No newline at end of file diff --git a/tests/fp/combined_IF_vectors/extract_testfloat_vectors.py b/tests/fp/combined_IF_vectors/extract_testfloat_vectors.py new file mode 100755 index 00000000..07047be2 --- /dev/null +++ b/tests/fp/combined_IF_vectors/extract_testfloat_vectors.py @@ -0,0 +1,79 @@ +#! /usr/bin/python3 +# extract sqrt and float div testfloat vectors + +# author: Alessandro Maiuolo +# contact: amaiuolo@g.hmc.edu +# date created: 3-29-2023 + +import os +wally = os.popen('echo $WALLY').read().strip() +# print(wally) + +def ext_bits(my_string): + target_len = 32 # we want 128 bits, div by 4 bc hex notation + zeroes_to_add = target_len - len(my_string) + return zeroes_to_add*"0" + my_string + +# rounding mode dictionary +round_dict = { + "rne":"0", + "rnm":"4", + "ru":"3", + "rz":"1", + "rd":"2", + "dyn":"7" +} + + +print("creating testfloat div test vectors") + +source_dir = "{}/tests/fp/vectors/".format(wally) +dest_dir = "{}/tests/fp/combined_IF_vectors/IF_vectors/".format(wally) +all_vectors = os.listdir(source_dir) + +div_vectors = [v for v in all_vectors if "div" in v] + +# iterate through all float div vectors +for vector in div_vectors: + # use name to determine configs + config_list = vector.split(".")[0].split("_") + operation = "1" #float div + rounding_mode = round_dict[str(config_list[2])] + # use name to create our new tv + dest_file = open(dest_dir + "cvw_" + vector, 'a') + # open vector + src_file = open(source_dir + vector,'r') + # for each test in the vector + for i in src_file.readlines(): + translation = "" # this stores the test that we are currently working on + [input_1, input_2, answer, flags] = i.split("_") # separate inputs, answer, and flags + # put it all together, strip nec for removing \n on the end of the flags + translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(input_1), ext_bits(input_2), ext_bits(answer), flags.strip(), rounding_mode) + dest_file.write(translation + "\n") + dest_file.close() + src_file.close() + + +print("creating testfloat sqrt test vectors") + +sqrt_vectors = [v for v in all_vectors if "sqrt" in v] + +# iterate through all float div vectors +for vector in sqrt_vectors: + # use name to determine configs + config_list = vector.split(".")[0].split("_") + operation = "2" #sqrt + rounding_mode = round_dict[str(config_list[2])] + # use name to create our new tv + dest_file = open(dest_dir + "cvw_" + vector, 'a') + # open vector + src_file = open(source_dir + vector,'r') + # for each test in the vector + for i in src_file.readlines(): + translation = "" # this stores the test that we are currently working on + [input_1, answer, flags] = i.split("_") # separate inputs, answer, and flags + # put it all together, strip nec for removing \n on the end of the flags + translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(input_1), "X"*32, ext_bits(answer), flags.strip(), rounding_mode) + dest_file.write(translation + "\n") + dest_file.close() + src_file.close() \ No newline at end of file diff --git a/tests/fp/create_all_vectors.sh b/tests/fp/create_all_vectors.sh index f1ba3625..38cfd555 100755 --- a/tests/fp/create_all_vectors.sh +++ b/tests/fp/create_all_vectors.sh @@ -3,3 +3,6 @@ mkdir -p vectors ./create_vectors.sh ./remove_spaces.sh + +# to create tvs for evaluation of combined IFdivsqrt +#./combined_IF_vectors/create_IF_vectors.sh \ No newline at end of file diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-01.reference_output index a9c3da2c..fb24280a 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-01.reference_output @@ -53,7 +53,7 @@ 8000000b # mcause value from m ext interrupt 00000000 # mtval for mext interrupt (0x0) 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 -fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable) +0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable) 00000222 # mideleg after attempted write of all 1's (only some bits are writeable) # skipping instruction address fault since they're impossible with compressed instrs enabled 00000001 # mcause from an instruction access fault 00000000 # mtval of faulting instruction address (0x0) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-s-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-s-01.reference_output index e5718471..9dbdd833 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-s-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-s-01.reference_output @@ -48,7 +48,7 @@ 00000009 # scause from S mode ecall 00000000 # stval of ecall (*** defined to be zero for now) 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 -fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable) +0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable) 00000222 # mideleg after attempted write of all 1's (only some bits are writeable) 0000000b # scause from M mode ecall 00000000 # stval of ecall (*** defined to be zero for now) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-u-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-u-01.reference_output index 5c6d8378..b19a0c21 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-u-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-u-01.reference_output @@ -45,7 +45,7 @@ 00000008 # scause from U mode ecall 00000000 # stval of ecall (*** defined to be zero for now) 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 -fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable) +0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable) 00000222 # mideleg after attempted write of all 1's (only some bits are writeable) 0000000b # scause from M mode ecall 00000000 # stval of ecall (*** defined to be zero for now) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output index 78e096ce..39ec3ad4 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output @@ -108,8 +108,8 @@ 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 -fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable) -ffffffff +0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable) +00000000 00000222 # mideleg after attempted write of all 1's (only some bits are writeable) 00000000 # skipping instruction address fault since they're impossible with compressed instrs enabled 00000001 # mcause from an instruction access fault diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output index dc5acb4d..4c3031eb 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output @@ -98,8 +98,8 @@ 00000000 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 -fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable) -ffffffff +0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable) +00000000 00000222 # mideleg after attempted write of all 1's (only some bits are writeable) 00000000 0000000b # scause from M mode ecall diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output index 7afec526..2d1d16d2 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output @@ -92,8 +92,8 @@ 00000000 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 -fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable) -ffffffff +0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable) +00000000 00000222 # mideleg after attempted write of all 1's (only some bits are writeable) 00000000 0000000b # scause from M mode ecall