From 310f55b6b73f1ec1f7f086f857211359b5c1e5d3 Mon Sep 17 00:00:00 2001 From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com> Date: Thu, 9 Jun 2022 00:05:38 +0000 Subject: [PATCH 1/7] added false path for data critical muxes --- synthDC/scripts/synth.tcl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index bb557160f..178820933 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -74,6 +74,9 @@ if { $saifpower == 1 } { if {$drive != "INV"} { set_false_path -from [get_ports reset] } +if {(($::env(DESIGN) == "ppa_mux2_1") || ($::env(DESIGN) == "ppa_mux4_1") || ($::env(DESIGN) == "ppa_mux8_1"))} { + set_false_path -from {s} +} # Set Frequency in [MHz] or period in [ns] set my_clock_pin clk From a58a756076e86f5fd84009d759de40bc23ea86fd Mon Sep 17 00:00:00 2001 From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com> Date: Thu, 9 Jun 2022 00:06:12 +0000 Subject: [PATCH 2/7] added one bit muxes for data critical synths --- pipelined/src/ppa/ppa.sv | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pipelined/src/ppa/ppa.sv b/pipelined/src/ppa/ppa.sv index b5df310d2..31aa13523 100644 --- a/pipelined/src/ppa/ppa.sv +++ b/pipelined/src/ppa/ppa.sv @@ -525,6 +525,30 @@ module ppa_decoder #(parameter WIDTH = 8) ( end endmodule +module ppa_mux2_1 #(parameter WIDTH = 1) ( + input logic [WIDTH-1:0] d0, d1, + input logic s, + output logic [WIDTH-1:0] y); + + assign y = s ? d1 : d0; +endmodule + +module ppa_mux4_1 #(parameter WIDTH = 1) ( + input logic [WIDTH-1:0] d0, d1, d2, d3, + input logic [1:0] s, + output logic [WIDTH-1:0] y); + + assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0); +endmodule + +module ppa_mux8_1 #(parameter WIDTH = 1) ( + input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, d6, d7, + input logic [2:0] s, + output logic [WIDTH-1:0] y); + + assign y = s[2] ? (s[1] ? (s[0] ? d5 : d4) : (s[0] ? d6 : d7)) : (s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0)); +endmodule + module ppa_mux2_8 #(parameter WIDTH = 8) ( input logic [WIDTH-1:0] d0, d1, input logic s, From 5522adc922af3b7a0074902acb503c4d1593427c Mon Sep 17 00:00:00 2001 From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com> Date: Thu, 9 Jun 2022 00:07:51 +0000 Subject: [PATCH 3/7] restored functionality of makeCoefTable() --- synthDC/ppaAnalyze.py | 118 +++++++++++++++++++++++++---------------- synthDC/ppaFitting.csv | 54 ++++++++++++++----- 2 files changed, 114 insertions(+), 58 deletions(-) diff --git a/synthDC/ppaAnalyze.py b/synthDC/ppaAnalyze.py index 1af304d37..fa5cdef1c 100755 --- a/synthDC/ppaAnalyze.py +++ b/synthDC/ppaAnalyze.py @@ -197,6 +197,8 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo allMetrics = [] ale = (var != 'delay') # if not delay, must be area, leakage, or energy + modFit = fitDict[mod] + fits = modFit[ale] for spec in techSpecs: metric = getVals(spec.tech, module, var, freq=freq) @@ -207,8 +209,8 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo metric = [m/norm for m in metric] if len(metric) == 5: # don't include the spec if we don't have points for all widths - xp, pred, leg = regress(widths, metric, spec, fits, ale=ale) - fullLeg += leg + xp, pred, coefs, r2 = regress(widths, metric, fits) + fullLeg += genLegend(fits, coefs, r2, spec, ale=ale) c = color if color else spec.color ax.scatter(widths, metric, color=c, marker=spec.shape) ax.plot(xp, pred, color=c) @@ -216,7 +218,8 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo allMetrics += metric combined = TechSpec('combined', 'red', '_', 0, 0, 0, 0) - xp, pred, leg = regress(allWidths, allMetrics, combined, fits, ale=ale) + xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits) + leg = genLegend(fits, coefs, r2, combined, ale=ale) fullLeg += leg ax.plot(xp, pred, color='red') @@ -232,14 +235,17 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo if (module in ['flop', 'csa']) & (var == 'delay'): ax.set_ylim(ymin=0) + ytop = ax.get_ylim()[1] + ax.set_ylim(ymax=1.1*ytop) if singlePlot: titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)" ax.set_title(module + titleStr) plt.savefig('./plots/PPA/'+ module + '_' + var + '.png') # plt.show() + return fullLeg -def regress(widths, var, spec, fits='clsgn', ale=False): +def regress(widths, var, fits='clsgn'): ''' fits a curve to the given points returns lists of x and y values to plot that curve and legend elements with the equation ''' @@ -267,28 +273,41 @@ def regress(widths, var, spec, fits='clsgn', ale=False): for x in xp: n = [func(x/normAddWidth) for func in funcArr] pred += [sum(np.multiply(coefs, n))] - - leg = genLegend(fits, coefs, r2, spec, ale=ale) - return xp, pred, leg + return xp, pred, coefs, r2 -def makeCoefTable(tech): - ''' not currently in use, may salvage later +def makeCoefTable(): + ''' writes CSV with each line containing the coefficients for a regression fit - to a particular combination of module, metric, and target frequency + to a particular combination of module, metric (including both techs, normalized) ''' file = open("ppaFitting.csv", "w") writer = csv.writer(file) - writer.writerow(['Module', 'Metric', 'Freq', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2']) + writer.writerow(['Module', 'Metric', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2']) - for mod in ['add', 'mult', 'comparator', 'shifter']: - for comb in [['delay', 5000], ['area', 5000], ['area', 10]]: - var = comb[0] - freq = comb[1] - metric = getVals(tech, mod, freq, var) - global widths - coefs, r2, funcArr = regress(widths, metric) - row = [mod] + comb + np.ndarray.tolist(coefs) + [r2] + for module in modules: + for var in ['delay', 'area', 'lpower', 'denergy']: + ale = (var != 'delay') + metL = [] + modFit = fitDict[module] + fits = modFit[ale] + + for spec in techSpecs: + metric = getVals(spec.tech, module, var) + techdict = spec._asdict() + norm = techdict[var] + metL += [m/norm for m in metric] + + xp, pred, coefs, r2 = regress(widths*2, metL, fits) + coefs = np.ndarray.tolist(coefs) + coefsToWrite = [None]*5 + fitTerms = 'clsgn' + ind = 0 + for i in range(len(fitTerms)): + if fitTerms[i] in fits: + coefsToWrite[i] = coefs[ind] + ind += 1 + row = [module, var] + coefsToWrite + [r2] writer.writerow(row) file.close() @@ -341,8 +360,8 @@ def freqPlot(tech, mod, width): median = np.median(list(flatten(freqsL))) - f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True) - for ax in (ax1, ax2, ax3, ax4): + f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) + for ax in (ax1, ax2): #, ax3, ax4): ax.ticklabel_format(useOffset=False, style='plain') for ind in [0,1]: @@ -353,23 +372,23 @@ def freqPlot(tech, mod, width): freqs, delays, areas = noOutliers(median, freqs, delays, areas) # comment out to see all syntheses c = 'blue' if ind else 'green' - adprod = adprodpow(areas, delays, 1) - adpow = adprodpow(areas, delays, 2) + # adprod = adprodpow(areas, delays, 1) + # adpow = adprodpow(areas, delays, 2) ax1.scatter(freqs, delays, color=c) ax2.scatter(freqs, areas, color=c) - ax3.scatter(freqs, adprod, color=c) - ax4.scatter(freqs, adpow, color=c) + # ax3.scatter(freqs, adprod, color=c) + # ax4.scatter(freqs, adpow, color=c) legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='o', label='timing achieved'), lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')] ax1.legend(handles=legend_elements) - ax4.set_xlabel("Target Freq (MHz)") + ax2.set_xlabel("Target Freq (MHz)") ax1.set_ylabel('Delay (ns)') ax2.set_ylabel('Area (sq microns)') - ax3.set_ylabel('Area * Delay') - ax4.set_ylabel('Area * $Delay^2$') + # ax3.set_ylabel('Area * Delay') + # ax4.set_ylabel('Area * $Delay^2$') ax1.set_title(mod + '_' + str(width)) plt.savefig('./plots/freqBuckshot/' + tech + '/' + mod + '/' + str(width) + '.png') # plt.show() @@ -464,23 +483,31 @@ def plotPPA(mod, freq=None, norm=True, aleOpt=False): if no freq specified, uses the synthesis with best achievable delay for each width overlays data from both techs ''' - plt.rcParams["figure.figsize"] = (12,8) + plt.rcParams["figure.figsize"] = (10,7) fig, axs = plt.subplots(2, 2) - modFit = fitDict[mod] + # fig, axs = plt.subplots(4, 1) - oneMetricPlot(mod, 'delay', ax=axs[0,0], fits=modFit[0], freq=freq, norm=norm) - oneMetricPlot(mod, 'area', ax=axs[0,1], fits=modFit[1], freq=freq, norm=norm) - oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits=modFit[1], freq=freq, norm=norm) - oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits=modFit[1], freq=freq, norm=norm) + # oneMetricPlot(mod, 'delay', ax=axs[0], fits=modFit[0], freq=freq, norm=norm) + # oneMetricPlot(mod, 'area', ax=axs[1], fits=modFit[1], freq=freq, norm=norm) + # oneMetricPlot(mod, 'lpower', ax=axs[2], fits=modFit[1], freq=freq, norm=norm) + # oneMetricPlot(mod, 'denergy', ax=axs[3], fits=modFit[1], freq=freq, norm=norm) + oneMetricPlot(mod, 'delay', ax=axs[0,0], freq=freq, norm=norm) + oneMetricPlot(mod, 'area', ax=axs[0,1], freq=freq, norm=norm) + oneMetricPlot(mod, 'lpower', ax=axs[1,0], freq=freq, norm=norm) + fullLeg = oneMetricPlot(mod, 'denergy', ax=axs[1,1], freq=freq, norm=norm) + if aleOpt: - oneMetricPlot(mod, 'area', ax=axs[0,1], fits=modFit[1], freq=10, norm=norm, color='black') - oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits=modFit[1], freq=10, norm=norm, color='black') - oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits=modFit[1], freq=10, norm=norm, color='black') + oneMetricPlot(mod, 'area', ax=axs[0,1], freq=10, norm=norm, color='black') + oneMetricPlot(mod, 'lpower', ax=axs[1,0], freq=10, norm=norm, color='black') + oneMetricPlot(mod, 'denergy', ax=axs[1,1], freq=10, norm=norm, color='black') titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)" n = 'normalized' if norm else 'unnormalized' saveStr = './plots/PPA/'+ n + '/' + mod + '.png' plt.suptitle(mod + titleStr) + + # fig.legend(handles=fullLeg, ncol=3, loc='center', bbox_to_anchor=(0.3, 0.82, 0.4, 0.2)) + if freq != 10: plt.savefig(saveStr) # plt.show() @@ -511,7 +538,7 @@ if __name__ == '__main__': fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 's', 'ls'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']} fitDict.update(dict.fromkeys(['mux2', 'mux4', 'mux8'], ['cg', 'l', 'l'])) - leftblue = [['mux2', 'sky90', 32], ['mux2', 'sky90', 64], ['mux2', 'sky90', 128], ['mux2', 'tsmc28', 16], ['mux2', 'tsmc28', 8], ['mux8', 'sky90', 32]] + leftblue = [['mux2', 'sky90', 32], ['mux2', 'sky90', 64], ['mux2', 'sky90', 128], ['mux8', 'sky90', 32], ['mux2', 'tsmc28', 8], ['mux2', 'tsmc28', 64]] TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1330.84, 582.81, 520.66], ['tsmc28', 'blue', '^', 12.2e-3, 209.29, 1060, 81.43]] @@ -529,12 +556,13 @@ if __name__ == '__main__': # squareAreaDelay('sky90', 'add', 32) # oneMetricPlot('add', 'delay') # freqPlot('sky90', 'mux4', 16) + # makeCoefTable() - for mod in modules: - # plotPPA(mod, norm=False) - plotPPA(mod, aleOpt=True) - plotBestAreas(mod) - for w in [8, 16, 32, 64, 128]: - freqPlot('sky90', mod, w) - freqPlot('tsmc28', mod, w) + for mod in ['mux2']: #modules: + plotPPA(mod, norm=False) + plotPPA(mod) #, aleOpt=True) + # plotBestAreas(mod) + # for w in [8, 16, 32, 64, 128]: + # freqPlot('sky90', mod, w) + # freqPlot('tsmc28', mod, w) plt.close('all') \ No newline at end of file diff --git a/synthDC/ppaFitting.csv b/synthDC/ppaFitting.csv index 6b88ead61..def67c07d 100644 --- a/synthDC/ppaFitting.csv +++ b/synthDC/ppaFitting.csv @@ -1,13 +1,41 @@ -Module,Metric,Freq,1,N,N^2,log2(N),Nlog2(N),R^2 -add,delay,5000,-0.038978555556527635,-0.08911531250030817,-0.00012953428819478948,0.2083593333340971,0.013950093750045424,1.0 -add,area,5000,-1913.1778463362505,-268.21377075092175,-0.4100347526051751,1046.9667200022955,47.59125331263557,1.0 -add,area,10,-13.720001333167332,14.700000312552621,1.3021426840869221e-09,-1.3062278840780171e-10,-9.375775472819561e-08,1.0 -mult,delay,5000,-0.2915958888891911,-0.02828693750009581,-3.445876736121953e-05,0.32169033333357117,0.0044735312500140964,1.0 -mult,area,5000,27780.605184113756,10418.196477973508,26.857274703166343,-24448.387256089416,-1468.2850310678027,1.0 -mult,area,10,-6472.791005245042,-2075.5787013197305,8.20962684330778,5345.246556351299,313.5693677823146,1.0 -comparator,delay,5000,0.1903951111111219,0.000987500000002994,3.427951388890516e-06,3.333333324460974e-06,-0.00012593750000039925,1.0 -comparator,area,5000,-508.51109056188875,-579.7924890645068,-1.0888888741341944,969.5466443383111,101.5524983752957,1.0 -comparator,area,10,-155.6022268893253,-40.3637507501383,-0.07230902908001494,132.9533363336765,8.452500156270371,1.0 -shifter,delay,5000,0.06953233333235516,-0.08957893750031035,-0.00015877864583368578,0.16727300000076853,0.014763625000045773,1.0 -shifter,area,5000,-237.48663487568587,1208.7075255666841,1.5708073263938906,-1678.7400476770383,-166.69187856311666,1.0 -shifter,area,10,-1079.4155736731122,-591.3687615645423,-0.877491337241916,1211.9333560050677,103.11437703155087,1.0 +Module,Metric,1,N,N^2,log2(N),Nlog2(N),R^2 +priorityencoder,delay,4.865032478368464,,,1.0346781590203091,,0.990533246983837 +priorityencoder,area,,0.3296349181169891,,,,0.9718942704677337 +priorityencoder,lpower,,0.2508481588069769,,,,0.9418329012771585 +priorityencoder,denergy,,0.09327161156406552,,,,0.8065924672945542 +add,delay,8.961254531683414,,,1.4310340215065527,,0.9564367595740637 +add,area,,1.0710989265923485,,,,0.988580182173048 +add,lpower,,0.9470245397661955,,,,0.9951383820581323 +add,denergy,,0.9954952282287014,,,,0.9928308616130285 +csa,delay,3.590384717869601,,,,,0.0 +csa,area,,0.9312877569527923,,,,0.999393942859829 +csa,lpower,,1.5320774877598933,,,,0.9400384192534433 +csa,denergy,,1.1454135769936609,,,,0.9735205275004183 +shiftleft,delay,8.66019468793489,,,1.6351711913499432,,0.9873681453602638 +shiftleft,area,,1.9102134686740575,,,,0.9466461680123697 +shiftleft,lpower,,2.277088275290811,,,,0.9624044038708768 +shiftleft,denergy,,1.4931073444617051,,,,0.9454881696599784 +comparator,delay,6.680678539086959,,,0.9397668550976327,,0.98789326603378 +comparator,area,,0.6003877936704982,,,,0.9672416909621802 +comparator,lpower,,0.46756802348373877,,,,0.8609362596824635 +comparator,denergy,,0.3089180049610159,,,,0.8267293340232036 +flop,delay,3.3270503187614153,,,,,0.0 +flop,area,,0.34478305655859876,,,,0.9433629202566682 +flop,lpower,,0.3707856336608904,,,,0.9170347531086821 +flop,denergy,,0.0011765517257429892,,,,0.688648230209356 +mux2,delay,4.732514086885074,,,0.38138175938205005,,0.5638177354804589 +mux2,area,,0.19794547955000782,,,,0.9753613114571431 +mux2,lpower,,0.1881638557015794,,,,0.7572248871637561 +mux2,denergy,,0.16278100836605952,,,,0.9811112115671446 +mux4,delay,5.67790744523475,,,0.5081925137582493,,0.8316415055210026 +mux4,area,,0.35778033738856435,,,,0.9880049722019894 +mux4,lpower,,0.32236674794207065,,,,0.8279138454959137 +mux4,denergy,,0.28073375091037084,,,,0.9943662618662574 +mux8,delay,7.252700330388384,,,0.45254210999717837,,0.8464368692304263 +mux8,area,,0.7614128432326613,,,,0.9863118376555963 +mux8,lpower,,0.6570734849206145,,,,0.9855956038468652 +mux8,denergy,,0.4496346388149245,,,,0.9785597135426944 +mult,delay,29.562138166420393,,,6.711916207386673,,0.9833266087176287 +mult,area,,,13.838943348894976,,,0.9875861886135875 +mult,lpower,,,14.380577146903335,,,0.9349609233308782 +mult,denergy,,,36.51397409545879,,,0.9719012952478829 From dd4fa7c68272c8964732b3266d8392e772dc2dbb Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 9 Jun 2022 17:26:47 +0000 Subject: [PATCH 4/7] qslc_r4a2 generator --- .gitignore | 4 +- pipelined/regression/sim-wally | 2 +- pipelined/regression/wave.do | 4 +- pipelined/src/uncore/uncore.sv | 2 +- pipelined/srt/Makefile | 13 ++- pipelined/srt/qslc_r4a2.c | 202 +++++++++++++++++++++++++++++++++ pipelined/srt/qst2.c | 201 -------------------------------- 7 files changed, 217 insertions(+), 211 deletions(-) create mode 100644 pipelined/srt/qslc_r4a2.c delete mode 100644 pipelined/srt/qst2.c diff --git a/.gitignore b/.gitignore index bed29cae1..c42bfc4dd 100644 --- a/.gitignore +++ b/.gitignore @@ -107,4 +107,6 @@ pipelined/config/rv64ic_orig synthDC/Summary.csv pipelined/srt/exptestgen pipelined/srt/testgen -pipelined/srt/qst2 +pipelined/srt/qslc_r4a2 +pipelined/srt/qslc_r4a2.sv +pipelined/srt/testvectors diff --git a/pipelined/regression/sim-wally b/pipelined/regression/sim-wally index a7dffc9ed..069851489 100755 --- a/pipelined/regression/sim-wally +++ b/pipelined/regression/sim-wally @@ -1,2 +1,2 @@ -vsim -do "do wally-pipelined.do rv32gc arch32f" +vsim -do "do wally-pipelined.do rv32gc arch32i" diff --git a/pipelined/regression/wave.do b/pipelined/regression/wave.do index ab6cbc46a..ecfc06d11 100644 --- a/pipelined/regression/wave.do +++ b/pipelined/regression/wave.do @@ -6,7 +6,7 @@ add wave -noupdate /testbench/reset_ext add wave -noupdate /testbench/memfilename add wave -noupdate /testbench/dut/core/SATP_REGW add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/BPPredWrongE -add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/CSRWritePendingDEM +add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/CSRWriteFencePendingDEM add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/RetM add wave -noupdate -group HDU -group hazards -color Pink /testbench/dut/core/hzu/TrapM add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/LoadStallD @@ -468,7 +468,7 @@ add wave -noupdate -group {debug trace} -expand -group wb /testbench/PCW add wave -noupdate -group {pc selection} /testbench/dut/core/ifu/PCNext2F add wave -noupdate -group {pc selection} /testbench/dut/core/ifu/PrivilegedNextPCM add wave -noupdate -group {pc selection} /testbench/dut/core/ifu/PrivilegedChangePCM -add wave -noupdate -group ifu -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState +add wave -noupdate -group ifu -color Gold /testbench/dut/core/ifu/bus/busdp/busfsm/BusCurrState add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusRead add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAdr add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAck diff --git a/pipelined/src/uncore/uncore.sv b/pipelined/src/uncore/uncore.sv index 488a61e0c..a69b7cd0c 100644 --- a/pipelined/src/uncore/uncore.sv +++ b/pipelined/src/uncore/uncore.sv @@ -92,7 +92,7 @@ module uncore ( // generate // on-chip RAM if (`RAM_SUPPORTED) begin : ram - ram_orig #( + ram #( .BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram ( .HCLK, .HRESETn, .HSELRam, .HADDR, diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile index 6789c234c..759ca2393 100644 --- a/pipelined/srt/Makefile +++ b/pipelined/srt/Makefile @@ -1,16 +1,19 @@ -all: sqrttestgen testgen qst2 +all: exptestgen testgen qslc_r4a2 sqrttestgen: sqrttestgen.c gcc sqrttestgen.c -lm -o sqrttestgen testgen: testgen.c gcc testgen.c -lm -o testgen - -qst2: qst2.c - gcc qst2.c -lm -o qst2 - gcc -lm -o testgen testgen.c ./testgen exptestgen: exptestgen.c gcc -lm -o exptestgen exptestgen.c ./exptestgen + +qslc_r4a2: qslc_r4a2.c + gcc qslc_r4a2.c -lm -o qslc_r4a2 + ./qslc_r4a2 >> qslc_r4a2.sv + +clean: + rm -f testgen exptestgen qslc_r4a2 \ No newline at end of file diff --git a/pipelined/srt/qslc_r4a2.c b/pipelined/srt/qslc_r4a2.c new file mode 100644 index 000000000..07616979c --- /dev/null +++ b/pipelined/srt/qslc_r4a2.c @@ -0,0 +1,202 @@ +/* + Program: qslc_r4a2.c + Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) + User: James E. Stine + +*/ + +#include +#include + +#define DIVISOR_SIZE 3 +#define CARRY_SIZE 7 +#define SUM_SIZE 7 +#define TOT_SIZE 7 + +void disp_binary(double, int, int); + +struct bits { + unsigned int divisor : DIVISOR_SIZE; + int tot : TOT_SIZE; +} pla; + +/* + + Function: disp_binary + Description: This function displays a Double-Precision number into + four 16 bit integers using the global union variable + dp_number + Argument List: double x The value to be converted + int bits_to_left Number of bits left of radix point + int bits_to_right Number of bits right of radix point + Return value: none + +*/ +void disp_binary(double x, int bits_to_left, int bits_to_right) { + int i; + double diff; + + if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + printf("0"); + } + if (i == bits_to_right+1) + printf(" "); + + return; + } + + if (x < 0.0) + x = pow(2.0, ((double) bits_to_left)) + x; + + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + diff = pow(2.0, ((double) -i) ); + if (x < diff) + printf("0"); + else { + printf("1"); + x -= diff; + } + //if (i == 0) + //printf(" "); + + } + +} + + +int main() { + int m; + int n; + int o; + pla.divisor = 0; + pla.tot = 0; + + printf(" case({D[5:3],Wmsbs})"); + printf(" \n"); + for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { + for (m=0; m < pow(2.0, TOT_SIZE); m++) { + printf(" 10'b"); + disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); + printf("_"); + disp_binary((double) pla.tot, TOT_SIZE, 0); + printf(": q = 4'b"); + + /* + 4 bits for Radix 4 (a=2) + 1000 = +2 + 0100 = +1 + 0000 = 0 + 0010 = -1 + 0001 = -2 + + */ + + switch (pla.divisor) { + case 0: + if ((pla.tot) >= 12) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -4) + printf("0000"); + else if ((pla.tot) >= -13) + printf("0010"); + else + printf("0001"); + break; + case 1: + if ((pla.tot) >= 14) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -15) + printf("0010"); + else + printf("0001"); + break; + case 2: + if ((pla.tot) >= 15) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -16) + printf("0010"); + else + printf("0001"); + break; + case 3: + if ((pla.tot) >= 16) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -18) + printf("0010"); + else + printf("0001"); + break; + case 4: + if ((pla.tot) >= 18) + printf("1000"); + else if ((pla.tot) >= 6) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -20) + printf("0010"); + else + printf("0001"); + break; + case 5: + if ((pla.tot) >= 20) + printf("1000"); + else if ((pla.tot) >= 6) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -20) + printf("0010"); + else + printf("0001"); + break; + case 6: + if ((pla.tot) >= 20) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -22) + printf("0010"); + else + printf("0001"); + break; + case 7: + if ((pla.tot) >= 24) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -24) + printf("0010"); + else + printf("0001"); + break; + default: printf ("XXX"); + + } + + printf(";\n"); + (pla.tot)++; + } + (pla.divisor)++; + } + printf(" endcase\n"); +} diff --git a/pipelined/srt/qst2.c b/pipelined/srt/qst2.c deleted file mode 100644 index 503a240a0..000000000 --- a/pipelined/srt/qst2.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - Program: qst2.c - Description: Prints out QST (assumes CPA is utilized to reduce memory) - User: James E. Stine - -*/ - -#include -#include - -#define DIVISOR_SIZE 3 -#define CARRY_SIZE 7 -#define SUM_SIZE 7 -#define TOT_SIZE 7 - -void disp_binary(double, int, int); - -struct bits { - unsigned int divisor : DIVISOR_SIZE; - int tot : TOT_SIZE; -} pla; - -/* - - Function: disp_binary - Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number - Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point - Return value: none - -*/ -void disp_binary(double x, int bits_to_left, int bits_to_right) { - int i; - double diff; - - if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - printf("0"); - } - if (i == bits_to_right+1) - printf(" "); - - return; - } - - if (x < 0.0) - x = pow(2.0, ((double) bits_to_left)) + x; - - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - diff = pow(2.0, ((double) -i) ); - if (x < diff) - printf("0"); - else { - printf("1"); - x -= diff; - } - if (i == 0) - printf(" "); - - } - -} - - -int main() { - - int m; - int n; - int o; - pla.divisor = 0; - pla.tot = 0; - - for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { - for (m=0; m < pow(2.0, TOT_SIZE); m++) { - disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); - disp_binary((double) pla.tot, TOT_SIZE, 0); - - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - - */ - - switch (pla.divisor) { - - case 0: - if ((pla.tot) >= 12) - printf(" 1000"); - else if ((pla.tot) >= 4) - printf(" 0100"); - else if ((pla.tot) >= -4) - printf(" 0000"); - else if ((pla.tot) >= -13) - printf(" 0010"); - else - printf(" 0001"); - break; - case 1: - if ((pla.tot) >= 14) - printf(" 1000"); - else if ((pla.tot) >= 4) - printf(" 0100"); - else if ((pla.tot) >= -6) - printf(" 0000"); - else if ((pla.tot) >= -15) - printf(" 0010"); - else - printf(" 0001"); - break; - case 2: - if ((pla.tot) >= 15) - printf(" 1000"); - else if ((pla.tot) >= 4) - printf(" 0100"); - else if ((pla.tot) >= -6) - printf(" 0000"); - else if ((pla.tot) >= -16) - printf(" 0010"); - else - printf(" 0001"); - break; - case 3: - if ((pla.tot) >= 16) - printf(" 1000"); - else if ((pla.tot) >= 4) - printf(" 0100"); - else if ((pla.tot) >= -6) - printf(" 0000"); - else if ((pla.tot) >= -18) - printf(" 0010"); - else - printf(" 0001"); - break; - case 4: - if ((pla.tot) >= 18) - printf(" 1000"); - else if ((pla.tot) >= 6) - printf(" 0100"); - else if ((pla.tot) >= -8) - printf(" 0000"); - else if ((pla.tot) >= -20) - printf(" 0010"); - else - printf(" 0001"); - break; - case 5: - if ((pla.tot) >= 20) - printf(" 1000"); - else if ((pla.tot) >= 6) - printf(" 0100"); - else if ((pla.tot) >= -8) - printf(" 0000"); - else if ((pla.tot) >= -20) - printf(" 0010"); - else - printf(" 0001"); - break; - case 6: - if ((pla.tot) >= 20) - printf(" 1000"); - else if ((pla.tot) >= 8) - printf(" 0100"); - else if ((pla.tot) >= -8) - printf(" 0000"); - else if ((pla.tot) >= -22) - printf(" 0010"); - else - printf(" 0001"); - break; - case 7: - if ((pla.tot) >= 24) - printf(" 1000"); - else if ((pla.tot) >= 8) - printf(" 0100"); - else if ((pla.tot) >= -8) - printf(" 0000"); - else if ((pla.tot) >= -24) - printf(" 0010"); - else - printf(" 0001"); - break; - default: - printf (" XXX"); - - } - - printf("\n"); - (pla.tot)++; - } - (pla.divisor)++; - } - - -} From 470c0552f8b614a8998e795be0652767780c9c4b Mon Sep 17 00:00:00 2001 From: stineje Date: Thu, 9 Jun 2022 16:45:13 -0500 Subject: [PATCH 5/7] Update integer division for r4 and qslc_r4a2.c --- pipelined/srt/Makefile | 12 +- pipelined/srt/qslc_r4a2.c | 256 ++- pipelined/srt/stine/README | 1 + pipelined/srt/stine/README.md | 22 + pipelined/srt/stine/checkme.sh | 19 + pipelined/srt/stine/idiv-config.vh | 27 + pipelined/srt/stine/intdiv.sv | 2802 +++++++++++++++++++++++++ pipelined/srt/stine/iter128.do | 50 + pipelined/srt/stine/iter128S.do | 50 + pipelined/srt/stine/iter32.do | 50 + pipelined/srt/stine/iter32S.do | 50 + pipelined/srt/stine/iter64.do | 50 + pipelined/srt/stine/iter64S.do | 50 + pipelined/srt/stine/lod.sv | 182 ++ pipelined/srt/stine/lzd.do | 55 + pipelined/srt/stine/lzd.sv | 182 ++ pipelined/srt/stine/lzd_tb.sv | 59 + pipelined/srt/stine/mux.sv | 51 + pipelined/srt/stine/otf4.in | 23 + pipelined/srt/stine/qslc_r4a2 | Bin 0 -> 16144 bytes pipelined/srt/stine/qslc_r4a2.c | 198 ++ pipelined/srt/stine/run.sh | 8 + pipelined/srt/stine/shift.sv | 73 + pipelined/srt/stine/shift_left.do | 55 + pipelined/srt/stine/shift_left_tb.sv | 71 + pipelined/srt/stine/shift_right.do | 55 + pipelined/srt/stine/shift_right_tb.sv | 64 + pipelined/srt/stine/shifter.sv | 18 + pipelined/srt/stine/test_iter128.sv | 79 + pipelined/srt/stine/test_iter128S.sv | 90 + pipelined/srt/stine/test_iter32.sv | 85 + pipelined/srt/stine/test_iter32S.sv | 79 + pipelined/srt/stine/test_iter64.sv | 79 + pipelined/srt/stine/test_iter64S.sv | 79 + pipelined/srt/stine/tmp | 1026 +++++++++ 35 files changed, 5914 insertions(+), 136 deletions(-) create mode 100755 pipelined/srt/stine/README create mode 100755 pipelined/srt/stine/README.md create mode 100755 pipelined/srt/stine/checkme.sh create mode 100644 pipelined/srt/stine/idiv-config.vh create mode 100755 pipelined/srt/stine/intdiv.sv create mode 100644 pipelined/srt/stine/iter128.do create mode 100644 pipelined/srt/stine/iter128S.do create mode 100755 pipelined/srt/stine/iter32.do create mode 100644 pipelined/srt/stine/iter32S.do create mode 100755 pipelined/srt/stine/iter64.do create mode 100644 pipelined/srt/stine/iter64S.do create mode 100755 pipelined/srt/stine/lod.sv create mode 100755 pipelined/srt/stine/lzd.do create mode 100755 pipelined/srt/stine/lzd.sv create mode 100755 pipelined/srt/stine/lzd_tb.sv create mode 100755 pipelined/srt/stine/mux.sv create mode 100644 pipelined/srt/stine/otf4.in create mode 100755 pipelined/srt/stine/qslc_r4a2 create mode 100644 pipelined/srt/stine/qslc_r4a2.c create mode 100755 pipelined/srt/stine/run.sh create mode 100755 pipelined/srt/stine/shift.sv create mode 100755 pipelined/srt/stine/shift_left.do create mode 100755 pipelined/srt/stine/shift_left_tb.sv create mode 100755 pipelined/srt/stine/shift_right.do create mode 100755 pipelined/srt/stine/shift_right_tb.sv create mode 100755 pipelined/srt/stine/shifter.sv create mode 100644 pipelined/srt/stine/test_iter128.sv create mode 100644 pipelined/srt/stine/test_iter128S.sv create mode 100755 pipelined/srt/stine/test_iter32.sv create mode 100644 pipelined/srt/stine/test_iter32S.sv create mode 100755 pipelined/srt/stine/test_iter64.sv create mode 100644 pipelined/srt/stine/test_iter64S.sv create mode 100644 pipelined/srt/stine/tmp diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile index 759ca2393..c3c69df8c 100644 --- a/pipelined/srt/Makefile +++ b/pipelined/srt/Makefile @@ -1,19 +1,19 @@ all: exptestgen testgen qslc_r4a2 sqrttestgen: sqrttestgen.c - gcc sqrttestgen.c -lm -o sqrttestgen + gcc sqrttestgen.c -o sqrttestgen -lm testgen: testgen.c - gcc testgen.c -lm -o testgen + gcc testgen.c -o testgen -lm ./testgen exptestgen: exptestgen.c - gcc -lm -o exptestgen exptestgen.c + gcc -o exptestgen exptestgen.c -lm ./exptestgen qslc_r4a2: qslc_r4a2.c - gcc qslc_r4a2.c -lm -o qslc_r4a2 - ./qslc_r4a2 >> qslc_r4a2.sv + gcc qslc_r4a2.c -o qslc_r4a2 -lm + ./qslc_r4a2 > qslc_r4a2.sv clean: - rm -f testgen exptestgen qslc_r4a2 \ No newline at end of file + rm -f testgen exptestgen qslc_r4a2 diff --git a/pipelined/srt/qslc_r4a2.c b/pipelined/srt/qslc_r4a2.c index 07616979c..8e68f9983 100644 --- a/pipelined/srt/qslc_r4a2.c +++ b/pipelined/srt/qslc_r4a2.c @@ -24,11 +24,11 @@ struct bits { Function: disp_binary Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number + four 16 bit integers using the global union variable + dp_number Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point + int bits_to_left Number of bits left of radix point + int bits_to_right Number of bits right of radix point Return value: none */ @@ -41,7 +41,7 @@ void disp_binary(double x, int bits_to_left, int bits_to_right) { printf("0"); } if (i == bits_to_right+1) - printf(" "); + ; return; } @@ -57,146 +57,142 @@ void disp_binary(double x, int bits_to_left, int bits_to_right) { printf("1"); x -= diff; } - //if (i == 0) - //printf(" "); + if (i == 0) + ; } } - int main() { int m; int n; int o; pla.divisor = 0; pla.tot = 0; - - printf(" case({D[5:3],Wmsbs})"); - printf(" \n"); + printf("\tcase({D[5:3],Wmsbs})\n"); for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { for (m=0; m < pow(2.0, TOT_SIZE); m++) { - printf(" 10'b"); - disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); - printf("_"); - disp_binary((double) pla.tot, TOT_SIZE, 0); - printf(": q = 4'b"); + printf("\t\t10'b"); + disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); + printf("_"); + disp_binary((double) pla.tot, TOT_SIZE, 0); + printf(": q = 4'b"); - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - - */ - - switch (pla.divisor) { - case 0: - if ((pla.tot) >= 12) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -4) - printf("0000"); - else if ((pla.tot) >= -13) - printf("0010"); - else - printf("0001"); - break; - case 1: - if ((pla.tot) >= 14) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -15) - printf("0010"); - else - printf("0001"); - break; - case 2: - if ((pla.tot) >= 15) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -16) - printf("0010"); - else - printf("0001"); - break; - case 3: - if ((pla.tot) >= 16) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -18) - printf("0010"); - else - printf("0001"); - break; - case 4: - if ((pla.tot) >= 18) - printf("1000"); - else if ((pla.tot) >= 6) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -20) - printf("0010"); - else - printf("0001"); - break; - case 5: - if ((pla.tot) >= 20) - printf("1000"); - else if ((pla.tot) >= 6) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -20) - printf("0010"); - else - printf("0001"); - break; - case 6: - if ((pla.tot) >= 20) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -22) - printf("0010"); - else - printf("0001"); - break; - case 7: - if ((pla.tot) >= 24) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -24) - printf("0010"); - else - printf("0001"); - break; - default: printf ("XXX"); + /* + 4 bits for Radix 4 (a=2) + 1000 = +2 + 0100 = +1 + 0000 = 0 + 0010 = -1 + 0001 = -2 + */ + switch (pla.divisor) { + case 0: + if ((pla.tot) >= 12) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -4) + printf("0000"); + else if ((pla.tot) >= -13) + printf("0010"); + else + printf("0001"); + break; + case 1: + if ((pla.tot) >= 14) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -15) + printf("0010"); + else + printf("0001"); + break; + case 2: + if ((pla.tot) >= 15) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -16) + printf("0010"); + else + printf("0001"); + break; + case 3: + if ((pla.tot) >= 16) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -18) + printf("0010"); + else + printf("0001"); + break; + case 4: + if ((pla.tot) >= 18) + printf("1000"); + else if ((pla.tot) >= 6) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -20) + printf("0010"); + else + printf("0001"); + break; + case 5: + if ((pla.tot) >= 20) + printf("1000"); + else if ((pla.tot) >= 6) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -20) + printf("0010"); + else + printf("0001"); + break; + case 6: + if ((pla.tot) >= 20) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -22) + printf("0010"); + else + printf("0001"); + break; + case 7: + if ((pla.tot) >= 24) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -24) + printf("0010"); + else + printf("0001"); + break; + default: printf ("XXX"); - } + } - printf(";\n"); - (pla.tot)++; - } - (pla.divisor)++; + printf(";\n"); + (pla.tot)++; + } + (pla.divisor)++; } - printf(" endcase\n"); + printf("\tendcase\n"); + } diff --git a/pipelined/srt/stine/README b/pipelined/srt/stine/README new file mode 100755 index 000000000..6898c5cec --- /dev/null +++ b/pipelined/srt/stine/README @@ -0,0 +1 @@ +vsim -do iter64.do -c diff --git a/pipelined/srt/stine/README.md b/pipelined/srt/stine/README.md new file mode 100755 index 000000000..ebb006c95 --- /dev/null +++ b/pipelined/srt/stine/README.md @@ -0,0 +1,22 @@ +This is a novel integer divider using r4 division by recurrence. The +reference is: + +J. E. Stine and K. Hill, "An Efficient Implementation of Radix-4 +Integer Division Using Scaling," 2020 IEEE 63rd International Midwest +Symposium on Circuits and Systems (MWSCAS), Springfield, MA, USA, +2020, pp. 1092-1095, doi: 10.1109/MWSCAS48704.2020.9184631. + +Although this version does not contain scaling, it could do this, if +needed. Moreover, a higher radix or overlapped radix can be done +easily to expand the the size. Also, the implementations here are +initially unsigned but hope to expand for signed, which should be +easy. + +There are two types of tests in this directory within each testbench. +One tests for 32-bits and the other 64-bits: + +int32div.do and int64div.do = test individual vector for debugging + +iter32.do and iter64.do = do not use any waveform generation and just +output lots of tests + diff --git a/pipelined/srt/stine/checkme.sh b/pipelined/srt/stine/checkme.sh new file mode 100755 index 000000000..acbd94008 --- /dev/null +++ b/pipelined/srt/stine/checkme.sh @@ -0,0 +1,19 @@ +#!/bin/sh +cat iter64_signed.out | grep "0 1$" +cat iter64_signed.out | grep "1 0$" +cat iter64_signed.out | grep "0 0$" +cat iter64_unsigned.out | grep "0 1$" +cat iter64_unsigned.out | grep "1 0$" +cat iter64_unsigned.out | grep "0 0$" +cat iter32_signed.out | grep "0 1$" +cat iter32_signed.out | grep "1 0$" +cat iter32_signed.out | grep "0 0$" +cat iter32_unsigned.out | grep "0 1$" +cat iter32_unsigned.out | grep "1 0$" +cat iter32_unsigned.out | grep "0 0$" +cat iter128_signed.out | grep "0 1$" +cat iter128_signed.out | grep "1 0$" +cat iter128_signed.out | grep "0 0$" +cat iter128_unsigned.out | grep "0 1$" +cat iter128_unsigned.out | grep "1 0$" +cat iter128_unsigned.out | grep "0 0$" diff --git a/pipelined/srt/stine/idiv-config.vh b/pipelined/srt/stine/idiv-config.vh new file mode 100644 index 000000000..8afa1e75b --- /dev/null +++ b/pipelined/srt/stine/idiv-config.vh @@ -0,0 +1,27 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: james.stine@okstate.edu 9 June 2022 +// Modified: +// +// Purpose: Specify which features are configured +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +// Integer division tests +`define IDIV_TESTS 1048576 diff --git a/pipelined/srt/stine/intdiv.sv b/pipelined/srt/stine/intdiv.sv new file mode 100755 index 000000000..a38783845 --- /dev/null +++ b/pipelined/srt/stine/intdiv.sv @@ -0,0 +1,2802 @@ +/////////////////////////////////////////// +// intdiv.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +// *** I added these verilator controls to clean up the +// lint output. The linter warnings should be fixed, but now the output is at +// least readable. +/* verilator lint_off COMBDLY */ +/* verilator lint_off IMPLICIT */ + +`include "idiv-config.vh" + +module intdiv #(parameter WIDTH=64) + (Qf, done, remf, div0, N, D, clk, reset, start, S); + + input logic [WIDTH-1:0] N, D; + input logic clk; + input logic reset; + input logic start; + input logic S; + + output logic [WIDTH-1:0] Qf; + output logic [WIDTH-1:0] remf; + output logic div0; + output logic done; + + logic enable; + logic state0; + logic V; + logic [$clog2(WIDTH):0] Num; + logic [$clog2(WIDTH)-1:0] P, NumIter, RemShift, RemShiftP; + logic [WIDTH-1:0] op1, op2, op1shift, Rem5; + logic [WIDTH:0] Qd, Rd, Qd2, Rd2; + logic [WIDTH:0] Q2d, Qd3; + logic [WIDTH-1:0] Q, Q2, rem0; + logic [3:0] quotient; + logic otfzero; + logic shiftResult; + + logic [WIDTH-1:0] twoD; + logic [WIDTH-1:0] twoN; + logic SignD; + logic SignN; + logic [WIDTH-1:0] QT, remT; + logic D_NegOne; + logic Max_N; + logic [1:0] QR; + logic tcQ, tcR; + + // Check if negative (two's complement) + // If so, convert to positive + adder #(WIDTH) cpa1 ((D ^ {WIDTH{D[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, D[WIDTH-1]&S}, twoD); + adder #(WIDTH) cpa2 ((N ^ {WIDTH{N[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, N[WIDTH-1]&S}, twoN); + assign SignD = D[WIDTH-1]; + assign SignN = N[WIDTH-1]; + // Max N and D = -1 (Overflow) + assign Max_N = (~|N[WIDTH-2:0]) & N[WIDTH-1]; + assign D_NegOne = &D; + + // Divider goes the distance to 37 cycles + // (thanks to the evil divisor for D = 0x1) + // The enable signal turns off register storage thus invalidating + // any future cycles. + + // Shift D, if needed (for integer) + // needed to allow qst to be in range for integer + // division [1,2) and allow integer divide to work. + // + // The V or valid bit can be used to determine if D + // is 0 and thus a divide by 0 exception. This div0 + // exception is given to FSM to tell the operation to + // quit gracefully. + + lod_hier #(WIDTH) p1 (.ZP(P), .ZV(V), .B(twoD)); + shift_left #(WIDTH) p2 (twoD, P, op2); + assign op1 = twoN; + assign div0 = ~V; + + // #iter: N = m+v+s = m+2+s (mod k = 0) + // v = 2 since \rho < 1 (add 4 to make sure its a ceil) + // k = 2 (r = 2^k) + adder #($clog2(WIDTH)+1) cpa3 ({1'b0, P}, + {{$clog2(WIDTH)+1-3{1'b0}}, shiftResult, ~shiftResult, 1'b0}, + Num); + + // Determine whether need to add just Q/Rem + assign shiftResult = P[0]; + // div by 2 (ceil) + assign NumIter = Num[$clog2(WIDTH):1]; + assign RemShift = P; + + // Avoid critical path of RemShift + flopr #($clog2(WIDTH)) reg1 (clk, reset, RemShift, RemShiftP); + + // FSM to control integer divider + // assume inputs are postive edge and + // datapath (divider) is negative edge + fsm64 #($clog2(WIDTH)) fsm1 (enablev, state0v, donev, otfzerov, + start, div0, NumIter, ~clk, reset); + + flopr #(1) rega (~clk, reset, donev, done); + flopr #(1) regc (~clk, reset, otfzerov, otfzero); + flopr #(1) regd (~clk, reset, enablev, enable); + flopr #(1) rege (~clk, reset, state0v, state0); + + // To obtain a correct remainder the last bit of the + // quotient has to be aligned with a radix-r boundary. + // Since the quotient is in the range 1/2 < q < 2 (one + // integer bit and m fractional bits), this is achieved by + // shifting N right by v+s so that (m+v+s) mod k = 0. And, + // the quotient has to be aligned to the integer position. + divide4 #(WIDTH) p3 (Qd, Q2d, Rd, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); + + // Storage registers to hold contents stable + flopenr #(WIDTH+1) reg3 (clk, reset, enable, Rd, Rd2); + flopenr #(WIDTH+1) reg4 (clk, reset, enable, Qd, Qd2); + flopenr #(WIDTH+1) reg5 (clk, reset, enable, Q2d, Qd3); + + // Probably not needed - just assigns results + assign Q = Qd2[WIDTH-1:0]; + assign Rem5 = Rd2[WIDTH:1]; + assign Q2 = Qd3[WIDTH-1:0]; + + // Adjust remainder by m (no need to adjust by + shift_right #(WIDTH) p4 (Rem5, RemShiftP, rem0); + + // Adjust Q/Rem for Signed + always_comb + casex({S, SignN, SignD}) + 3'b000 : QR = 2'b00; + 3'b001 : QR = 2'b00; + 3'b010 : QR = 2'b00; + 3'b011 : QR = 2'b00; + 3'b100 : QR = 2'b00; + 3'b101 : QR = 2'b10; + 3'b110 : QR = 2'b11; + 3'b111 : QR = 2'b01; + default: QR = 2'b00; + endcase // casex ({SignN, SignD, S}) + assign {tcQ, tcR} = QR; + + // When Dividend (N) and/or Divisor (D) are negative (first bit is '1'): + // - When N and D are negative: Remainder i + // s negative (undergoes a two's complement). + // - When N is negative: Quotient and Remainder are both negative (undergo a two's complement). + // - When D is negative: Quotient is negative (undergoes a two's complement). + adder #(WIDTH) cpa4 ((rem0 ^ {WIDTH{tcR}}), {{WIDTH-1{1'b0}}, tcR}, remT); + adder #(WIDTH) cpa5 ((Q ^ {WIDTH{tcQ}}), {{WIDTH-1{1'b0}}, tcQ}, QT); + + // RISC-V has exceptions for divide by 0 and overflow (see Table 6.1 of spec) + exception_int #(WIDTH) exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf); + +endmodule // intdiv + +// Division by Recurrence (r=4) +module divide4 #(parameter WIDTH=64) + (Q, Q2, rem0, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); + + input logic [WIDTH-1:0] op1, op2; + input logic clk, state0; + input logic reset; + input logic enable; + input logic otfzero; + input logic shiftResult; + + output logic [WIDTH:0] rem0; + output logic [WIDTH:0] Q; + output logic [WIDTH:0] Q2; + output logic [3:0] quotient; + + logic [WIDTH+3:0] Sum, Carry; + logic [WIDTH:0] Qstar; + logic [WIDTH:0] QMstar; + logic [WIDTH:0] QM2star; + logic [6:0] qtotal; + logic [WIDTH+3:0] SumN, CarryN, SumN2, CarryN2; + logic [WIDTH+3:0] divi1, divi2, divi1c, divi2c, dive1; + logic [WIDTH+3:0] mdivi_temp, mdivi; + logic zero; + logic [1:0] qsel; + logic [1:0] Qin, QMin; + logic CshiftQ, CshiftQM; + logic [WIDTH+3:0] rem1, rem2, rem3; + logic [WIDTH+3:0] SumR, CarryR; + logic [WIDTH:0] Qt; + + // Create one's complement values of Divisor (for q*D) + assign divi1 = {3'h0, op2, 1'b0}; + assign divi2 = {2'h0, op2, 2'b0}; + assign divi1c = ~divi1; + assign divi2c = ~divi2; + // Shift x1 if not mod k + mux2 #(WIDTH+4) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); + + // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D) + mux2 #(WIDTH+4) mx2 ({CarryN2[WIDTH+1:0], 2'h0}, {WIDTH+4{1'b0}}, state0, CarryN); + mux2 #(WIDTH+4) mx3 ({SumN2[WIDTH+1:0], 2'h0}, dive1, state0, SumN); + // Simplify QST + adder #(7) cpa1 (SumN[WIDTH+3:WIDTH-3], CarryN[WIDTH+3:WIDTH-3], qtotal); + // q = {+2, +1, -1, -2} else q = 0 + qst4 pd1 (qtotal[6:0], divi1[WIDTH-1:WIDTH-3], quotient); + assign ulp = quotient[2]|quotient[3]; + assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]); + // Map to binary encoding + assign qsel[1] = quotient[3]|quotient[2]; + assign qsel[0] = quotient[3]|quotient[1]; + mux4 #(WIDTH+4) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); + mux2 #(WIDTH+4) mx5 (mdivi_temp, {WIDTH+4{1'b0}}, zero, mdivi); + csa #(WIDTH+4) csa1 (mdivi, SumN, {CarryN[WIDTH+3:1], ulp}, Sum, Carry); + // regs : save CSA + flopenr #(WIDTH+4) reg1 (clk, reset, enable, Sum, SumN2); + flopenr #(WIDTH+4) reg2 (clk, reset, enable, Carry, CarryN2); + // OTF + ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM); + otf #(WIDTH+1) otf2 (Qin, QMin, CshiftQ, CshiftQM, + clk, otfzero, enable, Qstar, QMstar); + + // Correction and generation of Remainder + adder #(WIDTH+4) cpa2 (SumN2[WIDTH+3:0], CarryN2[WIDTH+3:0], rem1); + // Add back +D as correction + csa #(WIDTH+4) csa2 (CarryN2[WIDTH+3:0], SumN2[WIDTH+3:0], divi1, SumR, CarryR); + adder #(WIDTH+4) cpa3 (SumR, CarryR, rem2); + // Choose remainder (Rem or Rem+D) + mux2 #(WIDTH+4) mx6 (rem1, rem2, rem1[WIDTH+3], rem3); + // Choose correct Q or QM + mux2 #(WIDTH+1) mx7 (Qstar, QMstar, rem1[WIDTH+3], Qt); + // Final results + assign rem0 = rem3[WIDTH:0]; + assign Q = Qt; + +endmodule // divide4 + +module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); + + input logic [3:0] quot; + + output logic [1:0] Qin; + output logic [1:0] QMin; + output logic CshiftQ; + output logic CshiftQM; + + logic [5:0] qout; + + // q = {+2, +1, -1, -2} + always_comb + casex(quot) + 4'b0000 : qout = 6'b00_11_0_0; + 4'b0001 : qout = 6'b10_01_1_0; + 4'b0010 : qout = 6'b11_10_1_0; + 4'b0100 : qout = 6'b01_00_0_1; + 4'b1000 : qout = 6'b10_01_0_1; + default : qout = 6'bxx_xx_x_x; + endcase // case (quot) + + assign {Qin, QMin, CshiftQ, CshiftQM} = qout; + +endmodule // ls_control + +// On-the-fly Conversion per Ercegovac/Lang +module otf #(parameter WIDTH=8) + (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q); + + input logic [1:0] Qin, QMin; + input logic CshiftQ, CshiftQM; + input logic clk; + input logic reset; + input logic enable; + + output logic [WIDTH-1:0] R2Q; + output logic [WIDTH-1:0] R1Q; + + logic [WIDTH-1:0] Qstar, QMstar; + logic [WIDTH-1:0] M1Q, M2Q; + + // QM + mux2 #(WIDTH) m1 (QMstar, Qstar, CshiftQM, M1Q); + flopenr #(WIDTH) r1 (clk, reset, enable, {M1Q[WIDTH-3:0], QMin}, R1Q); + // Q + mux2 #(WIDTH) m2 (Qstar, QMstar, CshiftQ, M2Q); + flopenr #(WIDTH) r2 (clk, reset, enable, {M2Q[WIDTH-3:0], Qin}, R2Q); + + assign Qstar = R2Q; + assign QMstar = R1Q; + +endmodule // otf + +module adder #(parameter WIDTH=8) + (input logic [WIDTH-1:0] a, b, + output logic [WIDTH-1:0] y); + + assign y = a + b; + +endmodule // adder + +module fa (input logic a, b, c, output logic sum, carry); + + assign sum = a^b^c; + assign carry = a&b|a&c|b&c; + +endmodule // fa + +module csa #(parameter WIDTH=8) + (input logic [WIDTH-1:0] a, b, c, + output logic [WIDTH-1:0] sum, carry); + + logic [WIDTH:0] carry_temp; + genvar i; + generate + for (i=0;i B. LT and GT are both '0' if A = B. + +module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule // magcompare2b + +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare8 (LT, EQ, A, B); + + input logic [7:0] A; + input logic [7:0] B; + + logic [3:0] s; + logic [3:0] t; + logic [1:0] u; + logic [1:0] v; + logic GT; + //wire LT; + + output logic EQ; + output logic LT; + + magcompare2b mag1 (s[0], t[0], A[1:0], B[1:0]); + magcompare2b mag2 (s[1], t[1], A[3:2], B[3:2]); + magcompare2b mag3 (s[2], t[2], A[5:4], B[5:4]); + magcompare2b mag4 (s[3], t[3], A[7:6], B[7:6]); + + magcompare2b mag5 (u[0], v[0], t[1:0], s[1:0]); + magcompare2b mag6 (u[1], v[1], t[3:2], s[3:2]); + + magcompare2b mag7 (LT, GT, v[1:0], u[1:0]); + + assign EQ = ~(GT | LT); + +endmodule // magcompare8 + +module exception_int #(parameter WIDTH=8) + (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); + + input logic [WIDTH-1:0] Q; + input logic [WIDTH-1:0] rem; + input logic [WIDTH-1:0] op1; + input logic S; + input logic div0; + input logic Max_N; + input logic D_NegOne; + + output logic [WIDTH-1:0] Qf; + output logic [WIDTH-1:0] remf; + + always_comb + case ({div0, S, Max_N, D_NegOne}) + 4'b0000 : Qf = Q; + 4'b0001 : Qf = Q; + 4'b0010 : Qf = Q; + 4'b0011 : Qf = Q; + 4'b0100 : Qf = Q; + 4'b0101 : Qf = Q; + 4'b0110 : Qf = Q; + 4'b0111 : Qf = {1'b1, {WIDTH-1{1'h0}}}; + 4'b1000 : Qf = {WIDTH{1'b1}}; + 4'b1001 : Qf = {WIDTH{1'b1}}; + 4'b1010 : Qf = {WIDTH{1'b1}}; + 4'b1011 : Qf = {WIDTH{1'b1}}; + 4'b1100 : Qf = {WIDTH{1'b1}}; + 4'b1101 : Qf = {WIDTH{1'b1}}; + 4'b1110 : Qf = {WIDTH{1'b1}}; + 4'b1111 : Qf = {WIDTH{1'b1}}; + default: Qf = Q; + endcase + + always_comb + case ({div0, S, Max_N, D_NegOne}) + 4'b0000 : remf = rem; + 4'b0001 : remf = rem; + 4'b0010 : remf = rem; + 4'b0011 : remf = rem; + 4'b0100 : remf = rem; + 4'b0101 : remf = rem; + 4'b0110 : remf = rem; + 4'b0111 : remf = {WIDTH{1'h0}}; + 4'b1000 : remf = op1; + 4'b1001 : remf = op1; + 4'b1010 : remf = op1; + 4'b1011 : remf = op1; + 4'b1100 : remf = op1; + 4'b1101 : remf = op1; + 4'b1110 : remf = op1; + 4'b1111 : remf = op1; + default: remf = rem; + endcase + +endmodule // exception_int + +/* verilator lint_on COMBDLY */ +/* verilator lint_on IMPLICIT */ diff --git a/pipelined/srt/stine/iter128.do b/pipelined/srt/stine/iter128.do new file mode 100644 index 000000000..de3369ff4 --- /dev/null +++ b/pipelined/srt/stine/iter128.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter128.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/iter128S.do b/pipelined/srt/stine/iter128S.do new file mode 100644 index 000000000..7ddc416d7 --- /dev/null +++ b/pipelined/srt/stine/iter128S.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter128S.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/iter32.do b/pipelined/srt/stine/iter32.do new file mode 100755 index 000000000..02bd32512 --- /dev/null +++ b/pipelined/srt/stine/iter32.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter32.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/iter32S.do b/pipelined/srt/stine/iter32S.do new file mode 100644 index 000000000..52475b291 --- /dev/null +++ b/pipelined/srt/stine/iter32S.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter32S.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/iter64.do b/pipelined/srt/stine/iter64.do new file mode 100755 index 000000000..651c88cfd --- /dev/null +++ b/pipelined/srt/stine/iter64.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter64.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/iter64S.do b/pipelined/srt/stine/iter64S.do new file mode 100644 index 000000000..18c37a27f --- /dev/null +++ b/pipelined/srt/stine/iter64S.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter64S.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/lod.sv b/pipelined/srt/stine/lod.sv new file mode 100755 index 000000000..a7496757c --- /dev/null +++ b/pipelined/srt/stine/lod.sv @@ -0,0 +1,182 @@ +/////////////////////////////////////////// +// lod.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +module lod2 (P, V, B); + + input logic [1:0] B; + + output logic P; + output logic V; + + assign V = B[0] | B[1]; + assign P = B[0] & ~B[1]; + +endmodule // lo2 + +module lod_hier #(parameter WIDTH=8) + (input logic [WIDTH-1:0] B, + output logic [$clog2(WIDTH)-1:0] ZP, + output logic ZV); + + if (WIDTH == 128) + lod128 lod128 (ZP, ZV, B); + else if (WIDTH == 64) + lod64 lod64 (ZP, ZV, B); + else if (WIDTH == 32) + lod32 lod32 (ZP, ZV, B); + else if (WIDTH == 16) + lod16 lod16 (ZP, ZV, B); + else if (WIDTH == 8) + lod8 lod8 (ZP, ZV, B); + else if (WIDTH == 4) + lod4 lod4 (ZP, ZV, B); + +endmodule // lod_hier + +module lod4 (ZP, ZV, B); + + input logic [3:0] B; + + logic ZPa; + logic ZPb; + logic ZVa; + logic ZVb; + + output logic [1:0] ZP; + output logic ZV; + + lod2 l1(ZPa, ZVa, B[1:0]); + lod2 l2(ZPb, ZVb, B[3:2]); + + assign ZP[0:0] = ZVb ? ZPb : ZPa; + assign ZP[1] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod4 + +module lod8 (ZP, ZV, B); + + input logic [7:0] B; + + logic [1:0] ZPa; + logic [1:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [2:0] ZP; + output logic ZV; + + lod4 l1(ZPa, ZVa, B[3:0]); + lod4 l2(ZPb, ZVb, B[7:4]); + + assign ZP[1:0] = ZVb ? ZPb : ZPa; + assign ZP[2] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod8 + +module lod16 (ZP, ZV, B); + + input logic [15:0] B; + + logic [2:0] ZPa; + logic [2:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [3:0] ZP; + output logic ZV; + + lod8 l1(ZPa, ZVa, B[7:0]); + lod8 l2(ZPb, ZVb, B[15:8]); + + assign ZP[2:0] = ZVb ? ZPb : ZPa; + assign ZP[3] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod16 + +module lod32 (ZP, ZV, B); + + input logic [31:0] B; + + logic [3:0] ZPa; + logic [3:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [4:0] ZP; + output logic ZV; + + lod16 l1(ZPa, ZVa, B[15:0]); + lod16 l2(ZPb, ZVb, B[31:16]); + + assign ZP[3:0] = ZVb ? ZPb : ZPa; + assign ZP[4] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod32 + +module lod64 (ZP, ZV, B); + + input logic [63:0] B; + + logic [4:0] ZPa; + logic [4:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [5:0] ZP; + output logic ZV; + + lod32 l1(ZPa, ZVa, B[31:0]); + lod32 l2(ZPb, ZVb, B[63:32]); + + assign ZP[4:0] = ZVb ? ZPb : ZPa; + assign ZP[5] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod64 + +module lod128 (ZP, ZV, B); + + input logic [127:0] B; + + logic [5:0] ZPa; + logic [5:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [6:0] ZP; + output logic ZV; + + lod64 l1(ZPa, ZVa, B[63:0]); + lod64 l2(ZPb, ZVb, B[127:64]); + + assign ZP[5:0] = ZVb ? ZPb : ZPa; + assign ZP[6] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod128 diff --git a/pipelined/srt/stine/lzd.do b/pipelined/srt/stine/lzd.do new file mode 100755 index 000000000..9ba020b3a --- /dev/null +++ b/pipelined/srt/stine/lzd.do @@ -0,0 +1,55 @@ +# Copyright 1991-2016 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog lod.sv lzd_tb.sv + +# start and run simulation +vsim -voptargs=+acc work.stimulus + +view wave + +-- display input and output signals as hexidecimal values +# Diplays All Signals recursively +add wave -hex -r /stimulus/* + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 350 +configure wave -valuecolwidth 200 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 800ns +quit diff --git a/pipelined/srt/stine/lzd.sv b/pipelined/srt/stine/lzd.sv new file mode 100755 index 000000000..277b45931 --- /dev/null +++ b/pipelined/srt/stine/lzd.sv @@ -0,0 +1,182 @@ +/////////////////////////////////////////// +// lzd.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +module lzd2 (P, V, B); + + input logic [1:0] B; + + output logic P; + output logic V; + + assign V = ~(B[0] & B[1]); + assign P = B[1]; + +endmodule // lzd2 + +module lzd_hier #(parameter WIDTH=8) + (input logic [WIDTH-1:0] B, + output logic [$clog2(WIDTH)-1:0] ZP, + output logic ZV); + + if (WIDTH == 128) + lzd128 lzd127 (ZP, ZV, B); + else if (WIDTH == 64) + lzd64 lzd64 (ZP, ZV, B); + else if (WIDTH == 32) + lzd32 lzd32 (ZP, ZV, B); + else if (WIDTH == 16) + lzd16 lzd16 (ZP, ZV, B); + else if (WIDTH == 8) + lzd8 lzd8 (ZP, ZV, B); + else if (WIDTH == 4) + lzd4 lzd4 (ZP, ZV, B); + +endmodule // lzd_hier + +module lzd4 (ZP, ZV, B); + + input logic [3:0] B; + + logic ZPa; + logic ZPb; + logic ZVa; + logic ZVb; + + output logic [1:0] ZP; + output logic ZV; + + lzd2 l1 (ZPa, ZVa, B[1:0]); + lzd2 l2 (ZPb, ZVb, B[3:2]); + + assign ZP[0:0] = ZVb ? ZPb : ZPa; + assign ZP[1] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd4 + +module lzd8 (ZP, ZV, B); + + input logic [7:0] B; + + logic [1:0] ZPa; + logic [1:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [2:0] ZP; + output logic ZV; + + lzd4 l1 (ZPa, ZVa, B[3:0]); + lzd4 l2 (ZPb, ZVb, B[7:4]); + + assign ZP[1:0] = ZVb ? ZPb : ZPa; + assign ZP[2] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd8 + +module lzd16 (ZP, ZV, B); + + input logic [15:0] B; + + logic [2:0] ZPa; + logic [2:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [3:0] ZP; + output logic ZV; + + lzd8 l1 (ZPa, ZVa, B[7:0]); + lzd8 l2 (ZPb, ZVb, B[15:8]); + + assign ZP[2:0] = ZVb ? ZPb : ZPa; + assign ZP[3] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd16 + +module lzd32 (ZP, ZV, B); + + input logic [31:0] B; + + logic [3:0] ZPa; + logic [3:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [4:0] ZP; + output logic ZV; + + lzd16 l1 (ZPa, ZVa, B[15:0]); + lzd16 l2 (ZPb, ZVb, B[31:16]); + + assign ZP[3:0] = ZVb ? ZPb : ZPa; + assign ZP[4] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd32 + +module lzd64 (ZP, ZV, B); + + input logic [63:0] B; + + logic [4:0] ZPa; + logic [4:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [5:0] ZP; + output logic ZV; + + lzd32 l1 (ZPa, ZVa, B[31:0]); + lzd32 l2 (ZPb, ZVb, B[63:32]); + + assign ZP[4:0] = ZVb ? ZPb : ZPa; + assign ZP[5] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd64 + +module lzd128 (ZP, ZV, B); + + input logic [127:0] B; + + logic [5:0] ZPa; + logic [5:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [6:0] ZP; + output logic ZV; + + lzd64 l1 (ZPa, ZVa, B[64:0]); + lzd64 l2 (ZPb, ZVb, B[127:63]); + + assign ZP[5:0] = ZVb ? ZPb : ZPa; + assign ZP[6] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd128 diff --git a/pipelined/srt/stine/lzd_tb.sv b/pipelined/srt/stine/lzd_tb.sv new file mode 100755 index 000000000..2980d5f4d --- /dev/null +++ b/pipelined/srt/stine/lzd_tb.sv @@ -0,0 +1,59 @@ +// +// File name : tb +// Title : test +// project : HW3 +// Library : test +// Purpose : definition of modules for testbench +// notes : +// +// Copyright Oklahoma State University +// + +// Top level stimulus module + +`timescale 1ns/1ps +module stimulus; + + logic [7:0] B; + logic [2:0] ZP; + logic ZV; + + logic clk; + + integer handle3; + integer desc3; + integer i; + + // instatiate part to test + lzd_hier #(8) dut (B, ZP, ZV); + + initial + begin + clk = 1'b1; + forever #5 clk = ~clk; + end + + initial + begin + handle3 = $fopen("lzd.out"); + desc3 = handle3; + end + + initial + begin + for (i=0; i < 256; i=i+1) + begin + // Put vectors before beginning of clk + @(posedge clk) + begin + B = $random; + end + @(negedge clk) + begin + $fdisplay(desc3, "%b || %b %b", B, ZP, ZV); + end + end // for (i=0; i < 256; i=i+1) + $finish;// + end // initial begin + +endmodule // stimulus diff --git a/pipelined/srt/stine/mux.sv b/pipelined/srt/stine/mux.sv new file mode 100755 index 000000000..d13045e6d --- /dev/null +++ b/pipelined/srt/stine/mux.sv @@ -0,0 +1,51 @@ +module mux2 #(parameter WIDTH = 8) + (input logic [WIDTH-1:0] d0, d1, + input logic s, + output logic [WIDTH-1:0] y); + + assign y = s ? d1 : d0; + +endmodule // mux2 + +module mux3 #(parameter WIDTH = 8) + (input logic [WIDTH-1:0] d0, d1, d2, + input logic [1:0] s, + output logic [WIDTH-1:0] y); + + assign y = s[1] ? d2 : (s[0] ? d1 : d0); + +endmodule // mux3 + +module mux4 #(parameter WIDTH = 8) + (input logic [WIDTH-1:0] d0, d1, d2, d3, + input logic [1:0] s, + output logic [WIDTH-1:0] y); + + assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0); + +endmodule // mux4 + +module mux21x32 (Z, A, B, Sel); + + input logic [31:0] A; + input logic [31:0] B; + input logic Sel; + + output logic [31:0] Z; + + assign Z = Sel ? B : A; + +endmodule // mux21x32 + +module mux21x64 (Z, A, B, Sel); + + input logic [63:0] A; + input logic [63:0] B; + input logic Sel; + + output logic [63:0] Z; + + assign Z = Sel ? B : A; + +endmodule // mux21x64 + diff --git a/pipelined/srt/stine/otf4.in b/pipelined/srt/stine/otf4.in new file mode 100644 index 000000000..e448c1d09 --- /dev/null +++ b/pipelined/srt/stine/otf4.in @@ -0,0 +1,23 @@ +.i 4 +.o 6 +.ilb quot[3] quot[2] quot[1] quot[0] +.ob Qin[1] Qin[0] QMin[1] QMin[0] CshiftQ CshiftQM + +0000 001100 +0001 100110 +0010 111010 +0011 ------ +0100 010001 +0101 ------ +0110 ------ +0111 ------ +1000 100101 +1001 ------ +1010 ------ +1011 ------ +1100 ------ +1101 ------ +1110 ------ +1111 ------ + +.e \ No newline at end of file diff --git a/pipelined/srt/stine/qslc_r4a2 b/pipelined/srt/stine/qslc_r4a2 new file mode 100755 index 0000000000000000000000000000000000000000..6d6db4d49db929cbefcaf12ef24baaa058dd6b82 GIT binary patch literal 16144 zcmeHOeQ;FO6~7yZN%-0Xhyp6CC<;oJ1QH4e5+EBk3C};pD45L||cD+Fn=Y-K6dj#b~#6^LRnM~uXNl9(vY zf<9E*l)qRBcvb04>n}64eF!A+#!+Ped7d&0mfS;<#2c(+W+)F!p~peIbaH9kF5e!e zcr4kUY|o|q!X_Ue018W!N4eRZC*5NAg8p)c&dV+>nMW$|_GsQ7&0{GTuO;V`azfi2 zolm>WhmQ(NJFiCb?6O4B1WRgz%2n@0{a>fORhrl4l3|2h&R1r^lH2(8qV);Dg>+dOA>-t0nmB;=kcyFmSNVWT;<^vbn# zSYl+;X%uNYl+N+UH&*)zKVk2}j+MXcoS!@V>5aRVUURH$bqw;YxYRX3P)xaO?NDfvS;21rC^XftD)Q)Qa{Plz2O9!!U9>jh=>@IBfG93n& z-3Wadw#ft7g;lm@h2;-~1J%JuG!S01Vo7bNKCs4DQ5z7JRb3aVw<1wrIBHo6Iv}ti zvztj=^7(J<0*gdNxq#paMTH8`qoN0sJ-3tRJDKg8i4D!zw072-5Q z@;rZ1`iF?0Xj|iiAf`#0c!aD?rTSfVOQr5Gg2~@ zfm8-k8AxRym4Q?SQW^OF$iNBb#M9oU4>P?j>3^9ag!k51G%eBLZF)ZQfJ{K5=oXlJ z5>uKWoRb9MYsk~n{dOXe*e!Khko9!Gn$&4w*wfvf)M-K3)BS7K5pAoCRw86P=y^7M zicLS?rstY^!AE7yuU_YEe#hJN?y*&CJO#0W7rm_uXnmTeGOWR=@FX(u{?0k$p} z53aX0eKd)=z0q6@{SAO<=vj#_=cFBkJD?@--Q;X7dWXccFTKsjyzL(^^tSg7@j4EA zU;Q#V76E>)17s$;s+^NBF6L8yb}S&bXuNE#w`svGRP{E$7aiekS#TSac=wlyMBI<$ z98BK|-El3r{rKIR;G{rn!NbAI?{VgM;>}y%ZEo!9ab|nsC!E_xJ~2v&D(9MZ@^!X& zx=wXgIgiGAoR`MXy$7Xp>ftJ{vpyDo7!9JfEkD z&xaB(2d>F;($V5MdAX-++mY_Quu@b|JnjpgUqJ@%vEtGS_#m(^L z2-vTC8U4sv+&yozrs}HK%8widk`H1Rb42@hBo6@|-~WS-K!?eMdN*-xi{%Kj`2Z@)!6LAsg?PAWj(J4HI08<~&@^wGw>7t#_>PV%Fx#I_9|fXre*W-n;{lyS?&d>ka4iVmosMXL{^PPf z=nx70Y`6g}C+J`Cv<2us1A68riuxxLT13!aDQK$!Jt3iwnb2Vb-3cgS+-o|He-6j> zDn^+RW2Efaw@v6lf)*?2L<6b@QD{Q%BWSLIp18Dcyr*T)Mw!qCf*!$BC&qi&fF767 z_%I_i9)j)%6oYu(bWA75gDS>OBgQB>h##0x2SLB1pjR7EHHeT2ed9Ess}$62K-D11 zOz5KoouZ&a4CpaAhzm{V4uYP>OvWIbrlXo1Z^MB^?4Q<`Gp8Ix2cF}T(`i0IpHk58 z8&EZfUzpGwg0?7V(15Bz>@=b8e+Fo|f?i=jKbC{I&V>G+phbXU5X(%*{p2`L#W+2+ zZxA_h5W`LACW0Qv?FHj~#ek+u=zI7*BRSsX1U;yr4;av1*|Ys7^a6t3r=VL5=!X(| zuL=FM7f|}i5`(zabi7H9bt*=Y5o3((S*Z#A4MC?X=r9AS2H`THw-eN%pl@B$ml`#Q z({xGX%f$wQzJYrQQuCw%RfBllgccC=Q9v<>nCTctj=NQi1|xxZE2zhS zX2|hgV?v)J=zIm8U_jL%icIJ|1kF*^&p`)cc;sK$DjWsk&rj67Z1oarCie{SFzNGR7qtZm4Q?SQW-Ea zK>s6|C9BdG2~7P?$u&juW?egdU0tLivUQrs$|}geq(aPdZFDVg6{1#>IjjM%Y zbGGB$5gD1gaYrZndDspk#>E6E$&i1cU4&{Owrto}k$qHl>8NqboWnO|?hp$n%)4~f zMU%-){0-RV04IrLL?L?=TR!Z?kX)#(JFqQ6UuUCz;Te^ETiTM5XAQ}7pi8nZdtqLO zF`UH0qR;<92KiGZm4Q?SQW;2PAeDhs22vSFWgwM-R0jTcGr;fV@q2jm$Uuc3kXUb4 zW`VzIRM8J3BenYa1XUTYb$)zo!aSmRZ=7ozEL>7GOk*<4@aT~vQLHG zr9$-E-zM$2+7$OQi;_X&tc5*Wj87gHW^EV*e?8gLas*%37{754{7xO;-k-Z*cQM(J zdtlE=8!z}gVAo@0r%Y%P=j|ETvysnn#f(()yReUjC#L7E{EZ2&c8%wG$gGz&{wg?9@+toFlNiIH)0q;=(oA(78GZ_R(p$d)Uq|aaCnFTHp@h zy?0+l(2Dx11q_jBV^x*AQuMuFZ$;~@N_xXS0;E4=Ro8|pe6^N88VW}&U*l#`8LDfj z4MYQecmBW#dc{6y`NCn}7AsI64Q~-uVP9Rq@;BDiZ2`s5EFh!(pc^B#l~%aWHxr$* ztkTtsR(LGWl_m6Yf63KXE?QBx1ZI3gVR^jT>n&L=tfk9WE?%_UT3K3pm1m8$X3^s1 z9!oUT`uKJJVt!Gd-p>D*H~*7fyvF}kpBvBz0!gfl6!80^Xe6|$&pcPi1n3V4`s}h{ zf~!vs(2M@xE?E9Z$g1(x`{}!evXwyjgY{NpB;XfFBX(9rB%-5HIbx~ghWHhW~4cG8FL?GNC-1VVoz+GM6=xzvy z8Uo?y7QJU__L;{t<9Sv-TN;41WkUT2xK#iVjHGV{6O~p`?PBFNSrhIil z1XFaRTevaX>oAG=?E3nW)@L}CaSu|({(T&;&$L@&xlZyzY_uj~{$SsM5ta(CgOn!3 zY*b9X{d+L_o!l&ZJ!Bi*Z%sbm?^(JSsHL614tD%S5aRyxx|1ch&+ATm|9#-onv?mw zE@gR4w@ZDbJTsryhe6nAiNbtd=d$E=Eg-T`46|&6LTg6m^E#0wuV=acjAOYCd|HFD z&g)Q?t8`bG+{6?{PUXbpBCc9_R5<*l7GozJ9LVGlS4Vl*@hJj`=Ll0@>t? zVvWsLlH1`3%wv57$YkU8d7YT8`F6bh_|aQW)V_vuSi$^@lNMDWEEQAo+)*C2pQ8iv ub@+UkY01m|;p-1QCvx8vQ|+@0m7HP=&h0XmmdgY2i|EFNGQmc$sp6lD*88{s literal 0 HcmV?d00001 diff --git a/pipelined/srt/stine/qslc_r4a2.c b/pipelined/srt/stine/qslc_r4a2.c new file mode 100644 index 000000000..8e68f9983 --- /dev/null +++ b/pipelined/srt/stine/qslc_r4a2.c @@ -0,0 +1,198 @@ +/* + Program: qslc_r4a2.c + Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) + User: James E. Stine + +*/ + +#include +#include + +#define DIVISOR_SIZE 3 +#define CARRY_SIZE 7 +#define SUM_SIZE 7 +#define TOT_SIZE 7 + +void disp_binary(double, int, int); + +struct bits { + unsigned int divisor : DIVISOR_SIZE; + int tot : TOT_SIZE; +} pla; + +/* + + Function: disp_binary + Description: This function displays a Double-Precision number into + four 16 bit integers using the global union variable + dp_number + Argument List: double x The value to be converted + int bits_to_left Number of bits left of radix point + int bits_to_right Number of bits right of radix point + Return value: none + +*/ +void disp_binary(double x, int bits_to_left, int bits_to_right) { + int i; + double diff; + + if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + printf("0"); + } + if (i == bits_to_right+1) + ; + + return; + } + + if (x < 0.0) + x = pow(2.0, ((double) bits_to_left)) + x; + + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + diff = pow(2.0, ((double) -i) ); + if (x < diff) + printf("0"); + else { + printf("1"); + x -= diff; + } + if (i == 0) + ; + + } + +} + +int main() { + int m; + int n; + int o; + pla.divisor = 0; + pla.tot = 0; + printf("\tcase({D[5:3],Wmsbs})\n"); + for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { + for (m=0; m < pow(2.0, TOT_SIZE); m++) { + printf("\t\t10'b"); + disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); + printf("_"); + disp_binary((double) pla.tot, TOT_SIZE, 0); + printf(": q = 4'b"); + + /* + 4 bits for Radix 4 (a=2) + 1000 = +2 + 0100 = +1 + 0000 = 0 + 0010 = -1 + 0001 = -2 + */ + switch (pla.divisor) { + case 0: + if ((pla.tot) >= 12) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -4) + printf("0000"); + else if ((pla.tot) >= -13) + printf("0010"); + else + printf("0001"); + break; + case 1: + if ((pla.tot) >= 14) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -15) + printf("0010"); + else + printf("0001"); + break; + case 2: + if ((pla.tot) >= 15) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -16) + printf("0010"); + else + printf("0001"); + break; + case 3: + if ((pla.tot) >= 16) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -18) + printf("0010"); + else + printf("0001"); + break; + case 4: + if ((pla.tot) >= 18) + printf("1000"); + else if ((pla.tot) >= 6) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -20) + printf("0010"); + else + printf("0001"); + break; + case 5: + if ((pla.tot) >= 20) + printf("1000"); + else if ((pla.tot) >= 6) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -20) + printf("0010"); + else + printf("0001"); + break; + case 6: + if ((pla.tot) >= 20) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -22) + printf("0010"); + else + printf("0001"); + break; + case 7: + if ((pla.tot) >= 24) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -24) + printf("0010"); + else + printf("0001"); + break; + default: printf ("XXX"); + + } + + printf(";\n"); + (pla.tot)++; + } + (pla.divisor)++; + } + printf("\tendcase\n"); + +} diff --git a/pipelined/srt/stine/run.sh b/pipelined/srt/stine/run.sh new file mode 100755 index 000000000..6dcde6c26 --- /dev/null +++ b/pipelined/srt/stine/run.sh @@ -0,0 +1,8 @@ +#!/bin/sh +vsim -do iter32S.do -c +vsim -do iter32.do -c +vsim -do iter64.do -c +vsim -do iter64S.do -c +vsim -do iter128.do -c +vsim -do iter128S.do -c + diff --git a/pipelined/srt/stine/shift.sv b/pipelined/srt/stine/shift.sv new file mode 100755 index 000000000..9738d93fe --- /dev/null +++ b/pipelined/srt/stine/shift.sv @@ -0,0 +1,73 @@ +/////////////////////////////////////////// +// shifters.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +module shift_right #(parameter WIDTH=8) + (input logic [WIDTH-1:0] A, + input logic [$clog2(WIDTH)-1:0] Shift, + output logic [WIDTH-1:0] Z); + + logic [WIDTH-1:0] stage [$clog2(WIDTH):0]; + logic sign; + genvar i; + + assign stage[0] = A; + generate + for (i=0;i<$clog2(WIDTH);i=i+1) + begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], + {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, + Shift[$clog2(WIDTH)-i-1], + stage[i+1]); + end + endgenerate + assign Z = stage[$clog2(WIDTH)]; + +endmodule // shift_right + +module shift_left #(parameter WIDTH=8) + (input logic [WIDTH-1:0] A, + input logic [$clog2(WIDTH)-1:0] Shift, + output logic [WIDTH-1:0] Z); + + logic [WIDTH-1:0] stage [$clog2(WIDTH):0]; + genvar i; + + assign stage[0] = A; + generate + for (i=0;i<$clog2(WIDTH);i=i+1) + begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], + {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, + Shift[$clog2(WIDTH)-i-1], + stage[i+1]); + end + endgenerate + assign Z = stage[$clog2(WIDTH)]; + +endmodule // shift_left + + + + diff --git a/pipelined/srt/stine/shift_left.do b/pipelined/srt/stine/shift_left.do new file mode 100755 index 000000000..a178c3cc0 --- /dev/null +++ b/pipelined/srt/stine/shift_left.do @@ -0,0 +1,55 @@ +# Copyright 1991-2016 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv shift.sv shift_left_tb.sv + +# start and run simulation +vsim -voptargs=+acc work.stimulus + +view wave + +-- display input and output signals as hexidecimal values +# Diplays All Signals recursively +add wave -hex -r /stimulus/* + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 800ns +quit diff --git a/pipelined/srt/stine/shift_left_tb.sv b/pipelined/srt/stine/shift_left_tb.sv new file mode 100755 index 000000000..2d5d3dad8 --- /dev/null +++ b/pipelined/srt/stine/shift_left_tb.sv @@ -0,0 +1,71 @@ +// +// File name : tb +// Title : test +// project : HW3 +// Library : test +// Purpose : definition of modules for testbench +// notes : +// +// Copyright Oklahoma State University +// + +// Top level stimulus module + +`timescale 1ns/1ps + +`define XLEN 32 +module stimulus; + + logic [`XLEN-1:0] A; + logic [$clog2(`XLEN)-1:0] Shift; + logic [`XLEN-1:0] Z; + logic [`XLEN-1:0] Z_corr; + + //logic [63:0] A; + //logic [5:0] Shift; + //logic [63:0] Z; + //logic [63:0] Z_corr; + //logic [63:0] Z_orig; + + + logic clk; + + integer handle3; + integer desc3; + integer i; + + // instatiate part to test + shift_left dut1 (A, Shift, Z); + assign Z_corr = (A << Shift); + + initial + begin + clk = 1'b1; + forever #5 clk = ~clk; + end + + initial + begin + handle3 = $fopen("shift_left.out"); + desc3 = handle3; + end + + initial + begin + for (i=0; i < 256; i=i+1) + begin + // Put vectors before beginning of clk + @(posedge clk) + begin + A = $random; + Shift = $random; + end + @(negedge clk) + begin + $fdisplay(desc3, "%h %h || %h %h | %b", A, Shift, Z, Z_corr, (Z == Z_corr)); + end + end // for (i=0; i < 256; i=i+1) + $finish;// + end // initial begin + +endmodule // stimulus diff --git a/pipelined/srt/stine/shift_right.do b/pipelined/srt/stine/shift_right.do new file mode 100755 index 000000000..bf02e75c1 --- /dev/null +++ b/pipelined/srt/stine/shift_right.do @@ -0,0 +1,55 @@ +# Copyright 1991-2016 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv shift.sv shift_right_tb.sv + +# start and run simulation +vsim -voptargs=+acc work.stimulus + +view wave + +-- display input and output signals as hexidecimal values +# Diplays All Signals recursively +add wave -hex -r /stimulus/* + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 800ns +quit diff --git a/pipelined/srt/stine/shift_right_tb.sv b/pipelined/srt/stine/shift_right_tb.sv new file mode 100755 index 000000000..b35277484 --- /dev/null +++ b/pipelined/srt/stine/shift_right_tb.sv @@ -0,0 +1,64 @@ +// +// File name : tb +// Title : test +// project : HW3 +// Library : test +// Purpose : definition of modules for testbench +// notes : +// +// Copyright Oklahoma State University +// + +// Top level stimulus module + +`timescale 1ns/1ps + +`define XLEN 32 +module stimulus; + + logic [`XLEN-1:0] A; + logic [$clog2(`XLEN)-1:0] Shift; + logic [`XLEN-1:0] Z; + logic [`XLEN-1:0] Z_corr; + + logic clk; + + integer handle3; + integer desc3; + integer i; + + // instatiate part to test + shift_right dut1 (A, Shift, Z); + assign Z_corr = (A >> Shift); + + initial + begin + clk = 1'b1; + forever #5 clk = ~clk; + end + + initial + begin + handle3 = $fopen("shift_right.out"); + desc3 = handle3; + #250 $finish; + end + + initial + begin + for (i=0; i < 128; i=i+1) + begin + // Put vectors before beginning of clk + @(posedge clk) + begin + A = $random; + Shift = $random; + end + @(negedge clk) + begin + $fdisplay(desc3, "%h %h || %h %h | %b", A, Shift, Z, Z_corr, (Z == Z_corr)); + end + end // @(negedge clk) + end // for (j=0; j < 32; j=j+1) + +endmodule // stimulus diff --git a/pipelined/srt/stine/shifter.sv b/pipelined/srt/stine/shifter.sv new file mode 100755 index 000000000..779a02a47 --- /dev/null +++ b/pipelined/srt/stine/shifter.sv @@ -0,0 +1,18 @@ +module shifter_right(input logic signed [63:0] a, + input logic [ 5:0] shamt, + output logic signed [63:0] y); + + + y = a >> shamt; + +endmodule // shifter_right + +module shifter_left(input logic signed [63:0] a, + input logic [ 5:0] shamt, + output logic signed [63:0] y); + + + y = a << shamt; + +endmodule // shifter_right + diff --git a/pipelined/srt/stine/test_iter128.sv b/pipelined/srt/stine/test_iter128.sv new file mode 100644 index 000000000..c2f8f5a73 --- /dev/null +++ b/pipelined/srt/stine/test_iter128.sv @@ -0,0 +1,79 @@ +`include "idiv-config.vh" + +module tb; + + logic [127:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [127:0] Q; + logic [127:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [127:0] Ncomp; + logic [127:0] Dcomp; + logic [127:0] Qcomp; + logic [127:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(128) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter128_unsigned.out"); + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b0; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 128'h0; + #0 D = 128'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = {$urandom(), $urandom(), $urandom(), $urandom()}; + D = {$urandom(), $urandom(), $urandom(), $urandom()}; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = Ncomp/Dcomp; + Rcomp = Ncomp%Dcomp; + vectornum = vectornum + 1; + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb diff --git a/pipelined/srt/stine/test_iter128S.sv b/pipelined/srt/stine/test_iter128S.sv new file mode 100644 index 000000000..7757041f6 --- /dev/null +++ b/pipelined/srt/stine/test_iter128S.sv @@ -0,0 +1,90 @@ +`include "idiv-config.vh" + +module tb; + + logic [127:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [127:0] Q; + logic [127:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [31:0] rnd1; + logic [31:0] rnd2; + logic [127:0] Ncomp; + logic [127:0] Dcomp; + logic [127:0] Qcomp; + logic [127:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(128) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter128_signed.out"); + end + + /* + // VCD generation for power estimation + initial + begin + $dumpfile("iter128_signed.vcd"); + $dumpvars (0,tb.dut); + end + */ + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b1; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 128'h0; + #0 D = 128'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = {$urandom(), $urandom(), $urandom(), $urandom()}; + D = {$urandom(), $urandom(), $urandom(), $urandom()}; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (1) + @(posedge clk); + start <= 1'b0; + repeat (65) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = $signed(Ncomp)/$signed(Dcomp); + Rcomp = $signed(Ncomp)%$signed(Dcomp); + vectornum = vectornum + 1; + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb diff --git a/pipelined/srt/stine/test_iter32.sv b/pipelined/srt/stine/test_iter32.sv new file mode 100755 index 000000000..6590b5a16 --- /dev/null +++ b/pipelined/srt/stine/test_iter32.sv @@ -0,0 +1,85 @@ +`include "idiv-config.vh" + +module tb; + + logic [31:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [31:0] Q; + logic [31:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [31:0] Ncomp; + logic [31:0] Dcomp; + logic [31:0] Qcomp; + logic [31:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(32) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter32_unsigned.out"); + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b0; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 32'h0; + #0 D = 32'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = $urandom; + D = $urandom; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = Ncomp/Dcomp; + Rcomp = Ncomp%Dcomp; + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + vectornum = vectornum + 1; + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb + + + + + + diff --git a/pipelined/srt/stine/test_iter32S.sv b/pipelined/srt/stine/test_iter32S.sv new file mode 100644 index 000000000..e3b271b4e --- /dev/null +++ b/pipelined/srt/stine/test_iter32S.sv @@ -0,0 +1,79 @@ +`include "idiv-config.vh" + +module tb; + + logic [31:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [31:0] Q; + logic [31:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [31:0] Ncomp; + logic [31:0] Dcomp; + logic [31:0] Qcomp; + logic [31:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(32) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter32_signed.out"); + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b1; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 32'h0; + #0 D = 32'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = $urandom; + D = $urandom; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = $signed(Ncomp)/$signed(Dcomp); + Rcomp = $signed(Ncomp)%$signed(Dcomp); + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + vectornum = vectornum + 1; + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb diff --git a/pipelined/srt/stine/test_iter64.sv b/pipelined/srt/stine/test_iter64.sv new file mode 100755 index 000000000..3da85c20a --- /dev/null +++ b/pipelined/srt/stine/test_iter64.sv @@ -0,0 +1,79 @@ +`include "idiv-config.vh" + +module tb; + + logic [63:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [63:0] Q; + logic [63:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [63:0] Ncomp; + logic [63:0] Dcomp; + logic [63:0] Qcomp; + logic [63:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(64) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter64_unsigned.out"); + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b0; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 64'h0; + #0 D = 64'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = {$urandom(), $urandom()}; + D = {$urandom(), $urandom()}; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = Ncomp/Dcomp; + Rcomp = Ncomp%Dcomp; + vectornum = vectornum + 1; + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb diff --git a/pipelined/srt/stine/test_iter64S.sv b/pipelined/srt/stine/test_iter64S.sv new file mode 100644 index 000000000..cdfb573a7 --- /dev/null +++ b/pipelined/srt/stine/test_iter64S.sv @@ -0,0 +1,79 @@ +`include "idiv-config.vh" + +module tb; + + logic [63:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [63:0] Q; + logic [63:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [63:0] Ncomp; + logic [63:0] Dcomp; + logic [63:0] Qcomp; + logic [63:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(64) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter64_signed.out"); + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b1; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 64'h0; + #0 D = 64'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = {$urandom(), $urandom()}; + D = {$urandom(), $urandom()}; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = $signed(Ncomp)/$signed(Dcomp); + Rcomp = $signed(Ncomp)%$signed(Dcomp); + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + vectornum = vectornum + 1; + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb diff --git a/pipelined/srt/stine/tmp b/pipelined/srt/stine/tmp new file mode 100644 index 000000000..c7cdf64f8 --- /dev/null +++ b/pipelined/srt/stine/tmp @@ -0,0 +1,1026 @@ + case({D[5:3],Wmsbs}) + 10'b000_0000000: q = 4'b0000; + 10'b000_0000001: q = 4'b0000; + 10'b000_0000010: q = 4'b0000; + 10'b000_0000011: q = 4'b0000; + 10'b000_0000100: q = 4'b0100; + 10'b000_0000101: q = 4'b0100; + 10'b000_0000110: q = 4'b0100; + 10'b000_0000111: q = 4'b0100; + 10'b000_0001000: q = 4'b0100; + 10'b000_0001001: q = 4'b0100; + 10'b000_0001010: q = 4'b0100; + 10'b000_0001011: q = 4'b0100; + 10'b000_0001100: q = 4'b1000; + 10'b000_0001101: q = 4'b1000; + 10'b000_0001110: q = 4'b1000; + 10'b000_0001111: q = 4'b1000; + 10'b000_0010000: q = 4'b1000; + 10'b000_0010001: q = 4'b1000; + 10'b000_0010010: q = 4'b1000; + 10'b000_0010011: q = 4'b1000; + 10'b000_0010100: q = 4'b1000; + 10'b000_0010101: q = 4'b1000; + 10'b000_0010110: q = 4'b1000; + 10'b000_0010111: q = 4'b1000; + 10'b000_0011000: q = 4'b1000; + 10'b000_0011001: q = 4'b1000; + 10'b000_0011010: q = 4'b1000; + 10'b000_0011011: q = 4'b1000; + 10'b000_0011100: q = 4'b1000; + 10'b000_0011101: q = 4'b1000; + 10'b000_0011110: q = 4'b1000; + 10'b000_0011111: q = 4'b1000; + 10'b000_0100000: q = 4'b1000; + 10'b000_0100001: q = 4'b1000; + 10'b000_0100010: q = 4'b1000; + 10'b000_0100011: q = 4'b1000; + 10'b000_0100100: q = 4'b1000; + 10'b000_0100101: q = 4'b1000; + 10'b000_0100110: q = 4'b1000; + 10'b000_0100111: q = 4'b1000; + 10'b000_0101000: q = 4'b1000; + 10'b000_0101001: q = 4'b1000; + 10'b000_0101010: q = 4'b1000; + 10'b000_0101011: q = 4'b1000; + 10'b000_0101100: q = 4'b1000; + 10'b000_0101101: q = 4'b1000; + 10'b000_0101110: q = 4'b1000; + 10'b000_0101111: q = 4'b1000; + 10'b000_0110000: q = 4'b1000; + 10'b000_0110001: q = 4'b1000; + 10'b000_0110010: q = 4'b1000; + 10'b000_0110011: q = 4'b1000; + 10'b000_0110100: q = 4'b1000; + 10'b000_0110101: q = 4'b1000; + 10'b000_0110110: q = 4'b1000; + 10'b000_0110111: q = 4'b1000; + 10'b000_0111000: q = 4'b1000; + 10'b000_0111001: q = 4'b1000; + 10'b000_0111010: q = 4'b1000; + 10'b000_0111011: q = 4'b1000; + 10'b000_0111100: q = 4'b1000; + 10'b000_0111101: q = 4'b1000; + 10'b000_0111110: q = 4'b1000; + 10'b000_0111111: q = 4'b1000; + 10'b000_1000000: q = 4'b0001; + 10'b000_1000001: q = 4'b0001; + 10'b000_1000010: q = 4'b0001; + 10'b000_1000011: q = 4'b0001; + 10'b000_1000100: q = 4'b0001; + 10'b000_1000101: q = 4'b0001; + 10'b000_1000110: q = 4'b0001; + 10'b000_1000111: q = 4'b0001; + 10'b000_1001000: q = 4'b0001; + 10'b000_1001001: q = 4'b0001; + 10'b000_1001010: q = 4'b0001; + 10'b000_1001011: q = 4'b0001; + 10'b000_1001100: q = 4'b0001; + 10'b000_1001101: q = 4'b0001; + 10'b000_1001110: q = 4'b0001; + 10'b000_1001111: q = 4'b0001; + 10'b000_1010000: q = 4'b0001; + 10'b000_1010001: q = 4'b0001; + 10'b000_1010010: q = 4'b0001; + 10'b000_1010011: q = 4'b0001; + 10'b000_1010100: q = 4'b0001; + 10'b000_1010101: q = 4'b0001; + 10'b000_1010110: q = 4'b0001; + 10'b000_1010111: q = 4'b0001; + 10'b000_1011000: q = 4'b0001; + 10'b000_1011001: q = 4'b0001; + 10'b000_1011010: q = 4'b0001; + 10'b000_1011011: q = 4'b0001; + 10'b000_1011100: q = 4'b0001; + 10'b000_1011101: q = 4'b0001; + 10'b000_1011110: q = 4'b0001; + 10'b000_1011111: q = 4'b0001; + 10'b000_1100000: q = 4'b0001; + 10'b000_1100001: q = 4'b0001; + 10'b000_1100010: q = 4'b0001; + 10'b000_1100011: q = 4'b0001; + 10'b000_1100100: q = 4'b0001; + 10'b000_1100101: q = 4'b0001; + 10'b000_1100110: q = 4'b0001; + 10'b000_1100111: q = 4'b0001; + 10'b000_1101000: q = 4'b0001; + 10'b000_1101001: q = 4'b0001; + 10'b000_1101010: q = 4'b0001; + 10'b000_1101011: q = 4'b0001; + 10'b000_1101100: q = 4'b0001; + 10'b000_1101101: q = 4'b0001; + 10'b000_1101110: q = 4'b0001; + 10'b000_1101111: q = 4'b0001; + 10'b000_1110000: q = 4'b0001; + 10'b000_1110001: q = 4'b0001; + 10'b000_1110010: q = 4'b0001; + 10'b000_1110011: q = 4'b0010; + 10'b000_1110100: q = 4'b0010; + 10'b000_1110101: q = 4'b0010; + 10'b000_1110110: q = 4'b0010; + 10'b000_1110111: q = 4'b0010; + 10'b000_1111000: q = 4'b0010; + 10'b000_1111001: q = 4'b0010; + 10'b000_1111010: q = 4'b0010; + 10'b000_1111011: q = 4'b0010; + 10'b000_1111100: q = 4'b0000; + 10'b000_1111101: q = 4'b0000; + 10'b000_1111110: q = 4'b0000; + 10'b000_1111111: q = 4'b0000; + 10'b001_0000000: q = 4'b0000; + 10'b001_0000001: q = 4'b0000; + 10'b001_0000010: q = 4'b0000; + 10'b001_0000011: q = 4'b0000; + 10'b001_0000100: q = 4'b0100; + 10'b001_0000101: q = 4'b0100; + 10'b001_0000110: q = 4'b0100; + 10'b001_0000111: q = 4'b0100; + 10'b001_0001000: q = 4'b0100; + 10'b001_0001001: q = 4'b0100; + 10'b001_0001010: q = 4'b0100; + 10'b001_0001011: q = 4'b0100; + 10'b001_0001100: q = 4'b0100; + 10'b001_0001101: q = 4'b0100; + 10'b001_0001110: q = 4'b1000; + 10'b001_0001111: q = 4'b1000; + 10'b001_0010000: q = 4'b1000; + 10'b001_0010001: q = 4'b1000; + 10'b001_0010010: q = 4'b1000; + 10'b001_0010011: q = 4'b1000; + 10'b001_0010100: q = 4'b1000; + 10'b001_0010101: q = 4'b1000; + 10'b001_0010110: q = 4'b1000; + 10'b001_0010111: q = 4'b1000; + 10'b001_0011000: q = 4'b1000; + 10'b001_0011001: q = 4'b1000; + 10'b001_0011010: q = 4'b1000; + 10'b001_0011011: q = 4'b1000; + 10'b001_0011100: q = 4'b1000; + 10'b001_0011101: q = 4'b1000; + 10'b001_0011110: q = 4'b1000; + 10'b001_0011111: q = 4'b1000; + 10'b001_0100000: q = 4'b1000; + 10'b001_0100001: q = 4'b1000; + 10'b001_0100010: q = 4'b1000; + 10'b001_0100011: q = 4'b1000; + 10'b001_0100100: q = 4'b1000; + 10'b001_0100101: q = 4'b1000; + 10'b001_0100110: q = 4'b1000; + 10'b001_0100111: q = 4'b1000; + 10'b001_0101000: q = 4'b1000; + 10'b001_0101001: q = 4'b1000; + 10'b001_0101010: q = 4'b1000; + 10'b001_0101011: q = 4'b1000; + 10'b001_0101100: q = 4'b1000; + 10'b001_0101101: q = 4'b1000; + 10'b001_0101110: q = 4'b1000; + 10'b001_0101111: q = 4'b1000; + 10'b001_0110000: q = 4'b1000; + 10'b001_0110001: q = 4'b1000; + 10'b001_0110010: q = 4'b1000; + 10'b001_0110011: q = 4'b1000; + 10'b001_0110100: q = 4'b1000; + 10'b001_0110101: q = 4'b1000; + 10'b001_0110110: q = 4'b1000; + 10'b001_0110111: q = 4'b1000; + 10'b001_0111000: q = 4'b1000; + 10'b001_0111001: q = 4'b1000; + 10'b001_0111010: q = 4'b1000; + 10'b001_0111011: q = 4'b1000; + 10'b001_0111100: q = 4'b1000; + 10'b001_0111101: q = 4'b1000; + 10'b001_0111110: q = 4'b1000; + 10'b001_0111111: q = 4'b1000; + 10'b001_1000000: q = 4'b0001; + 10'b001_1000001: q = 4'b0001; + 10'b001_1000010: q = 4'b0001; + 10'b001_1000011: q = 4'b0001; + 10'b001_1000100: q = 4'b0001; + 10'b001_1000101: q = 4'b0001; + 10'b001_1000110: q = 4'b0001; + 10'b001_1000111: q = 4'b0001; + 10'b001_1001000: q = 4'b0001; + 10'b001_1001001: q = 4'b0001; + 10'b001_1001010: q = 4'b0001; + 10'b001_1001011: q = 4'b0001; + 10'b001_1001100: q = 4'b0001; + 10'b001_1001101: q = 4'b0001; + 10'b001_1001110: q = 4'b0001; + 10'b001_1001111: q = 4'b0001; + 10'b001_1010000: q = 4'b0001; + 10'b001_1010001: q = 4'b0001; + 10'b001_1010010: q = 4'b0001; + 10'b001_1010011: q = 4'b0001; + 10'b001_1010100: q = 4'b0001; + 10'b001_1010101: q = 4'b0001; + 10'b001_1010110: q = 4'b0001; + 10'b001_1010111: q = 4'b0001; + 10'b001_1011000: q = 4'b0001; + 10'b001_1011001: q = 4'b0001; + 10'b001_1011010: q = 4'b0001; + 10'b001_1011011: q = 4'b0001; + 10'b001_1011100: q = 4'b0001; + 10'b001_1011101: q = 4'b0001; + 10'b001_1011110: q = 4'b0001; + 10'b001_1011111: q = 4'b0001; + 10'b001_1100000: q = 4'b0001; + 10'b001_1100001: q = 4'b0001; + 10'b001_1100010: q = 4'b0001; + 10'b001_1100011: q = 4'b0001; + 10'b001_1100100: q = 4'b0001; + 10'b001_1100101: q = 4'b0001; + 10'b001_1100110: q = 4'b0001; + 10'b001_1100111: q = 4'b0001; + 10'b001_1101000: q = 4'b0001; + 10'b001_1101001: q = 4'b0001; + 10'b001_1101010: q = 4'b0001; + 10'b001_1101011: q = 4'b0001; + 10'b001_1101100: q = 4'b0001; + 10'b001_1101101: q = 4'b0001; + 10'b001_1101110: q = 4'b0001; + 10'b001_1101111: q = 4'b0001; + 10'b001_1110000: q = 4'b0001; + 10'b001_1110001: q = 4'b0010; + 10'b001_1110010: q = 4'b0010; + 10'b001_1110011: q = 4'b0010; + 10'b001_1110100: q = 4'b0010; + 10'b001_1110101: q = 4'b0010; + 10'b001_1110110: q = 4'b0010; + 10'b001_1110111: q = 4'b0010; + 10'b001_1111000: q = 4'b0010; + 10'b001_1111001: q = 4'b0010; + 10'b001_1111010: q = 4'b0000; + 10'b001_1111011: q = 4'b0000; + 10'b001_1111100: q = 4'b0000; + 10'b001_1111101: q = 4'b0000; + 10'b001_1111110: q = 4'b0000; + 10'b001_1111111: q = 4'b0000; + 10'b010_0000000: q = 4'b0000; + 10'b010_0000001: q = 4'b0000; + 10'b010_0000010: q = 4'b0000; + 10'b010_0000011: q = 4'b0000; + 10'b010_0000100: q = 4'b0100; + 10'b010_0000101: q = 4'b0100; + 10'b010_0000110: q = 4'b0100; + 10'b010_0000111: q = 4'b0100; + 10'b010_0001000: q = 4'b0100; + 10'b010_0001001: q = 4'b0100; + 10'b010_0001010: q = 4'b0100; + 10'b010_0001011: q = 4'b0100; + 10'b010_0001100: q = 4'b0100; + 10'b010_0001101: q = 4'b0100; + 10'b010_0001110: q = 4'b0100; + 10'b010_0001111: q = 4'b1000; + 10'b010_0010000: q = 4'b1000; + 10'b010_0010001: q = 4'b1000; + 10'b010_0010010: q = 4'b1000; + 10'b010_0010011: q = 4'b1000; + 10'b010_0010100: q = 4'b1000; + 10'b010_0010101: q = 4'b1000; + 10'b010_0010110: q = 4'b1000; + 10'b010_0010111: q = 4'b1000; + 10'b010_0011000: q = 4'b1000; + 10'b010_0011001: q = 4'b1000; + 10'b010_0011010: q = 4'b1000; + 10'b010_0011011: q = 4'b1000; + 10'b010_0011100: q = 4'b1000; + 10'b010_0011101: q = 4'b1000; + 10'b010_0011110: q = 4'b1000; + 10'b010_0011111: q = 4'b1000; + 10'b010_0100000: q = 4'b1000; + 10'b010_0100001: q = 4'b1000; + 10'b010_0100010: q = 4'b1000; + 10'b010_0100011: q = 4'b1000; + 10'b010_0100100: q = 4'b1000; + 10'b010_0100101: q = 4'b1000; + 10'b010_0100110: q = 4'b1000; + 10'b010_0100111: q = 4'b1000; + 10'b010_0101000: q = 4'b1000; + 10'b010_0101001: q = 4'b1000; + 10'b010_0101010: q = 4'b1000; + 10'b010_0101011: q = 4'b1000; + 10'b010_0101100: q = 4'b1000; + 10'b010_0101101: q = 4'b1000; + 10'b010_0101110: q = 4'b1000; + 10'b010_0101111: q = 4'b1000; + 10'b010_0110000: q = 4'b1000; + 10'b010_0110001: q = 4'b1000; + 10'b010_0110010: q = 4'b1000; + 10'b010_0110011: q = 4'b1000; + 10'b010_0110100: q = 4'b1000; + 10'b010_0110101: q = 4'b1000; + 10'b010_0110110: q = 4'b1000; + 10'b010_0110111: q = 4'b1000; + 10'b010_0111000: q = 4'b1000; + 10'b010_0111001: q = 4'b1000; + 10'b010_0111010: q = 4'b1000; + 10'b010_0111011: q = 4'b1000; + 10'b010_0111100: q = 4'b1000; + 10'b010_0111101: q = 4'b1000; + 10'b010_0111110: q = 4'b1000; + 10'b010_0111111: q = 4'b1000; + 10'b010_1000000: q = 4'b0001; + 10'b010_1000001: q = 4'b0001; + 10'b010_1000010: q = 4'b0001; + 10'b010_1000011: q = 4'b0001; + 10'b010_1000100: q = 4'b0001; + 10'b010_1000101: q = 4'b0001; + 10'b010_1000110: q = 4'b0001; + 10'b010_1000111: q = 4'b0001; + 10'b010_1001000: q = 4'b0001; + 10'b010_1001001: q = 4'b0001; + 10'b010_1001010: q = 4'b0001; + 10'b010_1001011: q = 4'b0001; + 10'b010_1001100: q = 4'b0001; + 10'b010_1001101: q = 4'b0001; + 10'b010_1001110: q = 4'b0001; + 10'b010_1001111: q = 4'b0001; + 10'b010_1010000: q = 4'b0001; + 10'b010_1010001: q = 4'b0001; + 10'b010_1010010: q = 4'b0001; + 10'b010_1010011: q = 4'b0001; + 10'b010_1010100: q = 4'b0001; + 10'b010_1010101: q = 4'b0001; + 10'b010_1010110: q = 4'b0001; + 10'b010_1010111: q = 4'b0001; + 10'b010_1011000: q = 4'b0001; + 10'b010_1011001: q = 4'b0001; + 10'b010_1011010: q = 4'b0001; + 10'b010_1011011: q = 4'b0001; + 10'b010_1011100: q = 4'b0001; + 10'b010_1011101: q = 4'b0001; + 10'b010_1011110: q = 4'b0001; + 10'b010_1011111: q = 4'b0001; + 10'b010_1100000: q = 4'b0001; + 10'b010_1100001: q = 4'b0001; + 10'b010_1100010: q = 4'b0001; + 10'b010_1100011: q = 4'b0001; + 10'b010_1100100: q = 4'b0001; + 10'b010_1100101: q = 4'b0001; + 10'b010_1100110: q = 4'b0001; + 10'b010_1100111: q = 4'b0001; + 10'b010_1101000: q = 4'b0001; + 10'b010_1101001: q = 4'b0001; + 10'b010_1101010: q = 4'b0001; + 10'b010_1101011: q = 4'b0001; + 10'b010_1101100: q = 4'b0001; + 10'b010_1101101: q = 4'b0001; + 10'b010_1101110: q = 4'b0001; + 10'b010_1101111: q = 4'b0001; + 10'b010_1110000: q = 4'b0010; + 10'b010_1110001: q = 4'b0010; + 10'b010_1110010: q = 4'b0010; + 10'b010_1110011: q = 4'b0010; + 10'b010_1110100: q = 4'b0010; + 10'b010_1110101: q = 4'b0010; + 10'b010_1110110: q = 4'b0010; + 10'b010_1110111: q = 4'b0010; + 10'b010_1111000: q = 4'b0010; + 10'b010_1111001: q = 4'b0010; + 10'b010_1111010: q = 4'b0000; + 10'b010_1111011: q = 4'b0000; + 10'b010_1111100: q = 4'b0000; + 10'b010_1111101: q = 4'b0000; + 10'b010_1111110: q = 4'b0000; + 10'b010_1111111: q = 4'b0000; + 10'b011_0000000: q = 4'b0000; + 10'b011_0000001: q = 4'b0000; + 10'b011_0000010: q = 4'b0000; + 10'b011_0000011: q = 4'b0000; + 10'b011_0000100: q = 4'b0100; + 10'b011_0000101: q = 4'b0100; + 10'b011_0000110: q = 4'b0100; + 10'b011_0000111: q = 4'b0100; + 10'b011_0001000: q = 4'b0100; + 10'b011_0001001: q = 4'b0100; + 10'b011_0001010: q = 4'b0100; + 10'b011_0001011: q = 4'b0100; + 10'b011_0001100: q = 4'b0100; + 10'b011_0001101: q = 4'b0100; + 10'b011_0001110: q = 4'b0100; + 10'b011_0001111: q = 4'b0100; + 10'b011_0010000: q = 4'b1000; + 10'b011_0010001: q = 4'b1000; + 10'b011_0010010: q = 4'b1000; + 10'b011_0010011: q = 4'b1000; + 10'b011_0010100: q = 4'b1000; + 10'b011_0010101: q = 4'b1000; + 10'b011_0010110: q = 4'b1000; + 10'b011_0010111: q = 4'b1000; + 10'b011_0011000: q = 4'b1000; + 10'b011_0011001: q = 4'b1000; + 10'b011_0011010: q = 4'b1000; + 10'b011_0011011: q = 4'b1000; + 10'b011_0011100: q = 4'b1000; + 10'b011_0011101: q = 4'b1000; + 10'b011_0011110: q = 4'b1000; + 10'b011_0011111: q = 4'b1000; + 10'b011_0100000: q = 4'b1000; + 10'b011_0100001: q = 4'b1000; + 10'b011_0100010: q = 4'b1000; + 10'b011_0100011: q = 4'b1000; + 10'b011_0100100: q = 4'b1000; + 10'b011_0100101: q = 4'b1000; + 10'b011_0100110: q = 4'b1000; + 10'b011_0100111: q = 4'b1000; + 10'b011_0101000: q = 4'b1000; + 10'b011_0101001: q = 4'b1000; + 10'b011_0101010: q = 4'b1000; + 10'b011_0101011: q = 4'b1000; + 10'b011_0101100: q = 4'b1000; + 10'b011_0101101: q = 4'b1000; + 10'b011_0101110: q = 4'b1000; + 10'b011_0101111: q = 4'b1000; + 10'b011_0110000: q = 4'b1000; + 10'b011_0110001: q = 4'b1000; + 10'b011_0110010: q = 4'b1000; + 10'b011_0110011: q = 4'b1000; + 10'b011_0110100: q = 4'b1000; + 10'b011_0110101: q = 4'b1000; + 10'b011_0110110: q = 4'b1000; + 10'b011_0110111: q = 4'b1000; + 10'b011_0111000: q = 4'b1000; + 10'b011_0111001: q = 4'b1000; + 10'b011_0111010: q = 4'b1000; + 10'b011_0111011: q = 4'b1000; + 10'b011_0111100: q = 4'b1000; + 10'b011_0111101: q = 4'b1000; + 10'b011_0111110: q = 4'b1000; + 10'b011_0111111: q = 4'b1000; + 10'b011_1000000: q = 4'b0001; + 10'b011_1000001: q = 4'b0001; + 10'b011_1000010: q = 4'b0001; + 10'b011_1000011: q = 4'b0001; + 10'b011_1000100: q = 4'b0001; + 10'b011_1000101: q = 4'b0001; + 10'b011_1000110: q = 4'b0001; + 10'b011_1000111: q = 4'b0001; + 10'b011_1001000: q = 4'b0001; + 10'b011_1001001: q = 4'b0001; + 10'b011_1001010: q = 4'b0001; + 10'b011_1001011: q = 4'b0001; + 10'b011_1001100: q = 4'b0001; + 10'b011_1001101: q = 4'b0001; + 10'b011_1001110: q = 4'b0001; + 10'b011_1001111: q = 4'b0001; + 10'b011_1010000: q = 4'b0001; + 10'b011_1010001: q = 4'b0001; + 10'b011_1010010: q = 4'b0001; + 10'b011_1010011: q = 4'b0001; + 10'b011_1010100: q = 4'b0001; + 10'b011_1010101: q = 4'b0001; + 10'b011_1010110: q = 4'b0001; + 10'b011_1010111: q = 4'b0001; + 10'b011_1011000: q = 4'b0001; + 10'b011_1011001: q = 4'b0001; + 10'b011_1011010: q = 4'b0001; + 10'b011_1011011: q = 4'b0001; + 10'b011_1011100: q = 4'b0001; + 10'b011_1011101: q = 4'b0001; + 10'b011_1011110: q = 4'b0001; + 10'b011_1011111: q = 4'b0001; + 10'b011_1100000: q = 4'b0001; + 10'b011_1100001: q = 4'b0001; + 10'b011_1100010: q = 4'b0001; + 10'b011_1100011: q = 4'b0001; + 10'b011_1100100: q = 4'b0001; + 10'b011_1100101: q = 4'b0001; + 10'b011_1100110: q = 4'b0001; + 10'b011_1100111: q = 4'b0001; + 10'b011_1101000: q = 4'b0001; + 10'b011_1101001: q = 4'b0001; + 10'b011_1101010: q = 4'b0001; + 10'b011_1101011: q = 4'b0001; + 10'b011_1101100: q = 4'b0001; + 10'b011_1101101: q = 4'b0001; + 10'b011_1101110: q = 4'b0010; + 10'b011_1101111: q = 4'b0010; + 10'b011_1110000: q = 4'b0010; + 10'b011_1110001: q = 4'b0010; + 10'b011_1110010: q = 4'b0010; + 10'b011_1110011: q = 4'b0010; + 10'b011_1110100: q = 4'b0010; + 10'b011_1110101: q = 4'b0010; + 10'b011_1110110: q = 4'b0010; + 10'b011_1110111: q = 4'b0010; + 10'b011_1111000: q = 4'b0010; + 10'b011_1111001: q = 4'b0010; + 10'b011_1111010: q = 4'b0000; + 10'b011_1111011: q = 4'b0000; + 10'b011_1111100: q = 4'b0000; + 10'b011_1111101: q = 4'b0000; + 10'b011_1111110: q = 4'b0000; + 10'b011_1111111: q = 4'b0000; + 10'b100_0000000: q = 4'b0000; + 10'b100_0000001: q = 4'b0000; + 10'b100_0000010: q = 4'b0000; + 10'b100_0000011: q = 4'b0000; + 10'b100_0000100: q = 4'b0000; + 10'b100_0000101: q = 4'b0000; + 10'b100_0000110: q = 4'b0100; + 10'b100_0000111: q = 4'b0100; + 10'b100_0001000: q = 4'b0100; + 10'b100_0001001: q = 4'b0100; + 10'b100_0001010: q = 4'b0100; + 10'b100_0001011: q = 4'b0100; + 10'b100_0001100: q = 4'b0100; + 10'b100_0001101: q = 4'b0100; + 10'b100_0001110: q = 4'b0100; + 10'b100_0001111: q = 4'b0100; + 10'b100_0010000: q = 4'b0100; + 10'b100_0010001: q = 4'b0100; + 10'b100_0010010: q = 4'b1000; + 10'b100_0010011: q = 4'b1000; + 10'b100_0010100: q = 4'b1000; + 10'b100_0010101: q = 4'b1000; + 10'b100_0010110: q = 4'b1000; + 10'b100_0010111: q = 4'b1000; + 10'b100_0011000: q = 4'b1000; + 10'b100_0011001: q = 4'b1000; + 10'b100_0011010: q = 4'b1000; + 10'b100_0011011: q = 4'b1000; + 10'b100_0011100: q = 4'b1000; + 10'b100_0011101: q = 4'b1000; + 10'b100_0011110: q = 4'b1000; + 10'b100_0011111: q = 4'b1000; + 10'b100_0100000: q = 4'b1000; + 10'b100_0100001: q = 4'b1000; + 10'b100_0100010: q = 4'b1000; + 10'b100_0100011: q = 4'b1000; + 10'b100_0100100: q = 4'b1000; + 10'b100_0100101: q = 4'b1000; + 10'b100_0100110: q = 4'b1000; + 10'b100_0100111: q = 4'b1000; + 10'b100_0101000: q = 4'b1000; + 10'b100_0101001: q = 4'b1000; + 10'b100_0101010: q = 4'b1000; + 10'b100_0101011: q = 4'b1000; + 10'b100_0101100: q = 4'b1000; + 10'b100_0101101: q = 4'b1000; + 10'b100_0101110: q = 4'b1000; + 10'b100_0101111: q = 4'b1000; + 10'b100_0110000: q = 4'b1000; + 10'b100_0110001: q = 4'b1000; + 10'b100_0110010: q = 4'b1000; + 10'b100_0110011: q = 4'b1000; + 10'b100_0110100: q = 4'b1000; + 10'b100_0110101: q = 4'b1000; + 10'b100_0110110: q = 4'b1000; + 10'b100_0110111: q = 4'b1000; + 10'b100_0111000: q = 4'b1000; + 10'b100_0111001: q = 4'b1000; + 10'b100_0111010: q = 4'b1000; + 10'b100_0111011: q = 4'b1000; + 10'b100_0111100: q = 4'b1000; + 10'b100_0111101: q = 4'b1000; + 10'b100_0111110: q = 4'b1000; + 10'b100_0111111: q = 4'b1000; + 10'b100_1000000: q = 4'b0001; + 10'b100_1000001: q = 4'b0001; + 10'b100_1000010: q = 4'b0001; + 10'b100_1000011: q = 4'b0001; + 10'b100_1000100: q = 4'b0001; + 10'b100_1000101: q = 4'b0001; + 10'b100_1000110: q = 4'b0001; + 10'b100_1000111: q = 4'b0001; + 10'b100_1001000: q = 4'b0001; + 10'b100_1001001: q = 4'b0001; + 10'b100_1001010: q = 4'b0001; + 10'b100_1001011: q = 4'b0001; + 10'b100_1001100: q = 4'b0001; + 10'b100_1001101: q = 4'b0001; + 10'b100_1001110: q = 4'b0001; + 10'b100_1001111: q = 4'b0001; + 10'b100_1010000: q = 4'b0001; + 10'b100_1010001: q = 4'b0001; + 10'b100_1010010: q = 4'b0001; + 10'b100_1010011: q = 4'b0001; + 10'b100_1010100: q = 4'b0001; + 10'b100_1010101: q = 4'b0001; + 10'b100_1010110: q = 4'b0001; + 10'b100_1010111: q = 4'b0001; + 10'b100_1011000: q = 4'b0001; + 10'b100_1011001: q = 4'b0001; + 10'b100_1011010: q = 4'b0001; + 10'b100_1011011: q = 4'b0001; + 10'b100_1011100: q = 4'b0001; + 10'b100_1011101: q = 4'b0001; + 10'b100_1011110: q = 4'b0001; + 10'b100_1011111: q = 4'b0001; + 10'b100_1100000: q = 4'b0001; + 10'b100_1100001: q = 4'b0001; + 10'b100_1100010: q = 4'b0001; + 10'b100_1100011: q = 4'b0001; + 10'b100_1100100: q = 4'b0001; + 10'b100_1100101: q = 4'b0001; + 10'b100_1100110: q = 4'b0001; + 10'b100_1100111: q = 4'b0001; + 10'b100_1101000: q = 4'b0001; + 10'b100_1101001: q = 4'b0001; + 10'b100_1101010: q = 4'b0001; + 10'b100_1101011: q = 4'b0001; + 10'b100_1101100: q = 4'b0010; + 10'b100_1101101: q = 4'b0010; + 10'b100_1101110: q = 4'b0010; + 10'b100_1101111: q = 4'b0010; + 10'b100_1110000: q = 4'b0010; + 10'b100_1110001: q = 4'b0010; + 10'b100_1110010: q = 4'b0010; + 10'b100_1110011: q = 4'b0010; + 10'b100_1110100: q = 4'b0010; + 10'b100_1110101: q = 4'b0010; + 10'b100_1110110: q = 4'b0010; + 10'b100_1110111: q = 4'b0010; + 10'b100_1111000: q = 4'b0000; + 10'b100_1111001: q = 4'b0000; + 10'b100_1111010: q = 4'b0000; + 10'b100_1111011: q = 4'b0000; + 10'b100_1111100: q = 4'b0000; + 10'b100_1111101: q = 4'b0000; + 10'b100_1111110: q = 4'b0000; + 10'b100_1111111: q = 4'b0000; + 10'b101_0000000: q = 4'b0000; + 10'b101_0000001: q = 4'b0000; + 10'b101_0000010: q = 4'b0000; + 10'b101_0000011: q = 4'b0000; + 10'b101_0000100: q = 4'b0000; + 10'b101_0000101: q = 4'b0000; + 10'b101_0000110: q = 4'b0100; + 10'b101_0000111: q = 4'b0100; + 10'b101_0001000: q = 4'b0100; + 10'b101_0001001: q = 4'b0100; + 10'b101_0001010: q = 4'b0100; + 10'b101_0001011: q = 4'b0100; + 10'b101_0001100: q = 4'b0100; + 10'b101_0001101: q = 4'b0100; + 10'b101_0001110: q = 4'b0100; + 10'b101_0001111: q = 4'b0100; + 10'b101_0010000: q = 4'b0100; + 10'b101_0010001: q = 4'b0100; + 10'b101_0010010: q = 4'b0100; + 10'b101_0010011: q = 4'b0100; + 10'b101_0010100: q = 4'b1000; + 10'b101_0010101: q = 4'b1000; + 10'b101_0010110: q = 4'b1000; + 10'b101_0010111: q = 4'b1000; + 10'b101_0011000: q = 4'b1000; + 10'b101_0011001: q = 4'b1000; + 10'b101_0011010: q = 4'b1000; + 10'b101_0011011: q = 4'b1000; + 10'b101_0011100: q = 4'b1000; + 10'b101_0011101: q = 4'b1000; + 10'b101_0011110: q = 4'b1000; + 10'b101_0011111: q = 4'b1000; + 10'b101_0100000: q = 4'b1000; + 10'b101_0100001: q = 4'b1000; + 10'b101_0100010: q = 4'b1000; + 10'b101_0100011: q = 4'b1000; + 10'b101_0100100: q = 4'b1000; + 10'b101_0100101: q = 4'b1000; + 10'b101_0100110: q = 4'b1000; + 10'b101_0100111: q = 4'b1000; + 10'b101_0101000: q = 4'b1000; + 10'b101_0101001: q = 4'b1000; + 10'b101_0101010: q = 4'b1000; + 10'b101_0101011: q = 4'b1000; + 10'b101_0101100: q = 4'b1000; + 10'b101_0101101: q = 4'b1000; + 10'b101_0101110: q = 4'b1000; + 10'b101_0101111: q = 4'b1000; + 10'b101_0110000: q = 4'b1000; + 10'b101_0110001: q = 4'b1000; + 10'b101_0110010: q = 4'b1000; + 10'b101_0110011: q = 4'b1000; + 10'b101_0110100: q = 4'b1000; + 10'b101_0110101: q = 4'b1000; + 10'b101_0110110: q = 4'b1000; + 10'b101_0110111: q = 4'b1000; + 10'b101_0111000: q = 4'b1000; + 10'b101_0111001: q = 4'b1000; + 10'b101_0111010: q = 4'b1000; + 10'b101_0111011: q = 4'b1000; + 10'b101_0111100: q = 4'b1000; + 10'b101_0111101: q = 4'b1000; + 10'b101_0111110: q = 4'b1000; + 10'b101_0111111: q = 4'b1000; + 10'b101_1000000: q = 4'b0001; + 10'b101_1000001: q = 4'b0001; + 10'b101_1000010: q = 4'b0001; + 10'b101_1000011: q = 4'b0001; + 10'b101_1000100: q = 4'b0001; + 10'b101_1000101: q = 4'b0001; + 10'b101_1000110: q = 4'b0001; + 10'b101_1000111: q = 4'b0001; + 10'b101_1001000: q = 4'b0001; + 10'b101_1001001: q = 4'b0001; + 10'b101_1001010: q = 4'b0001; + 10'b101_1001011: q = 4'b0001; + 10'b101_1001100: q = 4'b0001; + 10'b101_1001101: q = 4'b0001; + 10'b101_1001110: q = 4'b0001; + 10'b101_1001111: q = 4'b0001; + 10'b101_1010000: q = 4'b0001; + 10'b101_1010001: q = 4'b0001; + 10'b101_1010010: q = 4'b0001; + 10'b101_1010011: q = 4'b0001; + 10'b101_1010100: q = 4'b0001; + 10'b101_1010101: q = 4'b0001; + 10'b101_1010110: q = 4'b0001; + 10'b101_1010111: q = 4'b0001; + 10'b101_1011000: q = 4'b0001; + 10'b101_1011001: q = 4'b0001; + 10'b101_1011010: q = 4'b0001; + 10'b101_1011011: q = 4'b0001; + 10'b101_1011100: q = 4'b0001; + 10'b101_1011101: q = 4'b0001; + 10'b101_1011110: q = 4'b0001; + 10'b101_1011111: q = 4'b0001; + 10'b101_1100000: q = 4'b0001; + 10'b101_1100001: q = 4'b0001; + 10'b101_1100010: q = 4'b0001; + 10'b101_1100011: q = 4'b0001; + 10'b101_1100100: q = 4'b0001; + 10'b101_1100101: q = 4'b0001; + 10'b101_1100110: q = 4'b0001; + 10'b101_1100111: q = 4'b0001; + 10'b101_1101000: q = 4'b0001; + 10'b101_1101001: q = 4'b0001; + 10'b101_1101010: q = 4'b0001; + 10'b101_1101011: q = 4'b0001; + 10'b101_1101100: q = 4'b0010; + 10'b101_1101101: q = 4'b0010; + 10'b101_1101110: q = 4'b0010; + 10'b101_1101111: q = 4'b0010; + 10'b101_1110000: q = 4'b0010; + 10'b101_1110001: q = 4'b0010; + 10'b101_1110010: q = 4'b0010; + 10'b101_1110011: q = 4'b0010; + 10'b101_1110100: q = 4'b0010; + 10'b101_1110101: q = 4'b0010; + 10'b101_1110110: q = 4'b0010; + 10'b101_1110111: q = 4'b0010; + 10'b101_1111000: q = 4'b0000; + 10'b101_1111001: q = 4'b0000; + 10'b101_1111010: q = 4'b0000; + 10'b101_1111011: q = 4'b0000; + 10'b101_1111100: q = 4'b0000; + 10'b101_1111101: q = 4'b0000; + 10'b101_1111110: q = 4'b0000; + 10'b101_1111111: q = 4'b0000; + 10'b110_0000000: q = 4'b0000; + 10'b110_0000001: q = 4'b0000; + 10'b110_0000010: q = 4'b0000; + 10'b110_0000011: q = 4'b0000; + 10'b110_0000100: q = 4'b0000; + 10'b110_0000101: q = 4'b0000; + 10'b110_0000110: q = 4'b0000; + 10'b110_0000111: q = 4'b0000; + 10'b110_0001000: q = 4'b0100; + 10'b110_0001001: q = 4'b0100; + 10'b110_0001010: q = 4'b0100; + 10'b110_0001011: q = 4'b0100; + 10'b110_0001100: q = 4'b0100; + 10'b110_0001101: q = 4'b0100; + 10'b110_0001110: q = 4'b0100; + 10'b110_0001111: q = 4'b0100; + 10'b110_0010000: q = 4'b0100; + 10'b110_0010001: q = 4'b0100; + 10'b110_0010010: q = 4'b0100; + 10'b110_0010011: q = 4'b0100; + 10'b110_0010100: q = 4'b1000; + 10'b110_0010101: q = 4'b1000; + 10'b110_0010110: q = 4'b1000; + 10'b110_0010111: q = 4'b1000; + 10'b110_0011000: q = 4'b1000; + 10'b110_0011001: q = 4'b1000; + 10'b110_0011010: q = 4'b1000; + 10'b110_0011011: q = 4'b1000; + 10'b110_0011100: q = 4'b1000; + 10'b110_0011101: q = 4'b1000; + 10'b110_0011110: q = 4'b1000; + 10'b110_0011111: q = 4'b1000; + 10'b110_0100000: q = 4'b1000; + 10'b110_0100001: q = 4'b1000; + 10'b110_0100010: q = 4'b1000; + 10'b110_0100011: q = 4'b1000; + 10'b110_0100100: q = 4'b1000; + 10'b110_0100101: q = 4'b1000; + 10'b110_0100110: q = 4'b1000; + 10'b110_0100111: q = 4'b1000; + 10'b110_0101000: q = 4'b1000; + 10'b110_0101001: q = 4'b1000; + 10'b110_0101010: q = 4'b1000; + 10'b110_0101011: q = 4'b1000; + 10'b110_0101100: q = 4'b1000; + 10'b110_0101101: q = 4'b1000; + 10'b110_0101110: q = 4'b1000; + 10'b110_0101111: q = 4'b1000; + 10'b110_0110000: q = 4'b1000; + 10'b110_0110001: q = 4'b1000; + 10'b110_0110010: q = 4'b1000; + 10'b110_0110011: q = 4'b1000; + 10'b110_0110100: q = 4'b1000; + 10'b110_0110101: q = 4'b1000; + 10'b110_0110110: q = 4'b1000; + 10'b110_0110111: q = 4'b1000; + 10'b110_0111000: q = 4'b1000; + 10'b110_0111001: q = 4'b1000; + 10'b110_0111010: q = 4'b1000; + 10'b110_0111011: q = 4'b1000; + 10'b110_0111100: q = 4'b1000; + 10'b110_0111101: q = 4'b1000; + 10'b110_0111110: q = 4'b1000; + 10'b110_0111111: q = 4'b1000; + 10'b110_1000000: q = 4'b0001; + 10'b110_1000001: q = 4'b0001; + 10'b110_1000010: q = 4'b0001; + 10'b110_1000011: q = 4'b0001; + 10'b110_1000100: q = 4'b0001; + 10'b110_1000101: q = 4'b0001; + 10'b110_1000110: q = 4'b0001; + 10'b110_1000111: q = 4'b0001; + 10'b110_1001000: q = 4'b0001; + 10'b110_1001001: q = 4'b0001; + 10'b110_1001010: q = 4'b0001; + 10'b110_1001011: q = 4'b0001; + 10'b110_1001100: q = 4'b0001; + 10'b110_1001101: q = 4'b0001; + 10'b110_1001110: q = 4'b0001; + 10'b110_1001111: q = 4'b0001; + 10'b110_1010000: q = 4'b0001; + 10'b110_1010001: q = 4'b0001; + 10'b110_1010010: q = 4'b0001; + 10'b110_1010011: q = 4'b0001; + 10'b110_1010100: q = 4'b0001; + 10'b110_1010101: q = 4'b0001; + 10'b110_1010110: q = 4'b0001; + 10'b110_1010111: q = 4'b0001; + 10'b110_1011000: q = 4'b0001; + 10'b110_1011001: q = 4'b0001; + 10'b110_1011010: q = 4'b0001; + 10'b110_1011011: q = 4'b0001; + 10'b110_1011100: q = 4'b0001; + 10'b110_1011101: q = 4'b0001; + 10'b110_1011110: q = 4'b0001; + 10'b110_1011111: q = 4'b0001; + 10'b110_1100000: q = 4'b0001; + 10'b110_1100001: q = 4'b0001; + 10'b110_1100010: q = 4'b0001; + 10'b110_1100011: q = 4'b0001; + 10'b110_1100100: q = 4'b0001; + 10'b110_1100101: q = 4'b0001; + 10'b110_1100110: q = 4'b0001; + 10'b110_1100111: q = 4'b0001; + 10'b110_1101000: q = 4'b0001; + 10'b110_1101001: q = 4'b0001; + 10'b110_1101010: q = 4'b0010; + 10'b110_1101011: q = 4'b0010; + 10'b110_1101100: q = 4'b0010; + 10'b110_1101101: q = 4'b0010; + 10'b110_1101110: q = 4'b0010; + 10'b110_1101111: q = 4'b0010; + 10'b110_1110000: q = 4'b0010; + 10'b110_1110001: q = 4'b0010; + 10'b110_1110010: q = 4'b0010; + 10'b110_1110011: q = 4'b0010; + 10'b110_1110100: q = 4'b0010; + 10'b110_1110101: q = 4'b0010; + 10'b110_1110110: q = 4'b0010; + 10'b110_1110111: q = 4'b0010; + 10'b110_1111000: q = 4'b0000; + 10'b110_1111001: q = 4'b0000; + 10'b110_1111010: q = 4'b0000; + 10'b110_1111011: q = 4'b0000; + 10'b110_1111100: q = 4'b0000; + 10'b110_1111101: q = 4'b0000; + 10'b110_1111110: q = 4'b0000; + 10'b110_1111111: q = 4'b0000; + 10'b111_0000000: q = 4'b0000; + 10'b111_0000001: q = 4'b0000; + 10'b111_0000010: q = 4'b0000; + 10'b111_0000011: q = 4'b0000; + 10'b111_0000100: q = 4'b0000; + 10'b111_0000101: q = 4'b0000; + 10'b111_0000110: q = 4'b0000; + 10'b111_0000111: q = 4'b0000; + 10'b111_0001000: q = 4'b0100; + 10'b111_0001001: q = 4'b0100; + 10'b111_0001010: q = 4'b0100; + 10'b111_0001011: q = 4'b0100; + 10'b111_0001100: q = 4'b0100; + 10'b111_0001101: q = 4'b0100; + 10'b111_0001110: q = 4'b0100; + 10'b111_0001111: q = 4'b0100; + 10'b111_0010000: q = 4'b0100; + 10'b111_0010001: q = 4'b0100; + 10'b111_0010010: q = 4'b0100; + 10'b111_0010011: q = 4'b0100; + 10'b111_0010100: q = 4'b0100; + 10'b111_0010101: q = 4'b0100; + 10'b111_0010110: q = 4'b0100; + 10'b111_0010111: q = 4'b0100; + 10'b111_0011000: q = 4'b1000; + 10'b111_0011001: q = 4'b1000; + 10'b111_0011010: q = 4'b1000; + 10'b111_0011011: q = 4'b1000; + 10'b111_0011100: q = 4'b1000; + 10'b111_0011101: q = 4'b1000; + 10'b111_0011110: q = 4'b1000; + 10'b111_0011111: q = 4'b1000; + 10'b111_0100000: q = 4'b1000; + 10'b111_0100001: q = 4'b1000; + 10'b111_0100010: q = 4'b1000; + 10'b111_0100011: q = 4'b1000; + 10'b111_0100100: q = 4'b1000; + 10'b111_0100101: q = 4'b1000; + 10'b111_0100110: q = 4'b1000; + 10'b111_0100111: q = 4'b1000; + 10'b111_0101000: q = 4'b1000; + 10'b111_0101001: q = 4'b1000; + 10'b111_0101010: q = 4'b1000; + 10'b111_0101011: q = 4'b1000; + 10'b111_0101100: q = 4'b1000; + 10'b111_0101101: q = 4'b1000; + 10'b111_0101110: q = 4'b1000; + 10'b111_0101111: q = 4'b1000; + 10'b111_0110000: q = 4'b1000; + 10'b111_0110001: q = 4'b1000; + 10'b111_0110010: q = 4'b1000; + 10'b111_0110011: q = 4'b1000; + 10'b111_0110100: q = 4'b1000; + 10'b111_0110101: q = 4'b1000; + 10'b111_0110110: q = 4'b1000; + 10'b111_0110111: q = 4'b1000; + 10'b111_0111000: q = 4'b1000; + 10'b111_0111001: q = 4'b1000; + 10'b111_0111010: q = 4'b1000; + 10'b111_0111011: q = 4'b1000; + 10'b111_0111100: q = 4'b1000; + 10'b111_0111101: q = 4'b1000; + 10'b111_0111110: q = 4'b1000; + 10'b111_0111111: q = 4'b1000; + 10'b111_1000000: q = 4'b0001; + 10'b111_1000001: q = 4'b0001; + 10'b111_1000010: q = 4'b0001; + 10'b111_1000011: q = 4'b0001; + 10'b111_1000100: q = 4'b0001; + 10'b111_1000101: q = 4'b0001; + 10'b111_1000110: q = 4'b0001; + 10'b111_1000111: q = 4'b0001; + 10'b111_1001000: q = 4'b0001; + 10'b111_1001001: q = 4'b0001; + 10'b111_1001010: q = 4'b0001; + 10'b111_1001011: q = 4'b0001; + 10'b111_1001100: q = 4'b0001; + 10'b111_1001101: q = 4'b0001; + 10'b111_1001110: q = 4'b0001; + 10'b111_1001111: q = 4'b0001; + 10'b111_1010000: q = 4'b0001; + 10'b111_1010001: q = 4'b0001; + 10'b111_1010010: q = 4'b0001; + 10'b111_1010011: q = 4'b0001; + 10'b111_1010100: q = 4'b0001; + 10'b111_1010101: q = 4'b0001; + 10'b111_1010110: q = 4'b0001; + 10'b111_1010111: q = 4'b0001; + 10'b111_1011000: q = 4'b0001; + 10'b111_1011001: q = 4'b0001; + 10'b111_1011010: q = 4'b0001; + 10'b111_1011011: q = 4'b0001; + 10'b111_1011100: q = 4'b0001; + 10'b111_1011101: q = 4'b0001; + 10'b111_1011110: q = 4'b0001; + 10'b111_1011111: q = 4'b0001; + 10'b111_1100000: q = 4'b0001; + 10'b111_1100001: q = 4'b0001; + 10'b111_1100010: q = 4'b0001; + 10'b111_1100011: q = 4'b0001; + 10'b111_1100100: q = 4'b0001; + 10'b111_1100101: q = 4'b0001; + 10'b111_1100110: q = 4'b0001; + 10'b111_1100111: q = 4'b0001; + 10'b111_1101000: q = 4'b0010; + 10'b111_1101001: q = 4'b0010; + 10'b111_1101010: q = 4'b0010; + 10'b111_1101011: q = 4'b0010; + 10'b111_1101100: q = 4'b0010; + 10'b111_1101101: q = 4'b0010; + 10'b111_1101110: q = 4'b0010; + 10'b111_1101111: q = 4'b0010; + 10'b111_1110000: q = 4'b0010; + 10'b111_1110001: q = 4'b0010; + 10'b111_1110010: q = 4'b0010; + 10'b111_1110011: q = 4'b0010; + 10'b111_1110100: q = 4'b0010; + 10'b111_1110101: q = 4'b0010; + 10'b111_1110110: q = 4'b0010; + 10'b111_1110111: q = 4'b0010; + 10'b111_1111000: q = 4'b0000; + 10'b111_1111001: q = 4'b0000; + 10'b111_1111010: q = 4'b0000; + 10'b111_1111011: q = 4'b0000; + 10'b111_1111100: q = 4'b0000; + 10'b111_1111101: q = 4'b0000; + 10'b111_1111110: q = 4'b0000; + 10'b111_1111111: q = 4'b0000; + endcase From c836f37a0886e65ee9c9a9de825c67c5ec2ae128 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 9 Jun 2022 23:50:43 +0000 Subject: [PATCH 6/7] New RAM for further testing --- pipelined/src/uncore/ram.sv | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index 2aa0df360..09ab288b4 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -53,18 +53,18 @@ module ram #(parameter BASE=0, RANGE = 65535) ( logic [31:0] HADDRD, RamAddr; //logic prevHREADYRam, risingHREADYRam; logic initTrans; - logic memwrite, memwriteD; + logic memwrite, memwriteD, memread; logic nextHREADYRam; //logic [3:0] busycount; swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask)); - assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00); // *** add burst support, or disable on busy - assign memwrite = initTrans & HWRITE; - - // *** this seems like a weird way to use reset - flopen #(1) memwritereg(HCLK, initTrans | ~HRESETn, memwrite, memwriteD); // probably drop ~HRESETn in all this - flopen #(32) haddrreg(HCLK, initTrans | ~HRESETn, HADDR, HADDRD); + assign initTrans = HREADY & HSELRam & (HTRANS[1]]); + assign memwrite = initTrans & HWRITE; // *** why is initTrans needed? See CLINT interface + assign memread = initTrans & ~HWRITE; + + flopenr #(1) memwritereg(HCLK, ~HRESETn, HREADY, memwrite, memwriteD); + flopenr #(32) haddrreg(HCLK, ~HRESETn, HREADY, HADDR, HADDRD); /* // busy FSM to extend READY signal always @(posedge HCLK, negedge HRESETn) @@ -85,7 +85,9 @@ module ram #(parameter BASE=0, RANGE = 65535) ( end */ - assign nextHREADYRam = ~(memwriteD & ~memwrite); + // Stall on a read after a write because the RAM can't take both adddresses on the same cycle + assign nextHREADYRam = ~(memwriteD & memread); +// assign nextHREADYRam = ~(memwriteD & ~memwrite); flopr #(1) readyreg(HCLK, ~HRESETn, nextHREADYRam, HREADYRam); // assign HREADYRam = ~(memwriteD & ~memwrite); assign HRESPRam = 0; // OK @@ -109,8 +111,8 @@ module ram #(parameter BASE=0, RANGE = 65535) ( - // On writes, use address delayed by one cycle to sync with HWDATA - mux2 #(32) adrmux(HADDR, HADDRD, memwriteD, RamAddr); + // On writes or during a wait state, use address delayed by one cycle to sync RamAddr with HWDATA or hold stalled address + mux2 #(32) adrmux(HADDR, HADDRD, memwriteD | ~HREADY, RamAddr); // single-ported RAM bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH) From 4ff105f18c0dee2bb4e3ef407e93f642dd89d9cd Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 9 Jun 2022 17:22:04 -0700 Subject: [PATCH 7/7] Fixed lint error --- pipelined/src/uncore/ram.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index 09ab288b4..a8015c742 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -59,7 +59,7 @@ module ram #(parameter BASE=0, RANGE = 65535) ( swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask)); - assign initTrans = HREADY & HSELRam & (HTRANS[1]]); + assign initTrans = HREADY & HSELRam & (HTRANS[1]); assign memwrite = initTrans & HWRITE; // *** why is initTrans needed? See CLINT interface assign memread = initTrans & ~HWRITE;