Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Ross Thompson 2021-07-02 13:56:49 -05:00
commit 46831035fb
37 changed files with 455 additions and 2448 deletions

View File

@ -53,7 +53,7 @@
`define DTLB_ENTRY_BITS 5
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 16
`define PMP_ENTRIES 64
// Address space
`define RESET_VECTOR 64'h0000000080000000

View File

@ -63,7 +63,6 @@ module ahblite (
// Signals from PMA checker
input logic DSquashBusAccessM, ISquashBusAccessF,
// Signals to PMA checker (metadata of proposed access)
output logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM,
// Return from bus
output logic [`XLEN-1:0] HRDATAW,
// AHB-Lite external signals
@ -144,10 +143,6 @@ module ahblite (
endcase
// Determine access type (important for determining whether to fault)
assign AtomicAccessM = (ProposedNextBusState == ATOMICREAD) || (ProposedNextBusState == ATOMICWRITE);
assign ExecuteAccessF = (ProposedNextBusState == INSTRREAD);
assign WriteAccessM = (ProposedNextBusState == MEMWRITE) || (ProposedNextBusState == ATOMICWRITE);
assign ReadAccessM = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == ATOMICREAD);// ||
// (ProposedNextBusState == MMUTRANSLATE);
// The PMA and PMP checkers can decide to squash the access

View File

@ -1,599 +0,0 @@
// Brent-Kung Carry-save Prefix Adder
module bk128 (cout, sum, a, b, cin);
input [127:0] a, b;
input cin;
output [127:0] sum;
output cout;
wire [128:0] p,g,t;
wire [127:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
assign t[1]=p[1];
assign t[2]=p[2];
assign t[3]=p[3]^g[2];
assign t[4]=p[4];
assign t[5]=p[5]^g[4];
assign t[6]=p[6];
assign t[7]=p[7]^g[6];
assign t[8]=p[8];
assign t[9]=p[9]^g[8];
assign t[10]=p[10];
assign t[11]=p[11]^g[10];
assign t[12]=p[12];
assign t[13]=p[13]^g[12];
assign t[14]=p[14];
assign t[15]=p[15]^g[14];
assign t[16]=p[16];
assign t[17]=p[17]^g[16];
assign t[18]=p[18];
assign t[19]=p[19]^g[18];
assign t[20]=p[20];
assign t[21]=p[21]^g[20];
assign t[22]=p[22];
assign t[23]=p[23]^g[22];
assign t[24]=p[24];
assign t[25]=p[25]^g[24];
assign t[26]=p[26];
assign t[27]=p[27]^g[26];
assign t[28]=p[28];
assign t[29]=p[29]^g[28];
assign t[30]=p[30];
assign t[31]=p[31]^g[30];
assign t[32]=p[32];
assign t[33]=p[33]^g[32];
assign t[34]=p[34];
assign t[35]=p[35]^g[34];
assign t[36]=p[36];
assign t[37]=p[37]^g[36];
assign t[38]=p[38];
assign t[39]=p[39]^g[38];
assign t[40]=p[40];
assign t[41]=p[41]^g[40];
assign t[42]=p[42];
assign t[43]=p[43]^g[42];
assign t[44]=p[44];
assign t[45]=p[45]^g[44];
assign t[46]=p[46];
assign t[47]=p[47]^g[46];
assign t[48]=p[48];
assign t[49]=p[49]^g[48];
assign t[50]=p[50];
assign t[51]=p[51]^g[50];
assign t[52]=p[52];
assign t[53]=p[53]^g[52];
assign t[54]=p[54];
assign t[55]=p[55]^g[54];
assign t[56]=p[56];
assign t[57]=p[57]^g[56];
assign t[58]=p[58];
assign t[59]=p[59]^g[58];
assign t[60]=p[60];
assign t[61]=p[61]^g[60];
assign t[62]=p[62];
assign t[63]=p[63]^g[62];
assign t[64]=p[64];
assign t[65]=p[65]^g[64];
assign t[66]=p[66];
assign t[67]=p[67]^g[66];
assign t[68]=p[68];
assign t[69]=p[69]^g[68];
assign t[70]=p[70];
assign t[71]=p[71]^g[70];
assign t[72]=p[72];
assign t[73]=p[73]^g[72];
assign t[74]=p[74];
assign t[75]=p[75]^g[74];
assign t[76]=p[76];
assign t[77]=p[77]^g[76];
assign t[78]=p[78];
assign t[79]=p[79]^g[78];
assign t[80]=p[80];
assign t[81]=p[81]^g[80];
assign t[82]=p[82];
assign t[83]=p[83]^g[82];
assign t[84]=p[84];
assign t[85]=p[85]^g[84];
assign t[86]=p[86];
assign t[87]=p[87]^g[86];
assign t[88]=p[88];
assign t[89]=p[89]^g[88];
assign t[90]=p[90];
assign t[91]=p[91]^g[90];
assign t[92]=p[92];
assign t[93]=p[93]^g[92];
assign t[94]=p[94];
assign t[95]=p[95]^g[94];
assign t[96]=p[96];
assign t[97]=p[97]^g[96];
assign t[98]=p[98];
assign t[99]=p[99]^g[98];
assign t[100]=p[100];
assign t[101]=p[101]^g[100];
assign t[102]=p[102];
assign t[103]=p[103]^g[102];
assign t[104]=p[104];
assign t[105]=p[105]^g[104];
assign t[106]=p[106];
assign t[107]=p[107]^g[106];
assign t[108]=p[108];
assign t[109]=p[109]^g[108];
assign t[110]=p[110];
assign t[111]=p[111]^g[110];
assign t[112]=p[112];
assign t[113]=p[113]^g[112];
assign t[114]=p[114];
assign t[115]=p[115]^g[114];
assign t[116]=p[116];
assign t[117]=p[117]^g[116];
assign t[118]=p[118];
assign t[119]=p[119]^g[118];
assign t[120]=p[120];
assign t[121]=p[121]^g[120];
assign t[122]=p[122];
assign t[123]=p[123]^g[122];
assign t[124]=p[124];
assign t[125]=p[125]^g[124];
assign t[126]=p[126];
assign t[127]=p[127]^g[126];
assign t[128]=p[128];
// prefix tree
brent_kung_cs128 prefix_tree(c, p[127:0], g[127:0]);
// post-computation
assign sum=p[128:1]^c;
assign cout=g[128]|(p[128]&c[127]);
endmodule
module brent_kung_cs128 (c, p, g);
input [127:0] p;
input [127:0] g;
output [128:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]});
black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]});
black b_69_68 (G_69_68, P_69_68, {g[69],g[68]}, {p[69],p[68]});
black b_71_70 (G_71_70, P_71_70, {g[71],g[70]}, {p[71],p[70]});
black b_73_72 (G_73_72, P_73_72, {g[73],g[72]}, {p[73],p[72]});
black b_75_74 (G_75_74, P_75_74, {g[75],g[74]}, {p[75],p[74]});
black b_77_76 (G_77_76, P_77_76, {g[77],g[76]}, {p[77],p[76]});
black b_79_78 (G_79_78, P_79_78, {g[79],g[78]}, {p[79],p[78]});
black b_81_80 (G_81_80, P_81_80, {g[81],g[80]}, {p[81],p[80]});
black b_83_82 (G_83_82, P_83_82, {g[83],g[82]}, {p[83],p[82]});
black b_85_84 (G_85_84, P_85_84, {g[85],g[84]}, {p[85],p[84]});
black b_87_86 (G_87_86, P_87_86, {g[87],g[86]}, {p[87],p[86]});
black b_89_88 (G_89_88, P_89_88, {g[89],g[88]}, {p[89],p[88]});
black b_91_90 (G_91_90, P_91_90, {g[91],g[90]}, {p[91],p[90]});
black b_93_92 (G_93_92, P_93_92, {g[93],g[92]}, {p[93],p[92]});
black b_95_94 (G_95_94, P_95_94, {g[95],g[94]}, {p[95],p[94]});
black b_97_96 (G_97_96, P_97_96, {g[97],g[96]}, {p[97],p[96]});
black b_99_98 (G_99_98, P_99_98, {g[99],g[98]}, {p[99],p[98]});
black b_101_100 (G_101_100, P_101_100, {g[101],g[100]}, {p[101],p[100]});
black b_103_102 (G_103_102, P_103_102, {g[103],g[102]}, {p[103],p[102]});
black b_105_104 (G_105_104, P_105_104, {g[105],g[104]}, {p[105],p[104]});
black b_107_106 (G_107_106, P_107_106, {g[107],g[106]}, {p[107],p[106]});
black b_109_108 (G_109_108, P_109_108, {g[109],g[108]}, {p[109],p[108]});
black b_111_110 (G_111_110, P_111_110, {g[111],g[110]}, {p[111],p[110]});
black b_113_112 (G_113_112, P_113_112, {g[113],g[112]}, {p[113],p[112]});
black b_115_114 (G_115_114, P_115_114, {g[115],g[114]}, {p[115],p[114]});
black b_117_116 (G_117_116, P_117_116, {g[117],g[116]}, {p[117],p[116]});
black b_119_118 (G_119_118, P_119_118, {g[119],g[118]}, {p[119],p[118]});
black b_121_120 (G_121_120, P_121_120, {g[121],g[120]}, {p[121],p[120]});
black b_123_122 (G_123_122, P_123_122, {g[123],g[122]}, {p[123],p[122]});
black b_125_124 (G_125_124, P_125_124, {g[125],g[124]}, {p[125],p[124]});
black b_127_126 (G_127_126, P_127_126, {g[127],g[126]}, {p[127],p[126]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64});
black b_71_68 (G_71_68, P_71_68, {G_71_70,G_69_68}, {P_71_70,P_69_68});
black b_75_72 (G_75_72, P_75_72, {G_75_74,G_73_72}, {P_75_74,P_73_72});
black b_79_76 (G_79_76, P_79_76, {G_79_78,G_77_76}, {P_79_78,P_77_76});
black b_83_80 (G_83_80, P_83_80, {G_83_82,G_81_80}, {P_83_82,P_81_80});
black b_87_84 (G_87_84, P_87_84, {G_87_86,G_85_84}, {P_87_86,P_85_84});
black b_91_88 (G_91_88, P_91_88, {G_91_90,G_89_88}, {P_91_90,P_89_88});
black b_95_92 (G_95_92, P_95_92, {G_95_94,G_93_92}, {P_95_94,P_93_92});
black b_99_96 (G_99_96, P_99_96, {G_99_98,G_97_96}, {P_99_98,P_97_96});
black b_103_100 (G_103_100, P_103_100, {G_103_102,G_101_100}, {P_103_102,P_101_100});
black b_107_104 (G_107_104, P_107_104, {G_107_106,G_105_104}, {P_107_106,P_105_104});
black b_111_108 (G_111_108, P_111_108, {G_111_110,G_109_108}, {P_111_110,P_109_108});
black b_115_112 (G_115_112, P_115_112, {G_115_114,G_113_112}, {P_115_114,P_113_112});
black b_119_116 (G_119_116, P_119_116, {G_119_118,G_117_116}, {P_119_118,P_117_116});
black b_123_120 (G_123_120, P_123_120, {G_123_122,G_121_120}, {P_123_122,P_121_120});
black b_127_124 (G_127_124, P_127_124, {G_127_126,G_125_124}, {P_127_126,P_125_124});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64});
black b_79_72 (G_79_72, P_79_72, {G_79_76,G_75_72}, {P_79_76,P_75_72});
black b_87_80 (G_87_80, P_87_80, {G_87_84,G_83_80}, {P_87_84,P_83_80});
black b_95_88 (G_95_88, P_95_88, {G_95_92,G_91_88}, {P_95_92,P_91_88});
black b_103_96 (G_103_96, P_103_96, {G_103_100,G_99_96}, {P_103_100,P_99_96});
black b_111_104 (G_111_104, P_111_104, {G_111_108,G_107_104}, {P_111_108,P_107_104});
black b_119_112 (G_119_112, P_119_112, {G_119_116,G_115_112}, {P_119_116,P_115_112});
black b_127_120 (G_127_120, P_127_120, {G_127_124,G_123_120}, {P_127_124,P_123_120});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64});
black b_95_80 (G_95_80, P_95_80, {G_95_88,G_87_80}, {P_95_88,P_87_80});
black b_111_96 (G_111_96, P_111_96, {G_111_104,G_103_96}, {P_111_104,P_103_96});
black b_127_112 (G_127_112, P_127_112, {G_127_120,G_119_112}, {P_127_120,P_119_112});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64});
black b_127_96 (G_127_96, P_127_96, {G_127_112,G_111_96}, {P_127_112,P_111_96});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64});
// Stage 7: Generates G/P pairs that span 64 bits
grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64);
// Stage 8: Generates G/P pairs that span 32 bits
grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64);
// Stage 9: Generates G/P pairs that span 16 bits
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64);
grey g_111_0 (G_111_0, {G_111_96,G_95_0}, P_111_96);
// Stage 10: Generates G/P pairs that span 8 bits
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_55_0 (G_55_0, {G_55_48,G_47_0}, P_55_48);
grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64);
grey g_87_0 (G_87_0, {G_87_80,G_79_0}, P_87_80);
grey g_103_0 (G_103_0, {G_103_96,G_95_0}, P_103_96);
grey g_119_0 (G_119_0, {G_119_112,G_111_0}, P_119_112);
// Stage 11: Generates G/P pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_27_0 (G_27_0, {G_27_24,G_23_0}, P_27_24);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_43_0 (G_43_0, {G_43_40,G_39_0}, P_43_40);
grey g_51_0 (G_51_0, {G_51_48,G_47_0}, P_51_48);
grey g_59_0 (G_59_0, {G_59_56,G_55_0}, P_59_56);
grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64);
grey g_75_0 (G_75_0, {G_75_72,G_71_0}, P_75_72);
grey g_83_0 (G_83_0, {G_83_80,G_79_0}, P_83_80);
grey g_91_0 (G_91_0, {G_91_88,G_87_0}, P_91_88);
grey g_99_0 (G_99_0, {G_99_96,G_95_0}, P_99_96);
grey g_107_0 (G_107_0, {G_107_104,G_103_0}, P_107_104);
grey g_115_0 (G_115_0, {G_115_112,G_111_0}, P_115_112);
grey g_123_0 (G_123_0, {G_123_120,G_119_0}, P_123_120);
// Stage 12: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_21_0 (G_21_0, {G_21_20,G_19_0}, P_21_20);
grey g_25_0 (G_25_0, {G_25_24,G_23_0}, P_25_24);
grey g_29_0 (G_29_0, {G_29_28,G_27_0}, P_29_28);
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_37_0 (G_37_0, {G_37_36,G_35_0}, P_37_36);
grey g_41_0 (G_41_0, {G_41_40,G_39_0}, P_41_40);
grey g_45_0 (G_45_0, {G_45_44,G_43_0}, P_45_44);
grey g_49_0 (G_49_0, {G_49_48,G_47_0}, P_49_48);
grey g_53_0 (G_53_0, {G_53_52,G_51_0}, P_53_52);
grey g_57_0 (G_57_0, {G_57_56,G_55_0}, P_57_56);
grey g_61_0 (G_61_0, {G_61_60,G_59_0}, P_61_60);
grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64);
grey g_69_0 (G_69_0, {G_69_68,G_67_0}, P_69_68);
grey g_73_0 (G_73_0, {G_73_72,G_71_0}, P_73_72);
grey g_77_0 (G_77_0, {G_77_76,G_75_0}, P_77_76);
grey g_81_0 (G_81_0, {G_81_80,G_79_0}, P_81_80);
grey g_85_0 (G_85_0, {G_85_84,G_83_0}, P_85_84);
grey g_89_0 (G_89_0, {G_89_88,G_87_0}, P_89_88);
grey g_93_0 (G_93_0, {G_93_92,G_91_0}, P_93_92);
grey g_97_0 (G_97_0, {G_97_96,G_95_0}, P_97_96);
grey g_101_0 (G_101_0, {G_101_100,G_99_0}, P_101_100);
grey g_105_0 (G_105_0, {G_105_104,G_103_0}, P_105_104);
grey g_109_0 (G_109_0, {G_109_108,G_107_0}, P_109_108);
grey g_113_0 (G_113_0, {G_113_112,G_111_0}, P_113_112);
grey g_117_0 (G_117_0, {G_117_116,G_115_0}, P_117_116);
grey g_121_0 (G_121_0, {G_121_120,G_119_0}, P_121_120);
grey g_125_0 (G_125_0, {G_125_124,G_123_0}, P_125_124);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]);
grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]);
grey g_68_0 (G_68_0, {g[68],G_67_0}, p[68]);
grey g_70_0 (G_70_0, {g[70],G_69_0}, p[70]);
grey g_72_0 (G_72_0, {g[72],G_71_0}, p[72]);
grey g_74_0 (G_74_0, {g[74],G_73_0}, p[74]);
grey g_76_0 (G_76_0, {g[76],G_75_0}, p[76]);
grey g_78_0 (G_78_0, {g[78],G_77_0}, p[78]);
grey g_80_0 (G_80_0, {g[80],G_79_0}, p[80]);
grey g_82_0 (G_82_0, {g[82],G_81_0}, p[82]);
grey g_84_0 (G_84_0, {g[84],G_83_0}, p[84]);
grey g_86_0 (G_86_0, {g[86],G_85_0}, p[86]);
grey g_88_0 (G_88_0, {g[88],G_87_0}, p[88]);
grey g_90_0 (G_90_0, {g[90],G_89_0}, p[90]);
grey g_92_0 (G_92_0, {g[92],G_91_0}, p[92]);
grey g_94_0 (G_94_0, {g[94],G_93_0}, p[94]);
grey g_96_0 (G_96_0, {g[96],G_95_0}, p[96]);
grey g_98_0 (G_98_0, {g[98],G_97_0}, p[98]);
grey g_100_0 (G_100_0, {g[100],G_99_0}, p[100]);
grey g_102_0 (G_102_0, {g[102],G_101_0}, p[102]);
grey g_104_0 (G_104_0, {g[104],G_103_0}, p[104]);
grey g_106_0 (G_106_0, {g[106],G_105_0}, p[106]);
grey g_108_0 (G_108_0, {g[108],G_107_0}, p[108]);
grey g_110_0 (G_110_0, {g[110],G_109_0}, p[110]);
grey g_112_0 (G_112_0, {g[112],G_111_0}, p[112]);
grey g_114_0 (G_114_0, {g[114],G_113_0}, p[114]);
grey g_116_0 (G_116_0, {g[116],G_115_0}, p[116]);
grey g_118_0 (G_118_0, {g[118],G_117_0}, p[118]);
grey g_120_0 (G_120_0, {g[120],G_119_0}, p[120]);
grey g_122_0 (G_122_0, {g[122],G_121_0}, p[122]);
grey g_124_0 (G_124_0, {g[124],G_123_0}, p[124]);
grey g_126_0 (G_126_0, {g[126],G_125_0}, p[126]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
assign c[37]=G_36_0;
assign c[38]=G_37_0;
assign c[39]=G_38_0;
assign c[40]=G_39_0;
assign c[41]=G_40_0;
assign c[42]=G_41_0;
assign c[43]=G_42_0;
assign c[44]=G_43_0;
assign c[45]=G_44_0;
assign c[46]=G_45_0;
assign c[47]=G_46_0;
assign c[48]=G_47_0;
assign c[49]=G_48_0;
assign c[50]=G_49_0;
assign c[51]=G_50_0;
assign c[52]=G_51_0;
assign c[53]=G_52_0;
assign c[54]=G_53_0;
assign c[55]=G_54_0;
assign c[56]=G_55_0;
assign c[57]=G_56_0;
assign c[58]=G_57_0;
assign c[59]=G_58_0;
assign c[60]=G_59_0;
assign c[61]=G_60_0;
assign c[62]=G_61_0;
assign c[63]=G_62_0;
assign c[64]=G_63_0;
assign c[65]=G_64_0;
assign c[66]=G_65_0;
assign c[67]=G_66_0;
assign c[68]=G_67_0;
assign c[69]=G_68_0;
assign c[70]=G_69_0;
assign c[71]=G_70_0;
assign c[72]=G_71_0;
assign c[73]=G_72_0;
assign c[74]=G_73_0;
assign c[75]=G_74_0;
assign c[76]=G_75_0;
assign c[77]=G_76_0;
assign c[78]=G_77_0;
assign c[79]=G_78_0;
assign c[80]=G_79_0;
assign c[81]=G_80_0;
assign c[82]=G_81_0;
assign c[83]=G_82_0;
assign c[84]=G_83_0;
assign c[85]=G_84_0;
assign c[86]=G_85_0;
assign c[87]=G_86_0;
assign c[88]=G_87_0;
assign c[89]=G_88_0;
assign c[90]=G_89_0;
assign c[91]=G_90_0;
assign c[92]=G_91_0;
assign c[93]=G_92_0;
assign c[94]=G_93_0;
assign c[95]=G_94_0;
assign c[96]=G_95_0;
assign c[97]=G_96_0;
assign c[98]=G_97_0;
assign c[99]=G_98_0;
assign c[100]=G_99_0;
assign c[101]=G_100_0;
assign c[102]=G_101_0;
assign c[103]=G_102_0;
assign c[104]=G_103_0;
assign c[105]=G_104_0;
assign c[106]=G_105_0;
assign c[107]=G_106_0;
assign c[108]=G_107_0;
assign c[109]=G_108_0;
assign c[110]=G_109_0;
assign c[111]=G_110_0;
assign c[112]=G_111_0;
assign c[113]=G_112_0;
assign c[114]=G_113_0;
assign c[115]=G_114_0;
assign c[116]=G_115_0;
assign c[117]=G_116_0;
assign c[118]=G_117_0;
assign c[119]=G_118_0;
assign c[120]=G_119_0;
assign c[121]=G_120_0;
assign c[122]=G_121_0;
assign c[123]=G_122_0;
assign c[124]=G_123_0;
assign c[125]=G_124_0;
assign c[126]=G_125_0;
assign c[127]=G_126_0;
assign c[128]=G_127_0;
endmodule // brent_kung_cs

View File

@ -1,97 +0,0 @@
// Brent-Kung Carry-save Prefix Adder
module bk13 (cout, sum, a, b, cin);
input [12:0] a, b;
input cin;
output [12:0] sum;
output cout;
wire [13:0] p,g,t;
wire [12:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
assign t[1]=p[1];
assign t[2]=p[2];
assign t[3]=p[3]^g[2];
assign t[4]=p[4];
assign t[5]=p[5]^g[4];
assign t[6]=p[6];
assign t[7]=p[7]^g[6];
assign t[8]=p[8];
assign t[9]=p[9]^g[8];
assign t[10]=p[10];
assign t[11]=p[11]^g[10];
assign t[12]=p[12];
assign t[13]=p[13];
// prefix tree
brent_kung_cs13 prefix_tree(c, p[12:0], g[12:0]);
// post-computation
assign sum=p[13:1]^c;
assign cout=g[13]|(p[13]&c[12]);
endmodule
module brent_kung_cs13 (c, p, g);
input [13:0] p;
input [13:0] g;
output [13:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/P pairs that span 8 bits
// Stage 5: Generates G/P pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// Stage 6: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
endmodule

View File

@ -1,86 +0,0 @@
// Brent-Kung Prefix Adder
module bk14 (cout, sum, a, b, cin);
input [13:0] a, b;
input cin;
output [13:0] sum;
output cout;
wire [14:0] p,g;
wire [13:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
brent_kung14 prefix_tree(c, p[13:0], g[13:0]);
// post-computation
assign sum=p[14:1]^c;
assign cout=g[14]|(p[14]&c[13]);
endmodule
module brent_kung14 (c, p, g);
input [13:0] p;
input [13:0] g;
output [14:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/P pairs that span 8 bits
// Stage 5: Generates G/P pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// Stage 6: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
endmodule

View File

@ -1,70 +0,0 @@
module ha (C, S, A, B) ;
input A, B;
output S, C;
assign S = A^B;
assign C = A&B;
endmodule // HA
// module fa (input logic a, b, c, output logic sum, carry);
// assign sum = a^b^c;
// assign carry = a&b|a&c|b&c;
// endmodule // fa
// module csa #(parameter WIDTH=8) (a, b,c, sum, carry, cout);
// input logic [WIDTH-1:0] a, b, c;
// output logic [WIDTH-1:0] sum, carry;
// output logic cout;
// logic [WIDTH:0] carry_temp;
// genvar i;
// generate
// for (i=0;i<WIDTH;i=i+1)
// begin : genbit
// fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
// end
// endgenerate
// assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
// assign cout = carry_temp[WIDTH];
// endmodule // csa
module FA_array (S, C, A, B, Ci) ;
parameter n = 32;
input [n-1:0] A;
input [n-1:0] B;
input [n-1:0] Ci;
output [n-1:0] S;
output [n-1:0] C;
wire [n-1:0] n0;
wire [n-1:0] n1;
wire [n-1:0] n2;
genvar i;
generate
for (i = 0; i < n; i = i + 1) begin : index
fa FA1(.sum(S[i]), .carry(C[i]), .a(A[i]), .b(B[i]), .c(Ci[i]));
end
endgenerate
endmodule // FA_array
module HA_array (S, C, A, B) ;
parameter n = 32;
input [n-1:0] A, B;
output [n-1:0] S, C;
genvar i;
generate
for (i = 0; i < n; i = i + 1) begin : index
ha ha1(.S(S[i]), .C(C[i]), .A(A[i]), .B(B[i]));
end
endgenerate
endmodule // HA_array

View File

@ -68,9 +68,9 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
mux2 #(64) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);
mux2 #(64) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
// TDM multiplier (carry/save)
multiplier mult1 (mcand, mplier, Sum, Carry);
multiplier mult1 (mcand, mplier, Sum, Carry); // ***multiply
// Q*D - N (reversed but changed in rounder.v to account for sign reversal)
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2);
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2); //***adder
// Add ulp for subtraction in remainder
mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
@ -80,15 +80,15 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
mux2 #(64) mxA ({64'hFFFF_FFFF_FFFF_F9FF}, {64'hFFFF_FF3F_FFFF_FFFF}, P, qm_const);
// CPA (from CSA)/Remainder addition/subtraction
ldf128 cpa1 (cout1, mul_out, Sum2, Carry2, muxr_out);
ldf128 cpa1 (cout1, mul_out, Sum2, Carry2, muxr_out); //***adder
// Assuming [1,2) - q1
ldf64 cpa2 (cout2, q_out1, regb_out, q_const, 1'b0);
ldf64 cpa3 (cout3, qp_out1, regb_out, qp_const, 1'b0);
ldf64 cpa4 (cout4, qm_out1, regb_out, qm_const, 1'b1);
ldf64 cpa2 (cout2, q_out1, regb_out, q_const, 1'b0); //***adder
ldf64 cpa3 (cout3, qp_out1, regb_out, qp_const, 1'b0); //***adder
ldf64 cpa4 (cout4, qm_out1, regb_out, qm_const, 1'b1); //***adder
// Assuming [0.5,1) - q0
ldf64 cpa5 (cout5, q_out0, {regb_out[62:0], vss}, q_const, 1'b0);
ldf64 cpa6 (cout6, qp_out0, {regb_out[62:0], vss}, qp_const, 1'b0);
ldf64 cpa7 (cout7, qm_out0, {regb_out[62:0], vss}, qm_const, 1'b1);
ldf64 cpa5 (cout5, q_out0, {regb_out[62:0], vss}, q_const, 1'b0); //***adder
ldf64 cpa6 (cout6, qp_out0, {regb_out[62:0], vss}, qp_const, 1'b0); //***adder
ldf64 cpa7 (cout7, qm_out0, {regb_out[62:0], vss}, qm_const, 1'b1); //***adder
// One's complement instead of two's complement (for hw efficiency)
assign three = {~mul_out[126], mul_out[126], ~mul_out[125:63]};
mux2 #(64) mxTC (~mul_out[126:63], three[64:1], op_type, twocmp_out);

View File

@ -0,0 +1,62 @@
`include "wally-config.vh"
module fclassify (
input logic [63:0] SrcXE,
input logic FmtE, // 0-Single 1-Double
output logic [63:0] ClassResE
);
logic [31:0] Single;
logic [63:0] Double;
logic Sgn;
logic Inf, NaN, Zero, Norm, Denorm;
logic PInf, QNaN, PZero, PNorm, PDenorm;
logic NInf, SNaN, NZero, NNorm, NDenorm;
logic MaxExp, ExpZero, ManZero, FirstBitFrac;
// Single and Double precision layouts
assign Single = SrcXE[63:32];
assign Double = SrcXE;
assign Sgn = SrcXE[63];
// basic calculations for readabillity
assign ExpZero = FmtE ? ~|Double[62:52] : ~|Single[30:23];
assign MaxExp = FmtE ? &Double[62:52] : &Single[30:23];
assign ManZero = FmtE ? ~|Double[51:0] : ~|Single[22:0];
assign FirstBitFrac = FmtE ? Double[51] : Single[22];
// determine the type of number
assign NaN = MaxExp & ~ManZero;
assign Inf = MaxExp & ManZero;
assign Zero = ExpZero & ManZero;
assign Denorm= ExpZero & ~ManZero;
assign Norm = ~ExpZero;
// determine the sub categories
assign QNaN = FirstBitFrac&NaN;
assign SNaN = ~FirstBitFrac&NaN;
assign PInf = ~Sgn&Inf;
assign NInf = Sgn&Inf;
assign PNorm = ~Sgn&Norm;
assign NNorm = Sgn&Norm;
assign PDenorm = ~Sgn&Denorm;
assign NDenorm = Sgn&Denorm;
assign PZero = ~Sgn&Zero;
assign NZero = Sgn&Zero;
// determine sub category and combine into the result
// bit 0 - -Inf
// bit 1 - -Norm
// bit 2 - -Denorm
// bit 3 - -Zero
// bit 4 - +Zero
// bit 5 - +Denorm
// bit 6 - +Norm
// bit 7 - +Inf
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
endmodule

View File

@ -39,7 +39,7 @@
// if either of the input operands is a signaling NaN per 754
`include "wally-config.vh"
module fpucmp1 (
module fcmp (
input logic [63:0] op1,
input logic [63:0] op2,
input logic [2:0] FOpCtrlE,
@ -48,7 +48,7 @@ module fpucmp1 (
output logic Invalid, // Invalid Operation
// output logic [1:0] FCC, // Condition Codes
output logic [63:0] FCmpResultE);
output logic [63:0] CmpResE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
@ -392,7 +392,7 @@ module exception_cmp_2 (
output logic invalid,
output logic [1:0] fcc,
output logic [63:0] FCmpResultE,
output logic [63:0] CmpResE,
input logic Azero,
input logic Bzero,
@ -453,12 +453,12 @@ module exception_cmp_2 (
always_comb begin
case (FOpCtrlE[2:0])
3'b111: FCmpResultE = LT ? A : B;//min
3'b101: FCmpResultE = GT ? A : B;//max
3'b010: FCmpResultE = {63'b0, EQ};//equal
3'b001: FCmpResultE = {63'b0, LT};//less than
3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal
default: FCmpResultE = 64'b0;
3'b111: CmpResE = LT ? A : B;//min
3'b101: CmpResE = GT ? A : B;//max
3'b010: CmpResE = {63'b0, EQ};//equal
3'b001: CmpResE = {63'b0, LT};//less than
3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal
default: CmpResE = 64'b0;
endcase
end

View File

@ -64,30 +64,38 @@ module fctrl (
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
7'b1100000: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b0_1_010_0110_00_00_0_0; // fcvt.s.w
1'b1: ControlsD = `FCTRLW'b0_1_010_0101_00_00_0_0; // fcvt.s.wu
7'b1100000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.s.w
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.s.wu
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.s.l
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.s.lu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101000: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b1_1_010_0100_00_00_0_0; // fcvt.w.s
1'b1: ControlsD = `FCTRLW'b1_1_010_0101_00_00_0_0; // fcvt.wu.s
7'b1101000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_1_100_0010_00_00_0_0; // fcvt.w.s
2'b01: ControlsD = `FCTRLW'b1_1_100_0110_00_00_0_0; // fcvt.wu.s
2'b10: ControlsD = `FCTRLW'b1_1_100_1010_00_00_0_0; // fcvt.l.s
2'b11: ControlsD = `FCTRLW'b1_1_100_1110_00_00_0_0; // fcvt.lu.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0010_00_00_0_0; // fcvt.s.d
7'b1100001: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b0_1_010_1110_00_00_0_0; // fcvt.d.w
1'b1: ControlsD = `FCTRLW'b0_1_010_1111_00_00_0_0; // fcvt.d.wu
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fcvt.s.d
7'b1100001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.d.w
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.d.wu
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.d.l
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.d.lu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101001: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b1_0_010_1100_00_00_0_0; // fcvt.w.d
1'b1: ControlsD = `FCTRLW'b1_0_010_1101_00_00_0_0; // fcvt.wu.d
7'b1101001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_100_0010_00_00_0_0; // fcvt.w.d
2'b01: ControlsD = `FCTRLW'b1_0_100_0110_00_00_0_0; // fcvt.wu.d
2'b10: ControlsD = `FCTRLW'b1_0_100_1010_00_00_0_0; // fcvt.l.d
2'b11: ControlsD = `FCTRLW'b1_0_100_1110_00_00_0_0; // fcvt.lu.d
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
7'b0100001: ControlsD = `FCTRLW'b1_0_010_1000_00_00_0_0; // fcvt.d.s
7'b0100001: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
@ -130,17 +138,26 @@ module fctrl (
// add/sub/cnvt
// fadd = 0000
// fsub = 0001
// fcvt.w.s = 0100
// fcvt.wu.s = 0101
// fcvt.s.w = 0110
// fcvt.s.wu = 0111
// fcvt.s.d = 0010
// fcvt.w.d = 1100
// fcvt.wu.d = 1101
// fcvt.d.w = 1110
// fcvt.d.wu = 1111
// fcvt.d.s = 1000
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub}
// cnvt
// fcvt.w.s = 0010
// fcvt.wu.s = 0110
// fcvt.s.w = 0001
// fcvt.s.wu = 0101
// fcvt.s.d = 0000
// fcvt.l.s = 1010
// fcvt.lu.s = 1110
// fcvt.s.l = 1001
// fcvt.s.lu = 1101
// fcvt.w.d = 0010
// fcvt.wu.d = 0110
// fcvt.d.w = 0001
// fcvt.d.wu = 0101
// fcvt.d.s = 0000
// fcvt.l.d = 1010
// fcvt.lu.d = 1110
// fcvt.d.l = 1001
// fcvt.d.lu = 1101
// {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
// fmv.w.x = ???0
// fmv.w.d = ???1

View File

@ -23,7 +23,7 @@
//
// `timescale 1ps/1ps
module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
FDivStartE, reset, clk, FDivBusyE, HoldInputs);
input [63:0] DivInput1E; // 1st input operand (A)
@ -39,8 +39,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
input clk;
output [63:0] FDivResultM; // Result of operation
output [4:0] FDivFlagsM; // IEEE exception flags
output DivDenormM; // DivDenormM on input or output
output [4:0] FDivSqrtFlgM; // IEEE exception flags
output FDivSqrtDoneE;
output FDivBusyE, HoldInputs;
@ -51,6 +50,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
wire [63:0] Float2;
wire [63:0] IntValue;
wire DivDenormM; // DivDenormM on input or output
wire [12:0] exp1, exp2, expF;
wire [12:0] exp_diff, bias;
wire [13:0] exp_sqrt;
@ -103,7 +103,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input FDivFlagsM. The "sel_inv" is used in
// "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized.
// sub is one if the effective operation is subtaction.
@ -120,12 +120,12 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
// bias : DP = 2^{11-1}-1 = 1023
assign bias = {3'h0, 10'h3FF};
// Divide exponent
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
exp_add explogic1 (exp_cout1, {open, exp_diff},
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder
exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder?
{vss, exp_s}, {vss, exp_c}, 1'b1);
// Sqrt exponent (check if exponent is odd)
assign exp_odd = Float1[52] ? vss : vdd;
exp_add explogic2 (exp_cout2, exp_sqrt,
exp_add explogic2 (exp_cout2, exp_sqrt, //***adder?
{vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
// Choose correct exponent
assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;
@ -156,7 +156,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
// Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM);
flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM);
flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivFlagsM);
flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM);
endmodule // fpadd

View File

@ -25,7 +25,7 @@
`include "wally-config.vh"
module fpuhazard(
module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E,
input logic FWriteEnM, FWriteEnW,
input logic [4:0] RdM, RdW,

View File

@ -16,8 +16,8 @@ module fma2(
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
output logic [63:0] FmaResultM, // FMA final result
output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
output logic [63:0] FMAResM, // FMA final result
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
@ -57,7 +57,7 @@ module fma2(
logic [12:0] MaxExp; // maximum value of the exponent
logic [12:0] FracLen; // length of the fraction
logic SigNaN; // is an input a signaling NaN
logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency)
logic UnderflowFlag; // Underflow singal used in FMAFlgM (used to avoid a circular depencency)
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
@ -316,7 +316,7 @@ module fma2(
// Combine flags
// - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result was rounded up to a normal number
assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
@ -337,7 +337,7 @@ module fma2(
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
assign FmaResultM = XNaNM ? XNaNResult :
assign FMAResM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult : // has to be before inf

View File

@ -229,11 +229,11 @@ module fpadd (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);
assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (sum, mantissaA3, mantissaB3, sub);
cla64 add1 (sum, mantissaA3, mantissaB3, sub); //***adder
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); //***adder
// Determine the correct sign of the result
assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63];

View File

@ -34,7 +34,7 @@ module fpu (
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
output logic FStallD, // Stall the decode stage if Div/Sqrt instruction
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM,
@ -42,48 +42,38 @@ module fpu (
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM, // FPU flags
output logic [`XLEN-1:0] FPUResultW); // FPU result
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
// control logic signal instantiation
logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register
logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction
logic [1:0] FMemRWD; // Read and write enable for memory
logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal
logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal
logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal
logic SrcYUsedD; // Is input 2 used
logic SrcZUsedD; // Is input 3 used
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM;
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM;
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
logic [4:0] Adr1E, Adr2E, Adr3E;
// regfile signals
logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining
logic [63:0] FWDM; // Write data for FP register
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] SrcXMAligned;
logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding)
logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding)
logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding)
logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions
// div/sqrt signals
logic DivDenormE, DivDenormM, DivDenormW;
logic DivOvEn, DivUnEn;
logic [63:0] FDivResultE, FDivResultM, FDivResultW;
logic [4:0] FDivFlagsE, FDivFlagsM, FDivFlagsW;
logic FDivSqrtDoneE, FDivSqrtDoneM;
logic [63:0] FDivResultM, FDivResultW;
logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW;
logic FDivSqrtDoneE;
logic [63:0] DivInput1E, DivInput2E;
logic HoldInputs; // keep forwarded inputs arround durring division
// FMA signals
logic [105:0] ProdManE, ProdManM;
logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units
logic [161:0] AlignedAddendE, AlignedAddendM;
logic [12:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
@ -91,93 +81,112 @@ module fpu (
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
logic [63:0] FmaResultM, FmaResultW;
logic [4:0] FmaFlagsM, FmaFlagsW;
logic [63:0] FMAResM, FMAResW;
logic [4:0] FMAFlgM, FMAFlgW;
// add/cvt signals
logic [63:0] AddSumE, AddSumTcE;
logic [3:0] AddSelInvE;
logic [10:0] AddExpPostSumE;
logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE;
logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
logic AddConvertE;
logic [63:0] AddFloat1E, AddFloat2E;
logic [11:0] AddExp1DenormE, AddExp2DenormE;
logic [10:0] AddExponentE;
logic [2:0] AddRmE;
logic [3:0] AddOpTypeE;
logic AddPE, AddOvEnE, AddUnEnE;
logic AddDenormM;
logic [63:0] AddSumM, AddSumTcM;
logic [3:0] AddSelInvM;
logic [10:0] AddExpPostSumM;
logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM;
logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
logic AddConvertM, AddSignM;
logic [63:0] AddFloat1M, AddFloat2M;
logic [11:0] AddExp1DenormM, AddExp2DenormM;
logic [10:0] AddExponentM;
logic [63:0] AddOp1M, AddOp2M;
logic [2:0] AddRmM;
logic [3:0] AddOpTypeM;
logic AddPM, AddOvEnM, AddUnEnM;
logic [63:0] FAddResultM, FAddResultW;
logic [4:0] FAddFlagsM, FAddFlagsW;
logic [63:0] AddSumE, AddSumM;
logic [63:0] AddSumTcE, AddSumTcM;
logic [3:0] AddSelInvE, AddSelInvM;
logic [10:0] AddExpPostSumE,AddExpPostSumM;
logic AddCorrSignE, AddCorrSignM;
logic AddOp1NormE, AddOp1NormM;
logic AddOp2NormE, AddOp2NormM;
logic AddOpANormE, AddOpANormM;
logic AddOpBNormE, AddOpBNormM;
logic AddInvalidE, AddInvalidM;
logic AddDenormInE, AddDenormInM;
logic AddSwapE, AddSwapM;
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
logic AddSignAE, AddSignAM;
logic AddConvertE, AddConvertM;
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
logic [10:0] AddExponentE, AddExponentM;
logic [63:0] FAddResM, FAddResW;
logic [4:0] FAddFlgM, FAddFlgW;
// cmp signals
logic CmpInvalidE, CmpInvalidM, CmpInvalidW;
logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW;
logic CmpNVE, CmpNVM, CmpNVW;
logic [63:0] CmpResE, CmpResM, CmpResW;
// fsgn signals
logic [63:0] SgnResultE, SgnResultM, SgnResultW;
logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
logic [63:0] SgnResE, SgnResM;
logic SgnNVE, SgnNVM, SgnNVW;
logic [63:0] FResM, FResW;
logic FFlgM, FFlgW;
logic FFlgM, FFlgW;
// instantiation of W stage regfile signals
logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW;
logic [63:0] AlignedSrcAM;
// classify signals
logic [63:0] ClassResultE, ClassResultM, ClassResultW;
logic [63:0] ClassResE, ClassResM;
// 64-bit FPU result
logic [63:0] FPUResult64W, FPUResult64E;
logic [63:0] FPUResult64W;
logic [4:0] FPUFlagsW;
//DECODE STAGE
// top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
.FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
// regfile instantiation
FPregfile fpregfile (clk, reset, FWriteEnW,
fregfile fregfile (clk, reset, FWriteEnW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
FPUResult64W,
FRD1D, FRD2D, FRD3D);
//*****************
// fpregfile D/E pipe registers
// D/E pipe registers
//*****************
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
//*****************
// other D/E pipe registers
//*****************
flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE,
flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD},
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE});
//EXECUTION STAGE
// Hazard unit for FPU
fpuhazard hazard(.*);
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD,
.ForwardXE, .ForwardYE, .ForwardZE);
// forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE);
@ -186,7 +195,9 @@ module fpu (
// first of two-stage instance of floating-point fused multiply-add unit
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*);
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE,
.ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XNaNE, .YNaNE, .ZNaNE );
// first and only instance of floating-point divider
logic fpdivClk;
@ -204,174 +215,140 @@ module fpu (
.en(~HoldInputs), .clear(FDivSqrtDoneE),
.reset(reset), .clk(clk));
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*);
fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
.FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
.FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
// first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (.*);
fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
// first of two-stage instance of floating-point comparator
fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE);
// first and only instance of floating-point comparator
fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE);
// first and only instance of floating-point sign converter
fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE);
// first and only instance of floating-point classify unit
fpuclassify fpuclass (.*);
fclassify fclassify (.SrcXE, .FmtE, .ClassResE);
// output for store instructions
assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
//***swap to mux
//*****************
//fpregfile D/E pipe registers
// E/M pipe registers
//*****************
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM);
flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM);
flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM);
//*****************
// fma E/M pipe registers
//*****************
flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM);
flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM);
flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM);
flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM);
flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM);
flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM);
flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM);
flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM);
flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM);
flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM);
flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM);
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE},
{AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM});
//*****************
// fpadd E/M pipe registers
//*****************
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM);
flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM);
flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM);
flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM);
flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM);
flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM);
flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM);
flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM);
flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM);
flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM);
flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM);
flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM);
flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM);
flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM);
flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM);
flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM);
flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM);
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
//*****************
// fpcmp E/M pipe registers
//*****************
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM);
flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM);
flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
//*****************
// fpsgn E/M pipe registers
//*****************
flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM);
flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM);
//*****************
// other E/M pipe registers
//*****************
flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE},
{FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
//*****************
// fpuclassify E/M pipe registers
//*****************
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM);
//BEGIN MEMORY STAGE
mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM);
assign FFlgM = CmpInvalidM & FResSelM[1];
mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM);
mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM);
//***change to mux
assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM);
mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM);
// second instance of two-stage FMA unit
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*);
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM,
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM,
.FMAResM, .FMAFlgM);
// second instance of two-stage floating-point add/cvt unit
fpuaddcvt2 fpadd2 (.*);
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
.AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM,
.AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM);
//*****************
//fpregfile M/W pipe registers
// M/W pipe registers
//*****************
flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW);
flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW);
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW);
//*****************
// fma M/W pipe registers
//*****************
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW);
flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW);
//*****************
// fpdiv M/W pipe registers
//*****************
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW);
flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW);
flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW);
//*****************
// fpadd M/W pipe registers
//*****************
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW);
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW);
//*****************
// fpcmp M/W pipe registers
//*****************
flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW);
// flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW);
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW);
flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW);
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
//*****************
// fpsgn M/W pipe registers
//*****************
flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW);
flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW);
//*****************
// other M/W pipe registers
//*****************
flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM},
{FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW});
//*****************
// fpuclassify M/W pipe registers
//*****************
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW);
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
@ -385,13 +362,13 @@ module fpu (
//***turn into muxs
always_comb begin
case (FResultSelW)
3'b000 : FPUFlagsW = 5'b0;
3'b001 : FPUFlagsW = FmaFlagsW;
3'b010 : FPUFlagsW = FAddFlagsW;
3'b011 : FPUFlagsW = FDivFlagsW;
3'b001 : FPUFlagsW = FMAFlgW;
3'b010 : FPUFlagsW = FAddFlgW;
3'b011 : FPUFlagsW = FDivSqrtFlgW;
3'b100 : FPUFlagsW = {4'b0,FFlgW};
default : FPUFlagsW = 5'bxxxxx;
endcase
@ -400,8 +377,8 @@ module fpu (
always_comb begin
case (FResultSelW)
3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
3'b001 : FPUResult64W = FmaResultW;
3'b010 : FPUResult64W = FAddResultW;
3'b001 : FPUResult64W = FMAResW;
3'b010 : FPUResult64W = FAddResW;
3'b011 : FPUResult64W = FDivResultW;
3'b100 : FPUResult64W = FResW;
default : FPUResult64W = 64'bxxxxx;
@ -415,7 +392,9 @@ module fpu (
// define offsets for LSB zero extension or truncation
always_comb begin
// zero extension
//***turn into mux
FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};
//*** put into mem stage
SetFflagsM = FPUFlagsW;
end

View File

@ -183,11 +183,11 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub);
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3);
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder
// Finds normal underflow result to determine whether to round final exponent down
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be

View File

@ -27,7 +27,7 @@
//
module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
input [2:0] FrmM; // Rounding mode - specify values
input [3:0] FOpCtrlM; // Function opcode
@ -51,9 +51,9 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS
input AddSwapM;
// input AddNormOvflowM;
output [63:0] FAddResultM; // Result of operation
output [4:0] FAddFlagsM; // IEEE exception flags
output AddDenormM; // AddDenormM on input or output
output [63:0] FAddResM; // Result of operation
output [4:0] FAddFlgM; // IEEE exception flags
wire AddDenormM; // AddDenormM on input or output
wire P;
assign P = ~FmtM | FOpCtrlM[2];
@ -145,7 +145,7 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS
// exactly where the rounding point is. The rounding units also
// handles special cases and set the exception flags.
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlagsM in order to
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
// help in processor reservation station detection of load/stores. In
// other words, the processor would like to know ahead of time that
// if the result is an exception then don't load or store.
@ -155,8 +155,8 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
// Store the final result and the exception flags in registers.
assign FAddResultM = Result;
assign {AddDenormM, FAddFlagsM} = {DenormIO, FlagsIn};
assign FAddResM = Result;
assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};
endmodule // fpadd

View File

@ -1,50 +0,0 @@
`include "wally-config.vh"
module fpuclassify (
input logic [63:0] SrcXE,
input logic FmtE, // 0-single 1-double
output logic [63:0] ClassResultE
);
logic [31:0] single;
logic [63:0] double;
logic sign;
logic infinity, NaN, zero, normal, subnormal;
logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan;
// single and double precision layouts
assign single = SrcXE[63:32];
assign double = SrcXE;
assign sign = SrcXE[63];
// basic calculations for readabillity
assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23];
assign ExpZero = ~ExpNotZero;
assign ExpOnes = FmtE ? &double[62:52] : &single[30:23];
assign ManNotZero = FmtE ? |double[51:0] : |single[22:0];
assign ManZero = ~ManNotZero;
assign FirstBitMan = FmtE ? double[51] : single[22];
// determine the type of number
assign NaN = ExpOnes & ManNotZero;
assign infinity = ExpOnes & ManZero;
assign zero = ExpZero & ManZero;
assign subnormal= ExpZero & ManNotZero;
assign normal = ExpNotZero;
// determine sub category and combine into the result
// bit 0 - -infinity
// bit 1 - -normal
// bit 2 - -subnormal
// bit 3 - -zero
// bit 4 - +zero
// bit 5 - +subnormal
// bit 6 - +normal
// bit 7 - +infinity
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity};
endmodule

View File

@ -1,243 +0,0 @@
// //
// // File name : fpcomp.v
// // Title : Floating-Point Comparator
// // project : FPU
// // Library : fpcomp
// // Author(s) : James E. Stine
// // Purpose : definition of main unit to floating-point comparator
// // notes :
// //
// // Copyright Oklahoma State University
// //
// // Floating Point Comparator (Algorithm)
// //
// // 1.) Performs sign-extension if the inputs are 32-bit integers.
// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// // 3.) Check for special cases (+0=-0, unordered, and infinite values)
// // and correct for sign bits
// //
// // This module takes 64-bits inputs op1 and op2, VSS, and VDD
// // signals, and a 2-bit signal Sel that indicates the type of
// // operands being compared as indicated below.
// // Sel Description
// // 00 double precision numbers
// // 01 single precision numbers
// // 10 half precision numbers
// // 11 (unused)
// //
// // The comparator produces a 2-bit signal FCC, which
// // indicates the result of the comparison:
// //
// // fcc decscription
// // 00 A = B
// // 01 A < B
// // 10 A > B
// // 11 A and B are unordered (i.e., A or B is NaN)
// //
// // It also produces an invalid operation flag, which is one
// // if either of the input operands is a signaling NaN per 754
// module fpucmp2 (
// input logic [63:0] op1,
// input logic [63:0] op2,
// input logic [1:0] Sel,
// input logic [7:0] w, x,
// input logic ANaN, BNaN,
// input logic Azero, Bzero,
// input logic [3:0] FOpCtrlM,
// input logic FmtM,
// output logic Invalid, // Invalid Operation
// output logic [1:0] FCC, // Condition Codes
// output logic [63:0] FCmpResultM);
// logic LT; // magnitude op1 < magnitude op2
// logic EQ; // magnitude op1 = magnitude op2
// // Perform magnitude comparison between the 63 least signficant bits
// // of the input operands. Only LT and EQ are returned, since GT can
// // be determined from these values.
// magcompare64b_2 magcomp2 (LT, EQ, w, x);
// // Determine final values based on output of magnitude comparison,
// // sign bits, and special case testing.
// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*);
// endmodule // fpcomp
// /*module magcompare2b (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// // Determine if A < B using a minimized sum-of-products expression
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// // Determine if A > B using a minimized sum-of-products expression
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
// endmodule*/ // magcompare2b
// // 2-bit magnitude comparator
// // This module compares two 2-bit values A and B. LT is '1' if A < B
// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// // this version actually incorporates don't cares into the equation to
// // simplify the optimization
// // module magcompare2c (LT, GT, A, B);
// // input logic [1:0] A;
// // input logic [1:0] B;
// // output logic LT;
// // output logic GT;
// // assign LT = B[1] | (!A[1]&B[0]);
// // assign GT = A[1] | (!B[1]&A[0]);
// // endmodule // magcompare2b
// // This module compares two 64-bit values A and B. LT is '1' if A < B
// // and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// // This structure was modified so
// // that it only does a strict magnitdude comparison, and only
// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// // of 63 2-bit magnitude comparators, followed by one OR gates.
// //
// // J. E. Stine and M. J. Schulte, "A combined two's complement and
// // floating-point comparator," 2005 IEEE International Symposium on
// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// // doi: 10.1109/ISCAS.2005.1464531
// module magcompare64b_2 (LT, EQ, w, x);
// input logic [7:0] w;
// input logic [7:0] x;
// logic [3:0] y;
// logic [3:0] z;
// logic [1:0] a;
// logic [1:0] b;
// logic GT;
// output logic LT;
// output logic EQ;
// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
// assign EQ = ~(LT | GT);
// endmodule // magcompare64b
// // This module takes 64-bits inputs A and B, two magnitude comparison
// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
// // operands being compared as indicated below.
// // Sel Description
// // 00 double precision numbers
// // 01 single precision numbers
// // 10 half precision numbers
// // 11 bfloat precision numbers
// //
// // The comparator produces a 2-bit signal fcc, which
// // indicates the result of the comparison as follows:
// // fcc decscription
// // 00 A = B
// // 01 A < B
// // 10 A > B
// // 11 A and B are unordered (i.e., A or B is NaN)
// // It also produces a invalid operation flag, which is one
// // if either of the input operands is a signaling NaN.
// module exception_cmp_2 (
// input logic [63:0] A,
// input logic [63:0] B,
// input logic FmtM,
// input logic LT_mag,
// input logic EQ_mag,
// input logic [1:0] Sel,
// input logic [3:0] FOpCtrlM,
// output logic invalid,
// output logic [1:0] fcc,
// output logic [63:0] FCmpResultM,
// input logic Azero,
// input logic Bzero,
// input logic ANaN,
// input logic BNaN);
// logic dp;
// logic sp;
// logic hp;
// logic ASNaN;
// logic BSNaN;
// logic UO;
// logic GT;
// logic LT;
// logic EQ;
// logic [62:0] sixtythreezeros = 63'h0;
// assign dp = !Sel[1]&!Sel[0];
// assign sp = !Sel[1]&Sel[0];
// assign hp = Sel[1]&!Sel[0];
// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
// // point comparison is being performed.
// assign UO = (ANaN | BNaN);
// // Test if A or B is a signaling NaN.
// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
// // If either A or B is a signaling NaN the "Invalid Operation"
// // exception flag is set to one; otherwise it is zero.
// assign invalid = (ASNaN | BSNaN);
// // A and B are equal if (their magnitudes are equal) AND ((their signs are
// // equal) or (their magnitudes are zero AND they are floating point
// // numbers)). Also, A and B are not equal if they are unordered.
// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
// // A is less than B if (A is negative and B is posiive) OR
// // (A and B are positive and the magnitude of A is less than
// // the magnitude of B) or (A and B are negative integers and
// // the magnitude of A is less than the magnitude of B) or
// // (A and B are negative floating point numbers and
// // the magnitude of A is greater than the magnitude of B).
// // Also, A is not less than B if A and B are equal or unordered.
// assign LT = ((~LT_mag & A[63] & B[63]) |
// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
// // A is greater than B when LT, EQ, and UO are are false.
// assign GT = ~(LT | EQ | UO);
// // Note: it may be possible to optimize the setting of fcc
// // a little more, but it is probably not worth the effort.
// // Set the bits of fcc based on LT, GT, EQ, and UO
// assign fcc[0] = LT | UO;
// assign fcc[1] = GT | UO;
// always_comb begin
// case (FOpCtrlM[2:0])
// 3'b111: FCmpResultM = LT ? A : B;//min
// 3'b101: FCmpResultM = GT ? A : B;//max
// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal
// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than
// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal
// default: FCmpResultM = 64'b0;
// endcase
// end
// endmodule // exception_cmp

View File

@ -1,515 +0,0 @@
`include "wally-config.vh"
// `include "../../config/rv64icfd/wally-config.vh" //debug
module freg1adr (
input logic FmtW,
input logic reset,
input logic clear,
input logic clk,
input logic [4:0] rd,
input logic write,
input logic [4:0] adr1,
input logic [`XLEN-1:0] writeData,
output logic [`XLEN-1:0] readData);
//note - not word aligning based on precision of
//operation (FmtW)
//reg number should remain static, but it doesn't hurt
//to parameterize
parameter numRegs = 32;
//intermediary signals - useful for debugging
//and easy instatiation of generated modules
logic [`XLEN-1:0] [numRegs-1:0] regInput;
logic [`XLEN-1:0] [numRegs-1:0] regOutput;
//generate fp registers themselves
genvar i;
generate
for (i = 0; i < numRegs; i = i + 1) begin:register
floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0]));
end
endgenerate
//this could be done with:
//
//assign readData = regOutput[adr1];
//
//but always_comb allows for finer control
//address decoder
//only 1 for this fp register set
//used with fpsign
//defaults to outputting zeroes
always_comb begin
case(adr1)
5'b00000 : readData = regOutput[0];
5'b00001 : readData = regOutput[1];
5'b00010 : readData = regOutput[2];
5'b00011 : readData = regOutput[3];
5'b00100 : readData = regOutput[4];
5'b00101 : readData = regOutput[5];
5'b00110 : readData = regOutput[6];
5'b00111 : readData = regOutput[7];
5'b01000 : readData = regOutput[8];
5'b01001 : readData = regOutput[9];
5'b01010 : readData = regOutput[10];
5'b01011 : readData = regOutput[11];
5'b01100 : readData = regOutput[12];
5'b01101 : readData = regOutput[13];
5'b01110 : readData = regOutput[14];
5'b01111 : readData = regOutput[15];
5'b10000 : readData = regOutput[16];
5'b10001 : readData = regOutput[17];
5'b10010 : readData = regOutput[18];
5'b10011 : readData = regOutput[19];
5'b10100 : readData = regOutput[20];
5'b10101 : readData = regOutput[21];
5'b10110 : readData = regOutput[22];
5'b10111 : readData = regOutput[23];
5'b11000 : readData = regOutput[24];
5'b11001 : readData = regOutput[25];
5'b11010 : readData = regOutput[26];
5'b11011 : readData = regOutput[27];
5'b11100 : readData = regOutput[28];
5'b11101 : readData = regOutput[29];
5'b11110 : readData = regOutput[30];
5'b11111 : readData = regOutput[31];
default : readData = `XLEN'h0;
endcase
end
//destination register decoder
//only change input values on write
//defaults to undefined with invalid address
//
//note - this is an intermediary signal, so
//this is not asynch assignment. FF in flopr
//will not update data until clk pulse
always_comb begin
if(write) begin
case(rd)
5'b00000 : regInput[0] = writeData;
5'b00001 : regInput[1] = writeData;
5'b00010 : regInput[2] = writeData;
5'b00011 : regInput[3] = writeData;
5'b00100 : regInput[4] = writeData;
5'b00101 : regInput[5] = writeData;
5'b00110 : regInput[6] = writeData;
5'b00111 : regInput[7] = writeData;
5'b01000 : regInput[8] = writeData;
5'b01000 : regInput[9] = writeData;
5'b01001 : regInput[10] = writeData;
5'b01010 : regInput[11] = writeData;
5'b01111 : regInput[12] = writeData;
5'b01101 : regInput[13] = writeData;
5'b01110 : regInput[14] = writeData;
5'b01111 : regInput[15] = writeData;
5'b10000 : regInput[16] = writeData;
5'b10001 : regInput[17] = writeData;
5'b10010 : regInput[18] = writeData;
5'b10011 : regInput[19] = writeData;
5'b10100 : regInput[20] = writeData;
5'b10101 : regInput[21] = writeData;
5'b10110 : regInput[22] = writeData;
5'b10111 : regInput[23] = writeData;
5'b11000 : regInput[24] = writeData;
5'b11000 : regInput[25] = writeData;
5'b11001 : regInput[26] = writeData;
5'b11010 : regInput[27] = writeData;
5'b11111 : regInput[28] = writeData;
5'b11101 : regInput[29] = writeData;
5'b11110 : regInput[30] = writeData;
5'b11111 : regInput[31] = writeData;
default : regInput[0] = `XLEN'hx;
endcase
end
end
endmodule
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//********
//formatting separation
//********
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
module freg2adr (
input logic FmtW,
input logic reset,
input logic clear,
input logic clk,
input logic [4:0] rd,
input logic write,
input logic [4:0] adr1,
input logic [4:0] adr2,
input logic [`XLEN-1:0] writeData,
output logic [`XLEN-1:0] readData1,
output logic [`XLEN-1:0] readData2);
//note - not word aligning based on precision of
//operation (FmtW)
//reg number should remain static, but it doesn't hurt
//to parameterize
parameter numRegs = 32;
//intermediary signals - useful for debugging
//and easy instatiation of generated modules
logic [`XLEN-1:0] [numRegs-1:0] regInput;
logic [`XLEN-1:0] [numRegs-1:0] regOutput;
//generate fp registers themselves
genvar i;
generate
for (i = 0; i < numRegs; i = i + 1) begin:register
floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0]));
end
endgenerate
//address decoder
//2 are used for this fp register set
//used with fpadd/cvt, fpdiv/sqrt, and fpcmp
//defaults to outputting zeroes
always_comb begin
//adderss 1 decoder
case(adr1)
5'b00000 : readData1 = regOutput[0];
5'b00001 : readData1 = regOutput[1];
5'b00010 : readData1 = regOutput[2];
5'b00011 : readData1 = regOutput[3];
5'b00100 : readData1 = regOutput[4];
5'b00101 : readData1 = regOutput[5];
5'b00110 : readData1 = regOutput[6];
5'b00111 : readData1 = regOutput[7];
5'b01000 : readData1 = regOutput[8];
5'b01001 : readData1 = regOutput[9];
5'b01010 : readData1 = regOutput[10];
5'b01011 : readData1 = regOutput[11];
5'b01100 : readData1 = regOutput[12];
5'b01101 : readData1 = regOutput[13];
5'b01110 : readData1 = regOutput[14];
5'b01111 : readData1 = regOutput[15];
5'b10000 : readData1 = regOutput[16];
5'b10001 : readData1 = regOutput[17];
5'b10010 : readData1 = regOutput[18];
5'b10011 : readData1 = regOutput[19];
5'b10100 : readData1 = regOutput[20];
5'b10101 : readData1 = regOutput[21];
5'b10110 : readData1 = regOutput[22];
5'b10111 : readData1 = regOutput[23];
5'b11000 : readData1 = regOutput[24];
5'b11001 : readData1 = regOutput[25];
5'b11010 : readData1 = regOutput[26];
5'b11011 : readData1 = regOutput[27];
5'b11100 : readData1 = regOutput[28];
5'b11101 : readData1 = regOutput[29];
5'b11110 : readData1 = regOutput[30];
5'b11111 : readData1 = regOutput[31];
default : readData1 = `XLEN'h0;
endcase
//address 2 decoder
case(adr2)
5'b00000 : readData2 = regOutput[0];
5'b00001 : readData2 = regOutput[1];
5'b00010 : readData2 = regOutput[2];
5'b00011 : readData2 = regOutput[3];
5'b00100 : readData2 = regOutput[4];
5'b00101 : readData2 = regOutput[5];
5'b00110 : readData2 = regOutput[6];
5'b00111 : readData2 = regOutput[7];
5'b01000 : readData2 = regOutput[8];
5'b01001 : readData2 = regOutput[9];
5'b01010 : readData2 = regOutput[10];
5'b01011 : readData2 = regOutput[11];
5'b01100 : readData2 = regOutput[12];
5'b01101 : readData2 = regOutput[13];
5'b01110 : readData2 = regOutput[14];
5'b01111 : readData2 = regOutput[15];
5'b10000 : readData2 = regOutput[16];
5'b10001 : readData2 = regOutput[17];
5'b10010 : readData2 = regOutput[18];
5'b10011 : readData2 = regOutput[19];
5'b10100 : readData2 = regOutput[20];
5'b10101 : readData2 = regOutput[21];
5'b10110 : readData2 = regOutput[22];
5'b10111 : readData2 = regOutput[23];
5'b11000 : readData2 = regOutput[24];
5'b11001 : readData2 = regOutput[25];
5'b11010 : readData2 = regOutput[26];
5'b11011 : readData2 = regOutput[27];
5'b11100 : readData2 = regOutput[28];
5'b11101 : readData2 = regOutput[29];
5'b11110 : readData2 = regOutput[30];
5'b11111 : readData2 = regOutput[31];
default : readData2 = `XLEN'h0;
endcase
end
//destination register decoder
//only change input values on write
//defaults to undefined with invalid address
//
//note - this is an intermediary signal, so
//this is not asynch assignment. FF in flopr
//will not update data until clk pulse
always_comb begin
if(write) begin
case(rd)
5'b00000 : regInput[0] = writeData;
5'b00001 : regInput[1] = writeData;
5'b00010 : regInput[2] = writeData;
5'b00011 : regInput[3] = writeData;
5'b00100 : regInput[4] = writeData;
5'b00101 : regInput[5] = writeData;
5'b00110 : regInput[6] = writeData;
5'b00111 : regInput[7] = writeData;
5'b01000 : regInput[8] = writeData;
5'b01000 : regInput[9] = writeData;
5'b01001 : regInput[10] = writeData;
5'b01010 : regInput[11] = writeData;
5'b01111 : regInput[12] = writeData;
5'b01101 : regInput[13] = writeData;
5'b01110 : regInput[14] = writeData;
5'b01111 : regInput[15] = writeData;
5'b10000 : regInput[16] = writeData;
5'b10001 : regInput[17] = writeData;
5'b10010 : regInput[18] = writeData;
5'b10011 : regInput[19] = writeData;
5'b10100 : regInput[20] = writeData;
5'b10101 : regInput[21] = writeData;
5'b10110 : regInput[22] = writeData;
5'b10111 : regInput[23] = writeData;
5'b11000 : regInput[24] = writeData;
5'b11000 : regInput[25] = writeData;
5'b11001 : regInput[26] = writeData;
5'b11010 : regInput[27] = writeData;
5'b11111 : regInput[28] = writeData;
5'b11101 : regInput[29] = writeData;
5'b11110 : regInput[30] = writeData;
5'b11111 : regInput[31] = writeData;
default : regInput[0] = `XLEN'hx;
endcase
end
end
endmodule
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//********
//formatting separation
//********
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
module freg3adr (
input logic FmtW,
input logic reset,
input logic clear,
input logic clk,
input logic [4:0] rd,
input logic write,
input logic [4:0] adr1,
input logic [4:0] adr2,
input logic [4:0] adr3,
input logic [`XLEN-1:0] writeData,
output logic [`XLEN-1:0] readData1,
output logic [`XLEN-1:0] readData2,
output logic [`XLEN-1:0] readData3);
//note - not word aligning based on precision of
//operation (FmtW)
//reg number should remain static, but it doesn't hurt
//to parameterize
parameter numRegs = 32;
//intermediary signals - useful for debugging
//and easy instatiation of generated modules
logic [numRegs-1:0] [`XLEN-1:0] regInput;
logic [numRegs-1:0] [`XLEN-1:0] regOutput;
//generate fp registers themselves
genvar i;
generate
for (i = 0; i < numRegs; i = i + 1) begin:register
floprc #(`XLEN) freg(.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0]));
end
endgenerate
//address decoder
//3 are used for this fp register set
//used exclusively for fma
//defaults to outputting zeroes
always_comb begin
//adderss 1 decoder
case(adr1)
5'b00000 : readData1 = regOutput[0];
5'b00001 : readData1 = regOutput[1];
5'b00010 : readData1 = regOutput[2];
5'b00011 : readData1 = regOutput[3];
5'b00100 : readData1 = regOutput[4];
5'b00101 : readData1 = regOutput[5];
5'b00110 : readData1 = regOutput[6];
5'b00111 : readData1 = regOutput[7];
5'b01000 : readData1 = regOutput[8];
5'b01001 : readData1 = regOutput[9];
5'b01010 : readData1 = regOutput[10];
5'b01011 : readData1 = regOutput[11];
5'b01100 : readData1 = regOutput[12];
5'b01101 : readData1 = regOutput[13];
5'b01110 : readData1 = regOutput[14];
5'b01111 : readData1 = regOutput[15];
5'b10000 : readData1 = regOutput[16];
5'b10001 : readData1 = regOutput[17];
5'b10010 : readData1 = regOutput[18];
5'b10011 : readData1 = regOutput[19];
5'b10100 : readData1 = regOutput[20];
5'b10101 : readData1 = regOutput[21];
5'b10110 : readData1 = regOutput[22];
5'b10111 : readData1 = regOutput[23];
5'b11000 : readData1 = regOutput[24];
5'b11001 : readData1 = regOutput[25];
5'b11010 : readData1 = regOutput[26];
5'b11011 : readData1 = regOutput[27];
5'b11100 : readData1 = regOutput[28];
5'b11101 : readData1 = regOutput[29];
5'b11110 : readData1 = regOutput[30];
5'b11111 : readData1 = regOutput[31];
default : readData1 = `XLEN'h0;
endcase
//address 2 decoder
case(adr2)
5'b00000 : readData2 = regOutput[0];
5'b00001 : readData2 = regOutput[1];
5'b00010 : readData2 = regOutput[2];
5'b00011 : readData2 = regOutput[3];
5'b00100 : readData2 = regOutput[4];
5'b00101 : readData2 = regOutput[5];
5'b00110 : readData2 = regOutput[6];
5'b00111 : readData2 = regOutput[7];
5'b01000 : readData2 = regOutput[8];
5'b01001 : readData2 = regOutput[9];
5'b01010 : readData2 = regOutput[10];
5'b01011 : readData2 = regOutput[11];
5'b01100 : readData2 = regOutput[12];
5'b01101 : readData2 = regOutput[13];
5'b01110 : readData2 = regOutput[14];
5'b01111 : readData2 = regOutput[15];
5'b10000 : readData2 = regOutput[16];
5'b10001 : readData2 = regOutput[17];
5'b10010 : readData2 = regOutput[18];
5'b10011 : readData2 = regOutput[19];
5'b10100 : readData2 = regOutput[20];
5'b10101 : readData2 = regOutput[21];
5'b10110 : readData2 = regOutput[22];
5'b10111 : readData2 = regOutput[23];
5'b11000 : readData2 = regOutput[24];
5'b11001 : readData2 = regOutput[25];
5'b11010 : readData2 = regOutput[26];
5'b11011 : readData2 = regOutput[27];
5'b11100 : readData2 = regOutput[28];
5'b11101 : readData2 = regOutput[29];
5'b11110 : readData2 = regOutput[30];
5'b11111 : readData2 = regOutput[31];
default : readData2 = `XLEN'h0;
endcase
//address 3 decoder
case(adr3)
5'b00000 : readData3 = regOutput[0];
5'b00001 : readData3 = regOutput[1];
5'b00010 : readData3 = regOutput[2];
5'b00011 : readData3 = regOutput[3];
5'b00100 : readData3 = regOutput[4];
5'b00101 : readData3 = regOutput[5];
5'b00110 : readData3 = regOutput[6];
5'b00111 : readData3 = regOutput[7];
5'b01000 : readData3 = regOutput[8];
5'b01001 : readData3 = regOutput[9];
5'b01010 : readData3 = regOutput[10];
5'b01011 : readData3 = regOutput[11];
5'b01100 : readData3 = regOutput[12];
5'b01101 : readData3 = regOutput[13];
5'b01110 : readData3 = regOutput[14];
5'b01111 : readData3 = regOutput[15];
5'b10000 : readData3 = regOutput[16];
5'b10001 : readData3 = regOutput[17];
5'b10010 : readData3 = regOutput[18];
5'b10011 : readData3 = regOutput[19];
5'b10100 : readData3 = regOutput[20];
5'b10101 : readData3 = regOutput[21];
5'b10110 : readData3 = regOutput[22];
5'b10111 : readData3 = regOutput[23];
5'b11000 : readData3 = regOutput[24];
5'b11001 : readData3 = regOutput[25];
5'b11010 : readData3 = regOutput[26];
5'b11011 : readData3 = regOutput[27];
5'b11100 : readData3 = regOutput[28];
5'b11101 : readData3 = regOutput[29];
5'b11110 : readData3 = regOutput[30];
5'b11111 : readData3 = regOutput[31];
default : readData3 = `XLEN'h0;
endcase
end
//destination register decoder
//only change input values on write
//defaults to undefined with invalid address
//
//note - this is an intermediary signal, so
//this is not asynch assignment. FF in flopr
//will not update data until clk pulse
always_comb begin
if(write) begin
case(rd)
5'b00000 : regInput[0] = writeData;
5'b00001 : regInput[1] = writeData;
5'b00010 : regInput[2] = writeData;
5'b00011 : regInput[3] = writeData;
5'b00100 : regInput[4] = writeData;
5'b00101 : regInput[5] = writeData;
5'b00110 : regInput[6] = writeData;
5'b00111 : regInput[7] = writeData;
5'b01000 : regInput[8] = writeData;
5'b01001 : regInput[9] = writeData;
5'b01010 : regInput[10] = writeData;
5'b01011 : regInput[11] = writeData;
5'b01100 : regInput[12] = writeData;
5'b01101 : regInput[13] = writeData;
5'b01110 : regInput[14] = writeData;
5'b01111 : regInput[15] = writeData;
5'b10000 : regInput[16] = writeData;
5'b10001 : regInput[17] = writeData;
5'b10010 : regInput[18] = writeData;
5'b10011 : regInput[19] = writeData;
5'b10100 : regInput[20] = writeData;
5'b10101 : regInput[21] = writeData;
5'b10110 : regInput[22] = writeData;
5'b10111 : regInput[23] = writeData;
5'b11000 : regInput[24] = writeData;
5'b11001 : regInput[25] = writeData;
5'b11010 : regInput[26] = writeData;
5'b11011 : regInput[27] = writeData;
5'b11100 : regInput[28] = writeData;
5'b11101 : regInput[29] = writeData;
5'b11110 : regInput[30] = writeData;
5'b11111 : regInput[31] = writeData;
default : regInput[0] = `XLEN'hx;
endcase
end
end
endmodule

View File

@ -25,7 +25,7 @@
`include "wally-config.vh"
module FPregfile (
module fregfile (
input logic clk, reset,
input logic we4,
input logic [ 4:0] a1, a2, a3, a4,

View File

@ -1,13 +1,12 @@
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
module fsgn (
input logic [63:0] SrcXE, SrcYE,
input logic [1:0] SgnOpCodeE,
output logic [63:0] SgnResE,
output logic SgnNVE);
input [63:0] SrcXE, SrcYE;
input [1:0] SgnOpCodeE;
output [63:0] SgnResultE;
output [4:0] SgnFlagsE;
wire AonesExp;
logic AonesExp;
//op code designation:
//
@ -16,8 +15,8 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
//10 - fsgnjx - XOR sign values of SrcXE & SrcYE
//
assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
assign SgnResultE[62:0] = SrcXE[62:0];
assign SgnResE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
assign SgnResE[62:0] = SrcXE[62:0];
//If the exponent is all ones, then the value is either Inf or NaN,
//both of which will produce a QNaN/SNaN value of some sort. This will
@ -26,6 +25,6 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
//the only flag that can occur during this operation is invalid
//due to changing sign on already existing NaN
assign SgnFlagsE = {AonesExp & SgnResultE[63], 1'b0, 1'b0, 1'b0, 1'b0};
assign SgnNVE = AonesExp & SgnResE[63];
endmodule

View File

@ -1,89 +0,0 @@
// Brent-Kung Prefix Adder
module ling_bk13 (cout, sum, a, b, cin);
input [12:0] a, b;
input cin;
output [12:0] sum;
output cout;
wire [13:0] p,g;
wire [13:1] h,c;
// pre-computation
assign p={a|b,1'b1};
assign g={a&b, cin};
// prefix tree
ling_brent_kung prefix_tree(h, c, p[12:0], g[12:0]);
// post-computation
assign h[13]=g[13]|c[13];
assign sum=p[13:1]^h|g[13:1]&c;
assign cout=p[13]&h[13];
endmodule
module ling_brent_kung (h, c, p, g);
input [12:0] p;
input [13:0] g;
output [13:1] h;
output [13:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates H/I pairs that span 1 bits
rgry g_1_0 (H_1_0, {g[1],g[0]});
rblk b_3_2 (H_3_2, I_3_2, {g[3],g[2]}, {p[2],p[1]});
rblk b_5_4 (H_5_4, I_5_4, {g[5],g[4]}, {p[4],p[3]});
rblk b_7_6 (H_7_6, I_7_6, {g[7],g[6]}, {p[6],p[5]});
rblk b_9_8 (H_9_8, I_9_8, {g[9],g[8]}, {p[8],p[7]});
rblk b_11_10 (H_11_10, I_11_10, {g[11],g[10]}, {p[10],p[9]});
rblk b_13_12 (H_13_12, I_13_12, {g[13],g[12]}, {p[12],p[11]});
// Stage 2: Generates H/I pairs that span 2 bits
grey g_3_0 (H_3_0, {H_3_2,H_1_0}, I_3_2);
black b_7_4 (H_7_4, I_7_4, {H_7_6,H_5_4}, {I_7_6,I_5_4});
black b_11_8 (H_11_8, I_11_8, {H_11_10,H_9_8}, {I_11_10,I_9_8});
// Stage 3: Generates H/I pairs that span 4 bits
grey g_7_0 (H_7_0, {H_7_4,H_3_0}, I_7_4);
// Stage 4: Generates H/I pairs that span 8 bits
// Stage 5: Generates H/I pairs that span 4 bits
grey g_11_0 (H_11_0, {H_11_8,H_7_0}, I_11_8);
// Stage 6: Generates H/I pairs that span 2 bits
grey g_5_0 (H_5_0, {H_5_4,H_3_0}, I_5_4);
grey g_9_0 (H_9_0, {H_9_8,H_7_0}, I_9_8);
// Last grey cell stage
grey g_2_0 (H_2_0, {g[2],H_1_0}, p[1]);
grey g_4_0 (H_4_0, {g[4],H_3_0}, p[3]);
grey g_6_0 (H_6_0, {g[6],H_5_0}, p[5]);
grey g_8_0 (H_8_0, {g[8],H_7_0}, p[7]);
grey g_10_0 (H_10_0, {g[10],H_9_0}, p[9]);
grey g_12_0 (H_12_0, {g[12],H_11_0}, p[11]);
// Final Stage: Apply c_k+1=p_k&H_k_0
assign c[1]=g[0];
assign h[1]=H_1_0; assign c[2]=p[1]&H_1_0;
assign h[2]=H_2_0; assign c[3]=p[2]&H_2_0;
assign h[3]=H_3_0; assign c[4]=p[3]&H_3_0;
assign h[4]=H_4_0; assign c[5]=p[4]&H_4_0;
assign h[5]=H_5_0; assign c[6]=p[5]&H_5_0;
assign h[6]=H_6_0; assign c[7]=p[6]&H_6_0;
assign h[7]=H_7_0; assign c[8]=p[7]&H_7_0;
assign h[8]=H_8_0; assign c[9]=p[8]&H_8_0;
assign h[9]=H_9_0; assign c[10]=p[9]&H_9_0;
assign h[10]=H_10_0; assign c[11]=p[10]&H_10_0;
assign h[11]=H_11_0; assign c[12]=p[11]&H_11_0;
assign h[12]=H_12_0; assign c[13]=p[12]&H_12_0;
endmodule

View File

@ -168,3 +168,4 @@ module lz52 (ZP, ZV, B);
endmodule // lz52

0
wally-pipelined/src/fpu/mult_R4_64_64_cs.sv Executable file → Normal file
View File

View File

@ -115,11 +115,11 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
assign B_12_overflow = {8'h0, 3'b0, normal_overflow};
assign B_12_underflow = {8'h0, 3'b0, normal_underflow};
cla52 add1(Tmant, Cout, A[62:11], B);
cla52 add1(Tmant, Cout, A[62:11], B); //***adder
cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow);
cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); //***adder
cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow);
cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); //***adder
// Now that rounding is done, we compute the final exponent
// and test for special cases.

View File

@ -1,204 +0,0 @@
module sbtm_a4 (input logic [7:0] a,
output logic [13:0] y);
always_comb
case(a)
8'b01000000: y = 14'b10110100010111;
8'b01000001: y = 14'b10110010111111;
8'b01000010: y = 14'b10110001101000;
8'b01000011: y = 14'b10110000010011;
8'b01000100: y = 14'b10101111000001;
8'b01000101: y = 14'b10101101110000;
8'b01000110: y = 14'b10101100100001;
8'b01000111: y = 14'b10101011010011;
8'b01001000: y = 14'b10101010000111;
8'b01001001: y = 14'b10101000111101;
8'b01001010: y = 14'b10100111110100;
8'b01001011: y = 14'b10100110101101;
8'b01001100: y = 14'b10100101100111;
8'b01001101: y = 14'b10100100100010;
8'b01001110: y = 14'b10100011011111;
8'b01001111: y = 14'b10100010011101;
8'b01010000: y = 14'b10100001011100;
8'b01010001: y = 14'b10100000011100;
8'b01010010: y = 14'b10011111011110;
8'b01010011: y = 14'b10011110100001;
8'b01010100: y = 14'b10011101100100;
8'b01010101: y = 14'b10011100101001;
8'b01010110: y = 14'b10011011101111;
8'b01010111: y = 14'b10011010110110;
8'b01011000: y = 14'b10011001111110;
8'b01011001: y = 14'b10011001000110;
8'b01011010: y = 14'b10011000010000;
8'b01011011: y = 14'b10010111011011;
8'b01011100: y = 14'b10010110100110;
8'b01011101: y = 14'b10010101110011;
8'b01011110: y = 14'b10010101000000;
8'b01011111: y = 14'b10010100001110;
8'b01100000: y = 14'b10010011011100;
8'b01100001: y = 14'b10010010101100;
8'b01100010: y = 14'b10010001111100;
8'b01100011: y = 14'b10010001001101;
8'b01100100: y = 14'b10010000011111;
8'b01100101: y = 14'b10001111110001;
8'b01100110: y = 14'b10001111000100;
8'b01100111: y = 14'b10001110011000;
8'b01101000: y = 14'b10001101101100;
8'b01101001: y = 14'b10001101000001;
8'b01101010: y = 14'b10001100010110;
8'b01101011: y = 14'b10001011101100;
8'b01101100: y = 14'b10001011000011;
8'b01101101: y = 14'b10001010011010;
8'b01101110: y = 14'b10001001110010;
8'b01101111: y = 14'b10001001001010;
8'b01110000: y = 14'b10001000100011;
8'b01110001: y = 14'b10000111111101;
8'b01110010: y = 14'b10000111010111;
8'b01110011: y = 14'b10000110110001;
8'b01110100: y = 14'b10000110001100;
8'b01110101: y = 14'b10000101100111;
8'b01110110: y = 14'b10000101000011;
8'b01110111: y = 14'b10000100011111;
8'b01111000: y = 14'b10000011111100;
8'b01111001: y = 14'b10000011011001;
8'b01111010: y = 14'b10000010110111;
8'b01111011: y = 14'b10000010010101;
8'b01111100: y = 14'b10000001110011;
8'b01111101: y = 14'b10000001010010;
8'b01111110: y = 14'b10000000110001;
8'b01111111: y = 14'b10000000010001;
8'b10000000: y = 14'b01111111110001;
8'b10000001: y = 14'b01111111010001;
8'b10000010: y = 14'b01111110110010;
8'b10000011: y = 14'b01111110010011;
8'b10000100: y = 14'b01111101110101;
8'b10000101: y = 14'b01111101010110;
8'b10000110: y = 14'b01111100111001;
8'b10000111: y = 14'b01111100011011;
8'b10001000: y = 14'b01111011111110;
8'b10001001: y = 14'b01111011100001;
8'b10001010: y = 14'b01111011000100;
8'b10001011: y = 14'b01111010101000;
8'b10001100: y = 14'b01111010001100;
8'b10001101: y = 14'b01111001110000;
8'b10001110: y = 14'b01111001010101;
8'b10001111: y = 14'b01111000111010;
8'b10010000: y = 14'b01111000011111;
8'b10010001: y = 14'b01111000000100;
8'b10010010: y = 14'b01110111101010;
8'b10010011: y = 14'b01110111010000;
8'b10010100: y = 14'b01110110110110;
8'b10010101: y = 14'b01110110011101;
8'b10010110: y = 14'b01110110000100;
8'b10010111: y = 14'b01110101101011;
8'b10011000: y = 14'b01110101010010;
8'b10011001: y = 14'b01110100111001;
8'b10011010: y = 14'b01110100100001;
8'b10011011: y = 14'b01110100001001;
8'b10011100: y = 14'b01110011110001;
8'b10011101: y = 14'b01110011011010;
8'b10011110: y = 14'b01110011000010;
8'b10011111: y = 14'b01110010101011;
8'b10100000: y = 14'b01110010010100;
8'b10100001: y = 14'b01110001111110;
8'b10100010: y = 14'b01110001100111;
8'b10100011: y = 14'b01110001010001;
8'b10100100: y = 14'b01110000111011;
8'b10100101: y = 14'b01110000100101;
8'b10100110: y = 14'b01110000001111;
8'b10100111: y = 14'b01101111111010;
8'b10101000: y = 14'b01101111100101;
8'b10101001: y = 14'b01101111010000;
8'b10101010: y = 14'b01101110111011;
8'b10101011: y = 14'b01101110100110;
8'b10101100: y = 14'b01101110010001;
8'b10101101: y = 14'b01101101111101;
8'b10101110: y = 14'b01101101101001;
8'b10101111: y = 14'b01101101010101;
8'b10110000: y = 14'b01101101000001;
8'b10110001: y = 14'b01101100101101;
8'b10110010: y = 14'b01101100011010;
8'b10110011: y = 14'b01101100000110;
8'b10110100: y = 14'b01101011110011;
8'b10110101: y = 14'b01101011100000;
8'b10110110: y = 14'b01101011001101;
8'b10110111: y = 14'b01101010111010;
8'b10111000: y = 14'b01101010101000;
8'b10111001: y = 14'b01101010010101;
8'b10111010: y = 14'b01101010000011;
8'b10111011: y = 14'b01101001110001;
8'b10111100: y = 14'b01101001011111;
8'b10111101: y = 14'b01101001001101;
8'b10111110: y = 14'b01101000111100;
8'b10111111: y = 14'b01101000101010;
8'b11000000: y = 14'b01101000011001;
8'b11000001: y = 14'b01101000000111;
8'b11000010: y = 14'b01100111110110;
8'b11000011: y = 14'b01100111100101;
8'b11000100: y = 14'b01100111010100;
8'b11000101: y = 14'b01100111000011;
8'b11000110: y = 14'b01100110110011;
8'b11000111: y = 14'b01100110100010;
8'b11001000: y = 14'b01100110010010;
8'b11001001: y = 14'b01100110000010;
8'b11001010: y = 14'b01100101110010;
8'b11001011: y = 14'b01100101100001;
8'b11001100: y = 14'b01100101010010;
8'b11001101: y = 14'b01100101000010;
8'b11001110: y = 14'b01100100110010;
8'b11001111: y = 14'b01100100100011;
8'b11010000: y = 14'b01100100010011;
8'b11010001: y = 14'b01100100000100;
8'b11010010: y = 14'b01100011110101;
8'b11010011: y = 14'b01100011100101;
8'b11010100: y = 14'b01100011010110;
8'b11010101: y = 14'b01100011000111;
8'b11010110: y = 14'b01100010111001;
8'b11010111: y = 14'b01100010101010;
8'b11011000: y = 14'b01100010011011;
8'b11011001: y = 14'b01100010001101;
8'b11011010: y = 14'b01100001111110;
8'b11011011: y = 14'b01100001110000;
8'b11011100: y = 14'b01100001100010;
8'b11011101: y = 14'b01100001010100;
8'b11011110: y = 14'b01100001000110;
8'b11011111: y = 14'b01100000111000;
8'b11100000: y = 14'b01100000101010;
8'b11100001: y = 14'b01100000011100;
8'b11100010: y = 14'b01100000001111;
8'b11100011: y = 14'b01100000000001;
8'b11100100: y = 14'b01011111110100;
8'b11100101: y = 14'b01011111100110;
8'b11100110: y = 14'b01011111011001;
8'b11100111: y = 14'b01011111001100;
8'b11101000: y = 14'b01011110111111;
8'b11101001: y = 14'b01011110110010;
8'b11101010: y = 14'b01011110100101;
8'b11101011: y = 14'b01011110011000;
8'b11101100: y = 14'b01011110001011;
8'b11101101: y = 14'b01011101111110;
8'b11101110: y = 14'b01011101110010;
8'b11101111: y = 14'b01011101100101;
8'b11110000: y = 14'b01011101011001;
8'b11110001: y = 14'b01011101001100;
8'b11110010: y = 14'b01011101000000;
8'b11110011: y = 14'b01011100110100;
8'b11110100: y = 14'b01011100101000;
8'b11110101: y = 14'b01011100011100;
8'b11110110: y = 14'b01011100010000;
8'b11110111: y = 14'b01011100000100;
8'b11111000: y = 14'b01011011111000;
8'b11111001: y = 14'b01011011101100;
8'b11111010: y = 14'b01011011100000;
8'b11111011: y = 14'b01011011010101;
8'b11111100: y = 14'b01011011001001;
8'b11111101: y = 14'b01011010111101;
8'b11111110: y = 14'b01011010110010;
8'b11111111: y = 14'b01011010100111;
default: y = 14'bxxxxxxxxxxxxxx;
endcase // case (a)
endmodule // sbtm_a0

View File

@ -1,90 +0,0 @@
// Sklansky Prefix Adder
module sk14 (cout, sum, a, b, cin);
input [13:0] a, b;
input cin;
output [13:0] sum;
output cout;
wire [14:0] p,g;
wire [13:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
sklansky prefix_tree(c, p[13:0], g[13:0]);
// post-computation
assign sum=p[14:1]^c;
assign cout=g[14]|(p[14]&c[13]);
endmodule
module sklansky (c, p, g);
input [14:0] p;
input [14:0] g;
output [14:1] c;
// parallel-prefix, Sklansky
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_6_4 (G_6_4, P_6_4, {g[6],G_5_4}, {p[6],P_5_4});
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_10_8 (G_10_8, P_10_8, {g[10],G_9_8}, {p[10],P_9_8});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_14_12 (G_14_12, P_14_12, {g[14],G_13_12}, {p[14],P_13_12});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_6_0 (G_6_0, {G_6_4,G_3_0}, P_6_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_12_8 (G_12_8, P_12_8, {g[12],G_11_8}, {p[12],P_11_8});
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_14_8 (G_14_8, P_14_8, {G_14_12,G_11_8}, {P_14_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_10_0 (G_10_0, {G_10_8,G_7_0}, P_10_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_12_0 (G_12_0, {G_12_8,G_7_0}, P_12_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_14_0 (G_14_0, {G_14_8,G_7_0}, P_14_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
endmodule

View File

@ -77,11 +77,8 @@ module ifu (
output logic ITLBMissF, ITLBHitF,
// pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H
// input logic [31:0] HADDR,
// input logic [2:0] HSIZE,
// input logic HWRITE,
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF,
output logic ISquashBusAccessF
@ -130,10 +127,10 @@ module ifu (
.TLBMiss(ITLBMissF),
.TLBHit(ITLBHitF),
.TLBPageFault(ITLBInstrPageFaultF),
.InstrReadF(InstrReadF),
.ExecuteAccessF(InstrReadF), /// *** Ross Thompson this is definitely wrong. InstrReadF changed to icache read to memory.
.AtomicAccessM(1'b0),
.MemReadM(1'b0),
.MemWriteM(1'b0),
.ReadAccessM(1'b0),
.WriteAccessM(1'b0),
.SquashBusAccess(ISquashBusAccessF),
// .HSELRegions(IHSELRegionsF),
.DisableTranslation(1'b0),

View File

@ -85,15 +85,14 @@ module lsu (
output logic DTLBHitM, // not connected
// PMA/PMP (inside mmu) signals
input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well.
input logic [2:0] HSIZE,
input logic HWRITE,
input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage.
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker.
input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well.
input logic [2:0] HSIZE, HBURST,
input logic HWRITE,
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker.
output logic PMALoadAccessFaultM, PMAStoreAccessFaultM,
output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa.
output logic PMALoadAccessFaultM, PMAStoreAccessFaultM,
output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa.
output logic DSquashBusAccessM
// output logic [5:0] DHSELRegionsM
@ -136,10 +135,10 @@ module lsu (
.TLBMiss(DTLBMissM),
.TLBHit(DTLBHitM),
.TLBPageFault(DTLBPageFaultM),
.InstrReadF(1'b0),
.ExecuteAccessF(1'b0),
.AtomicAccessM(AtomicMaskedM[1]),
.MemWriteM(MemRWM[0]),
.MemReadM(MemRWM[1]),
.WriteAccessM(MemRWM[0]),
.ReadAccessM(MemRWM[1]),
.SquashBusAccess(DSquashBusAccessM),
// .SelRegions(DHSELRegionsM),
.*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist?

View File

@ -67,12 +67,9 @@ module mmu #(parameter ENTRY_BITS = 3,
output logic TLBPageFault,
// PMA checker signals
// input logic [31:0] HADDR,
// input logic [2:0] HSIZE,
// input logic HWRITE,
input logic AtomicAccessM, InstrReadF, MemWriteM, MemReadM,
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM,
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
output logic SquashBusAccess, // *** send to privileged unit
output logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM,

View File

@ -36,7 +36,7 @@ module pmachecker (
// input logic [2:0] HSIZE,
// input logic [2:0] HBURST, // *** in AHBlite, HBURST is hardwired to zero for single bursts only allowed. consider removing from this module if unused.
input logic AtomicAccessM, InstrReadF, MemWriteM, MemReadM, // *** atomicaccessM is unused but might want to stay in for future use.
input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // *** atomicaccessM is unused but might want to stay in for future use.
output logic Cacheable, Idempotent, AtomicAllowed,
output logic PMASquashBusAccess,
@ -52,9 +52,9 @@ module pmachecker (
logic [5:0] SelRegions;
// Determine what type of access is being made
assign AccessRW = MemReadM | MemWriteM;
assign AccessRWX = MemReadM | MemWriteM | InstrReadF;
assign AccessRX = MemReadM | InstrReadF;
assign AccessRW = ReadAccessM | WriteAccessM;
assign AccessRWX = ReadAccessM | WriteAccessM | ExecuteAccessF;
assign AccessRX = ReadAccessM | ExecuteAccessF;
// Determine which region of physical memory (if any) is being accessed
adrdecs adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWX, Size, SelRegions);
@ -66,8 +66,9 @@ module pmachecker (
// Detect access faults
assign PMAAccessFault = (~|SelRegions) & AccessRWX;
assign PMAInstrAccessFaultF = InstrReadF & PMAAccessFault;
assign PMALoadAccessFaultM = MemReadM & PMAAccessFault;
assign PMAStoreAccessFaultM = MemWriteM & PMAAccessFault;
assign PMAInstrAccessFaultF = ExecuteAccessF && PMAAccessFault;
assign PMALoadAccessFaultM = ReadAccessM && PMAAccessFault;
assign PMAStoreAccessFaultM = WriteAccessM && PMAAccessFault;
assign PMASquashBusAccess = PMAAccessFault;
endmodule

View File

@ -35,7 +35,6 @@ module pmpchecker (
input logic [1:0] PrivilegeModeW,
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
// *** ModelSim has a switch -svinputport which controls whether input ports
// are nets (wires) or vars by default. The default setting of this switch is
@ -48,9 +47,10 @@ module pmpchecker (
// boundary. It would be better to store the PMP address registers in a module
// somewhere in the CSR hierarchy and do PMP checking _within_ that module, so
// we don't have to pass around 16 whole registers.
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
input logic InstrReadF, MemWriteM, MemReadM,
input logic ExecuteAccessF, WriteAccessM, ReadAccessM,
output logic PMPSquashBusAccess,
@ -60,29 +60,23 @@ module pmpchecker (
);
// Bit i is high when the address falls in PMP region i
logic [15:0] Regions;
logic [3:0] MatchedRegion;
logic Match, EnforcePMP;
logic [`PMP_ENTRIES-1:0] Regions, FirstMatch;
//logic [3:0] MatchedRegion;
logic EnforcePMP;
logic [7:0] PMPCFG [15:0];
logic [7:0] PMPCFG [`PMP_ENTRIES-1:0];
// Bit i is high when the address is greater than or equal to PMPADR[i]
// Used for determining whether TOR PMP regions match
logic [15:0] AboveRegion;
logic [`PMP_ENTRIES-1:0] AboveRegion;
// Bit i is high if PMP register i is non-null
logic [15:0] ActiveRegion;
logic [`PMP_ENTRIES-1:0] ActiveRegion;
logic L_Bit, X_Bit, W_Bit, R_Bit;
logic InvalidExecute, InvalidWrite, InvalidRead;
logic [`PMP_ENTRIES-1:0] L_Bits, X_Bits, W_Bits, R_Bits;
//logic InvalidExecute, InvalidWrite, InvalidRead;
// *** extend to optionally 64 configurations
assign {PMPCFG[15], PMPCFG[14], PMPCFG[13], PMPCFG[12],
PMPCFG[11], PMPCFG[10], PMPCFG[9], PMPCFG[8]} = PMPCFG23_REGW;
assign {PMPCFG[7], PMPCFG[6], PMPCFG[5], PMPCFG[4],
PMPCFG[3], PMPCFG[2], PMPCFG[1], PMPCFG[0]} = PMPCFG01_REGW;
genvar i,j;
pmpadrdec pmpadrdec(.PhysicalAddress(PhysicalAddress),
.AdrMode(PMPCFG[0][4:3]),
@ -94,7 +88,6 @@ module pmpchecker (
assign ActiveRegion[0] = |PMPCFG[0][4:3];
generate // *** only for PMP_ENTRIES > 0
genvar i;
for (i = 1; i < `PMP_ENTRIES; i++) begin
pmpadrdec pmpadrdec(.PhysicalAddress(PhysicalAddress),
.AdrMode(PMPCFG[i][4:3]),
@ -107,12 +100,34 @@ module pmpchecker (
end
endgenerate
assign Match = |Regions;
//assign Match = |Regions;
// Only enforce PMP checking for S and U modes when at least one PMP is active
assign EnforcePMP = |ActiveRegion;
// *** extend to up to 64, fold bit extraction to avoid need for binary encoding of region
// verilator lint_off UNOPTFLAT
logic [`PMP_ENTRIES-1:0] NoLowerMatch;
// assign NoLowerMatch[0] = 1;
generate
// verilator lint_off WIDTH
for (j=0; j<`PMP_ENTRIES; j = j+8) begin
assign {PMPCFG[j+7], PMPCFG[j+6], PMPCFG[j+5], PMPCFG[j+4],
PMPCFG[j+3], PMPCFG[j+2], PMPCFG[j+1], PMPCFG[j]} = PMPCFG_ARRAY_REGW[j/8];
end
// verilator lint_on WIDTH
for (i=0; i<`PMP_ENTRIES; i++) begin
if (i==0) begin
assign FirstMatch[i] = Regions[i];
assign NoLowerMatch[i] = ~Regions[i];
end else begin
assign FirstMatch[i] = Regions[i] & NoLowerMatch[i];
assign NoLowerMatch[i] = NoLowerMatch[i-1] & ~Regions[i];
end
assign L_Bits[i] = PMPCFG[i][7] & FirstMatch[i];
assign X_Bits[i] = PMPCFG[i][2] & FirstMatch[i];
assign W_Bits[i] = PMPCFG[i][1] & FirstMatch[i];
assign R_Bits[i] = PMPCFG[i][0] & FirstMatch[i];
end
// verilator lint_on UNOPTFLAT
endgenerate
/* // *** extend to up to 64, fold bit extraction to avoid need for binary encoding of region
always_comb
casez (Regions)
16'b???????????????1: MatchedRegion = 0;
@ -134,25 +149,21 @@ module pmpchecker (
default: MatchedRegion = 0; // Should only occur if there is no match
endcase
assign L_Bit = PMPCFG[MatchedRegion][7] & Match;
assign X_Bit = PMPCFG[MatchedRegion][2] & Match;
assign W_Bit = PMPCFG[MatchedRegion][1] & Match;
assign R_Bit = PMPCFG[MatchedRegion][0] & Match;
assign L_Bit = PMPCFG[MatchedRegion][7] && Match;
assign X_Bit = PMPCFG[MatchedRegion][2] && Match;
assign W_Bit = PMPCFG[MatchedRegion][1] && Match;
assign R_Bit = PMPCFG[MatchedRegion][0] && Match;
assign InvalidExecute = InstrReadF & ~X_Bit;
assign InvalidWrite = MemWriteM & ~W_Bit;
assign InvalidRead = MemReadM & ~R_Bit;
assign InvalidExecute = ExecuteAccessF && ~X_Bit;
assign InvalidWrite = WriteAccessM && ~W_Bit;
assign InvalidRead = ReadAccessM && ~R_Bit;*/
// *** don't cause faults when there are no PMPs
assign PMPInstrAccessFaultF = (PrivilegeModeW == `M_MODE) ?
Match & L_Bit & InvalidExecute :
EnforcePMP & InvalidExecute;
assign PMPStoreAccessFaultM = (PrivilegeModeW == `M_MODE) ?
Match & L_Bit & InvalidWrite :
EnforcePMP & InvalidWrite;
assign PMPLoadAccessFaultM = (PrivilegeModeW == `M_MODE) ?
Match & L_Bit & InvalidRead :
EnforcePMP & InvalidRead;
// Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L_Bits : |ActiveRegion;
assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X_Bits;
assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM && ~|W_Bits;
assign PMPLoadAccessFaultM = EnforcePMP && ReadAccessM && ~|R_Bits;
assign PMPSquashBusAccess = PMPInstrAccessFaultF | PMPLoadAccessFaultM | PMPStoreAccessFaultM;

View File

@ -60,7 +60,7 @@ module csr #(parameter
output logic STATUS_MIE, STATUS_SIE,
output logic STATUS_MXR, STATUS_SUM,
output logic STATUS_MPRV,
output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
input logic [4:0] SetFflagsM,
output logic [2:0] FRM_REGW,

View File

@ -48,25 +48,9 @@ module csrm #(parameter
MTVAL = 12'h343,
MIP = 12'h344,
PMPCFG0 = 12'h3A0,
PMPCFG1 = 12'h3A1,
PMPCFG2 = 12'h3A2,
PMPCFG3 = 12'h3A3,
// .. up to 15 more at consecutive addresses
PMPADDR0 = 12'h3B0,
PMPADDR1 = 12'h3B1,
PMPADDR2 = 12'h3B2,
PMPADDR3 = 12'h3B3,
PMPADDR4 = 12'h3B4,
PMPADDR5 = 12'h3B5,
PMPADDR6 = 12'h3B6,
PMPADDR7 = 12'h3B7,
PMPADDR8 = 12'h3B8,
PMPADDR9 = 12'h3B9,
PMPADDR10 = 12'h3BA,
PMPADDR11 = 12'h3BB,
PMPADDR12 = 12'h3BC,
PMPADDR13 = 12'h3BD,
PMPADDR14 = 12'h3BE,
PMPADDR15 = 12'h3BF,
// ... up to 63 more at consecutive addresses
TSELECT = 12'h7A0,
TDATA1 = 12'h7A1,
TDATA2 = 12'h7A2,
@ -90,7 +74,7 @@ module csrm #(parameter
output logic [31:0] MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW,
output logic [`XLEN-1:0] MEDELEG_REGW, MIDELEG_REGW,
// 64-bit registers in RV64, or two 32-bit registers in RV32
output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
input logic [11:0] MIP_REGW, MIE_REGW,
output logic WriteMSTATUSM,
@ -103,8 +87,8 @@ module csrm #(parameter
logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM;
logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM;
logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM;
logic WritePMPCFG0M, WritePMPCFG2M;
logic WritePMPADDRM [15:0];
logic [`PMP_ENTRIES/8-1:0] WritePMPCFGM, WritePMPCFGHM ;
logic [`PMP_ENTRIES-1:0] WritePMPADDRM ;
localparam MISA_26 = (`MISA) & 32'h03ffffff;
@ -120,7 +104,7 @@ module csrm #(parameter
assign WriteMEPCM = MTrapM | (CSRMWriteM && (CSRAdrM == MEPC)) && ~StallW;
assign WriteMCAUSEM = MTrapM | (CSRMWriteM && (CSRAdrM == MCAUSE)) && ~StallW;
assign WriteMTVALM = MTrapM | (CSRMWriteM && (CSRAdrM == MTVAL)) && ~StallW;
assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW;
/* assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW;
assign WritePMPCFG2M = (CSRMWriteM && (CSRAdrM == PMPCFG2)) && ~StallW;
assign WritePMPADDRM[0] = (CSRMWriteM && (CSRAdrM == PMPADDR0)) && ~StallW;
assign WritePMPADDRM[1] = (CSRMWriteM && (CSRAdrM == PMPADDR1)) && ~StallW;
@ -137,10 +121,13 @@ module csrm #(parameter
assign WritePMPADDRM[12] = (CSRMWriteM && (CSRAdrM == PMPADDR12)) && ~StallW;
assign WritePMPADDRM[13] = (CSRMWriteM && (CSRAdrM == PMPADDR13)) && ~StallW;
assign WritePMPADDRM[14] = (CSRMWriteM && (CSRAdrM == PMPADDR14)) && ~StallW;
assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW;
assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW; */
assign WriteMCOUNTERENM = CSRMWriteM && (CSRAdrM == MCOUNTEREN) && ~StallW;
assign WriteMCOUNTINHIBITM = CSRMWriteM && (CSRAdrM == MCOUNTINHIBIT) && ~StallW;
assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID);
// CSRs
@ -172,33 +159,39 @@ module csrm #(parameter
flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], 32'hFFFFFFFF, MCOUNTINHIBIT_REGW);
// There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop
// *** need to add support for locked PMPCFG and PMPADR
genvar i;
generate
genvar i;
for (i = 0; i < `PMP_ENTRIES; i++) begin: pmp_flop
for(i=0; i<`PMP_ENTRIES; i++) begin
assign WritePMPADDRM[i] = (CSRMWriteM && (CSRAdrM == PMPADDR0+i)) && ~StallW;
flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]);
end
for (i=0; i<`PMP_ENTRIES/8; i++) begin
if (`XLEN==64) begin
assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW;
flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i]);
end else begin
assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW;
assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW;
flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][31:0]);
flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]);
end
end
endgenerate
// PMPCFG registers are a pair of 64-bit in RV64 and four 32-bit in RV32
generate
if (`XLEN==64) begin
flopenr #(`XLEN) PMPCFG01reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW);
flopenr #(`XLEN) PMPCFG23reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW);
end else begin
logic WritePMPCFG1M, WritePMPCFG3M;
assign WritePMPCFG1M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG1));
assign WritePMPCFG3M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG3));
flopenr #(`XLEN) PMPCFG0reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW[31:0]);
flopenr #(`XLEN) PMPCFG1reg(clk, reset, WritePMPCFG1M, CSRWriteValM, PMPCFG01_REGW[63:32]);
flopenr #(`XLEN) PMPCFG2reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW[31:0]);
flopenr #(`XLEN) PMPCFG3reg(clk, reset, WritePMPCFG3M, CSRWriteValM, PMPCFG23_REGW[63:32]);
end
endgenerate
// Read machine mode CSRs
// verilator lint_off WIDTH
always_comb begin
IllegalCSRMAccessM = !(`S_SUPPORTED | `U_SUPPORTED & `N_SUPPORTED) &&
(CSRAdrM == MEDELEG || CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode
case (CSRAdrM)
if (CSRAdrM >= PMPADDR0 && CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry
CSRMReadValM = PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0];
else if (CSRAdrM >= PMPCFG0 && CSRAdrM < PMPCFG0 + `PMP_ENTRIES/8) begin
if (~CSRAdrM[0]) CSRMReadValM = PMPCFG_ARRAY_REGW[CSRAdrM - PMPCFG0][`XLEN-1:0];
else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[CSRAdrM - PMPCFG0][63:32]};
end
else case (CSRAdrM)
MISA_ADR: CSRMReadValM = MISA_REGW;
MVENDORID: CSRMReadValM = 0;
MARCHID: CSRMReadValM = 0;
@ -219,7 +212,7 @@ module csrm #(parameter
MTVAL: CSRMReadValM = MTVAL_REGW;
MCOUNTEREN:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTEREN_REGW};
MCOUNTINHIBIT:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTINHIBIT_REGW};
PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0];
/* PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0];
PMPCFG1: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG01_REGW[63:32]};
PMPCFG2: CSRMReadValM = PMPCFG23_REGW[`XLEN-1:0];
PMPCFG3: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG23_REGW[63:32]};
@ -238,11 +231,12 @@ module csrm #(parameter
PMPADDR12: CSRMReadValM = PMPADDR_ARRAY_REGW[12];
PMPADDR13: CSRMReadValM = PMPADDR_ARRAY_REGW[13];
PMPADDR14: CSRMReadValM = PMPADDR_ARRAY_REGW[14];
PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15];
PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; */
default: begin
CSRMReadValM = 0;
IllegalCSRMAccessM = 1;
end
endcase
end
// verilator lint_on WIDTH
endmodule

View File

@ -68,7 +68,7 @@ module privileged (
output logic [1:0] PrivilegeModeW,
output logic [`XLEN-1:0] SATP_REGW,
output logic STATUS_MXR, STATUS_SUM,
output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
output logic [2:0] FRM_REGW
);

View File

@ -118,14 +118,12 @@ module wallypipelinedhart
logic [1:0] PageTypeF, PageTypeM;
// PMA checker signals
logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM;
logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM;
logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM;
logic DSquashBusAccessM, ISquashBusAccessF;
// logic [5:0] DHSELRegionsM, IHSELRegionsF;
var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0];
logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW; // signals being sent from privileged unit to pmp/pma in dmem and ifu.
// assign HSELRegions = ExecuteAccessF ? IHSELRegionsF : DHSELRegionsM; // *** this is a pure guess on how one of these should be selected. it passes tests, but is it the right way to do this?
var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0];
var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0];
// IMem stalls
logic ICacheStallF;