📄 salsa_8cpp-source.html
字号:
<a name="l00102"></a>00102 ss[0] = _mm_shuffle_epi32(s[0], _MM_SHUFFLE(0, 0, 0, 0));<a name="l00103"></a>00103 ss[1] = _mm_shuffle_epi32(s[0], _MM_SHUFFLE(1, 1, 1, 1));<a name="l00104"></a>00104 ss[2] = _mm_shuffle_epi32(s[0], _MM_SHUFFLE(2, 2, 2, 2));<a name="l00105"></a>00105 ss[3] = _mm_shuffle_epi32(s[0], _MM_SHUFFLE(3, 3, 3, 3));<a name="l00106"></a>00106 ss[4] = _mm_shuffle_epi32(s[1], _MM_SHUFFLE(0, 0, 0, 0));<a name="l00107"></a>00107 ss[6] = _mm_shuffle_epi32(s[1], _MM_SHUFFLE(2, 2, 2, 2));<a name="l00108"></a>00108 ss[7] = _mm_shuffle_epi32(s[1], _MM_SHUFFLE(3, 3, 3, 3));<a name="l00109"></a>00109 ss[9] = _mm_shuffle_epi32(s[2], _MM_SHUFFLE(1, 1, 1, 1));<a name="l00110"></a>00110 ss[10] = _mm_shuffle_epi32(s[2], _MM_SHUFFLE(2, 2, 2, 2));<a name="l00111"></a>00111 ss[11] = _mm_shuffle_epi32(s[2], _MM_SHUFFLE(3, 3, 3, 3));<a name="l00112"></a>00112 ss[12] = _mm_shuffle_epi32(s[3], _MM_SHUFFLE(0, 0, 0, 0));<a name="l00113"></a>00113 ss[13] = _mm_shuffle_epi32(s[3], _MM_SHUFFLE(1, 1, 1, 1));<a name="l00114"></a>00114 ss[14] = _mm_shuffle_epi32(s[3], _MM_SHUFFLE(2, 2, 2, 2));<a name="l00115"></a>00115 ss[15] = _mm_shuffle_epi32(s[3], _MM_SHUFFLE(3, 3, 3, 3));<a name="l00116"></a>00116 <a name="l00117"></a>00117 <span class="keywordflow">do</span><a name="l00118"></a>00118 {<a name="l00119"></a>00119 word32 *countersLo = (word32*)&(ss[8]), *countersHi = (word32*)&(ss[5]);<a name="l00120"></a>00120 <span class="keywordflow">for</span> (i=0; i<4; i++)<a name="l00121"></a>00121 {<a name="l00122"></a>00122 countersLo[i] = m_state[8];<a name="l00123"></a>00123 countersHi[i] = m_state[5];<a name="l00124"></a>00124 <span class="keywordflow">if</span> (++m_state[8] == 0)<a name="l00125"></a>00125 ++m_state[5];<a name="l00126"></a>00126 }<a name="l00127"></a>00127 <a name="l00128"></a>00128 __m128i x0 = ss[0];<a name="l00129"></a>00129 __m128i x1 = ss[1];<a name="l00130"></a>00130 __m128i x2 = ss[2];<a name="l00131"></a>00131 __m128i x3 = ss[3];<a name="l00132"></a>00132 __m128i x4 = ss[4];<a name="l00133"></a>00133 __m128i x5 = ss[5];<a name="l00134"></a>00134 __m128i x6 = ss[6];<a name="l00135"></a>00135 __m128i x7 = ss[7];<a name="l00136"></a>00136 __m128i x8 = ss[8];<a name="l00137"></a>00137 __m128i x9 = ss[9];<a name="l00138"></a>00138 __m128i x10 = ss[10];<a name="l00139"></a>00139 __m128i x11 = ss[11];<a name="l00140"></a>00140 __m128i x12 = ss[12];<a name="l00141"></a>00141 __m128i x13 = ss[13];<a name="l00142"></a>00142 __m128i x14 = ss[14];<a name="l00143"></a>00143 __m128i x15 = ss[15];<a name="l00144"></a>00144 <a name="l00145"></a>00145 <span class="keywordflow">for</span> (i=m_rounds; i>0; i-=2)<a name="l00146"></a>00146 {<a name="l00147"></a>00147 #define QUARTER_ROUND(a, b, c, d) \<a name="l00148"></a>00148 SSE2_QUARTER_ROUND(a, b, d, 7) \<a name="l00149"></a>00149 SSE2_QUARTER_ROUND(b, c, a, 9) \<a name="l00150"></a>00150 SSE2_QUARTER_ROUND(c, d, b, 13) \<a name="l00151"></a>00151 SSE2_QUARTER_ROUND(d, a, c, 18) <a name="l00152"></a>00152 <a name="l00153"></a>00153 QUARTER_ROUND(x0, x4, x8, x12)<a name="l00154"></a>00154 QUARTER_ROUND(x1, x5, x9, x13)<a name="l00155"></a>00155 QUARTER_ROUND(x2, x6, x10, x14)<a name="l00156"></a>00156 QUARTER_ROUND(x3, x7, x11, x15)<a name="l00157"></a>00157 <a name="l00158"></a>00158 QUARTER_ROUND(x0, x13, x10, x7)<a name="l00159"></a>00159 QUARTER_ROUND(x1, x14, x11, x4)<a name="l00160"></a>00160 QUARTER_ROUND(x2, x15, x8, x5)<a name="l00161"></a>00161 QUARTER_ROUND(x3, x12, x9, x6)<a name="l00162"></a>00162 <a name="l00163"></a>00163 #undef QUARTER_ROUND<a name="l00164"></a>00164 }<a name="l00165"></a>00165 <a name="l00166"></a>00166 x0 = _mm_add_epi32(x0, ss[0]);<a name="l00167"></a>00167 x1 = _mm_add_epi32(x1, ss[1]);<a name="l00168"></a>00168 x2 = _mm_add_epi32(x2, ss[2]);<a name="l00169"></a>00169 x3 = _mm_add_epi32(x3, ss[3]);<a name="l00170"></a>00170 x4 = _mm_add_epi32(x4, ss[4]);<a name="l00171"></a>00171 x5 = _mm_add_epi32(x5, ss[5]);<a name="l00172"></a>00172 x6 = _mm_add_epi32(x6, ss[6]);<a name="l00173"></a>00173 x7 = _mm_add_epi32(x7, ss[7]);<a name="l00174"></a>00174 x8 = _mm_add_epi32(x8, ss[8]);<a name="l00175"></a>00175 x9 = _mm_add_epi32(x9, ss[9]);<a name="l00176"></a>00176 x10 = _mm_add_epi32(x10, ss[10]);<a name="l00177"></a>00177 x11 = _mm_add_epi32(x11, ss[11]);<a name="l00178"></a>00178 x12 = _mm_add_epi32(x12, ss[12]);<a name="l00179"></a>00179 x13 = _mm_add_epi32(x13, ss[13]);<a name="l00180"></a>00180 x14 = _mm_add_epi32(x14, ss[14]);<a name="l00181"></a>00181 x15 = _mm_add_epi32(x15, ss[15]);<a name="l00182"></a>00182 <a name="l00183"></a>00183 <span class="preprocessor"> #define OUTPUT_4(x, a, b, c, d, e, f, g, h) {\</span><a name="l00184"></a>00184 <span class="preprocessor"> __m128i t0 = _mm_unpacklo_epi32(a, b);\</span><a name="l00185"></a>00185 <span class="preprocessor"> __m128i t1 = _mm_unpacklo_epi32(c, d);\</span><a name="l00186"></a>00186 <span class="preprocessor"> __m128i t2 = _mm_unpacklo_epi64(t0, t1);\</span><a name="l00187"></a>00187 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, e, t2)\</span><a name="l00188"></a>00188 <span class="preprocessor"> t2 = _mm_unpackhi_epi64(t0, t1);\</span><a name="l00189"></a>00189 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, f, t2)\</span><a name="l00190"></a>00190 <span class="preprocessor"> t0 = _mm_unpackhi_epi32(a, b);\</span><a name="l00191"></a>00191 <span class="preprocessor"> t1 = _mm_unpackhi_epi32(c, d);\</span><a name="l00192"></a>00192 <span class="preprocessor"> t2 = _mm_unpacklo_epi64(t0, t1);\</span><a name="l00193"></a>00193 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, g, t2)\</span><a name="l00194"></a>00194 <span class="preprocessor"> t2 = _mm_unpackhi_epi64(t0, t1);\</span><a name="l00195"></a>00195 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_XMM(x, h, t2)}</span><a name="l00196"></a>00196 <span class="preprocessor"></span><a name="l00197"></a>00197 <span class="preprocessor"> #define SALSA_OUTPUT(x) \</span><a name="l00198"></a>00198 <span class="preprocessor"> OUTPUT_4(x, x0, x13, x10, x7, 0, 4, 8, 12)\</span><a name="l00199"></a>00199 <span class="preprocessor"> OUTPUT_4(x, x4, x1, x14, x11, 1, 5, 9, 13)\</span><a name="l00200"></a>00200 <span class="preprocessor"> OUTPUT_4(x, x8, x5, x2, x15, 2, 6, 10, 14)\</span><a name="l00201"></a>00201 <span class="preprocessor"> OUTPUT_4(x, x12, x9, x6, x3, 3, 7, 11, 15)</span><a name="l00202"></a>00202 <span class="preprocessor"></span><a name="l00203"></a>00203 <a class="code" href="strciphr_8h.html#4f82514b5ab874259ac783ff9b96c4e5">CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH</a>(SALSA_OUTPUT, 4*<a class="code" href="struct_additive_cipher_concrete_policy.html#0c584b68c2f3a8208ff245cd8d09fcd5">BYTES_PER_ITERATION</a>)<a name="l00204"></a>00204 <a name="l00205"></a>00205 <span class="preprocessor">#undef SALSA_OUTPUT</span><a name="l00206"></a>00206 <span class="preprocessor"></span> } <span class="keywordflow">while</span> ((iterationCount-=4) >= 4);<a name="l00207"></a>00207 }<a name="l00208"></a>00208 <span class="preprocessor">#endif</span><a name="l00209"></a>00209 <span class="preprocessor"></span><a name="l00210"></a>00210 <span class="keywordflow">if</span> (!IsP4()) <span class="keywordflow">while</span> (iterationCount)<a name="l00211"></a>00211 {<a name="l00212"></a>00212 --iterationCount;<a name="l00213"></a>00213 __m128i x0 = s[0];<a name="l00214"></a>00214 __m128i x1 = s[1];<a name="l00215"></a>00215 __m128i x2 = s[2];<a name="l00216"></a>00216 __m128i x3 = s[3];<a name="l00217"></a>00217 <a name="l00218"></a>00218 <span class="keywordflow">for</span> (i=m_rounds; i>0; i-=2)<a name="l00219"></a>00219 {<a name="l00220"></a>00220 SSE2_QUARTER_ROUND(x0, x1, x3, 7)<a name="l00221"></a>00221 SSE2_QUARTER_ROUND(x1, x2, x0, 9)<a name="l00222"></a>00222 SSE2_QUARTER_ROUND(x2, x3, x1, 13)<a name="l00223"></a>00223 SSE2_QUARTER_ROUND(x3, x0, x2, 18)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -