📄 sosemanuk_8cpp-source.html
字号:
<a name="l00262"></a>00262 0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,<a name="l00263"></a>00263 0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,<a name="l00264"></a>00264 0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,<a name="l00265"></a>00265 0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,<a name="l00266"></a>00266 0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,<a name="l00267"></a>00267 0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,<a name="l00268"></a>00268 0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,<a name="l00269"></a>00269 0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,<a name="l00270"></a>00270 0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,<a name="l00271"></a>00271 0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,<a name="l00272"></a>00272 0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2<a name="l00273"></a>00273 };<a name="l00274"></a>00274 <a name="l00275"></a>00275 <a name="l00276"></a>00276 <span class="preprocessor">#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64</span><a name="l00277"></a>00277 <span class="preprocessor"></span><span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> <a class="code" href="struct_additive_cipher_concrete_policy.html#5bdc1cb44b0ddc8df0fb6953aec93602">SosemanukPolicy::GetAlignment</a>()<span class="keyword"> const</span><a name="l00278"></a>00278 <span class="keyword"></span>{<a name="l00279"></a>00279 <span class="preprocessor">#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE</span><a name="l00280"></a>00280 <span class="preprocessor"></span><span class="preprocessor">#ifdef __INTEL_COMPILER</span><a name="l00281"></a>00281 <span class="preprocessor"></span> <span class="keywordflow">if</span> (HasSSE2() && !IsP4()) <span class="comment">// Intel compiler produces faster code for this algorithm on the P4</span><a name="l00282"></a>00282 <span class="preprocessor">#else</span><a name="l00283"></a>00283 <span class="preprocessor"></span> <span class="keywordflow">if</span> (HasSSE2())<a name="l00284"></a>00284 <span class="preprocessor">#endif</span><a name="l00285"></a>00285 <span class="preprocessor"></span> <span class="keywordflow">return</span> 16;<a name="l00286"></a>00286 <span class="keywordflow">else</span><a name="l00287"></a>00287 <span class="preprocessor">#endif</span><a name="l00288"></a>00288 <span class="preprocessor"></span> <span class="keywordflow">return</span> 1;<a name="l00289"></a>00289 }<a name="l00290"></a>00290 <a name="l00291"></a>00291 <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> <a class="code" href="struct_additive_cipher_abstract_policy.html#32bbafa12b59e77d4d8bc67e9d5a4004">SosemanukPolicy::GetOptimalBlockSize</a>()<span class="keyword"> const</span><a name="l00292"></a>00292 <span class="keyword"></span>{<a name="l00293"></a>00293 <span class="preprocessor">#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE</span><a name="l00294"></a>00294 <span class="preprocessor"></span><span class="preprocessor">#ifdef __INTEL_COMPILER</span><a name="l00295"></a>00295 <span class="preprocessor"></span> <span class="keywordflow">if</span> (HasSSE2() && !IsP4()) <span class="comment">// Intel compiler produces faster code for this algorithm on the P4</span><a name="l00296"></a>00296 <span class="preprocessor">#else</span><a name="l00297"></a>00297 <span class="preprocessor"></span> <span class="keywordflow">if</span> (HasSSE2())<a name="l00298"></a>00298 <span class="preprocessor">#endif</span><a name="l00299"></a>00299 <span class="preprocessor"></span> <span class="keywordflow">return</span> 4*<a class="code" href="struct_additive_cipher_concrete_policy.html#0c584b68c2f3a8208ff245cd8d09fcd5">BYTES_PER_ITERATION</a>;<a name="l00300"></a>00300 <span class="keywordflow">else</span><a name="l00301"></a>00301 <span class="preprocessor">#endif</span><a name="l00302"></a>00302 <span class="preprocessor"></span> <span class="keywordflow">return</span> <a class="code" href="struct_additive_cipher_concrete_policy.html#0c584b68c2f3a8208ff245cd8d09fcd5">BYTES_PER_ITERATION</a>;<a name="l00303"></a>00303 }<a name="l00304"></a>00304 <span class="preprocessor">#endif</span><a name="l00305"></a>00305 <span class="preprocessor"></span><a name="l00306"></a>00306 <span class="preprocessor">#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code</span><a name="l00307"></a>00307 <span class="preprocessor"></span><a name="l00308"></a><a class="code" href="class_sosemanuk_policy.html#8417e2849165e287db8f75cb1ddaea7c">00308</a> <span class="keywordtype">void</span> <a class="code" href="class_sosemanuk_policy.html#8417e2849165e287db8f75cb1ddaea7c">SosemanukPolicy::OperateKeystream</a>(<a class="code" href="strciphr_8h.html#b4a226527d2bd01ff19bfa14d0974227">KeystreamOperation</a> operation, byte *output, <span class="keyword">const</span> byte *input, <span class="keywordtype">size_t</span> iterationCount)<a name="l00309"></a>00309 {<a name="l00310"></a>00310 <span class="preprocessor">#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE</span><a name="l00311"></a>00311 <span class="preprocessor"></span><span class="preprocessor">#ifdef __INTEL_COMPILER</span><a name="l00312"></a>00312 <span class="preprocessor"></span> <span class="keywordflow">if</span> (HasSSE2() && !IsP4()) <span class="comment">// Intel compiler produces faster code for this algorithm on the P4</span><a name="l00313"></a>00313 <span class="preprocessor">#else</span><a name="l00314"></a>00314 <span class="preprocessor"></span> <span class="keywordflow">if</span> (HasSSE2())<a name="l00315"></a>00315 <span class="preprocessor">#endif</span><a name="l00316"></a>00316 <span class="preprocessor"></span> {<a name="l00317"></a>00317 <span class="preprocessor">#ifdef __GNUC__</span><a name="l00318"></a>00318 <span class="preprocessor"></span> __asm__ __volatile__<a name="l00319"></a>00319 (<a name="l00320"></a>00320 <span class="stringliteral">".intel_syntax noprefix;"</span><a name="l00321"></a>00321 AS_PUSH( bx)<a name="l00322"></a>00322 #<span class="keywordflow">else</span><a name="l00323"></a>00323 word32 *state = <a class="code" href="class_sosemanuk_policy.html#996c06115c9de685f45a11789d3f3b76">m_state</a>;<a name="l00324"></a>00324 AS2( mov WORD_REG(ax), state)<a name="l00325"></a>00325 AS2( mov WORD_REG(di), output)<a name="l00326"></a>00326 AS2( mov WORD_REG(dx), input)<a name="l00327"></a>00327 AS2( mov WORD_REG(cx), iterationCount)<a name="l00328"></a>00328 <span class="preprocessor">#endif</span><a name="l00329"></a>00329 <span class="preprocessor"></span><a name="l00330"></a>00330 <span class="preprocessor">#define SSE2_output WORD_PTR [WORD_REG(sp)+1*WORD_SZ]</span><a name="l00331"></a>00331 <span class="preprocessor"></span><span class="preprocessor">#define SSE2_input WORD_PTR [WORD_REG(sp)+2*WORD_SZ]</span><a name="l00332"></a>00332 <span class="preprocessor"></span><span class="preprocessor">#define SSE2_wordsLeft WORD_PTR [WORD_REG(sp)+3*WORD_SZ]</span><a name="l00333"></a>00333 <span class="preprocessor"></span><span class="preprocessor">#define SSE2_diEnd WORD_PTR [WORD_REG(sp)+4*WORD_SZ]</span><a name="l00334"></a>00334 <span class="preprocessor"></span><span class="preprocessor">#define SSE2_pMulTables WORD_PTR [WORD_REG(sp)+5*WORD_SZ]</span><a name="l00335"></a>00335 <span class="preprocessor"></span><span class="preprocessor">#define SSE2_state WORD_PTR [WORD_REG(sp)+6*WORD_SZ]</span><a name="l00336"></a>00336 <span class="preprocessor"></span><span class="preprocessor">#define SSE2_wordsLeft2 WORD_PTR [WORD_REG(sp)+7*WORD_SZ]</span><a name="l00337"></a>00337 <span class="preprocessor"></span><span class="preprocessor">#define SSE2_stateCopy WORD_REG(sp) + 8*WORD_SZ</span><a name="l00338"></a>00338 <span class="preprocessor"></span><span class="preprocessor">#define SSE2_uvStart SSE2_stateCopy + 12*4</span><a name="l00339"></a>00339 <span class="preprocessor"></span><a name="l00340"></a>00340 AS_PUSH( bp)<a name="l00341"></a>00341 AS2( mov WORD_REG(bx), WORD_REG(sp))<a name="l00342"></a>00342 AS2( and WORD_REG(sp), -16)<a name="l00343"></a>00343 AS2( sub WORD_REG(sp), 80*4*2+12*4+8*WORD_SZ) <span class="comment">// 80 v's, 80 u's, 12 state, 8 locals</span><a name="l00344"></a>00344 AS2( mov [WORD_REG(sp)], WORD_REG(bx))<a name="l00345"></a>00345 AS2( mov SSE2_output, WORD_REG(di))<a name="l00346"></a>00346 AS2( mov SSE2_input, WORD_REG(dx))<a name="l00347"></a>00347 AS2( mov SSE2_state, WORD_REG(ax))<a name="l00348"></a>00348 <span class="preprocessor">#ifndef _MSC_VER</span><a name="l00349"></a>00349 <span class="preprocessor"></span> AS2( mov SSE2_pMulTables, WORD_REG(si))<a name="l00350"></a>00350 <span class="preprocessor">#endif</span><a name="l00351"></a>00351 <span class="preprocessor"></span> AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])<a name="l00352"></a>00352 AS2( lea WORD_REG(si), [4*WORD_REG(cx)])<a name="l00353"></a>00353 AS2( mov SSE2_wordsLeft, WORD_REG(si))<a name="l00354"></a>00354 AS2( movdqa xmm0, [WORD_REG(ax)+0*16]) <span class="comment">// copy state to stack to save a register</span><a name="l00355"></a>00355 AS2( movdqa [SSE2_stateCopy+0*16], xmm0)<a name="l00356"></a>00356 AS2( movdqa xmm0, [WORD_REG(ax)+1*16])<a name="l00357"></a>00357 AS2( movdqa [SSE2_stateCopy+1*16], xmm0)<a name="l00358"></a>00358 AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16])<a name="l00359"></a>00359 AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0)<a name="l00360"></a>00360 AS2( psrlq xmm0, 32)<a name="l00361"></a>00361 AS2( movd ebx, xmm0) <span class="comment">// s(9)</span><a name="l00362"></a>00362 AS2( mov ecx, [WORD_REG(ax)+10*4])<a name="l00363"></a>00363 AS2( mov edx, [WORD_REG(ax)+11*4])<a name="l00364"></a>00364 AS2( pcmpeqb xmm7, xmm7) <span class="comment">// all ones</span><a name="l00365"></a>00365 <a name="l00366"></a>00366 <span class="preprocessor">#define s(i) SSE2_stateCopy + ASM_MOD(i,10)*4</span><a name="l00367"></a>00367 <span class="preprocessor"></span><span class="preprocessor">#define u(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4</span><a name="l00368"></a>00368 <span class="preprocessor"></span><span class="preprocessor">#define v(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 + 80*4</span><a name="l00369"></a>00369 <span class="preprocessor"></span><a name="l00370"></a>00370 <span class="preprocessor">#define r10 ecx</span><a name="l00371"></a>00371 <span class="preprocessor"></span><span class="preprocessor">#define r11 edx</span><a name="l00372"></a>00372 <span class="preprocessor"></span><span class="preprocessor">#define r20 edx</span><a name="l00373"></a>00373 <span class="preprocessor"></span><span class="preprocessor">#define r21 ecx</span><a name="l00374"></a>00374 <span class="preprocessor"></span><a name="l00375"></a>00375 <span class="preprocessor">#define SSE2_STEP(i, j) \</span><a name="l00376"></a>00376 <span class="preprocessor"> AS2( mov eax, [s(i+0)])\</span><a name="l00377"></a>00377 <span class="preprocessor"> AS2( mov [v(i)], eax)\</span><a name="l00378"></a>00378 <span class="preprocessor"> AS2( rol eax, 8)\</span><a name="l00379"></a>00379 <span class="preprocessor"> AS2( lea ebp, [ebx + r2##j])\</span><a name="l00380"></a>00380 <span class="preprocessor"> AS2( xor ebp, r1##j)\</span><a name="l00381"></a>00381 <span class="preprocessor"> AS2( mov [u(i)], ebp)\</span><a name="l00382"></a>00382 <span class="preprocessor"> AS2( mov ebp, 1)\</span><a name="l00383"></a>00383 <span class="preprocessor"> AS2( and ebp, r2##j)\</span><a name="l00384"></a>00384 <span class="preprocessor"> AS1( neg ebp)\</span><a name="l00385"></a>00385 <span class="preprocessor"> AS2( and ebp, ebx)\</span><a name="l00386"></a>00386 <span class="preprocessor"> AS2( xor ebx, eax)\</span><a name="l00387"></a>00387 <span class="preprocessor"> AS2( movzx eax, al)\</span><a name="l00388"></a>00388 <span class="preprocessor"> AS2( xor ebx, [WORD_REG(si)+WORD_REG(ax)*4])\</span><a name="l00389"></a>00389 <span class="preprocessor"> AS2( mov eax, [s(i+3)])\</span><a name="l00390"></a>00390 <span class="preprocessor"> AS2( xor ebp, [s(i+2)])\</span><a name="l00391"></a>00391 <span class="preprocessor"> AS2( add r1##j, ebp)\</span><a name="l00392"></a>00392 <span class="preprocessor"> AS2( movzx ebp, al)\</span><a name="l00393"></a>00393 <span class="preprocessor"> AS2( shr eax, 8)\</span><a name="l00394"></a>00394 <span class="preprocessor"> AS2( xor ebx, [WORD_REG(si)+1024+WORD_REG(bp)*4])\</span><a name="l00395"></a>00395 <span class="preprocessor"> AS2( xor ebx, eax)\</span><a name="l00396"></a>00396 <span class="preprocessor"> AS2( imul r2##j, 0x54655307)\</span><a name="l00397"></a>00397 <span class="preprocessor"> AS2( rol r2##j, 7)\</span><a name="l00398"></a>00398 <span class="preprocessor"> AS2( mov [s(i+0)], ebx)\</span><a name="l00399"></a>00399 <span class="preprocessor"></span><a name="l00400"></a>00400 <span class="preprocessor"></span> ASL(2) <span class="comment">// outer loop, each iteration of this processes 80 words</span><a name="l00401"></a>00401 AS2( lea WORD_REG(di), [SSE2_uvStart]) <span class="comment">// start of v and u</span><a name="l00402"></a>00402 AS2( mov WORD_REG(ax), 80)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -