📄 sosemanuk_8cpp-source.html
字号:
<a name="l00403"></a>00403 AS2( cmp WORD_REG(si), 80)<a name="l00404"></a>00404 AS2( cmovg WORD_REG(si), WORD_REG(ax))<a name="l00405"></a>00405 AS2( mov SSE2_wordsLeft2, WORD_REG(si))<a name="l00406"></a>00406 AS2( lea WORD_REG(si), [WORD_REG(di)+WORD_REG(si)]) <span class="comment">// use to end first inner loop</span><a name="l00407"></a>00407 AS2( mov SSE2_diEnd, WORD_REG(si))<a name="l00408"></a>00408 <span class="preprocessor">#ifdef _MSC_VER</span><a name="l00409"></a>00409 <span class="preprocessor"></span> AS2( lea WORD_REG(si), s_mulTables)<a name="l00410"></a>00410 <span class="preprocessor">#else</span><a name="l00411"></a>00411 <span class="preprocessor"></span> AS2( mov WORD_REG(si), SSE2_pMulTables)<a name="l00412"></a>00412 <span class="preprocessor">#endif</span><a name="l00413"></a>00413 <span class="preprocessor"></span><a name="l00414"></a>00414 ASL(0) <span class="comment">// first inner loop, 20 words each, 4 iterations</span><a name="l00415"></a>00415 SSE2_STEP(0, 0)<a name="l00416"></a>00416 SSE2_STEP(1, 1)<a name="l00417"></a>00417 SSE2_STEP(2, 0)<a name="l00418"></a>00418 SSE2_STEP(3, 1)<a name="l00419"></a>00419 SSE2_STEP(4, 0)<a name="l00420"></a>00420 SSE2_STEP(5, 1)<a name="l00421"></a>00421 SSE2_STEP(6, 0)<a name="l00422"></a>00422 SSE2_STEP(7, 1)<a name="l00423"></a>00423 SSE2_STEP(8, 0)<a name="l00424"></a>00424 SSE2_STEP(9, 1)<a name="l00425"></a>00425 SSE2_STEP(10, 0)<a name="l00426"></a>00426 SSE2_STEP(11, 1)<a name="l00427"></a>00427 SSE2_STEP(12, 0)<a name="l00428"></a>00428 SSE2_STEP(13, 1)<a name="l00429"></a>00429 SSE2_STEP(14, 0)<a name="l00430"></a>00430 SSE2_STEP(15, 1)<a name="l00431"></a>00431 SSE2_STEP(16, 0)<a name="l00432"></a>00432 SSE2_STEP(17, 1)<a name="l00433"></a>00433 SSE2_STEP(18, 0)<a name="l00434"></a>00434 SSE2_STEP(19, 1)<a name="l00435"></a>00435 <span class="comment">// loop</span><a name="l00436"></a>00436 AS2( <span class="keyword">add</span> WORD_REG(di), 5*4)<a name="l00437"></a>00437 AS2( cmp WORD_REG(di), SSE2_diEnd)<a name="l00438"></a>00438 ASJ( jne, 0, b)<a name="l00439"></a>00439 <a name="l00440"></a>00440 AS2( mov WORD_REG(ax), SSE2_input)<a name="l00441"></a>00441 AS2( mov WORD_REG(bp), SSE2_output)<a name="l00442"></a>00442 AS2( lea WORD_REG(di), [SSE2_uvStart]) <span class="comment">// start of v and u</span><a name="l00443"></a>00443 AS2( mov WORD_REG(si), SSE2_wordsLeft2)<a name="l00444"></a>00444 <a name="l00445"></a>00445 ASL(1) <span class="comment">// second inner loop, 16 words each, 5 iterations</span><a name="l00446"></a>00446 AS2( movdqa xmm0, [WORD_REG(di)+0*20*4])<a name="l00447"></a>00447 AS2( movdqa xmm2, [WORD_REG(di)+2*20*4])<a name="l00448"></a>00448 AS2( movdqa xmm3, [WORD_REG(di)+3*20*4])<a name="l00449"></a>00449 AS2( movdqa xmm1, [WORD_REG(di)+1*20*4])<a name="l00450"></a>00450 <span class="comment">// S2</span><a name="l00451"></a>00451 AS2( movdqa xmm4, xmm0)<a name="l00452"></a>00452 AS2( pand xmm0, xmm2)<a name="l00453"></a>00453 AS2( pxor xmm0, xmm3)<a name="l00454"></a>00454 AS2( pxor xmm2, xmm1)<a name="l00455"></a>00455 AS2( pxor xmm2, xmm0)<a name="l00456"></a>00456 AS2( por xmm3, xmm4)<a name="l00457"></a>00457 AS2( pxor xmm3, xmm1)<a name="l00458"></a>00458 AS2( pxor xmm4, xmm2)<a name="l00459"></a>00459 AS2( movdqa xmm1, xmm3)<a name="l00460"></a>00460 AS2( por xmm3, xmm4)<a name="l00461"></a>00461 AS2( pxor xmm3, xmm0)<a name="l00462"></a>00462 AS2( pand xmm0, xmm1)<a name="l00463"></a>00463 AS2( pxor xmm4, xmm0)<a name="l00464"></a>00464 AS2( pxor xmm1, xmm3)<a name="l00465"></a>00465 AS2( pxor xmm1, xmm4)<a name="l00466"></a>00466 AS2( pxor xmm4, xmm7)<a name="l00467"></a>00467 <span class="comment">// xor with v</span><a name="l00468"></a>00468 AS2( pxor xmm2, [WORD_REG(di)+80*4])<a name="l00469"></a>00469 AS2( pxor xmm3, [WORD_REG(di)+80*5])<a name="l00470"></a>00470 AS2( pxor xmm1, [WORD_REG(di)+80*6])<a name="l00471"></a>00471 AS2( pxor xmm4, [WORD_REG(di)+80*7])<a name="l00472"></a>00472 <span class="comment">// exit loop early if less than 16 words left to output</span><a name="l00473"></a>00473 <span class="comment">// this is necessary because block size is 20 words, and we output 16 words in each iteration of this loop</span><a name="l00474"></a>00474 AS2( cmp WORD_REG(si), 16)<a name="l00475"></a>00475 ASJ( jl, 4, f)<a name="l00476"></a>00476 <span class="comment">// unpack</span><a name="l00477"></a>00477 AS2( movdqa xmm6, xmm2)<a name="l00478"></a>00478 AS2( punpckldq xmm2, xmm3)<a name="l00479"></a>00479 AS2( movdqa xmm5, xmm1)<a name="l00480"></a>00480 AS2( punpckldq xmm1, xmm4)<a name="l00481"></a>00481 AS2( movdqa xmm0, xmm2)<a name="l00482"></a>00482 AS2( punpcklqdq xmm2, xmm1)<a name="l00483"></a>00483 AS2( punpckhqdq xmm0, xmm1)<a name="l00484"></a>00484 AS2( punpckhdq xmm6, xmm3)<a name="l00485"></a>00485 AS2( punpckhdq xmm5, xmm4)<a name="l00486"></a>00486 AS2( movdqa xmm3, xmm6)<a name="l00487"></a>00487 AS2( punpcklqdq xmm6, xmm5)<a name="l00488"></a>00488 AS2( punpckhqdq xmm3, xmm5)<a name="l00489"></a>00489 <span class="comment">// output keystream</span><a name="l00490"></a>00490 AS2( test WORD_REG(ax), WORD_REG(ax))<a name="l00491"></a>00491 ASJ( jz, 3, f)<a name="l00492"></a>00492 AS2( test eax, 0xf)<a name="l00493"></a>00493 ASJ( jnz, 7, f)<a name="l00494"></a>00494 AS2( pxor xmm2, [WORD_REG(ax)+0*16])<a name="l00495"></a>00495 AS2( pxor xmm0, [WORD_REG(ax)+1*16])<a name="l00496"></a>00496 AS2( pxor xmm6, [WORD_REG(ax)+2*16])<a name="l00497"></a>00497 AS2( pxor xmm3, [WORD_REG(ax)+3*16])<a name="l00498"></a>00498 AS2( <span class="keyword">add</span> WORD_REG(ax), 4*16)<a name="l00499"></a>00499 ASJ( jmp, 3, f)<a name="l00500"></a>00500 ASL(7)<a name="l00501"></a>00501 AS2( movdqu xmm1, [WORD_REG(ax)+0*16])<a name="l00502"></a>00502 AS2( pxor xmm2, xmm1)<a name="l00503"></a>00503 AS2( movdqu xmm1, [WORD_REG(ax)+1*16])<a name="l00504"></a>00504 AS2( pxor xmm0, xmm1)<a name="l00505"></a>00505 AS2( movdqu xmm1, [WORD_REG(ax)+2*16])<a name="l00506"></a>00506 AS2( pxor xmm6, xmm1)<a name="l00507"></a>00507 AS2( movdqu xmm1, [WORD_REG(ax)+3*16])<a name="l00508"></a>00508 AS2( pxor xmm3, xmm1)<a name="l00509"></a>00509 AS2( <span class="keyword">add</span> WORD_REG(ax), 4*16)<a name="l00510"></a>00510 ASL(3)<a name="l00511"></a>00511 AS2( test ebp, 0xf)<a name="l00512"></a>00512 ASJ( jnz, 8, f)<a name="l00513"></a>00513 AS2( movdqa [WORD_REG(bp)+0*16], xmm2)<a name="l00514"></a>00514 AS2( movdqa [WORD_REG(bp)+1*16], xmm0)<a name="l00515"></a>00515 AS2( movdqa [WORD_REG(bp)+2*16], xmm6)<a name="l00516"></a>00516 AS2( movdqa [WORD_REG(bp)+3*16], xmm3)<a name="l00517"></a>00517 ASJ( jmp, 9, f)<a name="l00518"></a>00518 ASL(8)<a name="l00519"></a>00519 AS2( movdqu [WORD_REG(bp)+0*16], xmm2)<a name="l00520"></a>00520 AS2( movdqu [WORD_REG(bp)+1*16], xmm0)<a name="l00521"></a>00521 AS2( movdqu [WORD_REG(bp)+2*16], xmm6)<a name="l00522"></a>00522 AS2( movdqu [WORD_REG(bp)+3*16], xmm3)<a name="l00523"></a>00523 ASL(9)<a name="l00524"></a>00524 <span class="comment">// loop</span><a name="l00525"></a>00525 AS2( <span class="keyword">add</span> WORD_REG(di), 4*4)<a name="l00526"></a>00526 AS2( <span class="keyword">add</span> WORD_REG(bp), 4*16)<a name="l00527"></a>00527 AS2( sub WORD_REG(si), 16)<a name="l00528"></a>00528 ASJ( jnz, 1, b)<a name="l00529"></a>00529 <a name="l00530"></a>00530 <span class="comment">// outer loop</span><a name="l00531"></a>00531 AS2( mov WORD_REG(si), SSE2_wordsLeft)<a name="l00532"></a>00532 AS2( sub WORD_REG(si), 80)<a name="l00533"></a>00533 ASJ( jz, 6, f)<a name="l00534"></a>00534 AS2( mov SSE2_wordsLeft, WORD_REG(si))<a name="l00535"></a>00535 AS2( mov SSE2_input, WORD_REG(ax))<a name="l00536"></a>00536 AS2( mov SSE2_output, WORD_REG(bp))<a name="l00537"></a>00537 ASJ( jmp, 2, b)<a name="l00538"></a>00538 <a name="l00539"></a>00539 ASL(4) <span class="comment">// final output of less than 16 words</span><a name="l00540"></a>00540 AS2( test WORD_REG(ax), WORD_REG(ax))<a name="l00541"></a>00541 ASJ( jz, 5, f)<a name="l00542"></a>00542 AS2( movd xmm0, [WORD_REG(ax)+0*4])<a name="l00543"></a>00543 AS2( pxor xmm2, xmm0)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -