⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 integer_8cpp-source.html

📁 Crypto++是一个非常强大的密码学库,主要是功能全
💻 HTML
📖 第 1 页 / 共 5 页
字号:
00807         sum = _mm_add_epi64(a2b1, a0b3);00808         C[3] = _mm_add_epi64(sum, sum1);00809 00810         __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);00811         __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);00812         __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);00813         __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);00814         sum = _mm_add_epi64(a2b2, a3b1);00815         C[4] = _mm_add_epi64(sum, a1b3);00816 00817         __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));00818         __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));00819         __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);00820         __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);00821         __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);00822         C[5] = _mm_add_epi64(a3b2, a2b3);00823 }00824 00825 <span class="keywordtype">void</span> P4Optimized::Multiply4(word *C, <span class="keyword">const</span> word *A, <span class="keyword">const</span> word *B)00826 {00827         __m128i temp[7];00828         <span class="keyword">const</span> word *w = (word *)temp;00829         <span class="keyword">const</span> __m64 *mw = (__m64 *)w;00830 00831         P4_Mul(temp, (__m128i *)A, (__m128i *)B);00832 00833         C[0] = w[0];00834 00835         __m64 s1, s2;00836 00837         __m64 w1 = _m_from_int(w[1]);00838         __m64 w4 = mw[2];00839         __m64 w6 = mw[3];00840         __m64 w8 = mw[4];00841         __m64 w10 = mw[5];00842         __m64 w12 = mw[6];00843         __m64 w14 = mw[7];00844         __m64 w16 = mw[8];00845         __m64 w18 = mw[9];00846         __m64 w20 = mw[10];00847         __m64 w22 = mw[11];00848         __m64 w26 = _m_from_int(w[26]);00849 00850         s1 = _mm_add_si64(w1, w4);00851         C[1] = _m_to_int(s1);00852         s1 = _m_psrlqi(s1, 32);00853 00854         s2 = _mm_add_si64(w6, w8);00855         s1 = _mm_add_si64(s1, s2);00856         C[2] = _m_to_int(s1);00857         s1 = _m_psrlqi(s1, 32);00858 00859         s2 = _mm_add_si64(w10, w12);00860         s1 = _mm_add_si64(s1, s2);00861         C[3] = _m_to_int(s1);00862         s1 = _m_psrlqi(s1, 32);00863 00864         s2 = _mm_add_si64(w14, w16);00865         s1 = _mm_add_si64(s1, s2);00866         C[4] = _m_to_int(s1);00867         s1 = _m_psrlqi(s1, 32);00868 00869         s2 = _mm_add_si64(w18, w20);00870         s1 = _mm_add_si64(s1, s2);00871         C[5] = _m_to_int(s1);00872         s1 = _m_psrlqi(s1, 32);00873 00874         s2 = _mm_add_si64(w22, w26);00875         s1 = _mm_add_si64(s1, s2);00876         C[6] = _m_to_int(s1);00877         s1 = _m_psrlqi(s1, 32);00878 00879         C[7] = _m_to_int(s1) + w[27];00880         _mm_empty();00881 }00882 00883 <span class="keywordtype">void</span> P4Optimized::Multiply8(word *C, <span class="keyword">const</span> word *A, <span class="keyword">const</span> word *B)00884 {00885         __m128i temp[28];00886         <span class="keyword">const</span> word *w = (word *)temp;00887         <span class="keyword">const</span> __m64 *mw = (__m64 *)w;00888         <span class="keyword">const</span> word *x = (word *)temp+7*4;00889         <span class="keyword">const</span> __m64 *mx = (__m64 *)x;00890         <span class="keyword">const</span> word *y = (word *)temp+7*4*2;00891         <span class="keyword">const</span> __m64 *my = (__m64 *)y;00892         <span class="keyword">const</span> word *z = (word *)temp+7*4*3;00893         <span class="keyword">const</span> __m64 *mz = (__m64 *)z;00894 00895         P4_Mul(temp, (__m128i *)A, (__m128i *)B);00896 00897         P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);00898 00899         P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);00900 00901         P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);00902 00903         C[0] = w[0];00904 00905         __m64 s1, s2, s3, s4;00906 00907         __m64 w1 = _m_from_int(w[1]);00908         __m64 w4 = mw[2];00909         __m64 w6 = mw[3];00910         __m64 w8 = mw[4];00911         __m64 w10 = mw[5];00912         __m64 w12 = mw[6];00913         __m64 w14 = mw[7];00914         __m64 w16 = mw[8];00915         __m64 w18 = mw[9];00916         __m64 w20 = mw[10];00917         __m64 w22 = mw[11];00918         __m64 w26 = _m_from_int(w[26]);00919         __m64 w27 = _m_from_int(w[27]);00920 00921         __m64 x0 = _m_from_int(x[0]);00922         __m64 x1 = _m_from_int(x[1]);00923         __m64 x4 = mx[2];00924         __m64 x6 = mx[3];00925         __m64 x8 = mx[4];00926         __m64 x10 = mx[5];00927         __m64 x12 = mx[6];00928         __m64 x14 = mx[7];00929         __m64 x16 = mx[8];00930         __m64 x18 = mx[9];00931         __m64 x20 = mx[10];00932         __m64 x22 = mx[11];00933         __m64 x26 = _m_from_int(x[26]);00934         __m64 x27 = _m_from_int(x[27]);00935 00936         __m64 y0 = _m_from_int(y[0]);00937         __m64 y1 = _m_from_int(y[1]);00938         __m64 y4 = my[2];00939         __m64 y6 = my[3];00940         __m64 y8 = my[4];00941         __m64 y10 = my[5];00942         __m64 y12 = my[6];00943         __m64 y14 = my[7];00944         __m64 y16 = my[8];00945         __m64 y18 = my[9];00946         __m64 y20 = my[10];00947         __m64 y22 = my[11];00948         __m64 y26 = _m_from_int(y[26]);00949         __m64 y27 = _m_from_int(y[27]);00950 00951         __m64 z0 = _m_from_int(z[0]);00952         __m64 z1 = _m_from_int(z[1]);00953         __m64 z4 = mz[2];00954         __m64 z6 = mz[3];00955         __m64 z8 = mz[4];00956         __m64 z10 = mz[5];00957         __m64 z12 = mz[6];00958         __m64 z14 = mz[7];00959         __m64 z16 = mz[8];00960         __m64 z18 = mz[9];00961         __m64 z20 = mz[10];00962         __m64 z22 = mz[11];00963         __m64 z26 = _m_from_int(z[26]);00964 00965         s1 = _mm_add_si64(w1, w4);00966         C[1] = _m_to_int(s1);00967         s1 = _m_psrlqi(s1, 32);00968 00969         s2 = _mm_add_si64(w6, w8);00970         s1 = _mm_add_si64(s1, s2);00971         C[2] = _m_to_int(s1);00972         s1 = _m_psrlqi(s1, 32);00973 00974         s2 = _mm_add_si64(w10, w12);00975         s1 = _mm_add_si64(s1, s2);00976         C[3] = _m_to_int(s1);00977         s1 = _m_psrlqi(s1, 32);00978 00979         s3 = _mm_add_si64(x0, y0);00980         s2 = _mm_add_si64(w14, w16);00981         s1 = _mm_add_si64(s1, s3);00982         s1 = _mm_add_si64(s1, s2);00983         C[4] = _m_to_int(s1);00984         s1 = _m_psrlqi(s1, 32);00985 00986         s3 = _mm_add_si64(x1, y1);00987         s4 = _mm_add_si64(x4, y4);00988         s1 = _mm_add_si64(s1, w18);00989         s3 = _mm_add_si64(s3, s4);00990         s1 = _mm_add_si64(s1, w20);00991         s1 = _mm_add_si64(s1, s3);00992         C[5] = _m_to_int(s1);00993         s1 = _m_psrlqi(s1, 32);00994 00995         s3 = _mm_add_si64(x6, y6);00996         s4 = _mm_add_si64(x8, y8);00997         s1 = _mm_add_si64(s1, w22);00998         s3 = _mm_add_si64(s3, s4);00999         s1 = _mm_add_si64(s1, w26);01000         s1 = _mm_add_si64(s1, s3);01001         C[6] = _m_to_int(s1);01002         s1 = _m_psrlqi(s1, 32);01003 01004         s3 = _mm_add_si64(x10, y10);01005         s4 = _mm_add_si64(x12, y12);01006         s1 = _mm_add_si64(s1, w27);01007         s3 = _mm_add_si64(s3, s4);01008         s1 = _mm_add_si64(s1, s3);01009         C[7] = _m_to_int(s1);01010         s1 = _m_psrlqi(s1, 32);01011 01012         s3 = _mm_add_si64(x14, y14);01013         s4 = _mm_add_si64(x16, y16);01014         s1 = _mm_add_si64(s1, z0);01015         s3 = _mm_add_si64(s3, s4);01016         s1 = _mm_add_si64(s1, s3);01017         C[8] = _m_to_int(s1);01018         s1 = _m_psrlqi(s1, 32);01019 01020         s3 = _mm_add_si64(x18, y18);01021         s4 = _mm_add_si64(x20, y20);01022         s1 = _mm_add_si64(s1, z1);01023         s3 = _mm_add_si64(s3, s4);01024         s1 = _mm_add_si64(s1, z4);01025         s1 = _mm_add_si64(s1, s3);01026         C[9] = _m_to_int(s1);01027         s1 = _m_psrlqi(s1, 32);01028 01029         s3 = _mm_add_si64(x22, y22);01030         s4 = _mm_add_si64(x26, y26);01031         s1 = _mm_add_si64(s1, z6);01032         s3 = _mm_add_si64(s3, s4);01033         s1 = _mm_add_si64(s1, z8);01034         s1 = _mm_add_si64(s1, s3);01035         C[10] = _m_to_int(s1);01036         s1 = _m_psrlqi(s1, 32);01037 01038         s3 = _mm_add_si64(x27, y27);01039         s1 = _mm_add_si64(s1, z10);01040         s1 = _mm_add_si64(s1, z12);01041         s1 = _mm_add_si64(s1, s3);01042         C[11] = _m_to_int(s1);01043         s1 = _m_psrlqi(s1, 32);01044 01045         s3 = _mm_add_si64(z14, z16);01046         s1 = _mm_add_si64(s1, s3);01047         C[12] = _m_to_int(s1);01048         s1 = _m_psrlqi(s1, 32);01049 01050         s3 = _mm_add_si64(z18, z20);01051         s1 = _mm_add_si64(s1, s3);01052         C[13] = _m_to_int(s1);01053         s1 = _m_psrlqi(s1, 32);01054 01055         s3 = _mm_add_si64(z22, z26);01056         s1 = _mm_add_si64(s1, s3);01057         C[14] = _m_to_int(s1);01058         s1 = _m_psrlqi(s1, 32);01059 01060         C[15] = z[27] + _m_to_int(s1);01061         _mm_empty();01062 }01063 01064 <span class="keywordtype">void</span> P4Optimized::Multiply8Bottom(word *C, <span class="keyword">const</span> word *A, <span class="keyword">const</span> word *B)01065 {01066         __m128i temp[21];01067         <span class="keyword">const</span> word *w = (word *)temp;01068         <span class="keyword">const</span> __m64 *mw = (__m64 *)w;01069         <span class="keyword">const</span> word *x = (word *)temp+7*4;01070         <span class="keyword">const</span> __m64 *mx = (__m64 *)x;01071         <span class="keyword">const</span> word *y = (word *)temp+7*4*2;01072         <span class="keyword">const</span> __m64 *my = (__m64 *)y;01073 01074         P4_Mul(temp, (__m128i *)A, (__m128i *)B);01075 01076         P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);01077 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -