📄 t32cb16blend.s
字号:
/* libs/pixelflinger/t32cb16blend.S**** Copyright 2006, The Android Open Source Project**** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at **** http://www.apache.org/licenses/LICENSE-2.0 **** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.*/ .text .align .global scanline_t32cb16blend_arm// uses r6, r7, lr.macro pixel, DREG, SRC, FB, OFFSET // SRC = AARRGGBB mov r7, \SRC, lsr #24 // sA add r7, r7, r7, lsr #7 // sA + (sA >> 7) rsb r7, r7, #0x100 // sA = 0x100 - (sA+(sA>>7))1:.if \OFFSET // red mov lr, \DREG, lsr #(\OFFSET + 6 + 5) smulbb lr, r7, lr mov r6, \SRC, lsr #3 and r6, r6, #0x1F add lr, r6, lr, lsr #8 orr \FB, lr, lsl #(\OFFSET + 11) // green and r6, \DREG, #(0x3F<<(\OFFSET + 5)) smulbt r6, r7, r6 mov lr, \SRC, lsr #(8+2) and lr, lr, #0x3F add r6, lr, r6, lsr #(5+8) orr \FB, \FB, r6, lsl #(\OFFSET + 5) // blue and lr, \DREG, #(0x1F << \OFFSET) smulbt lr, r7, lr mov r6, \SRC, lsr #(8+8+3) and r6, r6, #0x1F add lr, r6, lr, lsr #8 orr \FB, \FB, lr, lsl #\OFFSET.else // red mov lr, \DREG, lsr #(6+5) and lr, lr, #0x1F smulbb lr, r7, lr mov r6, \SRC, lsr #3 and r6, r6, #0x1F add lr, r6, lr, lsr #8 mov \FB, lr, lsl #11 // green and r6, \DREG, #(0x3F<<5) smulbb r6, r7, r6 mov lr, \SRC, lsr #(8+2) and lr, lr, #0x3F add r6, lr, r6, lsr #(5+8) orr \FB, \FB, r6, lsl #5 // blue and lr, \DREG, #0x1F smulbb lr, r7, lr mov r6, \SRC, lsr #(8+8+3) and r6, r6, #0x1F add lr, r6, lr, lsr #8 orr \FB, \FB, lr.endif .endm // r0: dst ptr// r1: src ptr// r2: count// r3: d// r4: s0// r5: s1// r6: pixel// r7: pixel// r8: free// r9: free// r10: free// r11: free// r12: scratch// r14: pixelscanline_t32cb16blend_arm: stmfd sp!, {r4-r7, lr} pld [r0] pld [r1] // align DST to 32 bits tst r0, #0x3 beq aligned subs r2, r2, #1 ldmlofd sp!, {r4-r7, lr} // return bxlo lrlast: ldr r4, [r1], #4 ldrh r3, [r0] pixel r3, r4, r12, 0 strh r12, [r0], #2aligned: subs r2, r2, #2 blo 9f // The main loop is unrolled twice and process 4 pixels8: ldmia r1!, {r4, r5} // stream the source pld [r1, #32] add r0, r0, #4 // it's all zero, skip this pixel orrs r3, r4, r5 beq 7f // load the destination ldr r3, [r0, #-4] // stream the destination pld [r0, #32] pixel r3, r4, r12, 0 pixel r3, r5, r12, 16 // effectively, we're getting write-combining by virtue of the // cpu's write-back cache. str r12, [r0, #-4] // 2nd iterration of the loop, don't stream anything subs r2, r2, #2 movlt r4, r5 blt 9f ldmia r1!, {r4, r5} add r0, r0, #4 orrs r3, r4, r5 beq 7f ldr r3, [r0, #-4] pixel r3, r4, r12, 0 pixel r3, r5, r12, 16 str r12, [r0, #-4] 7: subs r2, r2, #2 bhs 8b mov r4, r59: adds r2, r2, #1 ldmlofd sp!, {r4-r7, lr} // return bxlo lr b last
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -