📄 scale_horz_h.asm
字号:
* ========================================================================= *
* *
* TEXAS INSTRUMENTS, INC. *
* *
* NAME *
* scale_horz *
* *
* *
* USAGE *
* This routine is C-callable and can be called as: *
* *
* void scale_horz *
* ( *
* unsigned short *in_data, /* Ptr to unscaled lines */ *
* unsigned int in_len, /* Pixels/line unscaled */ *
* short *out_data, /* Ptr to scaled data lines */ *
* unsigned int out_len, /* Pixels/line of scaled data */ *
* short *hh, /* Ptr to filter taps, *
* interleaved odd/even *
* outputs */ *
* unsigned int l_hh, /* Length of scaling filters */ *
* unsigned int n_hh, /* Number of scaling filters */ *
* short *patch /* Ptr to decrement pattern */ *
* ); *
* *
* DESCRIPTION *
* *
* This code can scale up or down 1 line of data, in the *
* ratio out_len : in_len. e.g 1 to 3, 4:3, 5:6. The *
* filters are designed outside of the loop using a *
* general purpose resizing algorithm. *
* *
* patch0 = patch + 2; *
* filter_count = n_hh; *
* ka = 0; *
* *
* line0_x = plane_x; *
* line0_y = plane_y; *
* ptr_hh = hh; *
* jump = (int) patch[0]; ka = jump >> 1; *
* jump = (int) patch[1]; kb = jump >> 1; *
* *
* for ( i = 0; i < n_y; i += 2) *
* { *
* y0 = 1 << 5; *
* y1 = 1 << 5; *
* for ( j = 0; j < l_hh; j+=4) *
* { *
* /* even outputs */ *
* for (k=0; k < 4; k++) *
* { *
* h0 = *ptr_hh++; *
* x0 = *(line0_x+ ka + k); *
* y0 += ( x0 * h0 ); *
* } *
* jump = (int) (*patch0++); *
* ka = ka + (jump>>1); *
* /* odd outputs */ *
* for (k=0; k < 4; k++) *
* { *
* h1 = *ptr_hh++; *
* x1 = *(line0_x + kb + k); *
* y1 += ( x1 * h1 ); *
* } *
* jump = (int) (*patch0++); *
* kb = kb + (jump>>1); *
* } *
* *line0_y++ = (short) (y0 >> 6) ; *
* *line0_y++ = (short) (y1 >> 6) ; *
* *
* filter_count -= 2; *
* if (!filter_count) *
* { *
* patch0 = patch + 2; *
* ptr_hh = hh; *
* filter_count = n_hh; *
* } *
* } *
* *
* ASSUMPTIONS *
* One line of data is produced per function call. *
* *
* The line must be aligned on a double word boundary and be a *
* multiples of 8 bytes. *
* *
* Filters are multiples of 4 taps, maximum number of filters is 256. *
* *
* The computations for each output are interleaved, thus the filters *
* are interleaved on a 4 short interval. *
* *
* Little ENDIAN Configuration is used and the input and output data *
* is 16 bit unsinged and signed shorts respectively. The filters *
* are also 16 bit signed shorts in 12 bit precision. *
* *
* The n_hh filters are all of the same length and are *
* strung together in a single linear array. *
* *
* Interrupts are masked by the function for most of its duration. *
* *
* MEMORY NOTE *
* Some bank hits will occur in this code for certain scale *
* factors and filter lengths. *
* *
* For 4 taps k = 0, for l_hh 8, k = 0.031, for l_hh = 16, k = 0.015. *
* Different flter lengths can produce different numbers of bank *
* conflicts. Overall, these bank conflicts have nearly zero effect. *
* *
* For l_hh=4: k=0, l_hh=8: k=1/32, l_hh=12: k=0, l_hh=16: k=1/64 *
* For l_hh % 8 == 0, k = 1/(4*l_hh) else k = 0 *
* *
* 'k' is the bank conflict between the store and the guidance table *
* load. Depending on the relative sizes of the filters and *
* memory width, this bank conflict is between 0 and 3.1% *
* overhead. *
* *
* TECHNIQUES *
* The outputs are computed using interleaved inputs. The patch table *
* controls the access of 2 parallel pointers. For example an 8/33 *
* scale factor will have the following access pattern. *
* *
* 11111111112222222222333333333344444444445555555555 *
* 012345678901234567890123456789012345678901234567890123456789 *
* *
* 0 e xxxxxxxx <-start point of even output 0 *
* 1 o xxxxxxxx <-start point of odd output 4 *
* 2 e xxxxxxxx *
* 3 o xxxxxxxx *
* 4 e xxxxxxxx *
* 5 o xxxxxxxx *
* 6 e xxxxxxxx *
* 7 o xxxxxxxx *
* 0 e xxxxxxxx <-next start *
* 1 o xxxxxxxx <-next start *
* *
* *
* From this diagram the odd pointer jumps 4 then another 4 as the *
* filters have 8 taps, it then jumps 4 to get to the next set of *
* input data. The odd pointer does the same. These jumps are *
* interleaved and so are the filter coefficients. The jumps are *
* in multiples of bytes as non-scaled non-aligned double word *
* accesses are used. In this case the table will be: *
* *
* short patch[] = {0,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,10,10,8,8}; *
* *
* Notice the first 2 entries are the intial starting points for *
* the two pointers. To remove a dependency in the code the last 2 *
* entries are copies of the 2nd two. This makes the table almost *
* circular. *
* *
* NOTES *
* Other scale factors can be achieved with the following *
* example tables. *
* *
* Scale Factor Taps Table short jump[] = *
* -------------------------------------------------------------------- *
* 5/6 4 {0, 1, 2, 2, 2, 3, 3, 2, 2, 2, 3, 3, 2, 2} *
* 4/3 8 {0, 4, 4, 4, -3, -2, 4, 4, -2, -3, 4, 4} *
* 3/4 12 {0,1,4,4,4,4,-6,-5,4,4,4,4,-5,-6,4,4,4,4,-5,-5,4,4} *
* 6/5 16 {0,0,4,4,4,4,4,4,-11,-10,4,4,4,4,4,4,-10,-10, *
* 4,4,4,4,4,4,-10,-11,4,4} *
* *
* The software to produce these tables and the simple coefficents *
* for an arbitarary scale factor and number of taps can be found *
* in the api document. Note in the case of 3/4, odd scale factors *
* are doubled to make 6/8 instead of 3/4 *
* *
* CYCLES *
* cycles = 0.5 * out_len * l_hh * (1+k) + 30. *
* If (l_hh % 8) == 0 then k = 1/(4*l_hh) else k = 0. *
* *
* For l_hh = 16, in_len = 1024, and out_len = 1366, cycles = 11129. *
* For l_hh = 8, in_len = 640, and out_len = 120, cycles = 525. *
* *
* CODESIZE *
* 452 bytes *
* ------------------------------------------------------------------------- *
* Copyright (c) 2001 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
.sect ".data:copyright_h"
_Copyright: .string "Copyright (C) 2001 Texas Instruments Incorporated. "
.string "All Rights Reserved."
.include "scale_horz_h.h62"
_scale_horz_asm:
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg A4, A_plane_x ;
.asg B4, B_n_x ;
.asg A6, A_plane_y ;
.asg B6, B_n_y ;
.asg A8, A_hh ;
.asg B8, B_l_hh ;
.asg A10, A_n_hh ;
.asg B10, B_patch ;
.asg A12, A_filt_state ;
.asg B18, B_filt_no ;
.asg B16, B_hh0 ;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -