📄 scale_vert_h.asm
字号:
* ========================================================================= *
* TEXAS INSTRUMENTS, INC. *
* *
* NAME *
* scale_vert *
* *
* USAGE *
* This routine is C-callable and can be called as: *
* *
* void scale_vert *
* ( *
* short *in_cols, /* Input image (16-bit pixels) */ *
* int cols, /* Width of the image. */ *
* short *out_cols, /* Output image (16-bit pixels) */ *
* short *ptr_hh, /* Pointer to filter taps. */ *
* short *mod_hh, /* Pointer to rotated filter taps. */ *
* int l_hh, /* Taps per filter (multiple of 4) */ *
* int start_line /* Starting position for filter. */ *
* ) *
* *
* DESCRIPTION *
* Performs a vertical scaling function on a block of data from a *
* frame store. It uses a scaling filter decided upon by the *
* calling function. The filter is rotated depending on the *
* starting line of the circular buffer. One line of length *
* 'cols' is produced per function call. The same filter runs *
* along the entire length of buffer and perfoms the filter *
* vertically along each set of parallel data points. *
* *
* void scale_vert *
* ( *
* short *ptr_plane_x, *
* short *ptr_plane_y, *
* int n_x, *
* short *ptr_hh, *
* short *mod_hh, *
* int l_hh, *
* int start_line *
* ) *
* { *
* int taps_count, sample, i, j, ka; *
* int max_filt, y0, y1, y2, y3, block; *
* int t_y0, t_y1, t_y2, t_y3; *
* short h0, h1, x0, x1, x2, x3, x01, x11, x21, x31; *
* short * ptr_line0_x, * ptr_line1_x; *
* short * ptr_line2_x, * ptr_line3_x; *
* short * ptr_line0_x1, * ptr_line1_x1; *
* short * ptr_line2_x1, * ptr_line3_x1; *
* short * ptr_line0_y, * filter; *
* *
* taps_count = l_hh - 2; *
* *
* ptr_line0_x = ptr_plane_x; *
* ptr_line0_y = ptr_plane_y; *
* *
* y0 = 0x0; y1 = 0x0; *
* y2 = 0x0; y3 = 0x0; *
* *
* max_filt = n_x*l_hh; *
* *
* /* if we know the pointer will need to wrap around, a positive *
* correction is needed instead of a negative one */ *
* *
* block = n_x*l_hh; *
* block = block - 4; /* next column on */ *
* *
* if (!start_line) start_line = l_hh; *
* filter = &ptr_hh[l_hh-start_line]; *
* j = start_line; *
* for (i = 0; i < l_hh; i++) *
* { *
* mod_hh[i] = *filter++; *
* j--; *
* if (!j) filter= ptr_hh; *
* if (!j) j = l_hh; *
* } *
* filter = mod_hh; *
* ka = 0; *
* *
* ptr_line1_x = ptr_line0_x+1; *
* ptr_line2_x = ptr_line1_x+1; *
* ptr_line3_x = ptr_line2_x+1; *
* *
* ptr_line0_x1 = ptr_line0_x+n_x ; *
* ptr_line1_x1 = ptr_line0_x1+1 ; *
* ptr_line2_x1 = ptr_line1_x1+1 ; *
* ptr_line3_x1 = ptr_line2_x1+1 ; *
* *
* for ( i = 0; i < max_filt; i+=8) *
* { *
* h0 = *filter++; *
* h1 = *filter++; *
* *
* x0 = ptr_line0_x[ka]; *
* x1 = ptr_line1_x[ka]; *
* x2 = ptr_line2_x[ka]; *
* x3 = ptr_line3_x[ka]; *
* *
* x01 = ptr_line0_x1[ka]; *
* x11 = ptr_line1_x1[ka]; *
* x21 = ptr_line2_x1[ka]; *
* x31 = ptr_line3_x1[ka]; *
* ka += 2*n_x; *
* *
* sample = taps_count ; *
* *
* if (!taps_count) filter = mod_hh; *
* if (!taps_count) ka = ka - block; *
* if (!taps_count) taps_count = l_hh ; *
* *
* /* 2 taps have been used, for 1 point */ *
* taps_count -= 2; *
* *
* y0 += (x0*h0 + x01*h1) ; *
* y1 += (x1*h0 + x11*h1) ; *
* y2 += (x2*h0 + x21*h1) ; *
* y3 += (x3*h0 + x31*h1) ; *
* *
* t_y0 = y0 >> 16; t_y1 = y1 >> 16; *
* t_y2 = y2 >> 16; t_y3 = y3 >> 16; *
* *
* if (!sample) y0 = 0x0; if (!sample) y1 = 0x0; *
* if (!sample) y2 = 0x0; if (!sample) y3 = 0x0; *
* *
* if (!sample) *ptr_line0_y++ = t_y0; *
* if (!sample) *ptr_line0_y++ = t_y1; *
* if (!sample) *ptr_line0_y++ = t_y2; *
* if (!sample) *ptr_line0_y++ = t_y3; *
* } *
* } *
* *
* ASSUMPTIONS *
* 'l_hh' must be divisible by 4. Pad with zeros for non % 4. *
* *
* All data must be double word aligned. *
* *
* Input and output, mod_hh, ptr_hh, cols must be multiple of 8. *
* *
* Input filters, hh, must be 12 bit precision. *
* *
* The data is in LITTLE ENDIAN format. *
* *
* The code is uninterruptable during execution. Interrupts are *
* disabled at the beginning and reenabled at the end. *
* *
* TECHNIQUES *
* Inner and outer loops are collapsed into 1 loop. *
* *
* A load store live too long problem is dealt with using a MPY. *
* *
* The filter loop is unrolled 4 times with l_hh % 4 == 0. *
* *
* The output data stores are packed together into words and the *
* outer loop is unrolled 8 times to allow 8 parallel filters to *
* be convolved at once. *
* *
* NOTES *
* Before calling this function the input data needs to be in Q11 *
* form. (It is generally the output from the horizontal scaling *
* function, scale_horz.) The results from this function must be *
* clipped before displaying as some pixels may go < 0 or > 255. *
* *
* MEMORY NOTE *
* No bank hits occur in this code. *
* *
* CYCLES *
* Formula for execution time: *
* cycles = (5 * l_hh * cols) / 16 + 6 * l_hh + 32 *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -