⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scale_horz_h.asm

📁 基于DM642平台的视频缩小放大功能 程序源代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
* ========================================================================= *
*                                                                           *
*   TEXAS INSTRUMENTS, INC.                                                 *
*                                                                           *
*   NAME                                                                    *
*       scale_horz                                                          *
*                                                                           *
*                                                                           *
*   USAGE                                                                   *
*       This routine is C-callable and can be called as:                    *
*                                                                           *
*           void scale_horz                                                 *
*           (                                                               *
*               unsigned short *in_data,  /* Ptr to unscaled lines      */  *
*               unsigned int    in_len,   /* Pixels/line unscaled       */  *
*               short          *out_data, /* Ptr to scaled data lines   */  *
*               unsigned int    out_len,  /* Pixels/line of scaled data */  *
*               short          *hh,       /* Ptr to filter taps,            *
*                                            interleaved odd/even           *
*                                            outputs                    */  *
*               unsigned int    l_hh,     /* Length of scaling filters  */  *
*               unsigned int    n_hh,     /* Number of scaling filters  */  *
*               short          *patch     /* Ptr to decrement pattern   */  *
*           );                                                              *
*                                                                           *
*   DESCRIPTION                                                             *
*                                                                           *
*       This code can scale up or down 1 line of data, in the               *
*       ratio out_len : in_len.  e.g 1 to 3, 4:3, 5:6. The                  *
*       filters are designed outside of the loop using a                    *
*       general purpose resizing algorithm.                                 *
*                                                                           *
*           patch0 = patch + 2;                                             *
*           filter_count = n_hh;                                            *
*           ka = 0;                                                         *
*                                                                           *
*           line0_x = plane_x;                                              *
*           line0_y = plane_y;                                              *
*           ptr_hh = hh;                                                    *
*           jump = (int) patch[0]; ka = jump >> 1;                          *
*           jump = (int) patch[1]; kb = jump >> 1;                          *
*                                                                           *
*           for ( i = 0; i < n_y; i += 2)                                   *
*           {                                                               *
*               y0 = 1 << 5;                                                *
*               y1 = 1 << 5;                                                *
*               for ( j = 0; j < l_hh; j+=4)                                *
*               {                                                           *
*                   /* even outputs */                                      *
*                   for (k=0; k < 4; k++)                                   *
*                   {                                                       *
*                       h0 = *ptr_hh++;                                     *
*                       x0 = *(line0_x+ ka + k);                            *
*                       y0 += ( x0 * h0 );                                  *
*                   }                                                       *
*                   jump = (int) (*patch0++);                               *
*                   ka = ka + (jump>>1);                                    *
*                   /* odd outputs */                                       *
*                   for (k=0; k < 4; k++)                                   *
*                   {                                                       *
*                       h1 = *ptr_hh++;                                     *
*                       x1 = *(line0_x + kb + k);                           *
*                       y1 += ( x1 * h1 );                                  *
*                   }                                                       *
*                   jump = (int) (*patch0++);                               *
*                   kb = kb + (jump>>1);                                    *
*               }                                                           *
*               *line0_y++ = (short) (y0 >> 6) ;                            *
*               *line0_y++ = (short) (y1 >> 6) ;                            *
*                                                                           *
*               filter_count -= 2;                                          *
*               if (!filter_count)                                          *
*               {                                                           *
*                   patch0 = patch + 2;                                     *
*                   ptr_hh = hh;                                            *
*                   filter_count = n_hh;                                    *
*               }                                                           *
*           }                                                               *
*                                                                           *
*   ASSUMPTIONS                                                             *
*       One line of data is produced per function call.                     *
*                                                                           *
*       The line must be aligned on a double word boundary and be a         *
*       multiples of 8 bytes.                                               *
*                                                                           *
*       Filters are multiples of 4 taps, maximum number of filters is 256.  *
*                                                                           *
*       The computations for each output are interleaved, thus the filters  *
*       are interleaved on a 4 short interval.                              *
*                                                                           *
*       Little ENDIAN Configuration is used and the input and output data   *
*       is 16 bit unsinged and signed shorts respectively.  The filters     *
*       are also 16 bit signed shorts in 12 bit precision.                  *
*                                                                           *
*       The n_hh filters are all of the same length and are                 *
*       strung together in a single linear array.                           *
*                                                                           *
*       Interrupts are masked by the function for most of its duration.     *
*                                                                           *
*   MEMORY NOTE                                                             *
*       Some bank hits will occur in this code for certain scale            *
*       factors and filter lengths.                                         *
*                                                                           *
*       For 4 taps k = 0, for l_hh 8, k = 0.031, for l_hh = 16, k = 0.015.  *
*       Different flter lengths can produce different numbers of bank       *
*       conflicts.  Overall, these bank conflicts have nearly zero effect.  *
*                                                                           *
*       For l_hh=4: k=0, l_hh=8: k=1/32, l_hh=12: k=0, l_hh=16: k=1/64      *
*       For l_hh % 8 == 0, k = 1/(4*l_hh) else k = 0                        *
*                                                                           *
*       'k' is the bank conflict between the store and the guidance table   *
*       load.  Depending on the relative sizes of the filters and           *
*       memory width, this bank conflict is between 0 and 3.1%              *
*       overhead.                                                           *
*                                                                           *
*   TECHNIQUES                                                              *
*       The outputs are computed using interleaved inputs. The patch table  *
*       controls the access of 2 parallel pointers. For example an 8/33     *
*       scale factor will have the following access pattern.                *
*                                                                           *
*                 11111111112222222222333333333344444444445555555555        *
*       012345678901234567890123456789012345678901234567890123456789        *
*                                                                           *
*       0  e xxxxxxxx     <-start point of even output 0                    *
*       1      o xxxxxxxx      <-start point of odd output 4                *
*       2          e xxxxxxxx                                               *
*       3              o xxxxxxxx                                           *
*       4                  e xxxxxxxx                                       *
*       5                      o xxxxxxxx                                   *
*       6                          e xxxxxxxx                               *
*       7                              o xxxxxxxx                           *
*       0                                   e xxxxxxxx  <-next start        *
*       1                                       o xxxxxxxx  <-next start    *
*                                                                           *
*                                                                           *
*       From this diagram the odd pointer jumps 4 then another 4 as the     *
*       filters have 8 taps, it then jumps 4 to get to the next set of      *
*       input data. The odd pointer does the same. These jumps are          *
*       interleaved and so are the filter coefficients. The jumps are       *
*       in multiples of bytes as non-scaled non-aligned double word         *
*       accesses are used.  In this case the table will be:                 *
*                                                                           *
*           short patch[] = {0,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,10,10,8,8};    *
*                                                                           *
*       Notice the first 2 entries are the intial starting points for       *
*       the two pointers. To remove a dependency in the code the last 2     *
*       entries are copies of the 2nd two. This makes the table almost      *
*       circular.                                                           *
*                                                                           *
*   NOTES                                                                   *
*       Other scale factors can be achieved with the following              *
*       example tables.                                                     *
*                                                                           *
*   Scale Factor Taps  Table short jump[] =                                 *
*   --------------------------------------------------------------------    *
*       5/6       4    {0, 1, 2, 2, 2, 3, 3, 2, 2, 2, 3, 3, 2, 2}           *
*       4/3       8    {0, 4, 4, 4, -3, -2, 4, 4, -2, -3, 4, 4}             *
*       3/4       12   {0,1,4,4,4,4,-6,-5,4,4,4,4,-5,-6,4,4,4,4,-5,-5,4,4}  *
*       6/5       16   {0,0,4,4,4,4,4,4,-11,-10,4,4,4,4,4,4,-10,-10,        *
*                       4,4,4,4,4,4,-10,-11,4,4}                            *
*                                                                           *
*       The software to produce these tables and the simple coefficents     *
*       for an arbitarary scale factor and number of taps can be found      *
*       in the api document. Note in the case of 3/4, odd scale factors     *
*       are doubled to make 6/8 instead of 3/4                              *
*                                                                           *
*   CYCLES                                                                  *
*       cycles = 0.5 * out_len * l_hh * (1+k) + 30.                         *
*       If (l_hh % 8) == 0 then k = 1/(4*l_hh) else k = 0.                  *
*                                                                           *
*       For l_hh = 16, in_len = 1024, and out_len = 1366,  cycles = 11129.  *
*       For l_hh = 8,  in_len = 640,  and out_len = 120,   cycles = 525.    *
*                                                                           *
*   CODESIZE                                                                *
*       452 bytes                                                           *
* ------------------------------------------------------------------------- *
*             Copyright (c) 2001 Texas Instruments, Incorporated.           *
*                            All Rights Reserved.                           *
* ========================================================================= *
                .sect ".data:copyright_h"
_Copyright:     .string "Copyright (C) 2001 Texas Instruments Incorporated. "
                .string "All Rights Reserved."
                .include "scale_horz_h.h62"
_scale_horz_asm: 
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
        .asg     A4,         A_plane_x    ;
        .asg     B4,         B_n_x        ;
        .asg     A6,         A_plane_y    ;
        .asg     B6,         B_n_y        ;
        .asg     A8,         A_hh         ;
        .asg     B8,         B_l_hh       ;
        .asg     A10,        A_n_hh       ;
        .asg     B10,        B_patch      ;
        .asg     A12,        A_filt_state ;
        .asg     B18,        B_filt_no    ;
        .asg     B16,        B_hh0        ;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -