LCOV - code coverage report
Current view: directory - media/libtheora/lib/x86 - mmxstate.c (source / functions) Found Hit Coverage
Test: app.info Lines: 83 0 0.0 %
Date: 2012-06-02 Functions: 5 0 0.0 %

       1                 : /********************************************************************
       2                 :  *                                                                  *
       3                 :  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
       4                 :  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
       5                 :  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
       6                 :  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
       7                 :  *                                                                  *
       8                 :  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
       9                 :  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
      10                 :  *                                                                  *
      11                 :  ********************************************************************
      12                 : 
      13                 :   function:
      14                 :     last mod: $Id: mmxstate.c 17563 2010-10-25 17:40:54Z tterribe $
      15                 : 
      16                 :  ********************************************************************/
      17                 : 
      18                 : /*MMX acceleration of complete fragment reconstruction algorithm.
      19                 :   Originally written by Rudolf Marek.*/
      20                 : #include <string.h>
      21                 : #include "x86int.h"
      22                 : #include "mmxloop.h"
      23                 : 
      24                 : #if defined(OC_X86_ASM)
      25                 : 
      26               0 : void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
      27                 :  int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
      28                 :   unsigned char *dst;
      29                 :   ptrdiff_t      frag_buf_off;
      30                 :   int            ystride;
      31                 :   int            refi;
      32                 :   /*Apply the inverse transform.*/
      33                 :   /*Special case only having a DC component.*/
      34               0 :   if(_last_zzi<2){
      35                 :     /*Note that this value must be unsigned, to keep the __asm__ block from
      36                 :        sign-extending it when it puts it in a register.*/
      37                 :     ogg_uint16_t p;
      38                 :     int          i;
      39                 :     /*We round this dequant product (and not any of the others) because there's
      40                 :        no iDCT rounding.*/
      41               0 :     p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
      42                 :     /*Fill _dct_coeffs with p.*/
      43               0 :     __asm__ __volatile__(
      44                 :       /*mm0=0000 0000 0000 AAAA*/
      45                 :       "movd %[p],%%mm0\n\t"
      46                 :       /*mm0=0000 0000 AAAA AAAA*/
      47                 :       "punpcklwd %%mm0,%%mm0\n\t"
      48                 :       /*mm0=AAAA AAAA AAAA AAAA*/
      49                 :       "punpckldq %%mm0,%%mm0\n\t"
      50                 :       :
      51               0 :       :[p]"r"((unsigned)p)
      52                 :     );
      53               0 :     for(i=0;i<4;i++){
      54               0 :       __asm__ __volatile__(
      55                 :         "movq %%mm0,"OC_MEM_OFFS(0x00,y)"\n\t"
      56                 :         "movq %%mm0,"OC_MEM_OFFS(0x08,y)"\n\t"
      57                 :         "movq %%mm0,"OC_MEM_OFFS(0x10,y)"\n\t"
      58                 :         "movq %%mm0,"OC_MEM_OFFS(0x18,y)"\n\t"
      59                 :         :[y]"=m"OC_ARRAY_OPERAND(ogg_int16_t,_dct_coeffs+64+16*i,16)
      60                 :       );
      61                 :     }
      62                 :   }
      63                 :   else{
      64                 :     /*Dequantize the DC coefficient.*/
      65               0 :     _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
      66               0 :     oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
      67                 :   }
      68                 :   /*Fill in the target buffer.*/
      69               0 :   frag_buf_off=_state->frag_buf_offs[_fragi];
      70               0 :   refi=_state->frags[_fragi].refi;
      71               0 :   ystride=_state->ref_ystride[_pli];
      72               0 :   dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
      73               0 :   if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
      74                 :   else{
      75                 :     const unsigned char *ref;
      76                 :     int                  mvoffsets[2];
      77               0 :     ref=_state->ref_frame_data[refi]+frag_buf_off;
      78               0 :     if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
      79               0 :      _state->frag_mvs[_fragi])>1){
      80               0 :       oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
      81                 :        _dct_coeffs+64);
      82                 :     }
      83               0 :     else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
      84                 :   }
      85               0 : }
      86                 : 
      87                 : /*We copy these entire function to inline the actual MMX routines so that we
      88                 :    use only a single indirect call.*/
      89                 : 
      90               0 : void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit){
      91               0 :   memset(_bv,_flimit,8);
      92               0 : }
      93                 : 
      94                 : /*Apply the loop filter to a given set of fragment rows in the given plane.
      95                 :   The filter may be run on the bottom edge, affecting pixels in the next row of
      96                 :    fragments, so this row also needs to be available.
      97                 :   _bv:        The bounding values array.
      98                 :   _refi:      The index of the frame buffer to filter.
      99                 :   _pli:       The color plane to filter.
     100                 :   _fragy0:    The Y coordinate of the first fragment row to filter.
     101                 :   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
     102               0 : void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
     103                 :  signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
     104                 :   OC_ALIGN8(unsigned char   ll[8]);
     105                 :   const oc_fragment_plane *fplane;
     106                 :   const oc_fragment       *frags;
     107                 :   const ptrdiff_t         *frag_buf_offs;
     108                 :   unsigned char           *ref_frame_data;
     109                 :   ptrdiff_t                fragi_top;
     110                 :   ptrdiff_t                fragi_bot;
     111                 :   ptrdiff_t                fragi0;
     112                 :   ptrdiff_t                fragi0_end;
     113                 :   int                      ystride;
     114                 :   int                      nhfrags;
     115               0 :   memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll));
     116               0 :   fplane=_state->fplanes+_pli;
     117               0 :   nhfrags=fplane->nhfrags;
     118               0 :   fragi_top=fplane->froffset;
     119               0 :   fragi_bot=fragi_top+fplane->nfrags;
     120               0 :   fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
     121               0 :   fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
     122               0 :   ystride=_state->ref_ystride[_pli];
     123               0 :   frags=_state->frags;
     124               0 :   frag_buf_offs=_state->frag_buf_offs;
     125               0 :   ref_frame_data=_state->ref_frame_data[_refi];
     126                 :   /*The following loops are constructed somewhat non-intuitively on purpose.
     127                 :     The main idea is: if a block boundary has at least one coded fragment on
     128                 :      it, the filter is applied to it.
     129                 :     However, the order that the filters are applied in matters, and VP3 chose
     130                 :      the somewhat strange ordering used below.*/
     131               0 :   while(fragi0<fragi0_end){
     132                 :     ptrdiff_t fragi;
     133                 :     ptrdiff_t fragi_end;
     134               0 :     fragi=fragi0;
     135               0 :     fragi_end=fragi+nhfrags;
     136               0 :     while(fragi<fragi_end){
     137               0 :       if(frags[fragi].coded){
     138                 :         unsigned char *ref;
     139               0 :         ref=ref_frame_data+frag_buf_offs[fragi];
     140               0 :         if(fragi>fragi0){
     141               0 :           OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMX,ref,ystride,ll);
     142                 :         }
     143               0 :         if(fragi0>fragi_top){
     144               0 :           OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMX,ref,ystride,ll);
     145                 :         }
     146               0 :         if(fragi+1<fragi_end&&!frags[fragi+1].coded){
     147               0 :           OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMX,ref+8,ystride,ll);
     148                 :         }
     149               0 :         if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
     150               0 :           OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMX,ref+(ystride<<3),ystride,ll);
     151                 :         }
     152                 :       }
     153               0 :       fragi++;
     154                 :     }
     155               0 :     fragi0+=nhfrags;
     156                 :   }
     157               0 : }
     158                 : 
     159               0 : void oc_loop_filter_init_mmxext(signed char _bv[256],int _flimit){
     160               0 :   memset(_bv,~(_flimit<<1),8);
     161               0 : }
     162                 : 
     163                 : /*Apply the loop filter to a given set of fragment rows in the given plane.
     164                 :   The filter may be run on the bottom edge, affecting pixels in the next row of
     165                 :    fragments, so this row also needs to be available.
     166                 :   _bv:        The bounding values array.
     167                 :   _refi:      The index of the frame buffer to filter.
     168                 :   _pli:       The color plane to filter.
     169                 :   _fragy0:    The Y coordinate of the first fragment row to filter.
     170                 :   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
     171               0 : void oc_state_loop_filter_frag_rows_mmxext(const oc_theora_state *_state,
     172                 :  signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
     173                 :   const oc_fragment_plane *fplane;
     174                 :   const oc_fragment       *frags;
     175                 :   const ptrdiff_t         *frag_buf_offs;
     176                 :   unsigned char           *ref_frame_data;
     177                 :   ptrdiff_t                fragi_top;
     178                 :   ptrdiff_t                fragi_bot;
     179                 :   ptrdiff_t                fragi0;
     180                 :   ptrdiff_t                fragi0_end;
     181                 :   int                      ystride;
     182                 :   int                      nhfrags;
     183               0 :   fplane=_state->fplanes+_pli;
     184               0 :   nhfrags=fplane->nhfrags;
     185               0 :   fragi_top=fplane->froffset;
     186               0 :   fragi_bot=fragi_top+fplane->nfrags;
     187               0 :   fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
     188               0 :   fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
     189               0 :   ystride=_state->ref_ystride[_pli];
     190               0 :   frags=_state->frags;
     191               0 :   frag_buf_offs=_state->frag_buf_offs;
     192               0 :   ref_frame_data=_state->ref_frame_data[_refi];
     193                 :   /*The following loops are constructed somewhat non-intuitively on purpose.
     194                 :     The main idea is: if a block boundary has at least one coded fragment on
     195                 :      it, the filter is applied to it.
     196                 :     However, the order that the filters are applied in matters, and VP3 chose
     197                 :      the somewhat strange ordering used below.*/
     198               0 :   while(fragi0<fragi0_end){
     199                 :     ptrdiff_t fragi;
     200                 :     ptrdiff_t fragi_end;
     201               0 :     fragi=fragi0;
     202               0 :     fragi_end=fragi+nhfrags;
     203               0 :     while(fragi<fragi_end){
     204               0 :       if(frags[fragi].coded){
     205                 :         unsigned char *ref;
     206               0 :         ref=ref_frame_data+frag_buf_offs[fragi];
     207               0 :         if(fragi>fragi0){
     208               0 :           OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMXEXT,ref,ystride,_bv);
     209                 :         }
     210               0 :         if(fragi0>fragi_top){
     211               0 :           OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMXEXT,ref,ystride,_bv);
     212                 :         }
     213               0 :         if(fragi+1<fragi_end&&!frags[fragi+1].coded){
     214               0 :           OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMXEXT,ref+8,ystride,_bv);
     215                 :         }
     216               0 :         if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
     217               0 :           OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMXEXT,ref+(ystride<<3),ystride,_bv);
     218                 :         }
     219                 :       }
     220               0 :       fragi++;
     221                 :     }
     222               0 :     fragi0+=nhfrags;
     223                 :   }
     224               0 : }
     225                 : 
     226                 : #endif

Generated by: LCOV version 1.7