LCOV - code coverage report
Current view: directory - media/libjpeg - jidctflt.c (source / functions) Found Hit Coverage
Test: app.info Lines: 95 0 0.0 %
Date: 2012-06-02 Functions: 1 0 0.0 %

       1                 : /*
       2                 :  * jidctflt.c
       3                 :  *
       4                 :  * Copyright (C) 1994-1998, Thomas G. Lane.
       5                 :  * This file is part of the Independent JPEG Group's software.
       6                 :  * For conditions of distribution and use, see the accompanying README file.
       7                 :  *
       8                 :  * This file contains a floating-point implementation of the
       9                 :  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
      10                 :  * must also perform dequantization of the input coefficients.
      11                 :  *
      12                 :  * This implementation should be more accurate than either of the integer
      13                 :  * IDCT implementations.  However, it may not give the same results on all
      14                 :  * machines because of differences in roundoff behavior.  Speed will depend
      15                 :  * on the hardware's floating point capacity.
      16                 :  *
      17                 :  * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
      18                 :  * on each row (or vice versa, but it's more convenient to emit a row at
      19                 :  * a time).  Direct algorithms are also available, but they are much more
      20                 :  * complex and seem not to be any faster when reduced to code.
      21                 :  *
      22                 :  * This implementation is based on Arai, Agui, and Nakajima's algorithm for
      23                 :  * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
      24                 :  * Japanese, but the algorithm is described in the Pennebaker & Mitchell
      25                 :  * JPEG textbook (see REFERENCES section in file README).  The following code
      26                 :  * is based directly on figure 4-8 in P&M.
      27                 :  * While an 8-point DCT cannot be done in less than 11 multiplies, it is
      28                 :  * possible to arrange the computation so that many of the multiplies are
      29                 :  * simple scalings of the final outputs.  These multiplies can then be
      30                 :  * folded into the multiplications or divisions by the JPEG quantization
      31                 :  * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
      32                 :  * to be done in the DCT itself.
      33                 :  * The primary disadvantage of this method is that with a fixed-point
      34                 :  * implementation, accuracy is lost due to imprecise representation of the
      35                 :  * scaled quantization values.  However, that problem does not arise if
      36                 :  * we use floating point arithmetic.
      37                 :  */
      38                 : 
      39                 : #define JPEG_INTERNALS
      40                 : #include "jinclude.h"
      41                 : #include "jpeglib.h"
      42                 : #include "jdct.h"             /* Private declarations for DCT subsystem */
      43                 : 
      44                 : #ifdef DCT_FLOAT_SUPPORTED
      45                 : 
      46                 : 
      47                 : /*
      48                 :  * This module is specialized to the case DCTSIZE = 8.
      49                 :  */
      50                 : 
      51                 : #if DCTSIZE != 8
      52                 :   Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
      53                 : #endif
      54                 : 
      55                 : 
      56                 : /* Dequantize a coefficient by multiplying it by the multiplier-table
      57                 :  * entry; produce a float result.
      58                 :  */
      59                 : 
      60                 : #define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
      61                 : 
      62                 : 
      63                 : /*
      64                 :  * Perform dequantization and inverse DCT on one block of coefficients.
      65                 :  */
      66                 : 
      67                 : GLOBAL(void)
      68               0 : jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
      69                 :                  JCOEFPTR coef_block,
      70                 :                  JSAMPARRAY output_buf, JDIMENSION output_col)
      71                 : {
      72                 :   FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
      73                 :   FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
      74                 :   FAST_FLOAT z5, z10, z11, z12, z13;
      75                 :   JCOEFPTR inptr;
      76                 :   FLOAT_MULT_TYPE * quantptr;
      77                 :   FAST_FLOAT * wsptr;
      78                 :   JSAMPROW outptr;
      79               0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
      80                 :   int ctr;
      81                 :   FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
      82                 :   SHIFT_TEMPS
      83                 : 
      84                 :   /* Pass 1: process columns from input, store into work array. */
      85                 : 
      86               0 :   inptr = coef_block;
      87               0 :   quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
      88               0 :   wsptr = workspace;
      89               0 :   for (ctr = DCTSIZE; ctr > 0; ctr--) {
      90                 :     /* Due to quantization, we will usually find that many of the input
      91                 :      * coefficients are zero, especially the AC terms.  We can exploit this
      92                 :      * by short-circuiting the IDCT calculation for any column in which all
      93                 :      * the AC terms are zero.  In that case each output is equal to the
      94                 :      * DC coefficient (with scale factor as needed).
      95                 :      * With typical images and quantization tables, half or more of the
      96                 :      * column DCT calculations can be simplified this way.
      97                 :      */
      98                 :     
      99               0 :     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
     100               0 :         inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
     101               0 :         inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
     102               0 :         inptr[DCTSIZE*7] == 0) {
     103                 :       /* AC terms all zero */
     104               0 :       FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     105                 :       
     106               0 :       wsptr[DCTSIZE*0] = dcval;
     107               0 :       wsptr[DCTSIZE*1] = dcval;
     108               0 :       wsptr[DCTSIZE*2] = dcval;
     109               0 :       wsptr[DCTSIZE*3] = dcval;
     110               0 :       wsptr[DCTSIZE*4] = dcval;
     111               0 :       wsptr[DCTSIZE*5] = dcval;
     112               0 :       wsptr[DCTSIZE*6] = dcval;
     113               0 :       wsptr[DCTSIZE*7] = dcval;
     114                 :       
     115               0 :       inptr++;                  /* advance pointers to next column */
     116               0 :       quantptr++;
     117               0 :       wsptr++;
     118               0 :       continue;
     119                 :     }
     120                 :     
     121                 :     /* Even part */
     122                 : 
     123               0 :     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     124               0 :     tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     125               0 :     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     126               0 :     tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
     127                 : 
     128               0 :     tmp10 = tmp0 + tmp2;        /* phase 3 */
     129               0 :     tmp11 = tmp0 - tmp2;
     130                 : 
     131               0 :     tmp13 = tmp1 + tmp3;        /* phases 5-3 */
     132               0 :     tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
     133                 : 
     134               0 :     tmp0 = tmp10 + tmp13;       /* phase 2 */
     135               0 :     tmp3 = tmp10 - tmp13;
     136               0 :     tmp1 = tmp11 + tmp12;
     137               0 :     tmp2 = tmp11 - tmp12;
     138                 :     
     139                 :     /* Odd part */
     140                 : 
     141               0 :     tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
     142               0 :     tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     143               0 :     tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     144               0 :     tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
     145                 : 
     146               0 :     z13 = tmp6 + tmp5;          /* phase 6 */
     147               0 :     z10 = tmp6 - tmp5;
     148               0 :     z11 = tmp4 + tmp7;
     149               0 :     z12 = tmp4 - tmp7;
     150                 : 
     151               0 :     tmp7 = z11 + z13;           /* phase 5 */
     152               0 :     tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
     153                 : 
     154               0 :     z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
     155               0 :     tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
     156               0 :     tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
     157                 : 
     158               0 :     tmp6 = tmp12 - tmp7;        /* phase 2 */
     159               0 :     tmp5 = tmp11 - tmp6;
     160               0 :     tmp4 = tmp10 + tmp5;
     161                 : 
     162               0 :     wsptr[DCTSIZE*0] = tmp0 + tmp7;
     163               0 :     wsptr[DCTSIZE*7] = tmp0 - tmp7;
     164               0 :     wsptr[DCTSIZE*1] = tmp1 + tmp6;
     165               0 :     wsptr[DCTSIZE*6] = tmp1 - tmp6;
     166               0 :     wsptr[DCTSIZE*2] = tmp2 + tmp5;
     167               0 :     wsptr[DCTSIZE*5] = tmp2 - tmp5;
     168               0 :     wsptr[DCTSIZE*4] = tmp3 + tmp4;
     169               0 :     wsptr[DCTSIZE*3] = tmp3 - tmp4;
     170                 : 
     171               0 :     inptr++;                    /* advance pointers to next column */
     172               0 :     quantptr++;
     173               0 :     wsptr++;
     174                 :   }
     175                 :   
     176                 :   /* Pass 2: process rows from work array, store into output array. */
     177                 :   /* Note that we must descale the results by a factor of 8 == 2**3. */
     178                 : 
     179               0 :   wsptr = workspace;
     180               0 :   for (ctr = 0; ctr < DCTSIZE; ctr++) {
     181               0 :     outptr = output_buf[ctr] + output_col;
     182                 :     /* Rows of zeroes can be exploited in the same way as we did with columns.
     183                 :      * However, the column calculation has created many nonzero AC terms, so
     184                 :      * the simplification applies less often (typically 5% to 10% of the time).
     185                 :      * And testing floats for zero is relatively expensive, so we don't bother.
     186                 :      */
     187                 :     
     188                 :     /* Even part */
     189                 : 
     190               0 :     tmp10 = wsptr[0] + wsptr[4];
     191               0 :     tmp11 = wsptr[0] - wsptr[4];
     192                 : 
     193               0 :     tmp13 = wsptr[2] + wsptr[6];
     194               0 :     tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
     195                 : 
     196               0 :     tmp0 = tmp10 + tmp13;
     197               0 :     tmp3 = tmp10 - tmp13;
     198               0 :     tmp1 = tmp11 + tmp12;
     199               0 :     tmp2 = tmp11 - tmp12;
     200                 : 
     201                 :     /* Odd part */
     202                 : 
     203               0 :     z13 = wsptr[5] + wsptr[3];
     204               0 :     z10 = wsptr[5] - wsptr[3];
     205               0 :     z11 = wsptr[1] + wsptr[7];
     206               0 :     z12 = wsptr[1] - wsptr[7];
     207                 : 
     208               0 :     tmp7 = z11 + z13;
     209               0 :     tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
     210                 : 
     211               0 :     z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
     212               0 :     tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
     213               0 :     tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
     214                 : 
     215               0 :     tmp6 = tmp12 - tmp7;
     216               0 :     tmp5 = tmp11 - tmp6;
     217               0 :     tmp4 = tmp10 + tmp5;
     218                 : 
     219                 :     /* Final output stage: scale down by a factor of 8 and range-limit */
     220                 : 
     221               0 :     outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3)
     222                 :                             & RANGE_MASK];
     223               0 :     outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3)
     224                 :                             & RANGE_MASK];
     225               0 :     outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3)
     226                 :                             & RANGE_MASK];
     227               0 :     outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3)
     228                 :                             & RANGE_MASK];
     229               0 :     outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3)
     230                 :                             & RANGE_MASK];
     231               0 :     outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3)
     232                 :                             & RANGE_MASK];
     233               0 :     outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3)
     234                 :                             & RANGE_MASK];
     235               0 :     outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3)
     236                 :                             & RANGE_MASK];
     237                 :     
     238               0 :     wsptr += DCTSIZE;           /* advance pointer to next row */
     239                 :   }
     240               0 : }
     241                 : 
     242                 : #endif /* DCT_FLOAT_SUPPORTED */

Generated by: LCOV version 1.7