1 : /*
2 : * jcdctmgr.c
3 : *
4 : * Copyright (C) 1994-1996, Thomas G. Lane.
5 : * Copyright (C) 1999-2006, MIYASAKA Masaru.
6 : * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
7 : * Copyright (C) 2011 D. R. Commander
8 : * This file is part of the Independent JPEG Group's software.
9 : * For conditions of distribution and use, see the accompanying README file.
10 : *
11 : * This file contains the forward-DCT management logic.
12 : * This code selects a particular DCT implementation to be used,
13 : * and it performs related housekeeping chores including coefficient
14 : * quantization.
15 : */
16 :
17 : #define JPEG_INTERNALS
18 : #include "jinclude.h"
19 : #include "jpeglib.h"
20 : #include "jdct.h" /* Private declarations for DCT subsystem */
21 : #include "jsimddct.h"
22 :
23 :
24 : /* Private subobject for this module */
25 :
26 : typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
27 : typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
28 :
29 : typedef JMETHOD(void, convsamp_method_ptr,
30 : (JSAMPARRAY sample_data, JDIMENSION start_col,
31 : DCTELEM * workspace));
32 : typedef JMETHOD(void, float_convsamp_method_ptr,
33 : (JSAMPARRAY sample_data, JDIMENSION start_col,
34 : FAST_FLOAT *workspace));
35 :
36 : typedef JMETHOD(void, quantize_method_ptr,
37 : (JCOEFPTR coef_block, DCTELEM * divisors,
38 : DCTELEM * workspace));
39 : typedef JMETHOD(void, float_quantize_method_ptr,
40 : (JCOEFPTR coef_block, FAST_FLOAT * divisors,
41 : FAST_FLOAT * workspace));
42 :
43 : METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
44 :
45 : typedef struct {
46 : struct jpeg_forward_dct pub; /* public fields */
47 :
48 : /* Pointer to the DCT routine actually in use */
49 : forward_DCT_method_ptr dct;
50 : convsamp_method_ptr convsamp;
51 : quantize_method_ptr quantize;
52 :
53 : /* The actual post-DCT divisors --- not identical to the quant table
54 : * entries, because of scaling (especially for an unnormalized DCT).
55 : * Each table is given in normal array order.
56 : */
57 : DCTELEM * divisors[NUM_QUANT_TBLS];
58 :
59 : /* work area for FDCT subroutine */
60 : DCTELEM * workspace;
61 :
62 : #ifdef DCT_FLOAT_SUPPORTED
63 : /* Same as above for the floating-point case. */
64 : float_DCT_method_ptr float_dct;
65 : float_convsamp_method_ptr float_convsamp;
66 : float_quantize_method_ptr float_quantize;
67 : FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
68 : FAST_FLOAT * float_workspace;
69 : #endif
70 : } my_fdct_controller;
71 :
72 : typedef my_fdct_controller * my_fdct_ptr;
73 :
74 :
75 : /*
76 : * Find the highest bit in an integer through binary search.
77 : */
78 : LOCAL(int)
79 576 : flss (UINT16 val)
80 : {
81 : int bit;
82 :
83 576 : bit = 16;
84 :
85 576 : if (!val)
86 0 : return 0;
87 :
88 576 : if (!(val & 0xff00)) {
89 576 : bit -= 8;
90 576 : val <<= 8;
91 : }
92 576 : if (!(val & 0xf000)) {
93 0 : bit -= 4;
94 0 : val <<= 4;
95 : }
96 576 : if (!(val & 0xc000)) {
97 123 : bit -= 2;
98 123 : val <<= 2;
99 : }
100 576 : if (!(val & 0x8000)) {
101 174 : bit -= 1;
102 174 : val <<= 1;
103 : }
104 :
105 576 : return bit;
106 : }
107 :
108 : /*
109 : * Compute values to do a division using reciprocal.
110 : *
111 : * This implementation is based on an algorithm described in
112 : * "How to optimize for the Pentium family of microprocessors"
113 : * (http://www.agner.org/assem/).
114 : * More information about the basic algorithm can be found in
115 : * the paper "Integer Division Using Reciprocals" by Robert Alverson.
116 : *
117 : * The basic idea is to replace x/d by x * d^-1. In order to store
118 : * d^-1 with enough precision we shift it left a few places. It turns
119 : * out that this algoright gives just enough precision, and also fits
120 : * into DCTELEM:
121 : *
122 : * b = (the number of significant bits in divisor) - 1
123 : * r = (word size) + b
124 : * f = 2^r / divisor
125 : *
126 : * f will not be an integer for most cases, so we need to compensate
127 : * for the rounding error introduced:
128 : *
129 : * no fractional part:
130 : *
131 : * result = input >> r
132 : *
133 : * fractional part of f < 0.5:
134 : *
135 : * round f down to nearest integer
136 : * result = ((input + 1) * f) >> r
137 : *
138 : * fractional part of f > 0.5:
139 : *
140 : * round f up to nearest integer
141 : * result = (input * f) >> r
142 : *
143 : * This is the original algorithm that gives truncated results. But we
144 : * want properly rounded results, so we replace "input" with
145 : * "input + divisor/2".
146 : *
147 : * In order to allow SIMD implementations we also tweak the values to
148 : * allow the same calculation to be made at all times:
149 : *
150 : * dctbl[0] = f rounded to nearest integer
151 : * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
152 : * dctbl[2] = 1 << ((word size) * 2 - r)
153 : * dctbl[3] = r - (word size)
154 : *
155 : * dctbl[2] is for stupid instruction sets where the shift operation
156 : * isn't member wise (e.g. MMX).
157 : *
158 : * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
159 : * is that most SIMD implementations have a "multiply and store top
160 : * half" operation.
161 : *
162 : * Lastly, we store each of the values in their own table instead
163 : * of in a consecutive manner, yet again in order to allow SIMD
164 : * routines.
165 : */
166 : LOCAL(int)
167 576 : compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
168 : {
169 : UDCTELEM2 fq, fr;
170 : UDCTELEM c;
171 : int b, r;
172 :
173 576 : b = flss(divisor) - 1;
174 576 : r = sizeof(DCTELEM) * 8 + b;
175 :
176 576 : fq = ((UDCTELEM2)1 << r) / divisor;
177 576 : fr = ((UDCTELEM2)1 << r) % divisor;
178 :
179 576 : c = divisor / 2; /* for rounding */
180 :
181 576 : if (fr == 0) { /* divisor is power of two */
182 : /* fq will be one bit too large to fit in DCTELEM, so adjust */
183 414 : fq >>= 1;
184 414 : r--;
185 162 : } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
186 42 : c++;
187 : } else { /* fractional part is > 0.5 */
188 120 : fq++;
189 : }
190 :
191 576 : dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
192 576 : dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
193 576 : dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
194 576 : dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
195 :
196 576 : if(r <= 16) return 0;
197 576 : else return 1;
198 : }
199 :
200 : /*
201 : * Initialize for a processing pass.
202 : * Verify that all referenced Q-tables are present, and set up
203 : * the divisor table for each one.
204 : * In the current implementation, DCT of all components is done during
205 : * the first pass, even if only some components will be output in the
206 : * first scan. Hence all components should be examined here.
207 : */
208 :
209 : METHODDEF(void)
210 3 : start_pass_fdctmgr (j_compress_ptr cinfo)
211 : {
212 3 : my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
213 : int ci, qtblno, i;
214 : jpeg_component_info *compptr;
215 : JQUANT_TBL * qtbl;
216 : DCTELEM * dtbl;
217 :
218 15 : for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
219 9 : ci++, compptr++) {
220 9 : qtblno = compptr->quant_tbl_no;
221 : /* Make sure specified quantization table is present */
222 18 : if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
223 9 : cinfo->quant_tbl_ptrs[qtblno] == NULL)
224 0 : ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
225 9 : qtbl = cinfo->quant_tbl_ptrs[qtblno];
226 : /* Compute divisors for this quant table */
227 : /* We may do this more than once for same table, but it's not a big deal */
228 9 : switch (cinfo->dct_method) {
229 : #ifdef DCT_ISLOW_SUPPORTED
230 : case JDCT_ISLOW:
231 : /* For LL&M IDCT method, divisors are equal to raw quantization
232 : * coefficients multiplied by 8 (to counteract scaling).
233 : */
234 9 : if (fdct->divisors[qtblno] == NULL) {
235 6 : fdct->divisors[qtblno] = (DCTELEM *)
236 6 : (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
237 : (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
238 : }
239 9 : dtbl = fdct->divisors[qtblno];
240 585 : for (i = 0; i < DCTSIZE2; i++) {
241 576 : if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
242 0 : && fdct->quantize == jsimd_quantize)
243 0 : fdct->quantize = quantize;
244 : }
245 9 : break;
246 : #endif
247 : #ifdef DCT_IFAST_SUPPORTED
248 : case JDCT_IFAST:
249 : {
250 : /* For AA&N IDCT method, divisors are equal to quantization
251 : * coefficients scaled by scalefactor[row]*scalefactor[col], where
252 : * scalefactor[0] = 1
253 : * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
254 : * We apply a further scale factor of 8.
255 : */
256 : #define CONST_BITS 14
257 : static const INT16 aanscales[DCTSIZE2] = {
258 : /* precomputed values scaled up by 14 bits */
259 : 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
260 : 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
261 : 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
262 : 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
263 : 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
264 : 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
265 : 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
266 : 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
267 : };
268 : SHIFT_TEMPS
269 :
270 0 : if (fdct->divisors[qtblno] == NULL) {
271 0 : fdct->divisors[qtblno] = (DCTELEM *)
272 0 : (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
273 : (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
274 : }
275 0 : dtbl = fdct->divisors[qtblno];
276 0 : for (i = 0; i < DCTSIZE2; i++) {
277 0 : if(!compute_reciprocal(
278 0 : DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
279 : (INT32) aanscales[i]),
280 0 : CONST_BITS-3), &dtbl[i])
281 0 : && fdct->quantize == jsimd_quantize)
282 0 : fdct->quantize = quantize;
283 : }
284 : }
285 0 : break;
286 : #endif
287 : #ifdef DCT_FLOAT_SUPPORTED
288 : case JDCT_FLOAT:
289 : {
290 : /* For float AA&N IDCT method, divisors are equal to quantization
291 : * coefficients scaled by scalefactor[row]*scalefactor[col], where
292 : * scalefactor[0] = 1
293 : * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
294 : * We apply a further scale factor of 8.
295 : * What's actually stored is 1/divisor so that the inner loop can
296 : * use a multiplication rather than a division.
297 : */
298 : FAST_FLOAT * fdtbl;
299 : int row, col;
300 : static const double aanscalefactor[DCTSIZE] = {
301 : 1.0, 1.387039845, 1.306562965, 1.175875602,
302 : 1.0, 0.785694958, 0.541196100, 0.275899379
303 : };
304 :
305 0 : if (fdct->float_divisors[qtblno] == NULL) {
306 0 : fdct->float_divisors[qtblno] = (FAST_FLOAT *)
307 0 : (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
308 : DCTSIZE2 * SIZEOF(FAST_FLOAT));
309 : }
310 0 : fdtbl = fdct->float_divisors[qtblno];
311 0 : i = 0;
312 0 : for (row = 0; row < DCTSIZE; row++) {
313 0 : for (col = 0; col < DCTSIZE; col++) {
314 0 : fdtbl[i] = (FAST_FLOAT)
315 0 : (1.0 / (((double) qtbl->quantval[i] *
316 0 : aanscalefactor[row] * aanscalefactor[col] * 8.0)));
317 0 : i++;
318 : }
319 : }
320 : }
321 0 : break;
322 : #endif
323 : default:
324 0 : ERREXIT(cinfo, JERR_NOT_COMPILED);
325 0 : break;
326 : }
327 : }
328 3 : }
329 :
330 :
331 : /*
332 : * Load data into workspace, applying unsigned->signed conversion.
333 : */
334 :
335 : METHODDEF(void)
336 0 : convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
337 : {
338 : register DCTELEM *workspaceptr;
339 : register JSAMPROW elemptr;
340 : register int elemr;
341 :
342 0 : workspaceptr = workspace;
343 0 : for (elemr = 0; elemr < DCTSIZE; elemr++) {
344 0 : elemptr = sample_data[elemr] + start_col;
345 :
346 : #if DCTSIZE == 8 /* unroll the inner loop */
347 0 : *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
348 0 : *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
349 0 : *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
350 0 : *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
351 0 : *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
352 0 : *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
353 0 : *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
354 0 : *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
355 : #else
356 : {
357 : register int elemc;
358 : for (elemc = DCTSIZE; elemc > 0; elemc--)
359 : *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
360 : }
361 : #endif
362 : }
363 0 : }
364 :
365 :
366 : /*
367 : * Quantize/descale the coefficients, and store into coef_blocks[].
368 : */
369 :
370 : METHODDEF(void)
371 0 : quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
372 : {
373 : int i;
374 : DCTELEM temp;
375 : UDCTELEM recip, corr, shift;
376 : UDCTELEM2 product;
377 0 : JCOEFPTR output_ptr = coef_block;
378 :
379 0 : for (i = 0; i < DCTSIZE2; i++) {
380 0 : temp = workspace[i];
381 0 : recip = divisors[i + DCTSIZE2 * 0];
382 0 : corr = divisors[i + DCTSIZE2 * 1];
383 0 : shift = divisors[i + DCTSIZE2 * 3];
384 :
385 0 : if (temp < 0) {
386 0 : temp = -temp;
387 0 : product = (UDCTELEM2)(temp + corr) * recip;
388 0 : product >>= shift + sizeof(DCTELEM)*8;
389 0 : temp = product;
390 0 : temp = -temp;
391 : } else {
392 0 : product = (UDCTELEM2)(temp + corr) * recip;
393 0 : product >>= shift + sizeof(DCTELEM)*8;
394 0 : temp = product;
395 : }
396 :
397 0 : output_ptr[i] = (JCOEF) temp;
398 : }
399 0 : }
400 :
401 :
402 : /*
403 : * Perform forward DCT on one or more blocks of a component.
404 : *
405 : * The input samples are taken from the sample_data[] array starting at
406 : * position start_row/start_col, and moving to the right for any additional
407 : * blocks. The quantized coefficients are returned in coef_blocks[].
408 : */
409 :
410 : METHODDEF(void)
411 396 : forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
412 : JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
413 : JDIMENSION start_row, JDIMENSION start_col,
414 : JDIMENSION num_blocks)
415 : /* This version is used for integer DCT implementations. */
416 : {
417 : /* This routine is heavily used, so it's worth coding it tightly. */
418 396 : my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
419 396 : DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
420 : DCTELEM * workspace;
421 : JDIMENSION bi;
422 :
423 : /* Make sure the compiler doesn't look up these every pass */
424 396 : forward_DCT_method_ptr do_dct = fdct->dct;
425 396 : convsamp_method_ptr do_convsamp = fdct->convsamp;
426 396 : quantize_method_ptr do_quantize = fdct->quantize;
427 396 : workspace = fdct->workspace;
428 :
429 396 : sample_data += start_row; /* fold in the vertical offset once */
430 :
431 792 : for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
432 : /* Load data into workspace, applying unsigned->signed conversion */
433 396 : (*do_convsamp) (sample_data, start_col, workspace);
434 :
435 : /* Perform the DCT */
436 396 : (*do_dct) (workspace);
437 :
438 : /* Quantize/descale the coefficients, and store into coef_blocks[] */
439 396 : (*do_quantize) (coef_blocks[bi], divisors, workspace);
440 : }
441 396 : }
442 :
443 :
444 : #ifdef DCT_FLOAT_SUPPORTED
445 :
446 :
447 : METHODDEF(void)
448 0 : convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
449 : {
450 : register FAST_FLOAT *workspaceptr;
451 : register JSAMPROW elemptr;
452 : register int elemr;
453 :
454 0 : workspaceptr = workspace;
455 0 : for (elemr = 0; elemr < DCTSIZE; elemr++) {
456 0 : elemptr = sample_data[elemr] + start_col;
457 : #if DCTSIZE == 8 /* unroll the inner loop */
458 0 : *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
459 0 : *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
460 0 : *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
461 0 : *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
462 0 : *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
463 0 : *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
464 0 : *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
465 0 : *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
466 : #else
467 : {
468 : register int elemc;
469 : for (elemc = DCTSIZE; elemc > 0; elemc--)
470 : *workspaceptr++ = (FAST_FLOAT)
471 : (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
472 : }
473 : #endif
474 : }
475 0 : }
476 :
477 :
478 : METHODDEF(void)
479 0 : quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
480 : {
481 : register FAST_FLOAT temp;
482 : register int i;
483 0 : register JCOEFPTR output_ptr = coef_block;
484 :
485 0 : for (i = 0; i < DCTSIZE2; i++) {
486 : /* Apply the quantization and scaling factor */
487 0 : temp = workspace[i] * divisors[i];
488 :
489 : /* Round to nearest integer.
490 : * Since C does not specify the direction of rounding for negative
491 : * quotients, we have to force the dividend positive for portability.
492 : * The maximum coefficient size is +-16K (for 12-bit data), so this
493 : * code should work for either 16-bit or 32-bit ints.
494 : */
495 0 : output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
496 : }
497 0 : }
498 :
499 :
500 : METHODDEF(void)
501 0 : forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
502 : JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
503 : JDIMENSION start_row, JDIMENSION start_col,
504 : JDIMENSION num_blocks)
505 : /* This version is used for floating-point DCT implementations. */
506 : {
507 : /* This routine is heavily used, so it's worth coding it tightly. */
508 0 : my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
509 0 : FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
510 : FAST_FLOAT * workspace;
511 : JDIMENSION bi;
512 :
513 :
514 : /* Make sure the compiler doesn't look up these every pass */
515 0 : float_DCT_method_ptr do_dct = fdct->float_dct;
516 0 : float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
517 0 : float_quantize_method_ptr do_quantize = fdct->float_quantize;
518 0 : workspace = fdct->float_workspace;
519 :
520 0 : sample_data += start_row; /* fold in the vertical offset once */
521 :
522 0 : for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
523 : /* Load data into workspace, applying unsigned->signed conversion */
524 0 : (*do_convsamp) (sample_data, start_col, workspace);
525 :
526 : /* Perform the DCT */
527 0 : (*do_dct) (workspace);
528 :
529 : /* Quantize/descale the coefficients, and store into coef_blocks[] */
530 0 : (*do_quantize) (coef_blocks[bi], divisors, workspace);
531 : }
532 0 : }
533 :
534 : #endif /* DCT_FLOAT_SUPPORTED */
535 :
536 :
537 : /*
538 : * Initialize FDCT manager.
539 : */
540 :
541 : GLOBAL(void)
542 3 : jinit_forward_dct (j_compress_ptr cinfo)
543 : {
544 : my_fdct_ptr fdct;
545 : int i;
546 :
547 3 : fdct = (my_fdct_ptr)
548 3 : (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
549 : SIZEOF(my_fdct_controller));
550 3 : cinfo->fdct = (struct jpeg_forward_dct *) fdct;
551 3 : fdct->pub.start_pass = start_pass_fdctmgr;
552 :
553 : /* First determine the DCT... */
554 3 : switch (cinfo->dct_method) {
555 : #ifdef DCT_ISLOW_SUPPORTED
556 : case JDCT_ISLOW:
557 3 : fdct->pub.forward_DCT = forward_DCT;
558 3 : if (jsimd_can_fdct_islow())
559 3 : fdct->dct = jsimd_fdct_islow;
560 : else
561 0 : fdct->dct = jpeg_fdct_islow;
562 3 : break;
563 : #endif
564 : #ifdef DCT_IFAST_SUPPORTED
565 : case JDCT_IFAST:
566 0 : fdct->pub.forward_DCT = forward_DCT;
567 0 : if (jsimd_can_fdct_ifast())
568 0 : fdct->dct = jsimd_fdct_ifast;
569 : else
570 0 : fdct->dct = jpeg_fdct_ifast;
571 0 : break;
572 : #endif
573 : #ifdef DCT_FLOAT_SUPPORTED
574 : case JDCT_FLOAT:
575 0 : fdct->pub.forward_DCT = forward_DCT_float;
576 0 : if (jsimd_can_fdct_float())
577 0 : fdct->float_dct = jsimd_fdct_float;
578 : else
579 0 : fdct->float_dct = jpeg_fdct_float;
580 0 : break;
581 : #endif
582 : default:
583 0 : ERREXIT(cinfo, JERR_NOT_COMPILED);
584 0 : break;
585 : }
586 :
587 : /* ...then the supporting stages. */
588 3 : switch (cinfo->dct_method) {
589 : #ifdef DCT_ISLOW_SUPPORTED
590 : case JDCT_ISLOW:
591 : #endif
592 : #ifdef DCT_IFAST_SUPPORTED
593 : case JDCT_IFAST:
594 : #endif
595 : #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
596 3 : if (jsimd_can_convsamp())
597 3 : fdct->convsamp = jsimd_convsamp;
598 : else
599 0 : fdct->convsamp = convsamp;
600 3 : if (jsimd_can_quantize())
601 3 : fdct->quantize = jsimd_quantize;
602 : else
603 0 : fdct->quantize = quantize;
604 3 : break;
605 : #endif
606 : #ifdef DCT_FLOAT_SUPPORTED
607 : case JDCT_FLOAT:
608 0 : if (jsimd_can_convsamp_float())
609 0 : fdct->float_convsamp = jsimd_convsamp_float;
610 : else
611 0 : fdct->float_convsamp = convsamp_float;
612 0 : if (jsimd_can_quantize_float())
613 0 : fdct->float_quantize = jsimd_quantize_float;
614 : else
615 0 : fdct->float_quantize = quantize_float;
616 0 : break;
617 : #endif
618 : default:
619 0 : ERREXIT(cinfo, JERR_NOT_COMPILED);
620 0 : break;
621 : }
622 :
623 : /* Allocate workspace memory */
624 : #ifdef DCT_FLOAT_SUPPORTED
625 3 : if (cinfo->dct_method == JDCT_FLOAT)
626 0 : fdct->float_workspace = (FAST_FLOAT *)
627 0 : (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
628 : SIZEOF(FAST_FLOAT) * DCTSIZE2);
629 : else
630 : #endif
631 3 : fdct->workspace = (DCTELEM *)
632 3 : (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
633 : SIZEOF(DCTELEM) * DCTSIZE2);
634 :
635 : /* Mark divisor tables unallocated */
636 15 : for (i = 0; i < NUM_QUANT_TBLS; i++) {
637 12 : fdct->divisors[i] = NULL;
638 : #ifdef DCT_FLOAT_SUPPORTED
639 12 : fdct->float_divisors[i] = NULL;
640 : #endif
641 : }
642 3 : }
|