1 : /*
2 : * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 :
12 : #include "vpx_ports/config.h"
13 : #include "vpx_ports/mem.h"
14 : #include "vp8/common/subpixel.h"
15 :
16 : extern const short vp8_six_tap_mmx[8][6*8];
17 : extern const short vp8_bilinear_filters_mmx[8][2*8];
18 :
19 : extern void vp8_filter_block1d_h6_mmx
20 : (
21 : unsigned char *src_ptr,
22 : unsigned short *output_ptr,
23 : unsigned int src_pixels_per_line,
24 : unsigned int pixel_step,
25 : unsigned int output_height,
26 : unsigned int output_width,
27 : const short *vp8_filter
28 : );
29 : extern void vp8_filter_block1dc_v6_mmx
30 : (
31 : unsigned short *src_ptr,
32 : unsigned char *output_ptr,
33 : int output_pitch,
34 : unsigned int pixels_per_line,
35 : unsigned int pixel_step,
36 : unsigned int output_height,
37 : unsigned int output_width,
38 : const short *vp8_filter
39 : );
40 : extern void vp8_filter_block1d8_h6_sse2
41 : (
42 : unsigned char *src_ptr,
43 : unsigned short *output_ptr,
44 : unsigned int src_pixels_per_line,
45 : unsigned int pixel_step,
46 : unsigned int output_height,
47 : unsigned int output_width,
48 : const short *vp8_filter
49 : );
50 : extern void vp8_filter_block1d16_h6_sse2
51 : (
52 : unsigned char *src_ptr,
53 : unsigned short *output_ptr,
54 : unsigned int src_pixels_per_line,
55 : unsigned int pixel_step,
56 : unsigned int output_height,
57 : unsigned int output_width,
58 : const short *vp8_filter
59 : );
60 : extern void vp8_filter_block1d8_v6_sse2
61 : (
62 : unsigned short *src_ptr,
63 : unsigned char *output_ptr,
64 : int dst_ptich,
65 : unsigned int pixels_per_line,
66 : unsigned int pixel_step,
67 : unsigned int output_height,
68 : unsigned int output_width,
69 : const short *vp8_filter
70 : );
71 : extern void vp8_filter_block1d16_v6_sse2
72 : (
73 : unsigned short *src_ptr,
74 : unsigned char *output_ptr,
75 : int dst_ptich,
76 : unsigned int pixels_per_line,
77 : unsigned int pixel_step,
78 : unsigned int output_height,
79 : unsigned int output_width,
80 : const short *vp8_filter
81 : );
82 : extern void vp8_unpack_block1d16_h6_sse2
83 : (
84 : unsigned char *src_ptr,
85 : unsigned short *output_ptr,
86 : unsigned int src_pixels_per_line,
87 : unsigned int output_height,
88 : unsigned int output_width
89 : );
90 : extern void vp8_filter_block1d8_h6_only_sse2
91 : (
92 : unsigned char *src_ptr,
93 : unsigned int src_pixels_per_line,
94 : unsigned char *output_ptr,
95 : int dst_ptich,
96 : unsigned int output_height,
97 : const short *vp8_filter
98 : );
99 : extern void vp8_filter_block1d16_h6_only_sse2
100 : (
101 : unsigned char *src_ptr,
102 : unsigned int src_pixels_per_line,
103 : unsigned char *output_ptr,
104 : int dst_ptich,
105 : unsigned int output_height,
106 : const short *vp8_filter
107 : );
108 : extern void vp8_filter_block1d8_v6_only_sse2
109 : (
110 : unsigned char *src_ptr,
111 : unsigned int src_pixels_per_line,
112 : unsigned char *output_ptr,
113 : int dst_ptich,
114 : unsigned int output_height,
115 : const short *vp8_filter
116 : );
117 : extern prototype_subpixel_predict(vp8_bilinear_predict8x8_mmx);
118 :
119 :
120 : #if HAVE_MMX
121 0 : void vp8_sixtap_predict4x4_mmx
122 : (
123 : unsigned char *src_ptr,
124 : int src_pixels_per_line,
125 : int xoffset,
126 : int yoffset,
127 : unsigned char *dst_ptr,
128 : int dst_pitch
129 : )
130 : {
131 0 : DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */
132 : const short *HFilter, *VFilter;
133 0 : HFilter = vp8_six_tap_mmx[xoffset];
134 0 : vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
135 0 : VFilter = vp8_six_tap_mmx[yoffset];
136 0 : vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4 , 4, 4, VFilter);
137 :
138 0 : }
139 :
140 :
141 0 : void vp8_sixtap_predict16x16_mmx
142 : (
143 : unsigned char *src_ptr,
144 : int src_pixels_per_line,
145 : int xoffset,
146 : int yoffset,
147 : unsigned char *dst_ptr,
148 : int dst_pitch
149 : )
150 : {
151 :
152 0 : DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
153 :
154 : const short *HFilter, *VFilter;
155 :
156 :
157 0 : HFilter = vp8_six_tap_mmx[xoffset];
158 :
159 0 : vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
160 0 : vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 21, 32, HFilter);
161 0 : vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8, FData2 + 8, src_pixels_per_line, 1, 21, 32, HFilter);
162 0 : vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12, FData2 + 12, src_pixels_per_line, 1, 21, 32, HFilter);
163 :
164 0 : VFilter = vp8_six_tap_mmx[yoffset];
165 0 : vp8_filter_block1dc_v6_mmx(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter);
166 0 : vp8_filter_block1dc_v6_mmx(FData2 + 36, dst_ptr + 4, dst_pitch, 32, 16 , 16, 16, VFilter);
167 0 : vp8_filter_block1dc_v6_mmx(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
168 0 : vp8_filter_block1dc_v6_mmx(FData2 + 44, dst_ptr + 12, dst_pitch, 32, 16 , 16, 16, VFilter);
169 :
170 0 : }
171 :
172 :
173 0 : void vp8_sixtap_predict8x8_mmx
174 : (
175 : unsigned char *src_ptr,
176 : int src_pixels_per_line,
177 : int xoffset,
178 : int yoffset,
179 : unsigned char *dst_ptr,
180 : int dst_pitch
181 : )
182 : {
183 :
184 0 : DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
185 :
186 : const short *HFilter, *VFilter;
187 :
188 0 : HFilter = vp8_six_tap_mmx[xoffset];
189 0 : vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
190 0 : vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 13, 16, HFilter);
191 :
192 0 : VFilter = vp8_six_tap_mmx[yoffset];
193 0 : vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, 8, VFilter);
194 0 : vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 8, 8, VFilter);
195 :
196 0 : }
197 :
198 :
199 0 : void vp8_sixtap_predict8x4_mmx
200 : (
201 : unsigned char *src_ptr,
202 : int src_pixels_per_line,
203 : int xoffset,
204 : int yoffset,
205 : unsigned char *dst_ptr,
206 : int dst_pitch
207 : )
208 : {
209 :
210 0 : DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
211 :
212 : const short *HFilter, *VFilter;
213 :
214 0 : HFilter = vp8_six_tap_mmx[xoffset];
215 0 : vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
216 0 : vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 9, 16, HFilter);
217 :
218 0 : VFilter = vp8_six_tap_mmx[yoffset];
219 0 : vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, 8, VFilter);
220 0 : vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 4, 8, VFilter);
221 :
222 0 : }
223 :
224 :
225 :
226 0 : void vp8_bilinear_predict16x16_mmx
227 : (
228 : unsigned char *src_ptr,
229 : int src_pixels_per_line,
230 : int xoffset,
231 : int yoffset,
232 : unsigned char *dst_ptr,
233 : int dst_pitch
234 : )
235 : {
236 0 : vp8_bilinear_predict8x8_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pitch);
237 0 : vp8_bilinear_predict8x8_mmx(src_ptr + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + 8, dst_pitch);
238 0 : vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8, dst_pitch);
239 0 : vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8 + 8, dst_pitch);
240 0 : }
241 : #endif
242 :
243 :
244 : #if HAVE_SSE2
245 0 : void vp8_sixtap_predict16x16_sse2
246 : (
247 : unsigned char *src_ptr,
248 : int src_pixels_per_line,
249 : int xoffset,
250 : int yoffset,
251 : unsigned char *dst_ptr,
252 : int dst_pitch
253 :
254 : )
255 : {
256 0 : DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
257 :
258 : const short *HFilter, *VFilter;
259 :
260 0 : if (xoffset)
261 : {
262 0 : if (yoffset)
263 : {
264 0 : HFilter = vp8_six_tap_mmx[xoffset];
265 0 : vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
266 0 : VFilter = vp8_six_tap_mmx[yoffset];
267 0 : vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
268 : }
269 : else
270 : {
271 : /* First-pass only */
272 0 : HFilter = vp8_six_tap_mmx[xoffset];
273 0 : vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
274 : }
275 : }
276 : else
277 : {
278 : /* Second-pass only */
279 0 : VFilter = vp8_six_tap_mmx[yoffset];
280 0 : vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32);
281 0 : vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
282 : }
283 0 : }
284 :
285 :
286 0 : void vp8_sixtap_predict8x8_sse2
287 : (
288 : unsigned char *src_ptr,
289 : int src_pixels_per_line,
290 : int xoffset,
291 : int yoffset,
292 : unsigned char *dst_ptr,
293 : int dst_pitch
294 : )
295 : {
296 0 : DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
297 : const short *HFilter, *VFilter;
298 :
299 0 : if (xoffset)
300 : {
301 0 : if (yoffset)
302 : {
303 0 : HFilter = vp8_six_tap_mmx[xoffset];
304 0 : vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
305 0 : VFilter = vp8_six_tap_mmx[yoffset];
306 0 : vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
307 : }
308 : else
309 : {
310 : /* First-pass only */
311 0 : HFilter = vp8_six_tap_mmx[xoffset];
312 0 : vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
313 : }
314 : }
315 : else
316 : {
317 : /* Second-pass only */
318 0 : VFilter = vp8_six_tap_mmx[yoffset];
319 0 : vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
320 : }
321 0 : }
322 :
323 :
324 0 : void vp8_sixtap_predict8x4_sse2
325 : (
326 : unsigned char *src_ptr,
327 : int src_pixels_per_line,
328 : int xoffset,
329 : int yoffset,
330 : unsigned char *dst_ptr,
331 : int dst_pitch
332 : )
333 : {
334 0 : DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
335 : const short *HFilter, *VFilter;
336 :
337 0 : if (xoffset)
338 : {
339 0 : if (yoffset)
340 : {
341 0 : HFilter = vp8_six_tap_mmx[xoffset];
342 0 : vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
343 0 : VFilter = vp8_six_tap_mmx[yoffset];
344 0 : vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
345 : }
346 : else
347 : {
348 : /* First-pass only */
349 0 : HFilter = vp8_six_tap_mmx[xoffset];
350 0 : vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
351 : }
352 : }
353 : else
354 : {
355 : /* Second-pass only */
356 0 : VFilter = vp8_six_tap_mmx[yoffset];
357 0 : vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
358 : }
359 0 : }
360 :
361 : #endif
362 :
363 : #if HAVE_SSSE3
364 :
365 : extern void vp8_filter_block1d8_h6_ssse3
366 : (
367 : unsigned char *src_ptr,
368 : unsigned int src_pixels_per_line,
369 : unsigned char *output_ptr,
370 : unsigned int output_pitch,
371 : unsigned int output_height,
372 : unsigned int vp8_filter_index
373 : );
374 :
375 : extern void vp8_filter_block1d16_h6_ssse3
376 : (
377 : unsigned char *src_ptr,
378 : unsigned int src_pixels_per_line,
379 : unsigned char *output_ptr,
380 : unsigned int output_pitch,
381 : unsigned int output_height,
382 : unsigned int vp8_filter_index
383 : );
384 :
385 : extern void vp8_filter_block1d16_v6_ssse3
386 : (
387 : unsigned char *src_ptr,
388 : unsigned int src_pitch,
389 : unsigned char *output_ptr,
390 : unsigned int out_pitch,
391 : unsigned int output_height,
392 : unsigned int vp8_filter_index
393 : );
394 :
395 : extern void vp8_filter_block1d8_v6_ssse3
396 : (
397 : unsigned char *src_ptr,
398 : unsigned int src_pitch,
399 : unsigned char *output_ptr,
400 : unsigned int out_pitch,
401 : unsigned int output_height,
402 : unsigned int vp8_filter_index
403 : );
404 :
405 : extern void vp8_filter_block1d4_h6_ssse3
406 : (
407 : unsigned char *src_ptr,
408 : unsigned int src_pixels_per_line,
409 : unsigned char *output_ptr,
410 : unsigned int output_pitch,
411 : unsigned int output_height,
412 : unsigned int vp8_filter_index
413 : );
414 :
415 : extern void vp8_filter_block1d4_v6_ssse3
416 : (
417 : unsigned char *src_ptr,
418 : unsigned int src_pitch,
419 : unsigned char *output_ptr,
420 : unsigned int out_pitch,
421 : unsigned int output_height,
422 : unsigned int vp8_filter_index
423 : );
424 :
425 0 : void vp8_sixtap_predict16x16_ssse3
426 : (
427 : unsigned char *src_ptr,
428 : int src_pixels_per_line,
429 : int xoffset,
430 : int yoffset,
431 : unsigned char *dst_ptr,
432 : int dst_pitch
433 :
434 : )
435 : {
436 0 : DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
437 :
438 0 : if (xoffset)
439 : {
440 0 : if (yoffset)
441 : {
442 0 : vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
443 0 : vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
444 : }
445 : else
446 : {
447 : /* First-pass only */
448 0 : vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
449 : }
450 : }
451 : else
452 : {
453 : /* Second-pass only */
454 0 : vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
455 : }
456 0 : }
457 :
458 0 : void vp8_sixtap_predict8x8_ssse3
459 : (
460 : unsigned char *src_ptr,
461 : int src_pixels_per_line,
462 : int xoffset,
463 : int yoffset,
464 : unsigned char *dst_ptr,
465 : int dst_pitch
466 : )
467 : {
468 0 : DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
469 :
470 0 : if (xoffset)
471 : {
472 0 : if (yoffset)
473 : {
474 0 : vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
475 0 : vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
476 : }
477 : else
478 : {
479 0 : vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
480 : }
481 : }
482 : else
483 : {
484 : /* Second-pass only */
485 0 : vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
486 : }
487 0 : }
488 :
489 :
490 0 : void vp8_sixtap_predict8x4_ssse3
491 : (
492 : unsigned char *src_ptr,
493 : int src_pixels_per_line,
494 : int xoffset,
495 : int yoffset,
496 : unsigned char *dst_ptr,
497 : int dst_pitch
498 : )
499 : {
500 0 : DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
501 :
502 0 : if (xoffset)
503 : {
504 0 : if (yoffset)
505 : {
506 0 : vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
507 0 : vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
508 : }
509 : else
510 : {
511 : /* First-pass only */
512 0 : vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
513 : }
514 : }
515 : else
516 : {
517 : /* Second-pass only */
518 0 : vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
519 : }
520 0 : }
521 :
522 0 : void vp8_sixtap_predict4x4_ssse3
523 : (
524 : unsigned char *src_ptr,
525 : int src_pixels_per_line,
526 : int xoffset,
527 : int yoffset,
528 : unsigned char *dst_ptr,
529 : int dst_pitch
530 : )
531 : {
532 0 : DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
533 :
534 0 : if (xoffset)
535 : {
536 0 : if (yoffset)
537 : {
538 0 : vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
539 0 : vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
540 : }
541 : else
542 : {
543 0 : vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
544 : }
545 : }
546 : else
547 : {
548 0 : vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
549 : }
550 :
551 0 : }
552 :
553 : #endif
|