1 : // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : // This webpage shows layout of YV12 and other YUV formats
6 : // http://www.fourcc.org/yuv.php
7 : // The actual conversion is best described here
8 : // http://en.wikipedia.org/wiki/YUV
9 : // An article on optimizing YUV conversion using tables instead of multiplies
10 : // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
11 : //
12 : // YV12 is a full plane of Y and a half height, half width chroma planes
13 : // YV16 is a full plane of Y and a full height, half width chroma planes
14 : // YV24 is a full plane of Y and a full height, full width chroma planes
15 : //
16 : // ARGB pixel format is output, which on little endian is stored as BGRA.
17 : // The alpha is set to 255, allowing the application to use RGBA or RGB32.
18 :
19 : #include "yuv_convert.h"
20 :
21 : // Header for low level row functions.
22 : #include "yuv_row.h"
23 : #include "mozilla/SSE.h"
24 :
25 : namespace mozilla {
26 :
27 : namespace gfx {
28 :
29 : // 16.16 fixed point arithmetic
30 : const int kFractionBits = 16;
31 : const int kFractionMax = 1 << kFractionBits;
32 : const int kFractionMask = ((1 << kFractionBits) - 1);
33 :
34 0 : NS_GFX_(YUVType) TypeFromSize(int ywidth,
35 : int yheight,
36 : int cbcrwidth,
37 : int cbcrheight)
38 : {
39 0 : if (ywidth == cbcrwidth && yheight == cbcrheight) {
40 0 : return YV24;
41 : }
42 0 : else if (ywidth / 2 == cbcrwidth && yheight == cbcrheight) {
43 0 : return YV16;
44 : }
45 : else {
46 0 : return YV12;
47 : }
48 : }
49 :
50 : // Convert a frame of YUV to 32 bit ARGB.
51 0 : NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
52 : const uint8* u_buf,
53 : const uint8* v_buf,
54 : uint8* rgb_buf,
55 : int pic_x,
56 : int pic_y,
57 : int pic_width,
58 : int pic_height,
59 : int y_pitch,
60 : int uv_pitch,
61 : int rgb_pitch,
62 : YUVType yuv_type) {
63 0 : unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
64 0 : unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
65 : // Test for SSE because the optimized code uses movntq, which is not part of MMX.
66 0 : bool has_sse = supports_mmx() && supports_sse();
67 : // There is no optimized YV24 SSE routine so we check for this and
68 : // fall back to the C code.
69 0 : has_sse &= yuv_type != YV24;
70 0 : bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
71 0 : int x_width = odd_pic_x ? pic_width - 1 : pic_width;
72 :
73 0 : for (int y = pic_y; y < pic_height + pic_y; ++y) {
74 0 : uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
75 0 : const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
76 0 : const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
77 0 : const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
78 :
79 0 : if (odd_pic_x) {
80 : // Handle the single odd pixel manually and use the
81 : // fast routines for the remaining.
82 : FastConvertYUVToRGB32Row_C(y_ptr++,
83 : u_ptr++,
84 : v_ptr++,
85 : rgb_row,
86 : 1,
87 0 : x_shift);
88 0 : rgb_row += 4;
89 : }
90 :
91 0 : if (has_sse) {
92 : FastConvertYUVToRGB32Row(y_ptr,
93 : u_ptr,
94 : v_ptr,
95 : rgb_row,
96 0 : x_width);
97 : }
98 : else {
99 : FastConvertYUVToRGB32Row_C(y_ptr,
100 : u_ptr,
101 : v_ptr,
102 : rgb_row,
103 : x_width,
104 0 : x_shift);
105 : }
106 : }
107 :
108 : // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
109 0 : if (has_sse)
110 0 : EMMS();
111 0 : }
112 :
113 : // C version does 8 at a time to mimic MMX code
114 0 : static void FilterRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
115 : int source_width, int source_y_fraction) {
116 0 : int y1_fraction = source_y_fraction;
117 0 : int y0_fraction = 256 - y1_fraction;
118 0 : uint8* end = ybuf + source_width;
119 0 : do {
120 0 : ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
121 0 : ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
122 0 : ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
123 0 : ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;
124 0 : ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8;
125 0 : ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;
126 0 : ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;
127 0 : ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
128 0 : y0_ptr += 8;
129 0 : y1_ptr += 8;
130 0 : ybuf += 8;
131 : } while (ybuf < end);
132 0 : }
133 :
134 : #ifdef MOZILLA_MAY_SUPPORT_MMX
135 : void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
136 : int source_width, int source_y_fraction);
137 : #endif
138 :
139 : #ifdef MOZILLA_MAY_SUPPORT_SSE2
140 : void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
141 : int source_width, int source_y_fraction);
142 : #endif
143 :
144 0 : static inline void FilterRows(uint8* ybuf, const uint8* y0_ptr,
145 : const uint8* y1_ptr, int source_width,
146 : int source_y_fraction) {
147 : #ifdef MOZILLA_MAY_SUPPORT_SSE2
148 0 : if (mozilla::supports_sse2()) {
149 0 : FilterRows_SSE2(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
150 0 : return;
151 : }
152 : #endif
153 :
154 : #ifdef MOZILLA_MAY_SUPPORT_MMX
155 0 : if (mozilla::supports_mmx()) {
156 0 : FilterRows_MMX(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
157 0 : return;
158 : }
159 : #endif
160 :
161 0 : FilterRows_C(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
162 : }
163 :
164 :
165 : // Scale a frame of YUV to 32 bit ARGB.
166 0 : NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf,
167 : const uint8* u_buf,
168 : const uint8* v_buf,
169 : uint8* rgb_buf,
170 : int source_width,
171 : int source_height,
172 : int width,
173 : int height,
174 : int y_pitch,
175 : int uv_pitch,
176 : int rgb_pitch,
177 : YUVType yuv_type,
178 : Rotate view_rotate,
179 : ScaleFilter filter) {
180 0 : bool has_mmx = supports_mmx();
181 :
182 : // 4096 allows 3 buffers to fit in 12k.
183 : // Helps performance on CPU with 16K L1 cache.
184 : // Large enough for 3830x2160 and 30" displays which are 2560x1600.
185 0 : const int kFilterBufferSize = 4096;
186 : // Disable filtering if the screen is too big (to avoid buffer overflows).
187 : // This should never happen to regular users: they don't have monitors
188 : // wider than 4096 pixels.
189 : // TODO(fbarchard): Allow rotated videos to filter.
190 0 : if (source_width > kFilterBufferSize || view_rotate)
191 0 : filter = FILTER_NONE;
192 :
193 0 : unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
194 : // Diagram showing origin and direction of source sampling.
195 : // ->0 4<-
196 : // 7 3
197 : //
198 : // 6 5
199 : // ->1 2<-
200 : // Rotations that start at right side of image.
201 0 : if ((view_rotate == ROTATE_180) ||
202 : (view_rotate == ROTATE_270) ||
203 : (view_rotate == MIRROR_ROTATE_0) ||
204 : (view_rotate == MIRROR_ROTATE_90)) {
205 0 : y_buf += source_width - 1;
206 0 : u_buf += source_width / 2 - 1;
207 0 : v_buf += source_width / 2 - 1;
208 0 : source_width = -source_width;
209 : }
210 : // Rotations that start at bottom of image.
211 0 : if ((view_rotate == ROTATE_90) ||
212 : (view_rotate == ROTATE_180) ||
213 : (view_rotate == MIRROR_ROTATE_90) ||
214 : (view_rotate == MIRROR_ROTATE_180)) {
215 0 : y_buf += (source_height - 1) * y_pitch;
216 0 : u_buf += ((source_height >> y_shift) - 1) * uv_pitch;
217 0 : v_buf += ((source_height >> y_shift) - 1) * uv_pitch;
218 0 : source_height = -source_height;
219 : }
220 :
221 : // Handle zero sized destination.
222 0 : if (width == 0 || height == 0)
223 0 : return;
224 0 : int source_dx = source_width * kFractionMax / width;
225 0 : int source_dy = source_height * kFractionMax / height;
226 0 : int source_dx_uv = source_dx;
227 :
228 0 : if ((view_rotate == ROTATE_90) ||
229 : (view_rotate == ROTATE_270)) {
230 0 : int tmp = height;
231 0 : height = width;
232 0 : width = tmp;
233 0 : tmp = source_height;
234 0 : source_height = source_width;
235 0 : source_width = tmp;
236 0 : int original_dx = source_dx;
237 0 : int original_dy = source_dy;
238 0 : source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits;
239 0 : source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits;
240 0 : source_dy = original_dx;
241 0 : if (view_rotate == ROTATE_90) {
242 0 : y_pitch = -1;
243 0 : uv_pitch = -1;
244 0 : source_height = -source_height;
245 : } else {
246 0 : y_pitch = 1;
247 0 : uv_pitch = 1;
248 : }
249 : }
250 :
251 : // Need padding because FilterRows() will write 1 to 16 extra pixels
252 : // after the end for SSE2 version.
253 : uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16];
254 : uint8* ybuf =
255 0 : reinterpret_cast<uint8*>(reinterpret_cast<PRUptrdiff>(yuvbuf + 15) & ~15);
256 0 : uint8* ubuf = ybuf + kFilterBufferSize;
257 0 : uint8* vbuf = ubuf + kFilterBufferSize;
258 : // TODO(fbarchard): Fixed point math is off by 1 on negatives.
259 0 : int yscale_fixed = (source_height << kFractionBits) / height;
260 :
261 : // TODO(fbarchard): Split this into separate function for better efficiency.
262 0 : for (int y = 0; y < height; ++y) {
263 0 : uint8* dest_pixel = rgb_buf + y * rgb_pitch;
264 0 : int source_y_subpixel = (y * yscale_fixed);
265 0 : if (yscale_fixed >= (kFractionMax * 2)) {
266 0 : source_y_subpixel += kFractionMax / 2; // For 1/2 or less, center filter.
267 : }
268 0 : int source_y = source_y_subpixel >> kFractionBits;
269 :
270 0 : const uint8* y0_ptr = y_buf + source_y * y_pitch;
271 0 : const uint8* y1_ptr = y0_ptr + y_pitch;
272 :
273 0 : const uint8* u0_ptr = u_buf + (source_y >> y_shift) * uv_pitch;
274 0 : const uint8* u1_ptr = u0_ptr + uv_pitch;
275 0 : const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch;
276 0 : const uint8* v1_ptr = v0_ptr + uv_pitch;
277 :
278 : // vertical scaler uses 16.8 fixed point
279 0 : int source_y_fraction = (source_y_subpixel & kFractionMask) >> 8;
280 : int source_uv_fraction =
281 0 : ((source_y_subpixel >> y_shift) & kFractionMask) >> 8;
282 :
283 0 : const uint8* y_ptr = y0_ptr;
284 0 : const uint8* u_ptr = u0_ptr;
285 0 : const uint8* v_ptr = v0_ptr;
286 : // Apply vertical filtering if necessary.
287 : // TODO(fbarchard): Remove memcpy when not necessary.
288 0 : if (filter & mozilla::gfx::FILTER_BILINEAR_V) {
289 0 : if (yscale_fixed != kFractionMax &&
290 : source_y_fraction && ((source_y + 1) < source_height)) {
291 0 : FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
292 : } else {
293 0 : memcpy(ybuf, y0_ptr, source_width);
294 : }
295 0 : y_ptr = ybuf;
296 0 : ybuf[source_width] = ybuf[source_width-1];
297 0 : int uv_source_width = (source_width + 1) / 2;
298 0 : if (yscale_fixed != kFractionMax &&
299 : source_uv_fraction &&
300 : (((source_y >> y_shift) + 1) < (source_height >> y_shift))) {
301 0 : FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction);
302 0 : FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction);
303 : } else {
304 0 : memcpy(ubuf, u0_ptr, uv_source_width);
305 0 : memcpy(vbuf, v0_ptr, uv_source_width);
306 : }
307 0 : u_ptr = ubuf;
308 0 : v_ptr = vbuf;
309 0 : ubuf[uv_source_width] = ubuf[uv_source_width - 1];
310 0 : vbuf[uv_source_width] = vbuf[uv_source_width - 1];
311 : }
312 0 : if (source_dx == kFractionMax) { // Not scaled
313 : FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
314 0 : dest_pixel, width);
315 0 : } else if (filter & FILTER_BILINEAR_H) {
316 : LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
317 0 : dest_pixel, width, source_dx);
318 : } else {
319 : // Specialized scalers and rotation.
320 : #if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86)
321 : if(mozilla::supports_sse()) {
322 : if (width == (source_width * 2)) {
323 : DoubleYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
324 : dest_pixel, width);
325 : } else if ((source_dx & kFractionMask) == 0) {
326 : // Scaling by integer scale factor. ie half.
327 : ConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
328 : dest_pixel, width,
329 : source_dx >> kFractionBits);
330 : } else if (source_dx_uv == source_dx) { // Not rotated.
331 : ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
332 : dest_pixel, width, source_dx);
333 : } else {
334 : RotateConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
335 : dest_pixel, width,
336 : source_dx >> kFractionBits,
337 : source_dx_uv >> kFractionBits);
338 : }
339 : }
340 : else {
341 : ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
342 : dest_pixel, width, source_dx);
343 : }
344 : #else
345 : ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
346 0 : dest_pixel, width, source_dx);
347 : #endif
348 : }
349 : }
350 : // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
351 0 : if (has_mmx)
352 0 : EMMS();
353 : }
354 :
355 : } // namespace gfx
356 : } // namespace mozilla
|