1 :
2 : /*
3 : * Copyright 2006 The Android Open Source Project
4 : *
5 : * Use of this source code is governed by a BSD-style license that can be
6 : * found in the LICENSE file.
7 : */
8 :
9 :
10 : #include "SkBlurMask.h"
11 : #include "SkMath.h"
12 : #include "SkTemplates.h"
13 : #include "SkEndian.h"
14 :
15 : // Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
16 : // breakeven on Mac, and ~15% slowdown on Linux.
17 : // Reading a word at a time when bulding the sum buffer seems to give
18 : // us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
19 : #if defined(BUILD_FOR_WIN_32)
20 : #define UNROLL_KERNEL_LOOP 1
21 : #endif
22 :
23 : /** The sum buffer is an array of u32 to hold the accumulated sum of all of the
24 : src values at their position, plus all values above and to the left.
25 : When we sample into this buffer, we need an initial row and column of 0s,
26 : so we have an index correspondence as follows:
27 :
28 : src[i, j] == sum[i+1, j+1]
29 : sum[0, j] == sum[i, 0] == 0
30 :
31 : We assume that the sum buffer's stride == its width
32 : */
33 0 : static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
34 : const uint8_t src[], int srcRB) {
35 0 : int sumW = srcW + 1;
36 :
37 0 : SkASSERT(srcRB >= srcW);
38 : // mod srcRB so we can apply it after each row
39 0 : srcRB -= srcW;
40 :
41 : int x, y;
42 :
43 : // zero out the top row and column
44 0 : memset(sum, 0, sumW * sizeof(sum[0]));
45 0 : sum += sumW;
46 :
47 : // special case first row
48 0 : uint32_t X = 0;
49 0 : *sum++ = 0; // initialze the first column to 0
50 0 : for (x = srcW - 1; x >= 0; --x) {
51 0 : X = *src++ + X;
52 0 : *sum++ = X;
53 : }
54 0 : src += srcRB;
55 :
56 : // now do the rest of the rows
57 0 : for (y = srcH - 1; y > 0; --y) {
58 0 : uint32_t L = 0;
59 0 : uint32_t C = 0;
60 0 : *sum++ = 0; // initialze the first column to 0
61 :
62 0 : for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
63 0 : uint32_t T = sum[-sumW];
64 0 : X = *src++ + L + T - C;
65 0 : *sum++ = X;
66 0 : L = X;
67 0 : C = T;
68 : }
69 :
70 0 : for (; x >= 4; x-=4) {
71 0 : uint32_t T = sum[-sumW];
72 0 : X = *src++ + L + T - C;
73 0 : *sum++ = X;
74 0 : L = X;
75 0 : C = T;
76 0 : T = sum[-sumW];
77 0 : X = *src++ + L + T - C;
78 0 : *sum++ = X;
79 0 : L = X;
80 0 : C = T;
81 0 : T = sum[-sumW];
82 0 : X = *src++ + L + T - C;
83 0 : *sum++ = X;
84 0 : L = X;
85 0 : C = T;
86 0 : T = sum[-sumW];
87 0 : X = *src++ + L + T - C;
88 0 : *sum++ = X;
89 0 : L = X;
90 0 : C = T;
91 : }
92 :
93 0 : for (; x >= 0; --x) {
94 0 : uint32_t T = sum[-sumW];
95 0 : X = *src++ + L + T - C;
96 0 : *sum++ = X;
97 0 : L = X;
98 0 : C = T;
99 : }
100 0 : src += srcRB;
101 : }
102 0 : }
103 :
104 : /**
105 : * This is the path for apply_kernel() to be taken when the kernel
106 : * is wider than the source image.
107 : */
108 0 : static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
109 : int sw, int sh) {
110 0 : SkASSERT(2*rx > sw);
111 :
112 0 : uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
113 :
114 0 : int sumStride = sw + 1;
115 :
116 0 : int dw = sw + 2*rx;
117 0 : int dh = sh + 2*ry;
118 :
119 0 : int prev_y = -2*ry;
120 0 : int next_y = 1;
121 :
122 0 : for (int y = 0; y < dh; y++) {
123 0 : int py = SkClampPos(prev_y) * sumStride;
124 0 : int ny = SkFastMin32(next_y, sh) * sumStride;
125 :
126 0 : int prev_x = -2*rx;
127 0 : int next_x = 1;
128 :
129 0 : for (int x = 0; x < dw; x++) {
130 0 : int px = SkClampPos(prev_x);
131 0 : int nx = SkFastMin32(next_x, sw);
132 :
133 0 : uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
134 0 : *dst++ = SkToU8(tmp * scale >> 24);
135 :
136 0 : prev_x += 1;
137 0 : next_x += 1;
138 : }
139 :
140 0 : prev_y += 1;
141 0 : next_y += 1;
142 : }
143 0 : }
144 : /**
145 : * sw and sh are the width and height of the src. Since the sum buffer
146 : * matches that, but has an extra row and col at the beginning (with zeros),
147 : * we can just use sw and sh as our "max" values for pinning coordinates
148 : * when sampling into sum[][]
149 : *
150 : * The inner loop is conceptually simple; we break it into several sections
151 : * to improve performance. Here's the original version:
152 : for (int x = 0; x < dw; x++) {
153 : int px = SkClampPos(prev_x);
154 : int nx = SkFastMin32(next_x, sw);
155 :
156 : uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
157 : *dst++ = SkToU8(tmp * scale >> 24);
158 :
159 : prev_x += 1;
160 : next_x += 1;
161 : }
162 : * The sections are:
163 : * left-hand section, where prev_x is clamped to 0
164 : * center section, where neither prev_x nor next_x is clamped
165 : * right-hand section, where next_x is clamped to sw
166 : * On some operating systems, the center section is unrolled for additional
167 : * speedup.
168 : */
169 0 : static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
170 : int sw, int sh) {
171 0 : if (2*rx > sw) {
172 0 : kernel_clamped(dst, rx, ry, sum, sw, sh);
173 0 : return;
174 : }
175 :
176 0 : uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
177 :
178 0 : int sumStride = sw + 1;
179 :
180 0 : int dw = sw + 2*rx;
181 0 : int dh = sh + 2*ry;
182 :
183 0 : int prev_y = -2*ry;
184 0 : int next_y = 1;
185 :
186 0 : SkASSERT(2*rx <= dw - 2*rx);
187 :
188 0 : for (int y = 0; y < dh; y++) {
189 0 : int py = SkClampPos(prev_y) * sumStride;
190 0 : int ny = SkFastMin32(next_y, sh) * sumStride;
191 :
192 0 : int prev_x = -2*rx;
193 0 : int next_x = 1;
194 0 : int x = 0;
195 :
196 0 : for (; x < 2*rx; x++) {
197 0 : SkASSERT(prev_x <= 0);
198 0 : SkASSERT(next_x <= sw);
199 :
200 0 : int px = 0;
201 0 : int nx = next_x;
202 :
203 0 : uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
204 0 : *dst++ = SkToU8(tmp * scale >> 24);
205 :
206 0 : prev_x += 1;
207 0 : next_x += 1;
208 : }
209 :
210 0 : int i0 = prev_x + py;
211 0 : int i1 = next_x + ny;
212 0 : int i2 = next_x + py;
213 0 : int i3 = prev_x + ny;
214 :
215 : #if UNROLL_KERNEL_LOOP
216 : for (; x < dw - 2*rx - 4; x += 4) {
217 : SkASSERT(prev_x >= 0);
218 : SkASSERT(next_x <= sw);
219 :
220 : uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
221 : *dst++ = SkToU8(tmp * scale >> 24);
222 : tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
223 : *dst++ = SkToU8(tmp * scale >> 24);
224 : tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
225 : *dst++ = SkToU8(tmp * scale >> 24);
226 : tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
227 : *dst++ = SkToU8(tmp * scale >> 24);
228 :
229 : prev_x += 4;
230 : next_x += 4;
231 : }
232 : #endif
233 :
234 0 : for (; x < dw - 2*rx; x++) {
235 0 : SkASSERT(prev_x >= 0);
236 0 : SkASSERT(next_x <= sw);
237 :
238 0 : uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
239 0 : *dst++ = SkToU8(tmp * scale >> 24);
240 :
241 0 : prev_x += 1;
242 0 : next_x += 1;
243 : }
244 :
245 0 : for (; x < dw; x++) {
246 0 : SkASSERT(prev_x >= 0);
247 0 : SkASSERT(next_x > sw);
248 :
249 0 : int px = prev_x;
250 0 : int nx = sw;
251 :
252 0 : uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
253 0 : *dst++ = SkToU8(tmp * scale >> 24);
254 :
255 0 : prev_x += 1;
256 0 : next_x += 1;
257 : }
258 :
259 0 : prev_y += 1;
260 0 : next_y += 1;
261 : }
262 : }
263 :
264 : /**
265 : * This is the path for apply_kernel_interp() to be taken when the kernel
266 : * is wider than the source image.
267 : */
268 0 : static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
269 : const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
270 0 : SkASSERT(2*rx > sw);
271 :
272 0 : int inner_weight = 255 - outer_weight;
273 :
274 : // round these guys up if they're bigger than 127
275 0 : outer_weight += outer_weight >> 7;
276 0 : inner_weight += inner_weight >> 7;
277 :
278 0 : uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
279 0 : uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
280 :
281 0 : int sumStride = sw + 1;
282 :
283 0 : int dw = sw + 2*rx;
284 0 : int dh = sh + 2*ry;
285 :
286 0 : int prev_y = -2*ry;
287 0 : int next_y = 1;
288 :
289 0 : for (int y = 0; y < dh; y++) {
290 0 : int py = SkClampPos(prev_y) * sumStride;
291 0 : int ny = SkFastMin32(next_y, sh) * sumStride;
292 :
293 0 : int ipy = SkClampPos(prev_y + 1) * sumStride;
294 0 : int iny = SkClampMax(next_y - 1, sh) * sumStride;
295 :
296 0 : int prev_x = -2*rx;
297 0 : int next_x = 1;
298 :
299 0 : for (int x = 0; x < dw; x++) {
300 0 : int px = SkClampPos(prev_x);
301 0 : int nx = SkFastMin32(next_x, sw);
302 :
303 0 : int ipx = SkClampPos(prev_x + 1);
304 0 : int inx = SkClampMax(next_x - 1, sw);
305 :
306 0 : uint32_t outer_sum = sum[px+py] + sum[nx+ny]
307 0 : - sum[nx+py] - sum[px+ny];
308 0 : uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
309 0 : - sum[inx+ipy] - sum[ipx+iny];
310 : *dst++ = SkToU8((outer_sum * outer_scale
311 0 : + inner_sum * inner_scale) >> 24);
312 :
313 0 : prev_x += 1;
314 0 : next_x += 1;
315 : }
316 0 : prev_y += 1;
317 0 : next_y += 1;
318 : }
319 0 : }
320 :
321 : /**
322 : * sw and sh are the width and height of the src. Since the sum buffer
323 : * matches that, but has an extra row and col at the beginning (with zeros),
324 : * we can just use sw and sh as our "max" values for pinning coordinates
325 : * when sampling into sum[][]
326 : *
327 : * The inner loop is conceptually simple; we break it into several variants
328 : * to improve performance. Here's the original version:
329 : for (int x = 0; x < dw; x++) {
330 : int px = SkClampPos(prev_x);
331 : int nx = SkFastMin32(next_x, sw);
332 :
333 : int ipx = SkClampPos(prev_x + 1);
334 : int inx = SkClampMax(next_x - 1, sw);
335 :
336 : uint32_t outer_sum = sum[px+py] + sum[nx+ny]
337 : - sum[nx+py] - sum[px+ny];
338 : uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
339 : - sum[inx+ipy] - sum[ipx+iny];
340 : *dst++ = SkToU8((outer_sum * outer_scale
341 : + inner_sum * inner_scale) >> 24);
342 :
343 : prev_x += 1;
344 : next_x += 1;
345 : }
346 : * The sections are:
347 : * left-hand section, where prev_x is clamped to 0
348 : * center section, where neither prev_x nor next_x is clamped
349 : * right-hand section, where next_x is clamped to sw
350 : * On some operating systems, the center section is unrolled for additional
351 : * speedup.
352 : */
353 0 : static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
354 : const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
355 0 : SkASSERT(rx > 0 && ry > 0);
356 0 : SkASSERT(outer_weight <= 255);
357 :
358 0 : if (2*rx > sw) {
359 0 : kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
360 0 : return;
361 : }
362 :
363 0 : int inner_weight = 255 - outer_weight;
364 :
365 : // round these guys up if they're bigger than 127
366 0 : outer_weight += outer_weight >> 7;
367 0 : inner_weight += inner_weight >> 7;
368 :
369 0 : uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
370 0 : uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
371 :
372 0 : int sumStride = sw + 1;
373 :
374 0 : int dw = sw + 2*rx;
375 0 : int dh = sh + 2*ry;
376 :
377 0 : int prev_y = -2*ry;
378 0 : int next_y = 1;
379 :
380 0 : SkASSERT(2*rx <= dw - 2*rx);
381 :
382 0 : for (int y = 0; y < dh; y++) {
383 0 : int py = SkClampPos(prev_y) * sumStride;
384 0 : int ny = SkFastMin32(next_y, sh) * sumStride;
385 :
386 0 : int ipy = SkClampPos(prev_y + 1) * sumStride;
387 0 : int iny = SkClampMax(next_y - 1, sh) * sumStride;
388 :
389 0 : int prev_x = -2*rx;
390 0 : int next_x = 1;
391 0 : int x = 0;
392 :
393 0 : for (; x < 2*rx; x++) {
394 0 : SkASSERT(prev_x < 0);
395 0 : SkASSERT(next_x <= sw);
396 :
397 0 : int px = 0;
398 0 : int nx = next_x;
399 :
400 0 : int ipx = 0;
401 0 : int inx = next_x - 1;
402 :
403 0 : uint32_t outer_sum = sum[px+py] + sum[nx+ny]
404 0 : - sum[nx+py] - sum[px+ny];
405 0 : uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
406 0 : - sum[inx+ipy] - sum[ipx+iny];
407 : *dst++ = SkToU8((outer_sum * outer_scale
408 0 : + inner_sum * inner_scale) >> 24);
409 :
410 0 : prev_x += 1;
411 0 : next_x += 1;
412 : }
413 :
414 0 : int i0 = prev_x + py;
415 0 : int i1 = next_x + ny;
416 0 : int i2 = next_x + py;
417 0 : int i3 = prev_x + ny;
418 0 : int i4 = prev_x + 1 + ipy;
419 0 : int i5 = next_x - 1 + iny;
420 0 : int i6 = next_x - 1 + ipy;
421 0 : int i7 = prev_x + 1 + iny;
422 :
423 : #if UNROLL_KERNEL_LOOP
424 : for (; x < dw - 2*rx - 4; x += 4) {
425 : SkASSERT(prev_x >= 0);
426 : SkASSERT(next_x <= sw);
427 :
428 : uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
429 : uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
430 : *dst++ = SkToU8((outer_sum * outer_scale
431 : + inner_sum * inner_scale) >> 24);
432 : outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
433 : inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
434 : *dst++ = SkToU8((outer_sum * outer_scale
435 : + inner_sum * inner_scale) >> 24);
436 : outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
437 : inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
438 : *dst++ = SkToU8((outer_sum * outer_scale
439 : + inner_sum * inner_scale) >> 24);
440 : outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
441 : inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
442 : *dst++ = SkToU8((outer_sum * outer_scale
443 : + inner_sum * inner_scale) >> 24);
444 :
445 : prev_x += 4;
446 : next_x += 4;
447 : }
448 : #endif
449 :
450 0 : for (; x < dw - 2*rx; x++) {
451 0 : SkASSERT(prev_x >= 0);
452 0 : SkASSERT(next_x <= sw);
453 :
454 0 : uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
455 0 : uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
456 : *dst++ = SkToU8((outer_sum * outer_scale
457 0 : + inner_sum * inner_scale) >> 24);
458 :
459 0 : prev_x += 1;
460 0 : next_x += 1;
461 : }
462 :
463 0 : for (; x < dw; x++) {
464 0 : SkASSERT(prev_x >= 0);
465 0 : SkASSERT(next_x > sw);
466 :
467 0 : int px = prev_x;
468 0 : int nx = sw;
469 :
470 0 : int ipx = prev_x + 1;
471 0 : int inx = sw;
472 :
473 0 : uint32_t outer_sum = sum[px+py] + sum[nx+ny]
474 0 : - sum[nx+py] - sum[px+ny];
475 0 : uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
476 0 : - sum[inx+ipy] - sum[ipx+iny];
477 : *dst++ = SkToU8((outer_sum * outer_scale
478 0 : + inner_sum * inner_scale) >> 24);
479 :
480 0 : prev_x += 1;
481 0 : next_x += 1;
482 : }
483 :
484 0 : prev_y += 1;
485 0 : next_y += 1;
486 : }
487 : }
488 :
489 : #include "SkColorPriv.h"
490 :
491 0 : static void merge_src_with_blur(uint8_t dst[], int dstRB,
492 : const uint8_t src[], int srcRB,
493 : const uint8_t blur[], int blurRB,
494 : int sw, int sh) {
495 0 : dstRB -= sw;
496 0 : srcRB -= sw;
497 0 : blurRB -= sw;
498 0 : while (--sh >= 0) {
499 0 : for (int x = sw - 1; x >= 0; --x) {
500 0 : *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
501 0 : dst += 1;
502 0 : src += 1;
503 0 : blur += 1;
504 : }
505 0 : dst += dstRB;
506 0 : src += srcRB;
507 0 : blur += blurRB;
508 : }
509 0 : }
510 :
511 0 : static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
512 : const uint8_t src[], int srcRowBytes,
513 : int sw, int sh,
514 : SkBlurMask::Style style) {
515 : int x;
516 0 : while (--sh >= 0) {
517 0 : switch (style) {
518 : case SkBlurMask::kSolid_Style:
519 0 : for (x = sw - 1; x >= 0; --x) {
520 0 : int s = *src;
521 0 : int d = *dst;
522 0 : *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
523 0 : dst += 1;
524 0 : src += 1;
525 : }
526 0 : break;
527 : case SkBlurMask::kOuter_Style:
528 0 : for (x = sw - 1; x >= 0; --x) {
529 0 : if (*src) {
530 0 : *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
531 : }
532 0 : dst += 1;
533 0 : src += 1;
534 : }
535 0 : break;
536 : default:
537 0 : SkDEBUGFAIL("Unexpected blur style here");
538 0 : break;
539 : }
540 0 : dst += dstRowBytes - sw;
541 0 : src += srcRowBytes - sw;
542 : }
543 0 : }
544 :
545 : ///////////////////////////////////////////////////////////////////////////////
546 :
547 : // we use a local funciton to wrap the class static method to work around
548 : // a bug in gcc98
549 : void SkMask_FreeImage(uint8_t* image);
550 0 : void SkMask_FreeImage(uint8_t* image) {
551 0 : SkMask::FreeImage(image);
552 0 : }
553 :
554 0 : bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
555 : SkScalar radius, Style style, Quality quality,
556 : SkIPoint* margin)
557 : {
558 0 : if (src.fFormat != SkMask::kA8_Format) {
559 0 : return false;
560 : }
561 :
562 : // Force high quality off for small radii (performance)
563 0 : if (radius < SkIntToScalar(3)) quality = kLow_Quality;
564 :
565 : // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
566 0 : int passCount = (quality == kHigh_Quality) ? 3 : 1;
567 0 : SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
568 :
569 0 : int rx = SkScalarCeil(passRadius);
570 0 : int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
571 :
572 0 : SkASSERT(rx >= 0);
573 0 : SkASSERT((unsigned)outer_weight <= 255);
574 0 : if (rx <= 0) {
575 0 : return false;
576 : }
577 :
578 0 : int ry = rx; // only do square blur for now
579 :
580 0 : int padx = passCount * rx;
581 0 : int pady = passCount * ry;
582 0 : if (margin) {
583 0 : margin->set(padx, pady);
584 : }
585 : dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
586 0 : src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
587 0 : dst->fRowBytes = dst->fBounds.width();
588 0 : dst->fFormat = SkMask::kA8_Format;
589 0 : dst->fImage = NULL;
590 :
591 0 : if (src.fImage) {
592 0 : size_t dstSize = dst->computeImageSize();
593 0 : if (0 == dstSize) {
594 0 : return false; // too big to allocate, abort
595 : }
596 :
597 0 : int sw = src.fBounds.width();
598 0 : int sh = src.fBounds.height();
599 0 : const uint8_t* sp = src.fImage;
600 0 : uint8_t* dp = SkMask::AllocImage(dstSize);
601 :
602 0 : SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
603 :
604 : // build the blurry destination
605 : {
606 0 : const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
607 0 : const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
608 0 : SkAutoTMalloc<uint32_t> storage(storageW * storageH);
609 0 : uint32_t* sumBuffer = storage.get();
610 :
611 : //pass1: sp is source, dp is destination
612 0 : build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
613 0 : if (outer_weight == 255) {
614 0 : apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
615 : } else {
616 0 : apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
617 : }
618 :
619 0 : if (quality == kHigh_Quality) {
620 : //pass2: dp is source, tmpBuffer is destination
621 0 : int tmp_sw = sw + 2 * rx;
622 0 : int tmp_sh = sh + 2 * ry;
623 0 : SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
624 0 : build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
625 0 : if (outer_weight == 255)
626 0 : apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
627 : else
628 : apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
629 0 : tmp_sw, tmp_sh, outer_weight);
630 :
631 : //pass3: tmpBuffer is source, dp is destination
632 0 : tmp_sw += 2 * rx;
633 0 : tmp_sh += 2 * ry;
634 0 : build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
635 0 : if (outer_weight == 255)
636 0 : apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
637 : else
638 : apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
639 0 : outer_weight);
640 : }
641 : }
642 :
643 0 : dst->fImage = dp;
644 : // if need be, alloc the "real" dst (same size as src) and copy/merge
645 : // the blur into it (applying the src)
646 0 : if (style == kInner_Style) {
647 : // now we allocate the "real" dst, mirror the size of src
648 0 : size_t srcSize = src.computeImageSize();
649 0 : if (0 == srcSize) {
650 0 : return false; // too big to allocate, abort
651 : }
652 0 : dst->fImage = SkMask::AllocImage(srcSize);
653 : merge_src_with_blur(dst->fImage, src.fRowBytes,
654 : sp, src.fRowBytes,
655 : dp + passCount * (rx + ry * dst->fRowBytes),
656 0 : dst->fRowBytes, sw, sh);
657 0 : SkMask::FreeImage(dp);
658 0 : } else if (style != kNormal_Style) {
659 : clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
660 0 : dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
661 : }
662 0 : (void)autoCall.detach();
663 : }
664 :
665 0 : if (style == kInner_Style) {
666 0 : dst->fBounds = src.fBounds; // restore trimmed bounds
667 0 : dst->fRowBytes = src.fRowBytes;
668 : }
669 :
670 0 : return true;
671 : }
672 :
|