1 :
2 : /*
3 : * Copyright 2009 The Android Open Source Project
4 : *
5 : * Use of this source code is governed by a BSD-style license that can be
6 : * found in the LICENSE file.
7 : */
8 :
9 :
10 : #include "SkColorPriv.h"
11 :
12 : /*
13 : Filter_32_opaque
14 :
15 : There is no hard-n-fast rule that the filtering must produce
16 : exact results for the color components, but if the 4 incoming colors are
17 : all opaque, then the output color must also be opaque. Subsequent parts of
18 : the drawing pipeline may rely on this (e.g. which blitrow proc to use).
19 : */
20 :
21 : #if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN)
22 : static inline void Filter_32_opaque_neon(unsigned x, unsigned y,
23 : SkPMColor a00, SkPMColor a01,
24 : SkPMColor a10, SkPMColor a11,
25 : SkPMColor *dst) {
26 : asm volatile(
27 : "vdup.8 d0, %[y] \n\t" // duplicate y into d0
28 : "vmov.u8 d16, #16 \n\t" // set up constant in d16
29 : "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y
30 :
31 : "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4
32 : "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5
33 : "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01
34 : "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11
35 :
36 : "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y)
37 : "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y
38 :
39 : "vdup.16 d5, %[x] \n\t" // duplicate x into d5
40 : "vmov.u16 d16, #16 \n\t" // set up constant in d16
41 : "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x
42 :
43 : "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x
44 : "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x
45 : "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)
46 : "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)
47 : "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8
48 : "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result
49 : :
50 : : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst)
51 : : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
52 : );
53 : }
54 :
55 : static inline void Filter_32_alpha_neon(unsigned x, unsigned y,
56 : SkPMColor a00, SkPMColor a01,
57 : SkPMColor a10, SkPMColor a11,
58 : SkPMColor *dst, uint16_t scale) {
59 : asm volatile(
60 : "vdup.8 d0, %[y] \n\t" // duplicate y into d0
61 : "vmov.u8 d16, #16 \n\t" // set up constant in d16
62 : "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y
63 :
64 : "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4
65 : "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5
66 : "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01
67 : "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11
68 :
69 : "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y)
70 : "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y
71 :
72 : "vdup.16 d5, %[x] \n\t" // duplicate x into d5
73 : "vmov.u16 d16, #16 \n\t" // set up constant in d16
74 : "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x
75 :
76 : "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x
77 : "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x
78 : "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)
79 : "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)
80 : "vdup.16 d3, %[scale] \n\t" // duplicate scale into d3
81 : "vshr.u16 d4, d4, #8 \n\t" // shift down result by 8
82 : "vmul.i16 d4, d4, d3 \n\t" // multiply result by scale
83 : "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8
84 : "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result
85 : :
86 : : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale)
87 : : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
88 : );
89 : }
90 : #define Filter_32_opaque Filter_32_opaque_neon
91 : #define Filter_32_alpha Filter_32_alpha_neon
92 : #else
93 0 : static inline void Filter_32_opaque_portable(unsigned x, unsigned y,
94 : SkPMColor a00, SkPMColor a01,
95 : SkPMColor a10, SkPMColor a11,
96 : SkPMColor* dstColor) {
97 0 : SkASSERT((unsigned)x <= 0xF);
98 0 : SkASSERT((unsigned)y <= 0xF);
99 :
100 0 : int xy = x * y;
101 0 : static const uint32_t mask = gMask_00FF00FF; //0xFF00FF;
102 :
103 0 : int scale = 256 - 16*y - 16*x + xy;
104 0 : uint32_t lo = (a00 & mask) * scale;
105 0 : uint32_t hi = ((a00 >> 8) & mask) * scale;
106 :
107 0 : scale = 16*x - xy;
108 0 : lo += (a01 & mask) * scale;
109 0 : hi += ((a01 >> 8) & mask) * scale;
110 :
111 0 : scale = 16*y - xy;
112 0 : lo += (a10 & mask) * scale;
113 0 : hi += ((a10 >> 8) & mask) * scale;
114 :
115 0 : lo += (a11 & mask) * xy;
116 0 : hi += ((a11 >> 8) & mask) * xy;
117 :
118 0 : *dstColor = ((lo >> 8) & mask) | (hi & ~mask);
119 0 : }
120 :
121 0 : static inline void Filter_32_alpha_portable(unsigned x, unsigned y,
122 : SkPMColor a00, SkPMColor a01,
123 : SkPMColor a10, SkPMColor a11,
124 : SkPMColor* dstColor,
125 : unsigned alphaScale) {
126 0 : SkASSERT((unsigned)x <= 0xF);
127 0 : SkASSERT((unsigned)y <= 0xF);
128 0 : SkASSERT(alphaScale <= 256);
129 :
130 0 : int xy = x * y;
131 0 : static const uint32_t mask = gMask_00FF00FF; //0xFF00FF;
132 :
133 0 : int scale = 256 - 16*y - 16*x + xy;
134 0 : uint32_t lo = (a00 & mask) * scale;
135 0 : uint32_t hi = ((a00 >> 8) & mask) * scale;
136 :
137 0 : scale = 16*x - xy;
138 0 : lo += (a01 & mask) * scale;
139 0 : hi += ((a01 >> 8) & mask) * scale;
140 :
141 0 : scale = 16*y - xy;
142 0 : lo += (a10 & mask) * scale;
143 0 : hi += ((a10 >> 8) & mask) * scale;
144 :
145 0 : lo += (a11 & mask) * xy;
146 0 : hi += ((a11 >> 8) & mask) * xy;
147 :
148 0 : lo = ((lo >> 8) & mask) * alphaScale;
149 0 : hi = ((hi >> 8) & mask) * alphaScale;
150 :
151 0 : *dstColor = ((lo >> 8) & mask) | (hi & ~mask);
152 0 : }
153 : #define Filter_32_opaque Filter_32_opaque_portable
154 : #define Filter_32_alpha Filter_32_alpha_portable
155 : #endif
156 :
|