1 : /* vim: set shiftwidth=2 tabstop=8 autoindent cindent expandtab: */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is SSE.h
16 : *
17 : * The Initial Developer of the Original Code is the Mozilla Foundation.
18 : * Portions created by the Initial Developer are Copyright (C) 2009
19 : * the Initial Developer. All Rights Reserved.
20 : *
21 : * Contributor(s):
22 : * L. David Baron <dbaron@dbaron.org>, Mozilla Corporation (original author)
23 : * Justin Lebar <justin.lebar@gmail.com>, Mozilla Corporation
24 : *
25 : * Alternatively, the contents of this file may be used under the terms of
26 : * either the GNU General Public License Version 2 or later (the "GPL"), or
27 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 : * in which case the provisions of the GPL or the LGPL are applicable instead
29 : * of those above. If you wish to allow use of your version of this file only
30 : * under the terms of either the GPL or the LGPL, and not to allow others to
31 : * use your version of this file under the terms of the MPL, indicate your
32 : * decision by deleting the provisions above and replace them with the notice
33 : * and other provisions required by the GPL or the LGPL. If you do not delete
34 : * the provisions above, a recipient may use your version of this file under
35 : * the terms of any one of the MPL, the GPL or the LGPL.
36 : *
37 : * ***** END LICENSE BLOCK ***** */
38 :
39 : /* compile-time and runtime tests for whether to use SSE instructions */
40 :
41 : #ifndef mozilla_SSE_h_
42 : #define mozilla_SSE_h_
43 :
44 : // for definition of NS_COM_GLUE
45 : #include "nscore.h"
46 :
47 : /**
48 : * The public interface of this header consists of a set of macros and
49 : * functions for Intel CPU features.
50 : *
51 : * DETECTING ISA EXTENSIONS
52 : * ========================
53 : *
54 : * This header provides the following functions for determining whether the
55 : * current CPU supports a particular instruction set extension:
56 : *
57 : * mozilla::supports_mmx
58 : * mozilla::supports_sse
59 : * mozilla::supports_sse2
60 : * mozilla::supports_sse3
61 : * mozilla::supports_ssse3
62 : * mozilla::supports_sse4a
63 : * mozilla::supports_sse4_1
64 : * mozilla::supports_sse4_2
65 : *
66 : * If you're writing code using inline assembly, you should guard it with a
67 : * call to one of these functions. For instance:
68 : *
69 : * if (mozilla::supports_sse2()) {
70 : * asm(" ... ");
71 : * }
72 : * else {
73 : * ...
74 : * }
75 : *
76 : * Note that these functions depend on cpuid intrinsics only available in gcc
77 : * 4.3 or later and MSVC 8.0 (Visual C++ 2005) or later, so they return false
78 : * in older compilers. (This could be fixed by replacing the code with inline
79 : * assembly.)
80 : *
81 : *
82 : * USING INTRINSICS
83 : * ================
84 : *
85 : * This header also provides support for coding using CPU intrinsics.
86 : *
87 : * For each mozilla::supports_abc function, we define a MOZILLA_MAY_SUPPORT_ABC
88 : * macro which indicates that the target/compiler combination we're using is
89 : * compatible with the ABC extension. For instance, x86_64 with MSVC 2003 is
90 : * compatible with SSE2 but not SSE3, since although there exist x86_64 CPUs
91 : * with SSE3 support, MSVC 2003 only supports through SSE2.
92 : *
93 : * Until gcc fixes #pragma target [1] [2] or our x86 builds require SSE2,
94 : * you'll need to separate code using intrinsics into a file separate from your
95 : * regular code. Here's the recommended pattern:
96 : *
97 : * #ifdef MOZILLA_MAY_SUPPORT_ABC
98 : * namespace mozilla {
99 : * namespace ABC {
100 : * void foo();
101 : * }
102 : * }
103 : * #endif
104 : *
105 : * void foo() {
106 : * #ifdef MOZILLA_MAY_SUPPORT_ABC
107 : * if (mozilla::supports_abc()) {
108 : * mozilla::ABC::foo(); // in a separate file
109 : * return;
110 : * }
111 : * #endif
112 : *
113 : * foo_unvectorized();
114 : * }
115 : *
116 : * You'll need to define mozilla::ABC::foo() in a separate file and add the
117 : * -mabc flag when using gcc.
118 : *
119 : * [1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39787 and
120 : * [2] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41201 being fixed.
121 : *
122 : */
123 :
124 : #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
125 :
126 : #ifdef __MMX__
127 : // It's ok to use MMX instructions based on the -march option (or
128 : // the default for x86_64 or for Intel Mac).
129 : #define MOZILLA_PRESUME_MMX 1
130 : #endif
131 : #ifdef __SSE__
132 : // It's ok to use SSE instructions based on the -march option (or
133 : // the default for x86_64 or for Intel Mac).
134 : #define MOZILLA_PRESUME_SSE 1
135 : #endif
136 : #ifdef __SSE2__
137 : // It's ok to use SSE2 instructions based on the -march option (or
138 : // the default for x86_64 or for Intel Mac).
139 : #define MOZILLA_PRESUME_SSE2 1
140 : #endif
141 : #ifdef __SSE3__
142 : // It's ok to use SSE3 instructions based on the -march option (or the
143 : // default for Intel Mac).
144 : #define MOZILLA_PRESUME_SSE3 1
145 : #endif
146 : #ifdef __SSSE3__
147 : // It's ok to use SSSE3 instructions based on the -march option.
148 : #define MOZILLA_PRESUME_SSSE3 1
149 : #endif
150 : #ifdef __SSE4A__
151 : // It's ok to use SSE4A instructions based on the -march option.
152 : #define MOZILLA_PRESUME_SSE4A 1
153 : #endif
154 : #ifdef __SSE4_1__
155 : // It's ok to use SSE4.1 instructions based on the -march option.
156 : #define MOZILLA_PRESUME_SSE4_1 1
157 : #endif
158 : #ifdef __SSE4_2__
159 : // It's ok to use SSE4.2 instructions based on the -march option.
160 : #define MOZILLA_PRESUME_SSE4_2 1
161 : #endif
162 :
163 : #ifdef HAVE_CPUID_H
164 : #define MOZILLA_SSE_HAVE_CPUID_DETECTION
165 : #endif
166 :
167 : #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
168 :
169 : #if _MSC_VER >= 1400
170 : // MSVC 2005 or newer on x86 or amd64
171 : #define MOZILLA_SSE_HAVE_CPUID_DETECTION
172 : #endif
173 :
174 : #if defined(_M_IX86_FP)
175 :
176 : #if _M_IX86_FP >= 1
177 : // It's ok to use SSE instructions based on the /arch option
178 : #define MOZILLA_PRESUME_SSE
179 : #endif
180 : #if _M_IX86_FP >= 2
181 : // It's ok to use SSE2 instructions based on the /arch option
182 : #define MOZILLA_PRESUME_SSE2
183 : #endif
184 :
185 : #elif defined(_M_AMD64)
186 : // MSVC for AMD64 doesn't support MMX, so don't presume it here.
187 :
188 : // SSE is always available on AMD64.
189 : #define MOZILLA_PRESUME_SSE
190 : // SSE2 is always available on AMD64.
191 : #define MOZILLA_PRESUME_SSE2
192 : #endif
193 :
194 : #elif defined(__SUNPRO_CC) && (defined(__i386) || defined(__x86_64__))
195 : // Sun Studio on x86 or amd64
196 :
197 : #define MOZILLA_SSE_HAVE_CPUID_DETECTION
198 :
199 : #if defined(__x86_64__)
200 : // MMX is always available on AMD64.
201 : #define MOZILLA_PRESUME_MMX
202 : // SSE is always available on AMD64.
203 : #define MOZILLA_PRESUME_SSE
204 : // SSE2 is always available on AMD64.
205 : #define MOZILLA_PRESUME_SSE2
206 : #endif
207 :
208 : #endif
209 :
210 : namespace mozilla {
211 :
212 : namespace sse_private {
213 : #if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
214 : #if !defined(MOZILLA_PRESUME_MMX)
215 : extern bool NS_COM_GLUE mmx_enabled;
216 : #endif
217 : #if !defined(MOZILLA_PRESUME_SSE)
218 : extern bool NS_COM_GLUE sse_enabled;
219 : #endif
220 : #if !defined(MOZILLA_PRESUME_SSE2)
221 : extern bool NS_COM_GLUE sse2_enabled;
222 : #endif
223 : #if !defined(MOZILLA_PRESUME_SSE3)
224 : extern bool NS_COM_GLUE sse3_enabled;
225 : #endif
226 : #if !defined(MOZILLA_PRESUME_SSSE3)
227 : extern bool NS_COM_GLUE ssse3_enabled;
228 : #endif
229 : #if !defined(MOZILLA_PRESUME_SSE4A)
230 : extern bool NS_COM_GLUE sse4a_enabled;
231 : #endif
232 : #if !defined(MOZILLA_PRESUME_SSE4_1)
233 : extern bool NS_COM_GLUE sse4_1_enabled;
234 : #endif
235 : #if !defined(MOZILLA_PRESUME_SSE4_2)
236 : extern bool NS_COM_GLUE sse4_2_enabled;
237 : #endif
238 : #endif
239 : }
240 :
241 : #if defined(MOZILLA_PRESUME_MMX)
242 : #define MOZILLA_MAY_SUPPORT_MMX 1
243 : inline bool supports_mmx() { return true; }
244 : #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
245 : #if !(defined(_MSC_VER) && defined(_M_AMD64))
246 : // Define MOZILLA_MAY_SUPPORT_MMX only if we're not on MSVC for
247 : // AMD64, since that compiler doesn't support MMX.
248 : #define MOZILLA_MAY_SUPPORT_MMX 1
249 : #endif
250 231 : inline bool supports_mmx() { return sse_private::mmx_enabled; }
251 : #else
252 : inline bool supports_mmx() { return false; }
253 : #endif
254 :
255 : #if defined(MOZILLA_PRESUME_SSE)
256 : #define MOZILLA_MAY_SUPPORT_SSE 1
257 : inline bool supports_sse() { return true; }
258 : #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
259 : #define MOZILLA_MAY_SUPPORT_SSE 1
260 231 : inline bool supports_sse() { return sse_private::sse_enabled; }
261 : #else
262 : inline bool supports_sse() { return false; }
263 : #endif
264 :
265 : #if defined(MOZILLA_PRESUME_SSE2)
266 : #define MOZILLA_MAY_SUPPORT_SSE2 1
267 : inline bool supports_sse2() { return true; }
268 : #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
269 : #define MOZILLA_MAY_SUPPORT_SSE2 1
270 1414003 : inline bool supports_sse2() { return sse_private::sse2_enabled; }
271 : #else
272 : inline bool supports_sse2() { return false; }
273 : #endif
274 :
275 : #if defined(MOZILLA_PRESUME_SSE3)
276 : #define MOZILLA_MAY_SUPPORT_SSE3 1
277 : inline bool supports_sse3() { return true; }
278 : #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
279 : #define MOZILLA_MAY_SUPPORT_SSE3 1
280 231 : inline bool supports_sse3() { return sse_private::sse3_enabled; }
281 : #else
282 : inline bool supports_sse3() { return false; }
283 : #endif
284 :
285 : #if defined(MOZILLA_PRESUME_SSSE3)
286 : #define MOZILLA_MAY_SUPPORT_SSSE3 1
287 : inline bool supports_ssse3() { return true; }
288 : #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
289 : #define MOZILLA_MAY_SUPPORT_SSSE3 1
290 231 : inline bool supports_ssse3() { return sse_private::ssse3_enabled; }
291 : #else
292 : inline bool supports_ssse3() { return false; }
293 : #endif
294 :
295 : #if defined(MOZILLA_PRESUME_SSE4A)
296 : #define MOZILLA_MAY_SUPPORT_SSE4A 1
297 : inline bool supports_sse4a() { return true; }
298 : #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
299 : #define MOZILLA_MAY_SUPPORT_SSE4A 1
300 231 : inline bool supports_sse4a() { return sse_private::sse4a_enabled; }
301 : #else
302 : inline bool supports_sse4a() { return false; }
303 : #endif
304 :
305 : #if defined(MOZILLA_PRESUME_SSE4_1)
306 : #define MOZILLA_MAY_SUPPORT_SSE4_1 1
307 : inline bool supports_sse4_1() { return true; }
308 : #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
309 : #define MOZILLA_MAY_SUPPORT_SSE4_1 1
310 231 : inline bool supports_sse4_1() { return sse_private::sse4_1_enabled; }
311 : #else
312 : inline bool supports_sse4_1() { return false; }
313 : #endif
314 :
315 : #if defined(MOZILLA_PRESUME_SSE4_2)
316 : #define MOZILLA_MAY_SUPPORT_SSE4_2 1
317 : inline bool supports_sse4_2() { return true; }
318 : #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
319 : #define MOZILLA_MAY_SUPPORT_SSE4_2 1
320 231 : inline bool supports_sse4_2() { return sse_private::sse4_2_enabled; }
321 : #else
322 : inline bool supports_sse4_2() { return false; }
323 : #endif
324 :
325 : }
326 :
327 : #endif /* !defined(mozilla_SSE_h_) */
|