1 : // This file should only be compiled if you're on x86 or x86_64. Additionally,
2 : // you'll need to compile this file with -msse2 if you're using gcc.
3 :
4 : #include <emmintrin.h>
5 : #include "nscore.h"
6 : #include "nsAlgorithm.h"
7 :
8 : namespace mozilla {
9 : namespace SSE2 {
10 :
11 : static inline bool
12 46756 : is_zero (__m128i x)
13 : {
14 : return
15 46756 : _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff;
16 : }
17 :
18 : PRInt32
19 16726 : FirstNon8Bit(const PRUnichar *str, const PRUnichar *end)
20 : {
21 16726 : const PRUint32 numUnicharsPerVector = 8;
22 :
23 : #if PR_BYTES_PER_WORD == 4
24 16726 : const size_t mask = 0xff00ff00;
25 16726 : const PRUint32 numUnicharsPerWord = 2;
26 : #elif PR_BYTES_PER_WORD == 8
27 : const size_t mask = 0xff00ff00ff00ff00;
28 : const PRUint32 numUnicharsPerWord = 4;
29 : #else
30 : #error Unknown platform!
31 : #endif
32 :
33 16726 : const PRInt32 len = end - str;
34 16726 : PRInt32 i = 0;
35 :
36 : // Align ourselves to a 16-byte boundary, as required by _mm_load_si128
37 : // (i.e. MOVDQA).
38 : PRInt32 alignLen =
39 16726 : NS_MIN(len, PRInt32(((-NS_PTR_TO_INT32(str)) & 0xf) / sizeof(PRUnichar)));
40 33243 : for (; i < alignLen; i++) {
41 16517 : if (str[i] > 255)
42 0 : return i;
43 : }
44 :
45 : // Check one XMM register (16 bytes) at a time.
46 16726 : const PRInt32 vectWalkEnd = ((len - i) / numUnicharsPerVector) * numUnicharsPerVector;
47 16726 : __m128i vectmask = _mm_set1_epi16(0xff00);
48 63478 : for(; i < vectWalkEnd; i += numUnicharsPerVector) {
49 46756 : const __m128i vect = *reinterpret_cast<const __m128i*>(str + i);
50 93512 : if (!is_zero(_mm_and_si128(vect, vectmask)))
51 4 : return i;
52 : }
53 :
54 : // Check one word at a time.
55 16722 : const PRInt32 wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
56 24592 : for(; i < wordWalkEnd; i += numUnicharsPerWord) {
57 7872 : const size_t word = *reinterpret_cast<const size_t*>(str + i);
58 7872 : if (word & mask)
59 2 : return i;
60 : }
61 :
62 : // Take care of the remainder one character at a time.
63 52020 : for (; i < len; i++) {
64 35300 : if (str[i] > 255) {
65 0 : return i;
66 : }
67 : }
68 :
69 16720 : return -1;
70 : }
71 :
72 : } // namespace SSE2
73 : } // namespace mozilla
|