1 : /* ***** BEGIN LICENSE BLOCK *****
2 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 : *
4 : * The contents of this file are subject to the Mozilla Public License Version
5 : * 1.1 (the "License"); you may not use this file except in compliance with
6 : * the License. You may obtain a copy of the License at
7 : * http://www.mozilla.org/MPL/
8 : *
9 : * Software distributed under the License is distributed on an "AS IS" basis,
10 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 : * for the specific language governing rights and limitations under the
12 : * License.
13 : *
14 : * The Original Code is Mozilla Hyphenation Service.
15 : *
16 : * The Initial Developer of the Original Code is
17 : * Mozilla Foundation.
18 : * Portions created by the Initial Developer are Copyright (C) 2011
19 : * the Initial Developer. All Rights Reserved.
20 : *
21 : * Contributor(s):
22 : * Jonathan Kew <jfkthame@gmail.com>
23 : *
24 : * Alternatively, the contents of this file may be used under the terms of
25 : * either the GNU General Public License Version 2 or later (the "GPL"), or
26 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 : * in which case the provisions of the GPL or the LGPL are applicable instead
28 : * of those above. If you wish to allow use of your version of this file only
29 : * under the terms of either the GPL or the LGPL, and not to allow others to
30 : * use your version of this file under the terms of the MPL, indicate your
31 : * decision by deleting the provisions above and replace them with the notice
32 : * and other provisions required by the GPL or the LGPL. If you do not delete
33 : * the provisions above, a recipient may use your version of this file under
34 : * the terms of any one of the MPL, the GPL or the LGPL.
35 : *
36 : * ***** END LICENSE BLOCK ***** */
37 :
38 : #include "nsHyphenator.h"
39 : #include "nsIFile.h"
40 : #include "nsUTF8Utils.h"
41 : #include "nsUnicodeProperties.h"
42 : #include "nsUnicharUtilCIID.h"
43 : #include "nsIURI.h"
44 :
45 : #include "hyphen.h"
46 :
47 0 : nsHyphenator::nsHyphenator(nsIURI *aURI)
48 0 : : mDict(nsnull)
49 : {
50 0 : nsCString uriSpec;
51 0 : nsresult rv = aURI->GetSpec(uriSpec);
52 0 : if (NS_FAILED(rv)) {
53 : return;
54 : }
55 0 : mDict = hnj_hyphen_load(uriSpec.get());
56 : #ifdef DEBUG
57 0 : if (mDict) {
58 0 : printf("loaded hyphenation patterns from %s\n", uriSpec.get());
59 : }
60 : #endif
61 : }
62 :
63 0 : nsHyphenator::~nsHyphenator()
64 : {
65 0 : if (mDict != nsnull) {
66 0 : hnj_hyphen_free((HyphenDict*)mDict);
67 0 : mDict = nsnull;
68 : }
69 0 : }
70 :
71 : bool
72 0 : nsHyphenator::IsValid()
73 : {
74 0 : return (mDict != nsnull);
75 : }
76 :
77 : nsresult
78 0 : nsHyphenator::Hyphenate(const nsAString& aString,
79 : nsTArray<bool>& aHyphens)
80 : {
81 0 : if (!aHyphens.SetLength(aString.Length())) {
82 0 : return NS_ERROR_OUT_OF_MEMORY;
83 : }
84 0 : memset(aHyphens.Elements(), false, aHyphens.Length());
85 :
86 0 : bool inWord = false;
87 0 : PRUint32 wordStart = 0, wordLimit = 0;
88 : PRUint32 chLen;
89 0 : for (PRUint32 i = 0; i < aString.Length(); i += chLen) {
90 0 : PRUint32 ch = aString[i];
91 0 : chLen = 1;
92 :
93 0 : if (NS_IS_HIGH_SURROGATE(ch)) {
94 0 : if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) {
95 0 : ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
96 0 : chLen = 2;
97 : } else {
98 0 : NS_WARNING("unpaired surrogate found during hyphenation");
99 : }
100 : }
101 :
102 0 : nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch);
103 0 : if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
104 0 : if (!inWord) {
105 0 : inWord = true;
106 0 : wordStart = i;
107 : }
108 0 : wordLimit = i + chLen;
109 0 : if (i + chLen < aString.Length()) {
110 0 : continue;
111 : }
112 : }
113 :
114 0 : if (inWord) {
115 0 : const PRUnichar *begin = aString.BeginReading();
116 : NS_ConvertUTF16toUTF8 utf8(begin + wordStart,
117 0 : wordLimit - wordStart);
118 0 : nsAutoTArray<char,200> utf8hyphens;
119 0 : utf8hyphens.SetLength(utf8.Length() + 5);
120 0 : char **rep = nsnull;
121 0 : int *pos = nsnull;
122 0 : int *cut = nsnull;
123 : int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict,
124 0 : utf8.BeginReading(), utf8.Length(),
125 : utf8hyphens.Elements(), nsnull,
126 0 : &rep, &pos, &cut);
127 0 : if (!err) {
128 : // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer
129 : // from utf8 code unit indexing (which would match the utf8 input
130 : // string directly) to Unicode character indexing.
131 : // We then need to convert this to utf16 code unit offsets for Gecko.
132 0 : const char *hyphPtr = utf8hyphens.Elements();
133 0 : const PRUnichar *cur = begin + wordStart;
134 0 : const PRUnichar *end = begin + wordLimit;
135 0 : while (cur < end) {
136 0 : if (*hyphPtr & 0x01) {
137 0 : aHyphens[cur - begin] = true;
138 : }
139 0 : cur++;
140 0 : if (cur < end && NS_IS_LOW_SURROGATE(*cur) &&
141 0 : NS_IS_HIGH_SURROGATE(*(cur-1)))
142 : {
143 0 : cur++;
144 : }
145 0 : hyphPtr++;
146 : }
147 : }
148 : }
149 :
150 0 : inWord = false;
151 : }
152 :
153 0 : return NS_OK;
154 : }
|