1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 : * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
3 : * ***** BEGIN LICENSE BLOCK *****
4 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * The Original Code is unicode functions code.
17 : *
18 : * The Initial Developer of the Original Code is
19 : * Mozilla Corporation.
20 : * Portions created by the Initial Developer are Copyright (C) 2007
21 : * the Initial Developer. All Rights Reserved.
22 : *
23 : * This code is based off of icu.c from the sqlite code
24 : * whose original author is danielk1977
25 : *
26 : * Contributor(s):
27 : * Shawn Wilsher <me@shawnwilsher.com> (Original Author)
28 : *
29 : * Alternatively, the contents of this file may be used under the terms of
30 : * either the GNU General Public License Version 2 or later (the "GPL"), or
31 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
32 : * in which case the provisions of the GPL or the LGPL are applicable instead
33 : * of those above. If you wish to allow use of your version of this file only
34 : * under the terms of either the GPL or the LGPL, and not to allow others to
35 : * use your version of this file under the terms of the MPL, indicate your
36 : * decision by deleting the provisions above and replace them with the notice
37 : * and other provisions required by the GPL or the LGPL. If you do not delete
38 : * the provisions above, a recipient may use your version of this file under
39 : * the terms of any one of the MPL, the GPL or the LGPL.
40 : *
41 : * ***** END LICENSE BLOCK ***** */
42 :
43 : #include "mozilla/Util.h"
44 :
45 : #include "mozStorageSQLFunctions.h"
46 : #include "nsUnicharUtils.h"
47 :
48 : namespace mozilla {
49 : namespace storage {
50 :
51 : ////////////////////////////////////////////////////////////////////////////////
52 : //// Local Helper Functions
53 :
54 : namespace {
55 :
56 : /**
57 : * Performs the LIKE comparison of a string against a pattern. For more detail
58 : * see http://www.sqlite.org/lang_expr.html#like.
59 : *
60 : * @param aPatternItr
61 : * An iterator at the start of the pattern to check for.
62 : * @param aPatternEnd
63 : * An iterator at the end of the pattern to check for.
64 : * @param aStringItr
65 : * An iterator at the start of the string to check for the pattern.
66 : * @param aStringEnd
67 : * An iterator at the end of the string to check for the pattern.
68 : * @param aEscapeChar
69 : * The character to use for escaping symbols in the pattern.
70 : * @return 1 if the pattern is found, 0 otherwise.
71 : */
72 : int
73 373 : likeCompare(nsAString::const_iterator aPatternItr,
74 : nsAString::const_iterator aPatternEnd,
75 : nsAString::const_iterator aStringItr,
76 : nsAString::const_iterator aStringEnd,
77 : PRUnichar aEscapeChar)
78 : {
79 373 : const PRUnichar MATCH_ALL('%');
80 373 : const PRUnichar MATCH_ONE('_');
81 :
82 373 : bool lastWasEscape = false;
83 1168 : while (aPatternItr != aPatternEnd) {
84 : /**
85 : * What we do in here is take a look at each character from the input
86 : * pattern, and do something with it. There are 4 possibilities:
87 : * 1) character is an un-escaped match-all character
88 : * 2) character is an un-escaped match-one character
89 : * 3) character is an un-escaped escape character
90 : * 4) character is not any of the above
91 : */
92 765 : if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
93 : // CASE 1
94 : /**
95 : * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
96 : * MATCH_ALL character. For each MATCH_ONE character, skip one character
97 : * in the pattern string.
98 : */
99 372 : while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
100 124 : if (*aPatternItr == MATCH_ONE) {
101 : // If we've hit the end of the string we are testing, no match
102 0 : if (aStringItr == aStringEnd)
103 0 : return 0;
104 0 : aStringItr++;
105 : }
106 124 : aPatternItr++;
107 : }
108 :
109 : // If we've hit the end of the pattern string, match
110 124 : if (aPatternItr == aPatternEnd)
111 89 : return 1;
112 :
113 231 : while (aStringItr != aStringEnd) {
114 181 : if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
115 181 : aEscapeChar)) {
116 : // we've hit a match, so indicate this
117 20 : return 1;
118 : }
119 161 : aStringItr++;
120 : }
121 :
122 : // No match
123 15 : return 0;
124 : }
125 641 : else if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
126 : // CASE 2
127 16 : if (aStringItr == aStringEnd) {
128 : // If we've hit the end of the string we are testing, no match
129 2 : return 0;
130 : }
131 14 : aStringItr++;
132 14 : lastWasEscape = false;
133 : }
134 625 : else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
135 : // CASE 3
136 7 : lastWasEscape = true;
137 : }
138 : else {
139 : // CASE 4
140 618 : if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
141 : // If we've hit a point where the strings don't match, there is no match
142 217 : return 0;
143 : }
144 401 : aStringItr++;
145 401 : lastWasEscape = false;
146 : }
147 :
148 422 : aPatternItr++;
149 : }
150 :
151 30 : return aStringItr == aStringEnd;
152 : }
153 :
154 : /**
155 : * This class manages a dynamic array. It can represent an array of any
156 : * reasonable size, but if the array is "N" elements or smaller, it will be
157 : * stored using fixed space inside the auto array itself. If the auto array
158 : * is a local variable, this internal storage will be allocated cheaply on the
159 : * stack, similar to nsAutoString. If a larger size is requested, the memory
160 : * will be dynamically allocated from the heap. Since the destructor will
161 : * free any heap-allocated memory, client code doesn't need to care where the
162 : * memory came from.
163 : */
164 : template <class T, size_t N> class AutoArray
165 : {
166 :
167 : public:
168 :
169 36 : AutoArray(size_t size)
170 40 : : mBuffer(size <= N ? mAutoBuffer : new T[size])
171 : {
172 36 : }
173 :
174 36 : ~AutoArray()
175 : {
176 36 : if (mBuffer != mAutoBuffer)
177 4 : delete[] mBuffer;
178 36 : }
179 :
180 : /**
181 : * Return the pointer to the allocated array.
182 : * @note If the array allocation failed, get() will return NULL!
183 : *
184 : * @return the pointer to the allocated array
185 : */
186 36 : T *get()
187 : {
188 36 : return mBuffer;
189 : }
190 :
191 : private:
192 : T *mBuffer; // Points to mAutoBuffer if we can use it, heap otherwise.
193 : T mAutoBuffer[N]; // The internal memory buffer that we use if we can.
194 : };
195 :
196 : /**
197 : * Compute the Levenshtein Edit Distance between two strings.
198 : *
199 : * @param aStringS
200 : * a string
201 : * @param aStringT
202 : * another string
203 : * @param _result
204 : * an outparam that will receive the edit distance between the arguments
205 : * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
206 : */
207 : int
208 24 : levenshteinDistance(const nsAString &aStringS,
209 : const nsAString &aStringT,
210 : int *_result)
211 : {
212 : // Set the result to a non-sensical value in case we encounter an error.
213 24 : *_result = -1;
214 :
215 24 : const PRUint32 sLen = aStringS.Length();
216 24 : const PRUint32 tLen = aStringT.Length();
217 :
218 24 : if (sLen == 0) {
219 4 : *_result = tLen;
220 4 : return SQLITE_OK;
221 : }
222 20 : if (tLen == 0) {
223 2 : *_result = sLen;
224 2 : return SQLITE_OK;
225 : }
226 :
227 : // Notionally, Levenshtein Distance is computed in a matrix. If we
228 : // assume s = "span" and t = "spam", the matrix would look like this:
229 : // s -->
230 : // t s p a n
231 : // | 0 1 2 3 4
232 : // V s 1 * * * *
233 : // p 2 * * * *
234 : // a 3 * * * *
235 : // m 4 * * * *
236 : //
237 : // Note that the row width is sLen + 1 and the column height is tLen + 1,
238 : // where sLen is the length of the string "s" and tLen is the length of "t".
239 : // The first row and the first column are initialized as shown, and
240 : // the algorithm computes the remaining cells row-by-row, and
241 : // left-to-right within each row. The computation only requires that
242 : // we be able to see the current row and the previous one.
243 :
244 : // Allocate memory for two rows. Use AutoArray's to manage the memory
245 : // so we don't have to explicitly free it, and so we can avoid the expense
246 : // of memory allocations for relatively small strings.
247 36 : AutoArray<int, nsAutoString::kDefaultStorageSize> row1(sLen + 1);
248 36 : AutoArray<int, nsAutoString::kDefaultStorageSize> row2(sLen + 1);
249 :
250 : // Declare the raw pointers that will actually be used to access the memory.
251 18 : int *prevRow = row1.get();
252 18 : NS_ENSURE_TRUE(prevRow, SQLITE_NOMEM);
253 18 : int *currRow = row2.get();
254 18 : NS_ENSURE_TRUE(currRow, SQLITE_NOMEM);
255 :
256 : // Initialize the first row.
257 2136 : for (PRUint32 i = 0; i <= sLen; i++)
258 2118 : prevRow[i] = i;
259 :
260 18 : const PRUnichar *s = aStringS.BeginReading();
261 18 : const PRUnichar *t = aStringT.BeginReading();
262 :
263 : // Compute the empty cells in the "matrix" row-by-row, starting with
264 : // the second row.
265 2126 : for (PRUint32 ti = 1; ti <= tLen; ti++) {
266 :
267 : // Initialize the first cell in this row.
268 2108 : currRow[0] = ti;
269 :
270 : // Get the character from "t" that corresponds to this row.
271 2108 : const PRUnichar tch = t[ti - 1];
272 :
273 : // Compute the remaining cells in this row, left-to-right,
274 : // starting at the second column (and first character of "s").
275 2002790 : for (PRUint32 si = 1; si <= sLen; si++) {
276 :
277 : // Get the character from "s" that corresponds to this column,
278 : // compare it to the t-character, and compute the "cost".
279 2000682 : const PRUnichar sch = s[si - 1];
280 2000682 : int cost = (sch == tch) ? 0 : 1;
281 :
282 : // ............ We want to calculate the value of cell "d" from
283 : // ...ab....... the previously calculated (or initialized) cells
284 : // ...cd....... "a", "b", and "c", where d = min(a', b', c').
285 : // ............
286 2000682 : int aPrime = prevRow[si - 1] + cost;
287 2000682 : int bPrime = prevRow[si] + 1;
288 2000682 : int cPrime = currRow[si - 1] + 1;
289 2000682 : currRow[si] = NS_MIN(aPrime, NS_MIN(bPrime, cPrime));
290 : }
291 :
292 : // Advance to the next row. The current row becomes the previous
293 : // row and we recycle the old previous row as the new current row.
294 : // We don't need to re-initialize the new current row since we will
295 : // rewrite all of its cells anyway.
296 2108 : int *oldPrevRow = prevRow;
297 2108 : prevRow = currRow;
298 2108 : currRow = oldPrevRow;
299 : }
300 :
301 : // The final result is the value of the last cell in the last row.
302 : // Note that that's now in the "previous" row, since we just swapped them.
303 18 : *_result = prevRow[sLen];
304 18 : return SQLITE_OK;
305 : }
306 :
307 : // This struct is used only by registerFunctions below, but ISO C++98 forbids
308 : // instantiating a template dependent on a locally-defined type. Boo-urns!
309 : struct Functions {
310 : const char *zName;
311 : int nArg;
312 : int enc;
313 : void *pContext;
314 : void (*xFunc)(::sqlite3_context*, int, sqlite3_value**);
315 : };
316 :
317 : } // anonymous namespace
318 :
319 : ////////////////////////////////////////////////////////////////////////////////
320 : //// Exposed Functions
321 :
322 : int
323 3267 : registerFunctions(sqlite3 *aDB)
324 : {
325 : Functions functions[] = {
326 : {"lower",
327 : 1,
328 : SQLITE_UTF16,
329 : 0,
330 : caseFunction},
331 : {"lower",
332 : 1,
333 : SQLITE_UTF8,
334 : 0,
335 : caseFunction},
336 : {"upper",
337 : 1,
338 : SQLITE_UTF16,
339 : (void*)1,
340 : caseFunction},
341 : {"upper",
342 : 1,
343 : SQLITE_UTF8,
344 : (void*)1,
345 : caseFunction},
346 :
347 : {"like",
348 : 2,
349 : SQLITE_UTF16,
350 : 0,
351 : likeFunction},
352 : {"like",
353 : 2,
354 : SQLITE_UTF8,
355 : 0,
356 : likeFunction},
357 : {"like",
358 : 3,
359 : SQLITE_UTF16,
360 : 0,
361 : likeFunction},
362 : {"like",
363 : 3,
364 : SQLITE_UTF8,
365 : 0,
366 : likeFunction},
367 :
368 : {"levenshteinDistance",
369 : 2,
370 : SQLITE_UTF16,
371 : 0,
372 : levenshteinDistanceFunction},
373 : {"levenshteinDistance",
374 : 2,
375 : SQLITE_UTF8,
376 : 0,
377 : levenshteinDistanceFunction},
378 3267 : };
379 :
380 3267 : int rv = SQLITE_OK;
381 35937 : for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) {
382 32670 : struct Functions *p = &functions[i];
383 : rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
384 32670 : p->xFunc, NULL, NULL);
385 : }
386 :
387 3267 : return rv;
388 : }
389 :
390 : ////////////////////////////////////////////////////////////////////////////////
391 : //// SQL Functions
392 :
393 : void
394 223 : caseFunction(sqlite3_context *aCtx,
395 : int aArgc,
396 : sqlite3_value **aArgv)
397 : {
398 223 : NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
399 :
400 446 : nsAutoString data(static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[0])));
401 223 : bool toUpper = ::sqlite3_user_data(aCtx) ? true : false;
402 :
403 223 : if (toUpper)
404 198 : ::ToUpperCase(data);
405 : else
406 25 : ::ToLowerCase(data);
407 :
408 : // Set the result.
409 223 : ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT);
410 223 : }
411 :
412 : /**
413 : * This implements the like() SQL function. This is used by the LIKE operator.
414 : * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
415 : * an escape character, say E, it is implemented as 'like(B, A, E)'.
416 : */
417 : void
418 192 : likeFunction(sqlite3_context *aCtx,
419 : int aArgc,
420 : sqlite3_value **aArgv)
421 : {
422 192 : NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");
423 :
424 192 : if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) {
425 : ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
426 0 : SQLITE_TOOBIG);
427 0 : return;
428 : }
429 :
430 192 : if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
431 0 : return;
432 :
433 384 : nsDependentString A(static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[1])));
434 384 : nsDependentString B(static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[0])));
435 192 : NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");
436 :
437 192 : PRUnichar E = 0;
438 192 : if (3 == aArgc)
439 87 : E = static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[2]))[0];
440 :
441 192 : nsAString::const_iterator itrString, endString;
442 192 : A.BeginReading(itrString);
443 192 : A.EndReading(endString);
444 192 : nsAString::const_iterator itrPattern, endPattern;
445 192 : B.BeginReading(itrPattern);
446 192 : B.EndReading(endPattern);
447 : ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString,
448 192 : endString, E));
449 : }
450 :
451 30 : void levenshteinDistanceFunction(sqlite3_context *aCtx,
452 : int aArgc,
453 : sqlite3_value **aArgv)
454 : {
455 30 : NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");
456 :
457 : // If either argument is a SQL NULL, then return SQL NULL.
458 56 : if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
459 26 : ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
460 6 : ::sqlite3_result_null(aCtx);
461 6 : return;
462 : }
463 :
464 24 : int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(PRUnichar);
465 24 : const PRUnichar *a = static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[0]));
466 :
467 24 : int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(PRUnichar);
468 24 : const PRUnichar *b = static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[1]));
469 :
470 : // Compute the Levenshtein Distance, and return the result (or error).
471 24 : int distance = -1;
472 48 : const nsDependentString A(a, aLen);
473 48 : const nsDependentString B(b, bLen);
474 24 : int status = levenshteinDistance(A, B, &distance);
475 24 : if (status == SQLITE_OK) {
476 24 : ::sqlite3_result_int(aCtx, distance);
477 : }
478 0 : else if (status == SQLITE_NOMEM) {
479 0 : ::sqlite3_result_error_nomem(aCtx);
480 : }
481 : else {
482 0 : ::sqlite3_result_error(aCtx, "User function returned error code", -1);
483 : }
484 : }
485 :
486 : } // namespace storage
487 : } // namespace mozilla
|