LCOV - code coverage report
Current view: directory - netwerk/dns - punycode.c (source / functions) Found Hit Coverage
Test: app.info Lines: 87 82 94.3 %
Date: 2012-06-02 Functions: 6 5 83.3 %

       1                 : /*
       2                 : punycode.c from RFC 3492
       3                 : http://www.nicemice.net/idn/
       4                 : Adam M. Costello
       5                 : http://www.nicemice.net/amc/
       6                 : 
       7                 : This is ANSI C code (C89) implementing Punycode (RFC 3492).
       8                 : 
       9                 : 
      10                 : C. Disclaimer and license
      11                 : 
      12                 :     Regarding this entire document or any portion of it (including
      13                 :     the pseudocode and C code), the author makes no guarantees and
      14                 :     is not responsible for any damage resulting from its use.  The
      15                 :     author grants irrevocable permission to anyone to use, modify,
      16                 :     and distribute it in any way that does not diminish the rights
      17                 :     of anyone else to use, modify, and distribute it, provided that
      18                 :     redistributed derivative works do not contain misleading author or
      19                 :     version information.  Derivative works need not be licensed under
      20                 :     similar terms.
      21                 : */
      22                 : 
      23                 : #include "punycode.h"
      24                 : 
      25                 : /**********************************************************/
      26                 : /* Implementation (would normally go in its own .c file): */
      27                 : 
      28                 : #include <string.h>
      29                 : 
      30                 : /*** Bootstring parameters for Punycode ***/
      31                 : 
      32                 : enum { base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700,
      33                 :        initial_bias = 72, initial_n = 0x80, delimiter = 0x2D };
      34                 : 
      35                 : /* basic(cp) tests whether cp is a basic code point: */
      36                 : #define basic(cp) ((punycode_uint)(cp) < 0x80)
      37                 : 
      38                 : /* delim(cp) tests whether cp is a delimiter: */
      39                 : #define delim(cp) ((cp) == delimiter)
      40                 : 
      41                 : /* decode_digit(cp) returns the numeric value of a basic code */
      42                 : /* point (for use in representing integers) in the range 0 to */
      43                 : /* base-1, or base if cp is does not represent a value.       */
      44                 : 
      45              19 : static punycode_uint decode_digit(punycode_uint cp)
      46                 : {
      47              34 :   return  cp - 48 < 10 ? cp - 22 :  cp - 65 < 26 ? cp - 65 :
      48              15 :           cp - 97 < 26 ? cp - 97 :  base;
      49                 : }
      50                 : 
      51                 : /* encode_digit(d,flag) returns the basic code point whose value      */
      52                 : /* (when used for representing integers) is d, which needs to be in   */
      53                 : /* the range 0 to base-1.  The lowercase form is used unless flag is  */
      54                 : /* nonzero, in which case the uppercase form is used.  The behavior   */
      55                 : /* is undefined if flag is nonzero and digit d has no uppercase form. */
      56                 : 
      57            2358 : static char encode_digit(punycode_uint d, int flag)
      58                 : {
      59            2358 :   return d + 22 + 75 * (d < 26) - ((flag != 0) << 5);
      60                 :   /*  0..25 map to ASCII a..z or A..Z */
      61                 :   /* 26..35 map to ASCII 0..9         */
      62                 : }
      63                 : 
      64                 : /* flagged(bcp) tests whether a basic code point is flagged */
      65                 : /* (uppercase).  The behavior is undefined if bcp is not a  */
      66                 : /* basic code point.                                        */
      67                 : 
      68                 : #define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26)
      69                 : 
      70                 : /* encode_basic(bcp,flag) forces a basic code point to lowercase */
      71                 : /* if flag is zero, uppercase if flag is nonzero, and returns    */
      72                 : /* the resulting code point.  The code point is unchanged if it  */
      73                 : /* is caseless.  The behavior is undefined if bcp is not a basic */
      74                 : /* code point.                                                   */
      75                 : 
      76               0 : static char encode_basic(punycode_uint bcp, int flag)
      77                 : {
      78               0 :   bcp -= (bcp - 97 < 26) << 5;
      79               0 :   return bcp + ((!flag && (bcp - 65 < 26)) << 5);
      80                 : }
      81                 : 
      82                 : /*** Platform-specific constants ***/
      83                 : 
      84                 : /* maxint is the maximum value of a punycode_uint variable: */
      85                 : static const punycode_uint maxint = (punycode_uint) -1;
      86                 : /* Because maxint is unsigned, -1 becomes the maximum value. */
      87                 : 
      88                 : /*** Bias adaptation function ***/
      89                 : 
      90             977 : static punycode_uint adapt(
      91                 :   punycode_uint delta, punycode_uint numpoints, int firsttime )
      92                 : {
      93                 :   punycode_uint k;
      94                 : 
      95             977 :   delta = firsttime ? delta / damp : delta >> 1;
      96                 :   /* delta >> 1 is a faster way of doing delta / 2 */
      97             977 :   delta += delta / numpoints;
      98                 : 
      99            1041 :   for (k = 0;  delta > ((base - tmin) * tmax) / 2;  k += base) {
     100              64 :     delta /= base - tmin;
     101                 :   }
     102                 : 
     103             977 :   return k + (base - tmin + 1) * delta / (delta + skew);
     104                 : }
     105                 : 
     106                 : /*** Main encode function ***/
     107                 : 
     108             517 : enum punycode_status punycode_encode(
     109                 :   punycode_uint input_length,
     110                 :   const punycode_uint input[],
     111                 :   const unsigned char case_flags[],
     112                 :   punycode_uint *output_length,
     113                 :   char output[] )
     114                 : {
     115                 :   punycode_uint n, delta, h, b, out, max_out, bias, j, m, q, k, t;
     116                 : 
     117                 :   /* Initialize the state: */
     118                 : 
     119             517 :   n = initial_n;
     120             517 :   delta = out = 0;
     121             517 :   max_out = *output_length;
     122             517 :   bias = initial_bias;
     123                 : 
     124                 :   /* Handle the basic code points: */
     125                 : 
     126            3724 :   for (j = 0;  j < input_length;  ++j) {
     127            3207 :     if (basic(input[j])) {
     128            2236 :       if (max_out - out < 2) return punycode_big_output;
     129            4472 :       output[out++] =
     130            2236 :         case_flags ? encode_basic(input[j], case_flags[j]) : (char)input[j];
     131                 :     }
     132                 :     /* else if (input[j] < n) return punycode_bad_input; */
     133                 :     /* (not needed for Punycode with unsigned code points) */
     134                 :   }
     135                 : 
     136             517 :   h = b = out;
     137                 : 
     138                 :   /* h is the number of code points that have been handled, b is the  */
     139                 :   /* number of basic code points, and out is the number of characters */
     140                 :   /* that have been output.                                           */
     141                 : 
     142             517 :   if (b > 0) output[out++] = delimiter;
     143                 : 
     144                 :   /* Main encoding loop: */
     145                 : 
     146            1939 :   while (h < input_length) {
     147                 :     /* All non-basic code points < n have been     */
     148                 :     /* handled already.  Find the next larger one: */
     149                 : 
     150            6263 :     for (m = maxint, j = 0;  j < input_length;  ++j) {
     151                 :       /* if (basic(input[j])) continue; */
     152                 :       /* (not needed for Punycode) */
     153            5358 :       if (input[j] >= n && input[j] < m) m = input[j];
     154                 :     }
     155                 : 
     156                 :     /* Increase delta enough to advance the decoder's    */
     157                 :     /* <n,i> state to <m,0>, but guard against overflow: */
     158                 : 
     159             905 :     if (m - n > (maxint - delta) / (h + 1)) return punycode_overflow;
     160             905 :     delta += (m - n) * (h + 1);
     161             905 :     n = m;
     162                 : 
     163            6263 :     for (j = 0;  j < input_length;  ++j) {
     164                 :       /* Punycode does not need to check whether input[j] is basic: */
     165            5358 :       if (input[j] < n /* || basic(input[j]) */ ) {
     166            3423 :         if (++delta == 0) return punycode_overflow;
     167                 :       }
     168                 : 
     169            5358 :       if (input[j] == n) {
     170                 :         /* Represent delta as a generalized variable-length integer: */
     171                 : 
     172            2358 :         for (q = delta, k = base;  ;  k += base) {
     173            2358 :           if (out >= max_out) return punycode_big_output;
     174            3674 :           t = k <= bias /* + tmin */ ? tmin :     /* +tmin not needed */
     175            1316 :               k >= bias + tmax ? tmax : k - bias;
     176            2358 :           if (q < t) break;
     177            1387 :           output[out++] = encode_digit(t + (q - t) % (base - t), 0);
     178            1387 :           q = (q - t) / (base - t);
     179            1387 :         }
     180                 : 
     181             971 :         output[out++] = encode_digit(q, case_flags && case_flags[j]);
     182             971 :         bias = adapt(delta, h + 1, h == b);
     183             971 :         delta = 0;
     184             971 :         ++h;
     185                 :       }
     186                 :     }
     187                 : 
     188             905 :     ++delta, ++n;
     189                 :   }
     190                 : 
     191             517 :   *output_length = out;
     192             517 :   return punycode_success;
     193                 : }
     194                 : 
     195                 : /*** Main decode function ***/
     196                 : 
     197               6 : enum punycode_status punycode_decode(
     198                 :   punycode_uint input_length,
     199                 :   const char input[],
     200                 :   punycode_uint *output_length,
     201                 :   punycode_uint output[],
     202                 :   unsigned char case_flags[] )
     203                 : {
     204                 :   punycode_uint n, out, i, max_out, bias,
     205                 :                  b, j, in, oldi, w, k, digit, t;
     206                 : 
     207                 :   /* Initialize the state: */
     208                 : 
     209               6 :   n = initial_n;
     210               6 :   out = i = 0;
     211               6 :   max_out = *output_length;
     212               6 :   bias = initial_bias;
     213                 : 
     214                 :   /* Handle the basic code points:  Let b be the number of input code */
     215                 :   /* points before the last delimiter, or 0 if there is none, then    */
     216                 :   /* copy the first b code points to the output.                      */
     217                 : 
     218               6 :   for (b = j = 0;  j < input_length;  ++j) if (delim(input[j])) b = j;
     219               6 :   if (b > max_out) return punycode_big_output;
     220                 : 
     221              32 :   for (j = 0;  j < b;  ++j) {
     222              26 :     if (case_flags) case_flags[out] = flagged(input[j]);
     223              26 :     if (!basic(input[j])) return punycode_bad_input;
     224              26 :     output[out++] = input[j];
     225                 :   }
     226                 : 
     227                 :   /* Main decoding loop:  Start just after the last delimiter if any  */
     228                 :   /* basic code points were copied; start at the beginning otherwise. */
     229                 : 
     230              12 :   for (in = b > 0 ? b + 1 : 0;  in < input_length;  ++out) {
     231                 : 
     232                 :     /* in is the index of the next character to be consumed, and */
     233                 :     /* out is the number of code points in the output array.     */
     234                 : 
     235                 :     /* Decode a generalized variable-length integer into delta,  */
     236                 :     /* which gets added to i.  The overflow checking is easier   */
     237                 :     /* if we increase i as we go, then subtract off its starting */
     238                 :     /* value at the end to obtain delta.                         */
     239                 : 
     240              19 :     for (oldi = i, w = 1, k = base;  ;  k += base) {
     241              19 :       if (in >= input_length) return punycode_bad_input;
     242              19 :       digit = decode_digit(input[in++]);
     243              19 :       if (digit >= base) return punycode_bad_input;
     244              19 :       if (digit > (maxint - i) / w) return punycode_overflow;
     245              19 :       i += digit * w;
     246              26 :       t = k <= bias /* + tmin */ ? tmin :     /* +tmin not needed */
     247               7 :           k >= bias + tmax ? tmax : k - bias;
     248              19 :       if (digit < t) break;
     249              13 :       if (w > maxint / (base - t)) return punycode_overflow;
     250              13 :       w *= (base - t);
     251              13 :     }
     252                 : 
     253               6 :     bias = adapt(i - oldi, out + 1, oldi == 0);
     254                 : 
     255                 :     /* i was supposed to wrap around from out+1 to 0,   */
     256                 :     /* incrementing n each time, so we'll fix that now: */
     257                 : 
     258               6 :     if (i / (out + 1) > maxint - n) return punycode_overflow;
     259               6 :     n += i / (out + 1);
     260               6 :     i %= (out + 1);
     261                 : 
     262                 :     /* Insert n at position i of the output: */
     263                 : 
     264                 :     /* not needed for Punycode: */
     265                 :     /* if (decode_digit(n) <= base) return punycode_invalid_input; */
     266               6 :     if (out >= max_out) return punycode_big_output;
     267                 : 
     268               6 :     if (case_flags) {
     269               0 :       memmove(case_flags + i + 1, case_flags + i, out - i);
     270                 :       /* Case of last character determines uppercase flag: */
     271               0 :       case_flags[i] = flagged(input[in - 1]);
     272                 :     }
     273                 : 
     274               6 :     memmove(output + i + 1, output + i, (out - i) * sizeof *output);
     275               6 :     output[i++] = n;
     276                 :   }
     277                 : 
     278               6 :   *output_length = out;
     279               6 :   return punycode_success;
     280                 : }

Generated by: LCOV version 1.7