LCOV - code coverage report
Current view: directory - gfx/graphite2/src/inc - UtfCodec.h (source / functions) Found Hit Coverage
Test: app.info Lines: 44 0 0.0 %
Date: 2012-06-02 Functions: 44 0 0.0 %

       1                 : /*  GRAPHITE2 LICENSING
       2                 : 
       3                 :     Copyright 2010, SIL International
       4                 :     All rights reserved.
       5                 : 
       6                 :     This library is free software; you can redistribute it and/or modify
       7                 :     it under the terms of the GNU Lesser General Public License as published
       8                 :     by the Free Software Foundation; either version 2.1 of License, or
       9                 :     (at your option) any later version.
      10                 : 
      11                 :     This program is distributed in the hope that it will be useful,
      12                 :     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14                 :     Lesser General Public License for more details.
      15                 : 
      16                 :     You should also have received a copy of the GNU Lesser General Public
      17                 :     License along with this library in the file named "LICENSE".
      18                 :     If not, write to the Free Software Foundation, 51 Franklin Street,
      19                 :     Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
      20                 :     internet at http://www.fsf.org/licenses/lgpl.html.
      21                 : 
      22                 : Alternatively, the contents of this file may be used under the terms of the
      23                 : Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
      24                 : License, as published by the Free Software Foundation, either version 2
      25                 : of the License or (at your option) any later version.
      26                 : */
      27                 : #pragma once
      28                 : 
      29                 : #include <cstdlib>
      30                 : #include "inc/Main.h"
      31                 : 
      32                 : namespace graphite2 {
      33                 : 
      34                 : typedef uint32  uchar_t;
      35                 : 
      36                 : template <int N>
      37                 : struct _utf_codec
      38                 : {
      39                 :         typedef uchar_t codeunit_t;
      40                 : 
      41                 :         static void     put(codeunit_t * cp, const uchar_t , int8 & len) throw();
      42                 :         static uchar_t  get(const codeunit_t * cp, int8 & len) throw();
      43                 : };
      44                 : 
      45                 : 
      46                 : template <>
      47                 : struct _utf_codec<32>
      48                 : {
      49                 : private:
      50                 :         static const uchar_t    limit = 0x110000;
      51                 : public:
      52                 :         typedef uint32  codeunit_t;
      53                 : 
      54                 :         inline
      55               0 :         static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
      56                 :         {
      57               0 :                 *cp = usv; l = 1;
      58               0 :         }
      59                 : 
      60                 :         inline
      61               0 :         static uchar_t get(const codeunit_t * cp, int8 & l) throw()
      62                 :         {
      63               0 :                 if (cp[0] < limit)   { l = 1;  return cp[0]; }
      64               0 :                 else                            { l = -1; return 0xFFFD; }
      65                 :         }
      66                 : };
      67                 : 
      68                 : 
      69                 : template <>
      70                 : struct _utf_codec<16>
      71                 : {
      72                 : private:
      73                 :         static const int32      lead_offset              = 0xD800 - (0x10000 >> 10);
      74                 :         static const int32      surrogate_offset = 0x10000 - (0xD800 << 10) - 0xDC00;
      75                 : public:
      76                 :         typedef uint16  codeunit_t;
      77                 : 
      78                 :         inline
      79                 :         static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
      80                 :         {
      81                 :                 if (usv < 0x10000)   { l = 1; cp[0] = codeunit_t(usv); }
      82                 :                 else
      83                 :                 {
      84                 :                         cp[0] = codeunit_t(lead_offset + (usv >> 10));
      85                 :                         cp[1] = codeunit_t(0xDC00 + (usv & 0x3FF));
      86                 :                         l = 2;
      87                 :                 }
      88                 :         }
      89                 : 
      90                 :         inline
      91               0 :         static uchar_t get(const codeunit_t * cp, int8 & l) throw()
      92                 :         {
      93               0 :                 const uint32    uh = cp[0];
      94               0 :                 l = 1;
      95                 : 
      96               0 :                 if (0xD800 > uh || uh > 0xDFFF)   { return uh; }
      97               0 :                 const uint32 ul = cp[1];
      98               0 :                 if (uh > 0xDBFF || 0xDC00 > ul || ul > 0xDFFF) { l = -1; return 0xFFFD; }
      99               0 :                 ++l;
     100               0 :                 return (uh<<10) + ul + surrogate_offset;
     101                 :         }
     102                 : };
     103                 : 
     104                 : 
     105                 : template <>
     106                 : struct _utf_codec<8>
     107                 : {
     108                 : private:
     109                 :         static const int8 sz_lut[16];
     110                 :         static const byte mask_lut[5];
     111                 : 
     112                 : 
     113                 : public:
     114                 :         typedef uint8   codeunit_t;
     115                 : 
     116                 :         inline
     117               0 :         static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
     118                 :         {
     119               0 :                 if (usv < 0x80)              {l = 1; cp[0] = usv; return; }
     120               0 :         if (usv < 0x0800)    {l = 2; cp[0] = 0xC0 + (usv >> 6);  cp[1] = 0x80 + (usv & 0x3F); return; }
     121               0 :         if (usv < 0x10000)   {l = 3; cp[0] = 0xE0 + (usv >> 12); cp[1] = 0x80 + ((usv >> 6) & 0x3F);  cp[2] = 0x80 + (usv & 0x3F); return; }
     122               0 :         else                            {l = 4; cp[0] = 0xF0 + (usv >> 18); cp[1] = 0x80 + ((usv >> 12) & 0x3F); cp[2] = 0x80 + ((usv >> 6) & 0x3F); cp[3] = 0x80 + (usv & 0x3F); return; }
     123                 :         }
     124                 : 
     125                 :         inline
     126               0 :         static uchar_t get(const codeunit_t * cp, int8 & l) throw()
     127                 :         {
     128               0 :                 const int8 seq_sz = sz_lut[*cp >> 4];
     129               0 :                 uchar_t u = *cp & mask_lut[seq_sz];
     130               0 :                 l = 1;
     131               0 :                 bool toolong = false;
     132                 : 
     133               0 :                 switch(seq_sz) {
     134               0 :                         case 4:     u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong  = (u < 0x10);
     135               0 :                         case 3:     u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x20);
     136               0 :                         case 2:     u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x80);
     137               0 :                         case 1:         break;
     138               0 :                         case 0:     l = -1; return 0xFFFD;
     139                 :                 }
     140                 : 
     141               0 :                 if (l != seq_sz || toolong)
     142                 :                 {
     143               0 :                         l = -l;
     144               0 :                         return 0xFFFD;
     145                 :                 }
     146               0 :                 return u;
     147                 :         }
     148                 : };
     149                 : 
     150                 : 
     151                 : template <typename C>
     152                 : class _utf_iterator
     153                 : {
     154                 :         typedef _utf_codec<sizeof(C)*8>   codec;
     155                 : 
     156                 :         C                         * cp;
     157                 :         mutable int8    sl;
     158                 : 
     159                 : public:
     160                 :         typedef C                       codeunit_type;
     161                 :         typedef uchar_t         value_type;
     162                 :         typedef uchar_t   * pointer;
     163                 : 
     164                 :         class reference
     165                 :         {
     166                 :                 const _utf_iterator & _i;
     167                 : 
     168               0 :                 reference(const _utf_iterator & i): _i(i) {}
     169                 :         public:
     170               0 :                 operator value_type () const throw ()                                   { return codec::get(_i.cp, _i.sl); }
     171               0 :                 reference & operator = (const value_type usv) throw()       { codec::put(_i.cp, usv, _i.sl); return *this; }
     172                 : 
     173                 :                 friend class _utf_iterator;
     174                 :         };
     175                 : 
     176                 : 
     177               0 :         _utf_iterator(const void * us=0)        : cp(reinterpret_cast<C *>(const_cast<void *>(us))), sl(1) { }
     178                 : 
     179               0 :         _utf_iterator   & operator ++ ()    { cp += abs(sl); return *this; }
     180                 :         _utf_iterator   operator ++ (int)       { _utf_iterator tmp(*this); operator++(); return tmp; }
     181                 : 
     182               0 :         bool operator == (const _utf_iterator & rhs) const throw() { return cp >= rhs.cp; }
     183               0 :         bool operator != (const _utf_iterator & rhs) const throw() { return !operator==(rhs); }
     184                 : 
     185               0 :         reference       operator * () const throw() { return *this; }
     186                 :         pointer         operator ->() const throw() { return &operator *(); }
     187                 : 
     188               0 :         operator codeunit_type * () const throw() { return cp; }
     189                 : 
     190               0 :         bool error() const throw()      { return sl < 1; }
     191                 : };
     192                 : 
     193                 : template <typename C>
     194                 : struct utf
     195                 : {
     196                 :         typedef typename _utf_codec<sizeof(C)*8>::codeunit_t codeunit_t;
     197                 : 
     198                 :         typedef _utf_iterator<C>          iterator;
     199                 :         typedef _utf_iterator<const C>    const_iterator;
     200                 : };
     201                 : 
     202                 : 
     203                 : typedef utf<uint32>       utf32;
     204                 : typedef utf<uint16>       utf16;
     205                 : typedef utf<uint8>        utf8;
     206                 : 
     207                 : } // namespace graphite2

Generated by: LCOV version 1.7