/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include #include #include #include "convertsimple.hxx" #include "handleundefinedunicodetotextchar.hxx" #include "tenchelp.hxx" #define IMPL_MAX_REPLACECHAR 5 namespace { struct ImplReplaceCharData { sal_uInt16 const mnUniChar; sal_uInt16 const mnReplaceChar; }; static ImplReplaceCharData const aImplRepCharTab[] = { { 0x00A0, 0x0020 }, /* NO-BREAK-SPACE */ { 0x00A1, 0x0021 }, /* INVERTED EXCLAMATION MARK */ { 0x00B7, 0x0045 }, /* MIDDLE DOT */ { 0x00BF, 0x003F }, /* INVERTED QUESTION MARK */ { 0x00D7, 0x002A }, /* MULTIPLICATION SIGN */ { 0x00F7, 0x002F }, /* DIVISION SIGN */ { 0x2000, 0x0020 }, /* EN QUAD */ { 0x2001, 0x0020 }, /* EM QUAD */ { 0x2002, 0x0020 }, /* EN SPACE */ { 0x2003, 0x0020 }, /* EM SPACE */ { 0x2004, 0x0020 }, /* THREE-PER-EM SPACE */ { 0x2005, 0x0020 }, /* FOUR-PER-EM SPACE */ { 0x2006, 0x0020 }, /* SIX-PER-EM SPACE */ { 0x2007, 0x0020 }, /* FIGURE SPACE */ { 0x2008, 0x0020 }, /* PUNCTUATION SPACE */ { 0x2009, 0x0020 }, /* THIN SPACE */ { 0x200A, 0x0020 }, /* HAIR SPACE */ { 0x2010, 0x002D }, /* HYPHEN */ { 0x2011, 0x002D }, /* NON-BREAKING HYPHEN */ { 0x2012, 0x002D }, /* FIGURE DASH */ { 0x2013, 0x002D }, /* EN DASH */ { 0x2014, 0x002D }, /* EM DASH */ { 0x2015, 0x002D }, /* HORIZONTAL BAR */ { 0x2018, 0x0027 }, /* LEFT SINGLE QUOTATION MARK */ { 0x2019, 0x0027 }, /* RIGHT SINGLE QUOTATION MARK */ { 0x201A, 0x002C }, /* SINGLE LOW-9 QUOTATION MARK */ { 0x201B, 0x0027 }, /* SINGLE HIGH-RESERVED-9 QUOTATION MARK */ { 0x201C, 0x0022 }, /* LEFT DOUBLE QUOTATION MARK */ { 0x201D, 0x0022 }, /* RIGHT DOUBLE QUOTATION MARK */ { 0x201E, 0x0022 }, /* DOUBLE LOW-9 QUOTATION MARK */ { 0x201F, 0x0022 }, /* DOUBLE HIGH-RESERVED-9 QUOTATION MARK */ { 0x2022, 0x002D }, /* BULLET */ { 0x2023, 0x002D }, /* TRIANGULAR BULLET */ { 0x2024, 0x002D }, /* ONE DOT LEADER */ { 0x2027, 0x002D }, /* HYPHENATION POINT */ { 0x2028, 0x000A }, /* LINE SEPARATOR */ { 0x2029, 0x000D }, /* PARAGRAPH SEPARATOR */ { 0x2032, 0x0027 }, /* PRIME */ { 0x2033, 0x0022 }, /* DOUBLE PRIME */ { 0x2035, 0x0027 }, /* RESERVED PRIME */ { 0x2036, 0x0022 }, /* RESERVED DOUBLE PRIME */ { 0x2039, 0x003C }, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ { 0x203A, 0x003E }, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ { 0x2043, 0x002D }, /* HYPHEN BULLET */ { 0x2044, 0x002F }, /* FRACTION SLASH */ { 0x2160, 0x0049 }, /* ROMAN NUMERAL ONE */ { 0x2164, 0x0056 }, /* ROMAN NUMERAL FIVE */ { 0x2169, 0x0058 }, /* ROMAN NUMERAL TEN */ { 0x216C, 0x004C }, /* ROMAN NUMERAL FIFTY */ { 0x216D, 0x0043 }, /* ROMAN NUMERAL ONE HUNDRED */ { 0x216E, 0x0044 }, /* ROMAN NUMERAL FIVE HUNDRED */ { 0x216F, 0x004D }, /* ROMAN NUMERAL ONE THOUSAND */ { 0x2170, 0x0069 }, /* SMALL ROMAN NUMERAL ONE */ { 0x2174, 0x0076 }, /* SMALL ROMAN NUMERAL FIVE */ { 0x2179, 0x0078 }, /* SMALL ROMAN NUMERAL TEN */ { 0x217C, 0x006C }, /* SMALL ROMAN NUMERAL FIFTY */ { 0x217D, 0x0063 }, /* SMALL ROMAN NUMERAL ONE HUNDRED */ { 0x217E, 0x0064 }, /* SMALL ROMAN NUMERAL FIVE HUNDRED */ { 0x217F, 0x006D }, /* SMALL ROMAN NUMERAL ONE THOUSAND */ { 0x2215, 0x002F }, /* DIVISION SLASH */ { 0x2217, 0x002A }, /* ASTERISK OPERATOR */ { 0xFF00, 0x0020 }, /* FULLWIDTH ASCII FORMS */ { 0xFF01, 0x0021 }, /* FULLWIDTH ASCII FORMS */ { 0xFF02, 0x0022 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF03, 0x0023 }, /* FULLWIDTH ASCII FORMS */ { 0xFF04, 0x0024 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF05, 0x0025 }, /* FULLWIDTH ASCII FORMS */ { 0xFF06, 0x0026 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF07, 0x0027 }, /* FULLWIDTH ASCII FORMS */ { 0xFF08, 0x0028 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF09, 0x0029 }, /* FULLWIDTH ASCII FORMS */ { 0xFF0A, 0x002A }, /* FULLWIDTH ASCII FORMS*/ { 0xFF0B, 0x002B }, /* FULLWIDTH ASCII FORMS */ { 0xFF0C, 0x002C }, /* FULLWIDTH ASCII FORMS*/ { 0xFF0D, 0x002D }, /* FULLWIDTH ASCII FORMS */ { 0xFF0E, 0x002E }, /* FULLWIDTH ASCII FORMS*/ { 0xFF0F, 0x002F }, /* FULLWIDTH ASCII FORMS */ { 0xFF10, 0x0030 }, /* FULLWIDTH ASCII FORMS */ { 0xFF11, 0x0031 }, /* FULLWIDTH ASCII FORMS */ { 0xFF12, 0x0032 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF13, 0x0033 }, /* FULLWIDTH ASCII FORMS */ { 0xFF14, 0x0034 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF15, 0x0035 }, /* FULLWIDTH ASCII FORMS */ { 0xFF16, 0x0036 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF17, 0x0037 }, /* FULLWIDTH ASCII FORMS */ { 0xFF18, 0x0038 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF19, 0x0039 }, /* FULLWIDTH ASCII FORMS */ { 0xFF1A, 0x003A }, /* FULLWIDTH ASCII FORMS*/ { 0xFF1B, 0x003B }, /* FULLWIDTH ASCII FORMS */ { 0xFF1C, 0x003C }, /* FULLWIDTH ASCII FORMS*/ { 0xFF1D, 0x003D }, /* FULLWIDTH ASCII FORMS */ { 0xFF1E, 0x003E }, /* FULLWIDTH ASCII FORMS*/ { 0xFF1F, 0x003F }, /* FULLWIDTH ASCII FORMS */ { 0xFF20, 0x0040 }, /* FULLWIDTH ASCII FORMS */ { 0xFF21, 0x0041 }, /* FULLWIDTH ASCII FORMS */ { 0xFF22, 0x0042 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF23, 0x0043 }, /* FULLWIDTH ASCII FORMS */ { 0xFF24, 0x0044 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF25, 0x0045 }, /* FULLWIDTH ASCII FORMS */ { 0xFF26, 0x0046 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF27, 0x0047 }, /* FULLWIDTH ASCII FORMS */ { 0xFF28, 0x0048 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF29, 0x0049 }, /* FULLWIDTH ASCII FORMS */ { 0xFF2A, 0x004A }, /* FULLWIDTH ASCII FORMS*/ { 0xFF2B, 0x004B }, /* FULLWIDTH ASCII FORMS */ { 0xFF2C, 0x004C }, /* FULLWIDTH ASCII FORMS*/ { 0xFF2D, 0x004D }, /* FULLWIDTH ASCII FORMS */ { 0xFF2E, 0x004E }, /* FULLWIDTH ASCII FORMS*/ { 0xFF2F, 0x004F }, /* FULLWIDTH ASCII FORMS */ { 0xFF30, 0x0050 }, /* FULLWIDTH ASCII FORMS */ { 0xFF31, 0x0051 }, /* FULLWIDTH ASCII FORMS */ { 0xFF32, 0x0052 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF33, 0x0053 }, /* FULLWIDTH ASCII FORMS */ { 0xFF34, 0x0054 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF35, 0x0055 }, /* FULLWIDTH ASCII FORMS */ { 0xFF36, 0x0056 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF37, 0x0057 }, /* FULLWIDTH ASCII FORMS */ { 0xFF38, 0x0058 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF39, 0x0059 }, /* FULLWIDTH ASCII FORMS */ { 0xFF3A, 0x005A }, /* FULLWIDTH ASCII FORMS*/ { 0xFF3B, 0x005B }, /* FULLWIDTH ASCII FORMS */ { 0xFF3C, 0x005C }, /* FULLWIDTH ASCII FORMS*/ { 0xFF3D, 0x005D }, /* FULLWIDTH ASCII FORMS */ { 0xFF3E, 0x005E }, /* FULLWIDTH ASCII FORMS*/ { 0xFF3F, 0x005F }, /* FULLWIDTH ASCII FORMS */ { 0xFF40, 0x0060 }, /* FULLWIDTH ASCII FORMS */ { 0xFF41, 0x0061 }, /* FULLWIDTH ASCII FORMS */ { 0xFF42, 0x0062 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF43, 0x0063 }, /* FULLWIDTH ASCII FORMS */ { 0xFF44, 0x0064 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF45, 0x0065 }, /* FULLWIDTH ASCII FORMS */ { 0xFF46, 0x0066 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF47, 0x0067 }, /* FULLWIDTH ASCII FORMS */ { 0xFF48, 0x0068 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF49, 0x0069 }, /* FULLWIDTH ASCII FORMS */ { 0xFF4A, 0x006A }, /* FULLWIDTH ASCII FORMS*/ { 0xFF4B, 0x006B }, /* FULLWIDTH ASCII FORMS */ { 0xFF4C, 0x006C }, /* FULLWIDTH ASCII FORMS*/ { 0xFF4D, 0x006D }, /* FULLWIDTH ASCII FORMS */ { 0xFF4E, 0x006E }, /* FULLWIDTH ASCII FORMS*/ { 0xFF4F, 0x006F }, /* FULLWIDTH ASCII FORMS */ { 0xFF50, 0x0070 }, /* FULLWIDTH ASCII FORMS */ { 0xFF51, 0x0071 }, /* FULLWIDTH ASCII FORMS */ { 0xFF52, 0x0072 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF53, 0x0073 }, /* FULLWIDTH ASCII FORMS */ { 0xFF54, 0x0074 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF55, 0x0075 }, /* FULLWIDTH ASCII FORMS */ { 0xFF56, 0x0076 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF57, 0x0077 }, /* FULLWIDTH ASCII FORMS */ { 0xFF58, 0x0078 }, /* FULLWIDTH ASCII FORMS*/ { 0xFF59, 0x0079 }, /* FULLWIDTH ASCII FORMS */ { 0xFF5A, 0x007A }, /* FULLWIDTH ASCII FORMS*/ { 0xFF5B, 0x007B }, /* FULLWIDTH ASCII FORMS */ { 0xFF5C, 0x007C }, /* FULLWIDTH ASCII FORMS*/ { 0xFF5D, 0x007D }, /* FULLWIDTH ASCII FORMS */ { 0xFF5E, 0x007E }, /* FULLWIDTH ASCII FORMS*/ { 0xFF5F, 0x007F }, /* FULLWIDTH ASCII FORMS */ { 0xFF61, 0x3002 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF62, 0x300C }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF63, 0x300D }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF64, 0x3001 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF65, 0x30FB }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF66, 0x30F2 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF67, 0x30A1 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF68, 0x30A3 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF69, 0x30A5 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF6A, 0x30A7 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF6B, 0x30A9 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF6C, 0x30E3 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF6D, 0x30E5 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF6E, 0x30E7 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF6F, 0x30C3 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF70, 0x30FC }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF71, 0x30A2 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF72, 0x30A4 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF73, 0x30A6 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF74, 0x30A8 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF75, 0x30AA }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF76, 0x30AB }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF77, 0x30AD }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF78, 0x30AF }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF79, 0x30B1 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF7A, 0x30B3 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF7B, 0x30B5 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF7C, 0x30B7 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF7D, 0x30B9 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF7E, 0x30BB }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF7F, 0x30BD }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF80, 0x30BF }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF81, 0x30C1 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF82, 0x30C4 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF83, 0x30C6 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF84, 0x30C8 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF85, 0x30CA }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF86, 0x30CB }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF87, 0x30CC }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF88, 0x30CD }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF89, 0x30CE }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF8A, 0x30CF }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF8B, 0x30D2 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF8C, 0x30D5 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF8D, 0x30D8 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF8E, 0x30DB }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF8F, 0x30DE }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF90, 0x30DF }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF91, 0x30E0 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF92, 0x30E1 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF93, 0x30E2 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF94, 0x30E4 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF95, 0x30E6 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF96, 0x30E8 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF97, 0x30E9 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF98, 0x30EA }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF99, 0x30EB }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF9A, 0x30EC }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF9B, 0x30ED }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF9C, 0x30EF }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF9D, 0x30F3 }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF9E, 0x309B }, /* HALFWIDTH KATAKANA FORMS */ { 0xFF9F, 0x309C }, /* HALFWIDTH KATAKANA FORMS */ { 0xFFA0, 0x3164 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFA1, 0x3131 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFA2, 0x3132 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFA3, 0x3133 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFA4, 0x3134 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFA5, 0x3135 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFA6, 0x3136 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFA7, 0x3137 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFA8, 0x3138 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFA9, 0x3139 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFAA, 0x313A }, /* HALFWIDTH HANGUL FORMS */ { 0xFFAB, 0x313B }, /* HALFWIDTH HANGUL FORMS */ { 0xFFAC, 0x313C }, /* HALFWIDTH HANGUL FORMS */ { 0xFFAD, 0x313D }, /* HALFWIDTH HANGUL FORMS */ { 0xFFAE, 0x313E }, /* HALFWIDTH HANGUL FORMS */ { 0xFFAF, 0x313F }, /* HALFWIDTH HANGUL FORMS */ { 0xFFB0, 0x3140 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFB1, 0x3141 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFB2, 0x3142 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFB3, 0x3143 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFB4, 0x3144 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFB5, 0x3145 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFB6, 0x3146 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFB7, 0x3147 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFB8, 0x3148 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFB9, 0x3149 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFBA, 0x314A }, /* HALFWIDTH HANGUL FORMS */ { 0xFFBB, 0x314B }, /* HALFWIDTH HANGUL FORMS */ { 0xFFBC, 0x314C }, /* HALFWIDTH HANGUL FORMS */ { 0xFFBD, 0x314D }, /* HALFWIDTH HANGUL FORMS */ { 0xFFBE, 0x314E }, /* HALFWIDTH HANGUL FORMS */ { 0xFFC2, 0x314F }, /* HALFWIDTH HANGUL FORMS */ { 0xFFC3, 0x3150 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFC4, 0x3151 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFC5, 0x3152 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFC6, 0x3153 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFC7, 0x3154 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFCA, 0x3155 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFCB, 0x3156 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFCC, 0x3157 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFCD, 0x3158 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFCE, 0x3159 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFCF, 0x315A }, /* HALFWIDTH HANGUL FORMS */ { 0xFFD2, 0x315B }, /* HALFWIDTH HANGUL FORMS */ { 0xFFD3, 0x315C }, /* HALFWIDTH HANGUL FORMS */ { 0xFFD4, 0x315D }, /* HALFWIDTH HANGUL FORMS */ { 0xFFD5, 0x315E }, /* HALFWIDTH HANGUL FORMS */ { 0xFFD6, 0x315F }, /* HALFWIDTH HANGUL FORMS */ { 0xFFD7, 0x3160 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFDA, 0x3161 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFDB, 0x3162 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFDC, 0x3163 }, /* HALFWIDTH HANGUL FORMS */ { 0xFFE0, 0x00A2 }, /* FULLWIDTH CENT SIGN */ { 0xFFE1, 0x00A3 }, /* FULLWIDTH POUND SIGN */ { 0xFFE2, 0x00AC }, /* FULLWIDTH NOT SIGN */ { 0xFFE3, 0x00AF }, /* FULLWIDTH MACRON */ { 0xFFE4, 0x00A6 }, /* FULLWIDTH BROKEN BAR */ { 0xFFE5, 0x00A5 }, /* FULLWIDTH YEN SIGN */ { 0xFFE6, 0x20A9 }, /* FULLWIDTH WON SIGN */ { 0xFFE8, 0x2502 }, /* HALFWIDTH FORMS LIGHT VERTICAL */ { 0xFFE9, 0x2190 }, /* HALFWIDTH LEFTWARDS ARROW */ { 0xFFEA, 0x2191 }, /* HALFWIDTH UPWARDS ARROW */ { 0xFFEB, 0x2192 }, /* HALFWIDTH RIGHTWARDS ARROW */ { 0xFFEC, 0x2193 }, /* HALFWIDTH DOWNWARDS ARROW */ { 0xFFED, 0x25A0 }, /* HALFWIDTH BLACK SQUARE */ { 0xFFEE, 0x25CB }, /* HALFWIDTH WHITE CIRCLE */ { 0xFFFD, 0x003F } /* REPLACEMENT CHARACTER */ }; sal_uInt16 ImplGetReplaceChar( sal_Unicode c ) { sal_uInt16 nLow; sal_uInt16 nHigh; sal_uInt16 nMid; sal_uInt16 nCompareChar; const ImplReplaceCharData* pCharData; nLow = 0; nHigh = SAL_N_ELEMENTS( aImplRepCharTab ) - 1; do { nMid = (nLow+nHigh)/2; pCharData = aImplRepCharTab+nMid; nCompareChar = pCharData->mnUniChar; if ( c < nCompareChar ) { if ( !nMid ) break; nHigh = nMid-1; } else { if ( c > nCompareChar ) nLow = nMid+1; else return pCharData->mnReplaceChar; } } while ( nLow <= nHigh ); return 0; } struct ImplReplaceCharStrData { sal_uInt16 const mnUniChar; sal_uInt16 const maReplaceChars[IMPL_MAX_REPLACECHAR]; }; static ImplReplaceCharStrData const aImplRepCharStrTab[] = { { 0x00A9, { 0x0028, 0x0063, 0x0029, 0x0000, 0x0000 } }, /* COPYRIGHT SIGN */ { 0x00AB, { 0x003C, 0x003C, 0x0000, 0x0000, 0x0000 } }, /* LEFT-POINTING-DOUBLE ANGLE QUOTATION MARK */ { 0x0AE0, { 0x0028, 0x0072, 0x0029, 0x0000, 0x0000 } }, /* REGISTERED SIGN */ { 0x00BB, { 0x003E, 0x003E, 0x0000, 0x0000, 0x0000 } }, /* RIGHT-POINTING-DOUBLE ANGLE QUOTATION MARK */ { 0x00BC, { 0x0031, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE QUARTER */ { 0x00BD, { 0x0031, 0x002F, 0x0032, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE HALF */ { 0x00BE, { 0x0033, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE QUARTERS */ { 0x00C6, { 0x0041, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LETTER AE */ { 0x00E6, { 0x0061, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LETTER AE */ { 0x0152, { 0x004F, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LIGATURE OE */ { 0x0153, { 0x006F, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LIGATURE OE */ { 0x2025, { 0x002E, 0x002E, 0x0000, 0x0000, 0x0000 } }, /* TWO DOT LEADER */ { 0x2026, { 0x002E, 0x002E, 0x002E, 0x0000, 0x0000 } }, /* HORIZONTAL ELLIPSES */ { 0x2034, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* TRIPLE PRIME */ { 0x2037, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* REVERSED TRIPLE PRIME */ { 0x20AC, { 0x0045, 0x0055, 0x0052, 0x0000, 0x0000 } }, /* EURO SIGN */ { 0x2122, { 0x0028, 0x0074, 0x006D, 0x0029, 0x0000 } }, /* TRADE MARK SIGN */ { 0x2153, { 0x0031, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE THIRD */ { 0x2154, { 0x0032, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO THIRD */ { 0x2155, { 0x0031, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE FIFTH */ { 0x2156, { 0x0032, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO FIFTH */ { 0x2157, { 0x0033, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE FIFTH */ { 0x2158, { 0x0034, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION FOUR FIFTH */ { 0x2159, { 0x0031, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE SIXTH */ { 0x215A, { 0x0035, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE SIXTH */ { 0x215B, { 0x0031, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE EIGHTH */ { 0x215C, { 0x0033, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE EIGHTH */ { 0x215D, { 0x0035, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE EIGHTH */ { 0x215E, { 0x0037, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION SEVEN EIGHTH */ { 0x215F, { 0x0031, 0x002F, 0x0000, 0x0000, 0x0000 } }, /* FRACTION NUMERATOR ONE */ { 0x2161, { 0x0049, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWO */ { 0x2162, { 0x0049, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL THREE */ { 0x2163, { 0x0049, 0x0056, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL FOUR */ { 0x2165, { 0x0056, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SIX */ { 0x2166, { 0x0056, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SEVEN */ { 0x2168, { 0x0056, 0x0049, 0x0049, 0x0049, 0x0000 } }, /* ROMAN NUMERAL EIGHT */ { 0x2169, { 0x0049, 0x0058, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL NINE */ { 0x216A, { 0x0058, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL ELEVEN */ { 0x216B, { 0x0058, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWELVE */ { 0x2171, { 0x0069, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL TWO */ { 0x2172, { 0x0069, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL THREE */ { 0x2173, { 0x0069, 0x0076, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL FOUR */ { 0x2175, { 0x0076, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SIX */ { 0x2176, { 0x0076, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SEVEN */ { 0x2178, { 0x0076, 0x0069, 0x0069, 0x0069, 0x0000 } }, /* SMALL ROMAN NUMERAL EIGHT */ { 0x2179, { 0x0069, 0x0078, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL NINE */ { 0x217A, { 0x0078, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL ELEVEN */ { 0x217B, { 0x0058, 0x0069, 0x0069, 0x0000, 0x0000 } } /* SMALL ROMAN NUMERAL TWELVE */ }; const sal_uInt16* ImplGetReplaceString( sal_Unicode c ) { sal_uInt16 nLow; sal_uInt16 nHigh; sal_uInt16 nMid; sal_uInt16 nCompareChar; const ImplReplaceCharStrData* pCharData; nLow = 0; nHigh = SAL_N_ELEMENTS( aImplRepCharStrTab ) - 1; do { nMid = (nLow+nHigh)/2; pCharData = aImplRepCharStrTab+nMid; nCompareChar = pCharData->mnUniChar; if ( c < nCompareChar ) { if ( !nMid ) break; nHigh = nMid-1; } else { if ( c > nCompareChar ) nLow = nMid+1; else return pCharData->maReplaceChars; } } while ( nLow <= nHigh ); return nullptr; } // Writes 0--2 characters to dest: int ImplConvertUnicodeCharToChar( const ImplByteConvertData* pConvertData, sal_Unicode c, char * dest ) { const ImplUniCharTabData* pToCharExTab; if ( c < 0x80 ) { dest[0] = static_cast< char >(c); return 1; } if ( (c >= pConvertData->mnToCharStart1) && (c <= pConvertData->mnToCharEnd1) ) { dest[0] = static_cast< char >(pConvertData->mpToCharTab1[c-pConvertData->mnToCharStart1]); if ( dest[0] != 0 ) return 1; } else if ( (c >= pConvertData->mnToCharStart2) && (c <= pConvertData->mnToCharEnd2) ) { dest[0] = static_cast< char >(pConvertData->mpToCharTab2[c-pConvertData->mnToCharStart2]); if ( dest[0] != 0 ) return 1; } pToCharExTab = pConvertData->mpToCharExTab; if ( pToCharExTab ) { sal_uInt16 nLow; sal_uInt16 nHigh; sal_uInt16 nMid; sal_uInt16 nCompareChar; const ImplUniCharTabData* pCharExData; nLow = 0; nHigh = pConvertData->mnToCharExCount-1; do { nMid = (nLow+nHigh)/2; pCharExData = pToCharExTab+nMid; nCompareChar = pCharExData->mnUniChar; if ( c < nCompareChar ) { if ( !nMid ) break; nHigh = nMid-1; } else { if ( c > nCompareChar ) nLow = nMid+1; else { dest[0] = static_cast< char >(pCharExData->mnChar); if ( pCharExData->mnChar2 == 0 ) return 1; dest[1] = static_cast< char >(pCharExData->mnChar2); return 2; } } } while ( nLow <= nHigh ); } return 0; } } sal_Size sal::detail::textenc::convertCharToUnicode( void const * pData, SAL_UNUSED_PARAMETER void *, char const * pSrcBuf, sal_Size nSrcBytes, sal_Unicode * pDestBuf, sal_Size nDestChars, sal_uInt32 nFlags, sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes) { sal_Unicode cConv; const ImplByteConvertData* pConvertData = static_cast(pData); sal_Unicode* pEndDestBuf; const char* pEndSrcBuf; *pInfo = 0; pEndDestBuf = pDestBuf+nDestChars; pEndSrcBuf = pSrcBuf+nSrcBytes; while ( pSrcBuf < pEndSrcBuf ) { unsigned char c = static_cast(*pSrcBuf); if ( c < 0x80 ) cConv = c; else { if ( (c >= pConvertData->mnToUniStart1) && (c <= pConvertData->mnToUniEnd1) ) cConv = pConvertData->mpToUniTab1[c-pConvertData->mnToUniStart1]; else if ( (c >= pConvertData->mnToUniStart2) && (c <= pConvertData->mnToUniEnd2) ) cConv = pConvertData->mpToUniTab2[c-pConvertData->mnToUniStart2]; else cConv = 0; if ( !cConv ) { *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED; if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR ) { if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) { ++pSrcBuf; } *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; break; } if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE ) { pSrcBuf++; continue; } cConv = ImplGetUndefinedUnicodeChar(c, nFlags); } } if ( pDestBuf == pEndDestBuf ) { *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL; break; } *pDestBuf = cConv; pDestBuf++; pSrcBuf++; } *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); return (nDestChars - (pEndDestBuf-pDestBuf)); } sal_Size sal::detail::textenc::convertUnicodeToChar( void const * pData, SAL_UNUSED_PARAMETER void *, sal_Unicode const * pSrcBuf, sal_Size nSrcChars, char * pDestBuf, sal_Size nDestBytes, sal_uInt32 nFlags, sal_uInt32 * pInfo, sal_Size * pSrcCvtChars) { sal_Unicode c; const ImplByteConvertData* pConvertData = static_cast(pData); char* pEndDestBuf; const sal_Unicode* pEndSrcBuf; int i; int n; sal_uInt16 cTemp; char aTempBuf[IMPL_MAX_REPLACECHAR+2]; const sal_uInt16* pReplace; *pInfo = 0; pEndDestBuf = pDestBuf+nDestBytes; pEndSrcBuf = pSrcBuf+nSrcChars; while ( pSrcBuf < pEndSrcBuf ) { c = *pSrcBuf; if ( c < 0x80 ) { aTempBuf[0] = static_cast< char >(c); n = 1; } else { n = ImplConvertUnicodeCharToChar( pConvertData, c, aTempBuf ); if ( n == 0 ) { if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) { cTemp = ImplGetReplaceChar( c ); if ( cTemp ) n = ImplConvertUnicodeCharToChar( pConvertData, cTemp, aTempBuf ); } if ( n == 0 ) { if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) { pReplace = ImplGetReplaceString( c ); if ( pReplace ) { while ( *pReplace && (n < IMPL_MAX_REPLACECHAR) ) { i = ImplConvertUnicodeCharToChar( pConvertData, *pReplace, aTempBuf + n ); if ( i == 0 ) { n = 0; break; } pReplace++; n += i; } } } /* Handle undefined and surrogates characters */ /* (all surrogates characters are undefined) */ if ( n == 0 ) { if (sal::detail::textenc::handleUndefinedUnicodeToTextChar( &pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf, nFlags, pInfo)) continue; break; } } } } if ( pEndDestBuf - pDestBuf < n ) { *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; break; } for ( i = 0; i < n; ++i ) *pDestBuf++ = aTempBuf[i]; pSrcBuf++; } *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); return (nDestBytes - (pEndDestBuf-pDestBuf)); } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */