summaryrefslogtreecommitdiff
path: root/sal/textenc/unichars.c
diff options
context:
space:
mode:
Diffstat (limited to 'sal/textenc/unichars.c')
-rw-r--r--sal/textenc/unichars.c136
1 files changed, 136 insertions, 0 deletions
diff --git a/sal/textenc/unichars.c b/sal/textenc/unichars.c
new file mode 100644
index 000000000000..454e057c7f84
--- /dev/null
+++ b/sal/textenc/unichars.c
@@ -0,0 +1,136 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+#include "unichars.h"
+#include "osl/diagnose.h"
+#include "sal/types.h"
+
+int ImplIsNoncharacter(sal_uInt32 nUtf32)
+{
+ /* All code points that are noncharacters, as of Unicode 3.1.1. */
+ return (nUtf32 >= 0xFDD0 && nUtf32 <= 0xFDEF)
+ || (nUtf32 & 0xFFFF) >= 0xFFFE
+ || nUtf32 > 0x10FFFF;
+}
+
+int ImplIsControlOrFormat(sal_uInt32 nUtf32)
+{
+ /* All code points of <http://www.unicode.org/Public/UNIDATA/
+ UnicodeData.txt>, Version 3.1.1, that have a General Category of Cc
+ (Other, Control) or Cf (Other, Format).
+ */
+ return nUtf32 <= 0x001F
+ || (nUtf32 >= 0x007F && nUtf32 <= 0x009F)
+ || nUtf32 == 0x070F /* SYRIAC ABBREVIATION MARK */
+ || nUtf32 == 0x180B /* MONGOLIAN FREE VARIATION SELECTOR ONE */
+ || nUtf32 == 0x180C /* MONGOLIAN FREE VARIATION SELECTOR TWO */
+ || nUtf32 == 0x180D /* MONGOLIAN FREE VARIATION SELECTOR THREE */
+ || nUtf32 == 0x180E /* MONGOLIAN VOWEL SEPARATOR */
+ || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */
+ || nUtf32 == 0x200D /* ZERO WIDTH JOINER */
+ || nUtf32 == 0x200E /* LEFT-TO-RIGHT MARK */
+ || nUtf32 == 0x200F /* RIGHT-TO-LEFT MARK */
+ || nUtf32 == 0x202A /* LEFT-TO-RIGHT EMBEDDING */
+ || nUtf32 == 0x202B /* RIGHT-TO-LEFT EMBEDDING */
+ || nUtf32 == 0x202C /* POP DIRECTIONAL FORMATTING */
+ || nUtf32 == 0x202D /* LEFT-TO-RIGHT OVERRIDE */
+ || nUtf32 == 0x202E /* RIGHT-TO-LEFT OVERRIDE */
+ || nUtf32 == 0x206A /* INHIBIT SYMMETRIC SWAPPING */
+ || nUtf32 == 0x206B /* ACTIVATE SYMMETRIC SWAPPING */
+ || nUtf32 == 0x206C /* INHIBIT ARABIC FORM SHAPING */
+ || nUtf32 == 0x206D /* ACTIVATE ARABIC FORM SHAPING */
+ || nUtf32 == 0x206E /* NATIONAL DIGIT SHAPES */
+ || nUtf32 == 0x206F /* NOMINAL DIGIT SHAPES */
+ || nUtf32 == 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */
+ || nUtf32 == 0xFFF9 /* INTERLINEAR ANNOTATION ANCHOR */
+ || nUtf32 == 0xFFFA /* INTERLINEAR ANNOTATION SEPARATOR */
+ || nUtf32 == 0xFFFB /* INTERLINEAR ANNOTATION TERMINATOR */
+ || nUtf32 == 0x1D173 /* MUSICAL SYMBOL BEGIN BEAM */
+ || nUtf32 == 0x1D174 /* MUSICAL SYMBOL END BEAM */
+ || nUtf32 == 0x1D175 /* MUSICAL SYMBOL BEGIN TIE */
+ || nUtf32 == 0x1D176 /* MUSICAL SYMBOL END TIE */
+ || nUtf32 == 0x1D177 /* MUSICAL SYMBOL BEGIN SLUR */
+ || nUtf32 == 0x1D178 /* MUSICAL SYMBOL END SLUR */
+ || nUtf32 == 0x1D179 /* MUSICAL SYMBOL BEGIN PHRASE */
+ || nUtf32 == 0x1D17A /* MUSICAL SYMBOL END PHRASE */
+ || nUtf32 == 0xE0001 /* LANGUAGE TAG */
+ || (nUtf32 >= 0xE0020 && nUtf32 <= 0xE007F);
+}
+
+int ImplIsHighSurrogate(sal_uInt32 nUtf32)
+{
+ /* All code points that are high-surrogates, as of Unicode 3.1.1. */
+ return nUtf32 >= 0xD800 && nUtf32 <= 0xDBFF;
+}
+
+int ImplIsLowSurrogate(sal_uInt32 nUtf32)
+{
+ /* All code points that are low-surrogates, as of Unicode 3.1.1. */
+ return nUtf32 >= 0xDC00 && nUtf32 <= 0xDFFF;
+}
+
+int ImplIsPrivateUse(sal_uInt32 nUtf32)
+{
+ /* All code points of <http://www.unicode.org/Public/UNIDATA/
+ UnicodeData.txt>, Version 3.1.1, that have a General Category of Co
+ (Other, Private Use).
+ */
+ return (nUtf32 >= 0xE000 && nUtf32 <= 0xF8FF)
+ || (nUtf32 >= 0xF0000 && nUtf32 <= 0xFFFFD)
+ || (nUtf32 >= 0x100000 && nUtf32 <= 0x10FFFD);
+}
+
+int ImplIsZeroWidth(sal_uInt32 nUtf32)
+{
+ /* All code points of <http://www.unicode.org/Public/UNIDATA/
+ UnicodeData.txt>, Version 3.1.1, that have "ZERO WIDTH" in their
+ Character name.
+ */
+ return nUtf32 == 0x200B /* ZERO WIDTH SPACE */
+ || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */
+ || nUtf32 == 0x200D /* ZERO WIDTH JOINER */
+ || nUtf32 == 0xFEFF; /* ZEOR WIDTH NO-BREAK SPACE */
+}
+
+sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32)
+{
+ OSL_ENSURE(nUtf32 >= 0x10000, "specification violation");
+ return ((nUtf32 - 0x10000) >> 10) | 0xD800;
+}
+
+sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32)
+{
+ OSL_ENSURE(nUtf32 >= 0x10000, "specification violation");
+ return ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00;
+}
+
+sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow)
+{
+ OSL_ENSURE(ImplIsHighSurrogate(nHigh) && ImplIsLowSurrogate(nLow),
+ "specification violation");
+ return (((nHigh & 0x3FF) << 10) | (nLow & 0x3FF)) + 0x10000;
+}