tdf#121069, tdf#121469 migrate special characters

dbahsql: Decode UTF8 characters stored as unicode code point values in schema file. Change-Id: I90db2345a6de9bee7aae8ae6a7c046a03eebc0a7 Reviewed-on: https://gerrit.libreoffice.org/67197 Tested-by: Jenkins Reviewed-by: Tamás Bunth <btomi96@gmail.com> (cherry picked from commit 2ec13411b7e7b18d1ea28faee9ad93e0b7b21dde) Reviewed-on: https://gerrit.libreoffice.org/67351 Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
author: Tamas Bunth <tamas.bunth@collabora.co.uk> 2019-01-31 11:03:30 +0100
committer: Miklos Vajna <vmiklos@collabora.com> 2019-02-05 10:36:54 +0100
commit: 16710fab5248b21c1c2cb8b6a4eac3a7a2d64792 (patch)
tree: 2a88817349c5a616f16dbf5b704ca9d42b72e10c /dbaccess
parent: 2d4608ac3b958aaaff032725ded152a13b3bac7d (diff)
4 files changed, 45 insertions, 38 deletions
diff --git a/dbaccess/source/filter/hsqldb/createparser.cxx b/dbaccess/source/filter/hsqldb/createparser.cxx
index 77b1fbec84b2..e81de3e07654 100644
--- a/dbaccess/source/filter/hsqldb/createparser.cxx
+++ b/dbaccess/source/filter/hsqldb/createparser.cxx
@@ -28,42 +28,6 @@ using namespace css::sdbc;
 
 namespace
 {
-//Find ascii escaped unicode
-sal_Int32 lcl_IndexOfUnicode(const OString& rSource, const sal_Int32 nFrom = 0)
-{
-    const OString sHexDigits = "0123456789abcdefABCDEF";
-    sal_Int32 nIndex = rSource.indexOf("\\u", nFrom);
-    if (nIndex == -1)
-    {
-        return -1;
-    }
-    bool bIsUnicode = true;
-    for (short nDist = 2; nDist <= 5; ++nDist)
-    {
-        if (sHexDigits.indexOf(rSource[nIndex + nDist]) == -1)
-        {
-            bIsUnicode = false;
-        }
-    }
-    return bIsUnicode ? nIndex : -1;
-}
-
-//Convert ascii escaped unicode to utf-8
-OUString lcl_ConvertToUTF8(const OString& original)
-{
-    OString sResult = original;
-    sal_Int32 nIndex = lcl_IndexOfUnicode(sResult);
-    while (nIndex != -1 && nIndex < original.getLength())
-    {
-        const OString sHex = original.copy(nIndex + 2, 4);
-        const sal_Unicode cDec = static_cast<sal_Unicode>(strtol(sHex.getStr(), nullptr, 16));
-        const OString sNewChar = OString(&cDec, 1, RTL_TEXTENCODING_UTF8);
-        sResult = sResult.replaceAll("\\u" + sHex, sNewChar);
-        nIndex = lcl_IndexOfUnicode(original, nIndex + 1);
-    }
-    return OStringToOUString(sResult, RTL_TEXTENCODING_UTF8);
-}
-
 /// Returns substring of sSql from the first occurrence of '(' until the
 /// last occurrence of ')' (excluding the parenthesis)
 OUString lcl_getColumnPart(const OUString& sSql)
@@ -281,7 +245,6 @@ void CreateStmtParser::parseColumnPart(const OUString& sColumnPart)
         ColumnTypeParts typeParts = lcl_getColumnTypeParts(sFullTypeName);
 
         bool bCaseInsensitive = typeParts.typeName.indexOf("IGNORECASE") >= 0;
-        rColumnName = lcl_ConvertToUTF8(OUStringToOString(rColumnName, RTL_TEXTENCODING_UTF8));
         bool isPrimaryKey = lcl_isPrimaryKey(sColumn);
 
         if (isPrimaryKey)
diff --git a/dbaccess/source/filter/hsqldb/parseschema.cxx b/dbaccess/source/filter/hsqldb/parseschema.cxx
index beca3c24e17e..e04998c80f28 100644
--- a/dbaccess/source/filter/hsqldb/parseschema.cxx
+++ b/dbaccess/source/filter/hsqldb/parseschema.cxx
@@ -20,6 +20,7 @@
 #include "parseschema.hxx"
 #include "fbcreateparser.hxx"
 #include "fbalterparser.hxx"
+#include "utils.hxx"
 
 #include <com/sun/star/io/TextInputStream.hpp>
 #include <com/sun/star/embed/XStorage.hpp>
@@ -123,7 +124,8 @@ void SchemaParser::parseSchema()
     while (!xTextInput->isEOF())
     {
         // every line contains exactly one DDL statement
-        OUString sSql = xTextInput->readLine();
+        OUString sSql = utils::convertToUTF8(
+            OUStringToOString(xTextInput->readLine(), RTL_TEXTENCODING_UTF8));
 
         IndexStmtParser indexParser{ sSql };
         if (indexParser.isIndexStatement())
diff --git a/dbaccess/source/filter/hsqldb/utils.cxx b/dbaccess/source/filter/hsqldb/utils.cxx
index 8d6c49e348dc..dc869f51217b 100644
--- a/dbaccess/source/filter/hsqldb/utils.cxx
+++ b/dbaccess/source/filter/hsqldb/utils.cxx
@@ -28,6 +28,46 @@
 
 using namespace dbahsql;
 
+namespace
+{
+//Find ascii escaped unicode
+sal_Int32 lcl_IndexOfUnicode(const OString& rSource, const sal_Int32 nFrom = 0)
+{
+    const OString sHexDigits = "0123456789abcdefABCDEF";
+    sal_Int32 nIndex = rSource.indexOf("\\u", nFrom);
+    if (nIndex == -1)
+    {
+        return -1;
+    }
+    bool bIsUnicode = true;
+    for (short nDist = 2; nDist <= 5; ++nDist)
+    {
+        if (sHexDigits.indexOf(rSource[nIndex + nDist]) == -1)
+        {
+            bIsUnicode = false;
+        }
+    }
+    return bIsUnicode ? nIndex : -1;
+}
+
+} // unnamed namespace
+
+//Convert ascii escaped unicode to utf-8
+OUString utils::convertToUTF8(const OString& original)
+{
+    OString sResult = original;
+    sal_Int32 nIndex = lcl_IndexOfUnicode(sResult);
+    while (nIndex != -1 && nIndex < original.getLength())
+    {
+        const OString sHex = original.copy(nIndex + 2, 4);
+        const sal_Unicode cDec = static_cast<sal_Unicode>(strtol(sHex.getStr(), nullptr, 16));
+        const OString sNewChar = OString(&cDec, 1, RTL_TEXTENCODING_UTF8);
+        sResult = sResult.replaceAll("\\u" + sHex, sNewChar);
+        nIndex = lcl_IndexOfUnicode(original, nIndex + 1);
+    }
+    return OStringToOUString(sResult, RTL_TEXTENCODING_UTF8);
+}
+
 OUString utils::getTableNameFromStmt(const OUString& sSql)
 {
     auto stmtComponents = comphelper::string::split(sSql, sal_Unicode(u' '));
diff --git a/dbaccess/source/filter/hsqldb/utils.hxx b/dbaccess/source/filter/hsqldb/utils.hxx
index 02ccc3d2426b..b2d54fbc1a50 100644
--- a/dbaccess/source/filter/hsqldb/utils.hxx
+++ b/dbaccess/source/filter/hsqldb/utils.hxx
@@ -16,6 +16,8 @@ namespace dbahsql
 {
 namespace utils
 {
+OUString convertToUTF8(const OString& original);
+
 OUString getTableNameFromStmt(const OUString& sSql);
 
 void ensureFirebirdTableLength(const OUString& sName);
author	Tamas Bunth <tamas.bunth@collabora.co.uk>	2019-01-31 11:03:30 +0100
committer	Miklos Vajna <vmiklos@collabora.com>	2019-02-05 10:36:54 +0100
commit	16710fab5248b21c1c2cb8b6a4eac3a7a2d64792 (patch)
tree	2a88817349c5a616f16dbf5b704ca9d42b72e10c /dbaccess
parent	2d4608ac3b958aaaff032725ded152a13b3bac7d (diff)