summaryrefslogtreecommitdiff
path: root/sc
diff options
context:
space:
mode:
authorCzeber László Ádám <czeber.laszloadam@nisz.hu>2023-05-08 09:33:07 +0200
committerAndras Timar <andras.timar@collabora.com>2023-05-13 20:05:33 +0200
commit7c0f9682a1ddd61a85edab79dd2306e6de823327 (patch)
treeb371c0a11dcdc44f585f842dbfff446245ff787c /sc
parenta7741db7a30f7d734fffe016d98d9170de82d8b8 (diff)
tdf#152980 CSV import: Fix control character length in XLSX save
Converting from CSV to XLSX corrupts text that looks like a control character. Only 4 numeric length escape character allowed, in _x000D_ format, not _x0D_ for exampled. Change lcl_unEscapeUnicodeChars function to decodeXString. Delete not used functions and add multiple occurence for unit test. Change-Id: Id1d4bfcf7d27cf5005e7bea8e289303c5d9aca73 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151494 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Eike Rathke <erack@redhat.com> Signed-off-by: Xisco Fauli <xiscofauli@libreoffice.org> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151562 Reviewed-by: Michael Stahl <michael.stahl@allotropia.de> Tested-by: Jenkins
Diffstat (limited to 'sc')
-rw-r--r--sc/qa/unit/data/csv/tdf152980.csv9
-rw-r--r--sc/qa/unit/subsequent_export_test2.cxx29
-rw-r--r--sc/source/filter/oox/richstring.cxx112
3 files changed, 39 insertions, 111 deletions
diff --git a/sc/qa/unit/data/csv/tdf152980.csv b/sc/qa/unit/data/csv/tdf152980.csv
new file mode 100644
index 000000000000..c5050b86d968
--- /dev/null
+++ b/sc/qa/unit/data/csv/tdf152980.csv
@@ -0,0 +1,9 @@
+"a_x1_b"
+"a_x01_b"
+"a_x001_b"
+"a_x0001_b"
+"a_xfoo b"
+"a b"
+"a
+b"
+"a b"
diff --git a/sc/qa/unit/subsequent_export_test2.cxx b/sc/qa/unit/subsequent_export_test2.cxx
index 56d7ac158151..d1920de3c3cb 100644
--- a/sc/qa/unit/subsequent_export_test2.cxx
+++ b/sc/qa/unit/subsequent_export_test2.cxx
@@ -193,6 +193,7 @@ public:
void testTotalsRowFunction();
void testAutofilterHiddenButton();
void testTdf119565();
+ void testTdf152980();
CPPUNIT_TEST_SUITE(ScExportTest2);
@@ -325,6 +326,7 @@ public:
CPPUNIT_TEST(testTotalsRowFunction);
CPPUNIT_TEST(testAutofilterHiddenButton);
CPPUNIT_TEST(testTdf119565);
+ CPPUNIT_TEST(testTdf152980);
CPPUNIT_TEST_SUITE_END();
};
@@ -2969,6 +2971,33 @@ void ScExportTest2::testTdf119565()
xShapeProps->getPropertyValue("LineJoint").get<drawing::LineJoint>());
}
+void ScExportTest2::testTdf152980()
+{
+ createScDoc("csv/tdf152980.csv");
+ ScDocShell* pDocSh = getScDocShell();
+ pDocSh->DoHardRecalc();
+ saveAndReload("Calc Office Open XML");
+ pDocSh = getScDocShell();
+ pDocSh->DoHardRecalc();
+
+ ScDocument* pDoc = getScDoc();
+
+ // - Expected: The part between a and b does not change
+ // - Actual : Only the characters a and b remain
+ CPPUNIT_ASSERT_EQUAL(OUString("a_x1_b"), pDoc->GetString(0, 0, 0));
+ CPPUNIT_ASSERT_EQUAL(OUString("a_x01_b"), pDoc->GetString(0, 1, 0));
+ CPPUNIT_ASSERT_EQUAL(OUString("a_x001_b"), pDoc->GetString(0, 2, 0));
+
+ // The character code does not change in both cases
+ CPPUNIT_ASSERT_EQUAL(OUString("a_x0001_b"), pDoc->GetString(0, 3, 0));
+
+ // The escape characters are handled correctly in both cases
+ CPPUNIT_ASSERT_EQUAL(OUString("a_xfoo\nb"), pDoc->GetString(0, 4, 0));
+ CPPUNIT_ASSERT_EQUAL(OUString("a\tb"), pDoc->GetString(0, 5, 0));
+ CPPUNIT_ASSERT_EQUAL(OUString("a\nb"), pDoc->GetString(0, 6, 0));
+ CPPUNIT_ASSERT_EQUAL(OUString("a\n\nb"), pDoc->GetString(0, 7, 0));
+}
+
CPPUNIT_TEST_SUITE_REGISTRATION(ScExportTest2);
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/oox/richstring.cxx b/sc/source/filter/oox/richstring.cxx
index bda5960e42b1..a1345179c19a 100644
--- a/sc/source/filter/oox/richstring.cxx
+++ b/sc/source/filter/oox/richstring.cxx
@@ -49,116 +49,6 @@ bool lclNeedsRichTextFormat( const oox::xls::Font* pFont )
return pFont && pFont->needsRichTextFormat();
}
-sal_Int32 lcl_getHexLetterValue(sal_Unicode nCode)
-{
- if (nCode >= '0' && nCode <= '9')
- return nCode - '0';
-
- if (nCode >= 'A' && nCode <= 'F')
- return nCode - 'A' + 10;
-
- if (nCode >= 'a' && nCode <= 'f')
- return nCode - 'a' + 10;
-
- return -1;
-}
-
-bool lcl_validEscape(sal_Unicode nCode)
-{
- // Valid XML chars that can be escaped (ignoring the restrictions) as in the OOX open spec
- // 2.1.1742 Part 1 Section 22.9.2.19, ST_Xstring (Escaped String)
- if (nCode == 0x000D || nCode == 0x000A || nCode == 0x0009 || nCode == 0x005F)
- return true;
-
- // Other valid XML chars in basic multilingual plane that cannot be escaped.
- if ((nCode >= 0x0020 && nCode <= 0xD7FF) || (nCode >= 0xE000 && nCode <= 0xFFFD))
- return false;
-
- return true;
-}
-
-OUString lcl_unEscapeUnicodeChars(const OUString& rSrc)
-{
- // Example: Escaped representation of unicode char 0x000D is _x000D_
-
- sal_Int32 nLen = rSrc.getLength();
- if (!nLen)
- return rSrc;
-
- sal_Int32 nStart = 0;
- bool bFound = false;
- const OUString aPrefix = "_x";
- sal_Int32 nPrefixStart = rSrc.indexOf(aPrefix, nStart);
-
- if (nPrefixStart == -1)
- return rSrc;
-
- OUStringBuffer aBuf(rSrc);
- sal_Int32 nOffset = 0; // index offset in aBuf w.r.t rSrc.
-
- do
- {
- sal_Int32 nEnd = -1;
- sal_Unicode nCode = 0;
- bool bFoundThis = false;
- for (sal_Int32 nIdx = 0; nIdx < 5; ++nIdx)
- {
- sal_Int32 nThisIdx = nPrefixStart + nIdx + 2;
- if (nThisIdx >= nLen)
- break;
-
- sal_Unicode nThisCode = rSrc[nThisIdx];
- sal_Int32 nLetter = lcl_getHexLetterValue(nThisCode);
-
- if (!nIdx && nLetter < 0)
- break;
-
- if (nLetter >= 0)
- {
- nCode = (nCode << 4) + static_cast<sal_Unicode>(nLetter);
- }
- else if (nThisCode == '_')
- {
- nEnd = nThisIdx + 1;
- bFoundThis = true;
- break;
- }
- else
- {
- break;
- }
- }
-
- if (bFoundThis)
- {
- // nEnd is already set inside the inner loop in this case.
- if (lcl_validEscape(nCode))
- {
- bFound = true;
- sal_Int32 nEscStrLen = nEnd - nPrefixStart;
- aBuf.remove(nPrefixStart - nOffset, nEscStrLen);
- aBuf.insert(nPrefixStart - nOffset, nCode);
-
- nOffset += nEscStrLen - 1;
- }
- }
- else
- {
- // Start the next search just after last "_x"
- nEnd = nPrefixStart + 2;
- }
-
- nStart = nEnd;
- nPrefixStart = rSrc.indexOf(aPrefix, nStart);
- }
- while (nPrefixStart != -1);
-
- if (bFound)
- return aBuf.makeStringAndClear();
-
- return rSrc;
-}
-
} // namespace
RichStringPortion::RichStringPortion() :
@@ -169,7 +59,7 @@ RichStringPortion::RichStringPortion() :
void RichStringPortion::setText( const OUString& rText )
{
- maText = lcl_unEscapeUnicodeChars(rText);
+ maText = AttributeConversion::decodeXString(rText);
}
FontRef const & RichStringPortion::createFont(const WorkbookHelper& rHelper)