summaryrefslogtreecommitdiff
path: root/sw
diff options
context:
space:
mode:
authorAdam Kovacs <christo161@gmail.com>2019-04-30 10:53:08 +0200
committerLászló Németh <nemeth@numbertext.org>2019-05-06 10:29:48 +0200
commitb9afb9959c31c3c57d0f2fe91107a92abfd82cdb (patch)
treeb986729226066797466d31afbc5b47234370ab21 /sw
parent0e86f0c79e29bde4a29ef77046621398b6bede63 (diff)
tdf#113483: DOCX: fix encoding of bookmarks with non-ASCII letters
Non-ASCII letters were stored using percent-encoding, resulting broken bookmark names after export/import. For example, the word "Első" became the wrong "Els%C5%91". Now only the reversed ASCII characters are stored in percent-encoding. For example, the name "Első!" stored in the following form: <w:bookmarkStart w:name="Első%21" w:id="0"/> <w:instrText> REF Első%21 \h </w:instrText> Change-Id: I65168e071b6baa12385c0aaa12d9f2ae4ccf9f98 Reviewed-on: https://gerrit.libreoffice.org/71299 Reviewed-by: László Németh <nemeth@numbertext.org> Tested-by: László Németh <nemeth@numbertext.org>
Diffstat (limited to 'sw')
-rw-r--r--sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docxbin0 -> 13624 bytes
-rw-r--r--sw/qa/extras/ooxmlexport/ooxmlexport13.cxx10
-rw-r--r--sw/source/filter/ww8/docxattributeoutput.cxx9
3 files changed, 18 insertions, 1 deletions
diff --git a/sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx b/sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx
new file mode 100644
index 000000000000..ec129909bc01
--- /dev/null
+++ b/sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx
Binary files differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx
index dfde7c2f399a..aae0813900ed 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx
@@ -261,6 +261,16 @@ DECLARE_OOXMLIMPORT_TEST(testTdf123460, "tdf123460.docx")
CPPUNIT_ASSERT_EQUAL(true, bCaught);
}
+//tdf#113483: fix handling of non-ascii characters in bookmark names and instrText xml tags
+DECLARE_OOXMLEXPORT_TEST(testTdf113483, "tdf113483_crossreflink_nonascii_bookmarkname.docx")
+{
+ xmlDocPtr pXmlDoc = parseExport("word/document.xml");
+ if (!pXmlDoc)
+ return;
+ assertXPath(pXmlDoc, "/w:document/w:body/w:p[1]/w:bookmarkStart[1]", "name", OUString::fromUtf8("Els\u0151"));
+ assertXPathContent(pXmlDoc, "/w:document/w:body/w:p[5]/w:r[2]/w:instrText[1]", OUString::fromUtf8(" REF Els\u0151 \\h "));
+}
+
CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx
index bc20d072a38c..d351101cf4a7 100644
--- a/sw/source/filter/ww8/docxattributeoutput.cxx
+++ b/sw/source/filter/ww8/docxattributeoutput.cxx
@@ -96,6 +96,7 @@
#include <svl/grabbagitem.hxx>
#include <sfx2/sfxbasemodel.hxx>
#include <tools/datetimeutils.hxx>
+#include <tools/urlobj.hxx>
#include <svl/whiter.hxx>
#include <rtl/tencinfo.h>
#include <sal/log.hxx>
@@ -1611,7 +1612,7 @@ void DocxAttributeOutput::DoWriteBookmarkTagStart(const OUString & bookmarkName)
{
m_pSerializer->singleElementNS(XML_w, XML_bookmarkStart,
FSNS(XML_w, XML_id), OString::number(m_nNextBookmarkId),
- FSNS(XML_w, XML_name), BookmarkToWord(bookmarkName).toUtf8());
+ FSNS(XML_w, XML_name), INetURLObject::decode(BookmarkToWord(bookmarkName), INetURLObject::DecodeMechanism::Unambiguous, RTL_TEXTENCODING_UTF8).toUtf8());
}
void DocxAttributeOutput::DoWriteBookmarkTagEnd(const OUString & bookmarkName)
@@ -1980,6 +1981,12 @@ void DocxAttributeOutput::CmdField_Impl( const SwTextNode* pNode, sal_Int32 nPos
sToken = sToken.replaceAll("NNNN", "dddd");
sToken = sToken.replaceAll("NN", "ddd");
}
+ //tdf#113483: fix non-ascii characters inside instrText xml tags
+ else if ( rInfos.eType == ww::eREF
+ || rInfos.eType == ww::ePAGEREF )
+ {
+ sToken = INetURLObject::decode(sToken, INetURLObject::DecodeMechanism::Unambiguous, RTL_TEXTENCODING_UTF8);
+ }
// Write the Field command
DoWriteCmd( sToken );