diff options
author | Adam Kovacs <christo161@gmail.com> | 2019-04-30 10:53:08 +0200 |
---|---|---|
committer | László Németh <nemeth@numbertext.org> | 2019-05-06 10:29:48 +0200 |
commit | b9afb9959c31c3c57d0f2fe91107a92abfd82cdb (patch) | |
tree | b986729226066797466d31afbc5b47234370ab21 /sw | |
parent | 0e86f0c79e29bde4a29ef77046621398b6bede63 (diff) |
tdf#113483: DOCX: fix encoding of bookmarks with non-ASCII letters
Non-ASCII letters were stored using percent-encoding, resulting
broken bookmark names after export/import. For example, the word "Első"
became the wrong "Els%C5%91". Now only the reversed ASCII characters
are stored in percent-encoding.
For example, the name "Első!" stored in the following form:
<w:bookmarkStart w:name="Első%21" w:id="0"/>
<w:instrText> REF Első%21 \h </w:instrText>
Change-Id: I65168e071b6baa12385c0aaa12d9f2ae4ccf9f98
Reviewed-on: https://gerrit.libreoffice.org/71299
Reviewed-by: László Németh <nemeth@numbertext.org>
Tested-by: László Németh <nemeth@numbertext.org>
Diffstat (limited to 'sw')
-rw-r--r-- | sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx | bin | 0 -> 13624 bytes | |||
-rw-r--r-- | sw/qa/extras/ooxmlexport/ooxmlexport13.cxx | 10 | ||||
-rw-r--r-- | sw/source/filter/ww8/docxattributeoutput.cxx | 9 |
3 files changed, 18 insertions, 1 deletions
diff --git a/sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx b/sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx Binary files differnew file mode 100644 index 000000000000..ec129909bc01 --- /dev/null +++ b/sw/qa/extras/ooxmlexport/data/tdf113483_crossreflink_nonascii_bookmarkname.docx diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx index dfde7c2f399a..aae0813900ed 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx @@ -261,6 +261,16 @@ DECLARE_OOXMLIMPORT_TEST(testTdf123460, "tdf123460.docx") CPPUNIT_ASSERT_EQUAL(true, bCaught); } +//tdf#113483: fix handling of non-ascii characters in bookmark names and instrText xml tags +DECLARE_OOXMLEXPORT_TEST(testTdf113483, "tdf113483_crossreflink_nonascii_bookmarkname.docx") +{ + xmlDocPtr pXmlDoc = parseExport("word/document.xml"); + if (!pXmlDoc) + return; + assertXPath(pXmlDoc, "/w:document/w:body/w:p[1]/w:bookmarkStart[1]", "name", OUString::fromUtf8("Els\u0151")); + assertXPathContent(pXmlDoc, "/w:document/w:body/w:p[5]/w:r[2]/w:instrText[1]", OUString::fromUtf8(" REF Els\u0151 \\h ")); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx index bc20d072a38c..d351101cf4a7 100644 --- a/sw/source/filter/ww8/docxattributeoutput.cxx +++ b/sw/source/filter/ww8/docxattributeoutput.cxx @@ -96,6 +96,7 @@ #include <svl/grabbagitem.hxx> #include <sfx2/sfxbasemodel.hxx> #include <tools/datetimeutils.hxx> +#include <tools/urlobj.hxx> #include <svl/whiter.hxx> #include <rtl/tencinfo.h> #include <sal/log.hxx> @@ -1611,7 +1612,7 @@ void DocxAttributeOutput::DoWriteBookmarkTagStart(const OUString & bookmarkName) { m_pSerializer->singleElementNS(XML_w, XML_bookmarkStart, FSNS(XML_w, XML_id), OString::number(m_nNextBookmarkId), - FSNS(XML_w, XML_name), BookmarkToWord(bookmarkName).toUtf8()); + FSNS(XML_w, XML_name), INetURLObject::decode(BookmarkToWord(bookmarkName), INetURLObject::DecodeMechanism::Unambiguous, RTL_TEXTENCODING_UTF8).toUtf8()); } void DocxAttributeOutput::DoWriteBookmarkTagEnd(const OUString & bookmarkName) @@ -1980,6 +1981,12 @@ void DocxAttributeOutput::CmdField_Impl( const SwTextNode* pNode, sal_Int32 nPos sToken = sToken.replaceAll("NNNN", "dddd"); sToken = sToken.replaceAll("NN", "ddd"); } + //tdf#113483: fix non-ascii characters inside instrText xml tags + else if ( rInfos.eType == ww::eREF + || rInfos.eType == ww::ePAGEREF ) + { + sToken = INetURLObject::decode(sToken, INetURLObject::DecodeMechanism::Unambiguous, RTL_TEXTENCODING_UTF8); + } // Write the Field command DoWriteCmd( sToken ); |