diff options
author | Stephan Bergmann <sbergman@redhat.com> | 2015-10-29 12:17:40 +0100 |
---|---|---|
committer | Stephan Bergmann <sbergman@redhat.com> | 2015-10-29 13:02:40 +0000 |
commit | a346dfccd7e342d776dd59eb3ed128557e22a1bf (patch) | |
tree | fd2b7dcb940e054bdc080afed3b52257c9e4ca84 | |
parent | b051510796dcf289edcd03737087176e53bbe4b8 (diff) |
tdf#70833: IDNA support when exporing hyperlinks to PDF
Any URLs using non-ASCII IDNA syntax need to be resolved to ASCII-only, as PDF
URI Action's URI needs to be "encoded in 7-bit ASCII."
Introduce URIHelper::resolveIdnaHost (svl/urihelper.hxx), which internally uses
icu::IDNA, which requires to bump the minimal --with-system-icu requirement from
4.2 to 4.6, which means ICU_RECLASSIFIED_CLOSE_PARENTHESIS is always true now.
Change-Id: I0e20d9a20ed2b869fba0cc7c969721411db590b3
Reviewed-on: https://gerrit.libreoffice.org/19669
Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
Tested-by: Stephan Bergmann <sbergman@redhat.com>
-rw-r--r-- | config_host.mk.in | 1 | ||||
-rw-r--r-- | configure.ac | 11 | ||||
-rw-r--r-- | i18npool/CustomTarget_breakiterator.mk | 3 | ||||
-rw-r--r-- | include/svl/urihelper.hxx | 17 | ||||
-rw-r--r-- | svl/Library_svl.mk | 2 | ||||
-rw-r--r-- | svl/qa/unit/test_URIHelper.cxx | 63 | ||||
-rw-r--r-- | svl/source/misc/urihelper.cxx | 68 | ||||
-rw-r--r-- | vcl/source/gdi/pdfwriter_impl.cxx | 11 |
8 files changed, 160 insertions, 16 deletions
diff --git a/config_host.mk.in b/config_host.mk.in index 8d54bade7a8f..5b2e6489f5d8 100644 --- a/config_host.mk.in +++ b/config_host.mk.in @@ -273,7 +273,6 @@ export ICU_CFLAGS=$(gb_SPACE)@ICU_CFLAGS@ export ICU_LIBS=$(gb_SPACE)@ICU_LIBS@ export ICU_MAJOR=@ICU_MAJOR@ export ICU_MINOR=@ICU_MINOR@ -export ICU_RECLASSIFIED_CLOSE_PARENTHESIS=@ICU_RECLASSIFIED_CLOSE_PARENTHESIS@ export ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER=@ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER@ export ICU_RECLASSIFIED_HEBREW_LETTER=@ICU_RECLASSIFIED_HEBREW_LETTER@ export ICU_RECLASSIFIED_PREPEND_SET_EMPTY=@ICU_RECLASSIFIED_PREPEND_SET_EMPTY@ diff --git a/configure.ac b/configure.ac index 50003a650224..4a7f6b357917 100644 --- a/configure.ac +++ b/configure.ac @@ -8949,7 +8949,6 @@ SYSTEM_GENCMN= ICU_MAJOR=56 ICU_MINOR=1 -ICU_RECLASSIFIED_CLOSE_PARENTHESIS="TRUE" ICU_RECLASSIFIED_PREPEND_SET_EMPTY="TRUE" ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER="TRUE" ICU_RECLASSIFIED_HEBREW_LETTER="TRUE" @@ -8974,10 +8973,10 @@ if test "$with_system_icu" = "yes"; then ICU_MAJOR=`echo $ICU_VERSION | cut -d"." -f1` ICU_MINOR=`echo $ICU_VERSION | cut -d"." -f2` - if test "$ICU_MAJOR" -ge "49" -o \( "$ICU_MAJOR" = "4" -a "$ICU_MINOR" -ge "2" \); then + if test "$ICU_MAJOR" -ge "49" -o \( "$ICU_MAJOR" = "4" -a "$ICU_MINOR" -ge "6" \); then AC_MSG_RESULT([OK, $ICU_VERSION]) else - AC_MSG_ERROR([not suitable, only >= 4.2 supported currently]) + AC_MSG_ERROR([not suitable, only >= 4.6 supported currently]) fi fi @@ -9013,11 +9012,6 @@ You can use --with-system-icu-for-build=force to use it anyway.]) if test -z "$SYSTEM_GENCMN"; then AC_MSG_ERROR([\'gencmn\' not found in \$PATH, install the icu development tool \'gencmn\']) fi - if test "$ICU_MAJOR" -ge "49" -o \( "$ICU_MAJOR" = "4" -a "$ICU_MINOR" -ge "4" \); then - ICU_RECLASSIFIED_CLOSE_PARENTHESIS="TRUE" - else - ICU_RECLASSIFIED_CLOSE_PARENTHESIS= - fi if test "$ICU_MAJOR" -ge "49"; then ICU_RECLASSIFIED_PREPEND_SET_EMPTY="TRUE" ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER="TRUE" @@ -9055,7 +9049,6 @@ AC_SUBST(SYSTEM_GENCCODE) AC_SUBST(SYSTEM_GENCMN) AC_SUBST(ICU_MAJOR) AC_SUBST(ICU_MINOR) -AC_SUBST(ICU_RECLASSIFIED_CLOSE_PARENTHESIS) AC_SUBST(ICU_RECLASSIFIED_PREPEND_SET_EMPTY) AC_SUBST(ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER) AC_SUBST(ICU_RECLASSIFIED_HEBREW_LETTER) diff --git a/i18npool/CustomTarget_breakiterator.mk b/i18npool/CustomTarget_breakiterator.mk index 4aaf2e5a710e..302ad464eb12 100644 --- a/i18npool/CustomTarget_breakiterator.mk +++ b/i18npool/CustomTarget_breakiterator.mk @@ -98,8 +98,7 @@ $(i18npool_BIDIR)/%.brk : $(i18npool_BIDIR)/%.txt $(call gb_ExternalExecutable_g # sed substitution... $(i18npool_BIDIR)/%.txt : \ $(SRCDIR)/i18npool/source/breakiterator/data/%.txt | $(i18npool_BIDIR)/.dir - sed -e ': dummy' \ - $(if $(ICU_RECLASSIFIED_CLOSE_PARENTHESIS),-e "s#\[:LineBreak = Close_Punctuation:\]#\[& \[:LineBreak = Close_Parenthesis:\]\]#") \ + sed -e "s#\[:LineBreak = Close_Punctuation:\]#\[& \[:LineBreak = Close_Parenthesis:\]\]#" \ $(if $(ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER),,\ -e '/\[:LineBreak = Conditional_Japanese_Starter:\]/d' \ -e 's# $$CJ##' \ diff --git a/include/svl/urihelper.hxx b/include/svl/urihelper.hxx index b784188461f8..5f7633033ffd 100644 --- a/include/svl/urihelper.hxx +++ b/include/svl/urihelper.hxx @@ -152,6 +152,23 @@ SVL_DLLPUBLIC OUString removePassword(OUString const & rURI, INetURLObject::EncodeMechanism eEncodeMechanism = INetURLObject::WAS_ENCODED, INetURLObject::DecodeMechanism eDecodeMechanism = INetURLObject::DECODE_TO_IURI, rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); + +/** Resolve a URL's host component domain name in IDNA syntax to plain DNS + syntax. + + For details, see RFC 5890 "Internationalized Domain Names for Applications + (IDNA): Definitions and Document Framework." + + @param: url An arbitrary string, should be a URI. + + @return If the input matches the syntax of a hierarchical URL, and it has + a host component that matches the IDNA2008 domain name syntax, and that + domain name contains any U-labels, return a version of the input URL with + the host component resolved to plain DNS syntax. Otherwise, return the + input unchanged. +*/ +SVL_DLLPUBLIC OUString resolveIdnaHost(OUString const & url); + } #endif // INCLUDED_SVL_URIHELPER_HXX diff --git a/svl/Library_svl.mk b/svl/Library_svl.mk index db08af5f4b3a..3a7b00d1aaa0 100644 --- a/svl/Library_svl.mk +++ b/svl/Library_svl.mk @@ -21,6 +21,8 @@ $(eval $(call gb_Library_Library,svl)) $(eval $(call gb_Library_use_externals,svl,\ boost_headers \ + icu_headers \ + icuuc \ mdds_headers \ libxml2 \ )) diff --git a/svl/qa/unit/test_URIHelper.cxx b/svl/qa/unit/test_URIHelper.cxx index f27149b57a01..37d53e90ec6c 100644 --- a/svl/qa/unit/test_URIHelper.cxx +++ b/svl/qa/unit/test_URIHelper.cxx @@ -198,9 +198,12 @@ public: void testFindFirstURLInText(); + void testResolveIdnaHost(); + CPPUNIT_TEST_SUITE(Test); CPPUNIT_TEST(testNormalizedMakeRelative); CPPUNIT_TEST(testFindFirstURLInText); + CPPUNIT_TEST(testResolveIdnaHost); CPPUNIT_TEST(finish); CPPUNIT_TEST_SUITE_END(); @@ -423,6 +426,66 @@ void Test::testFindFirstURLInText() { } } +void Test::testResolveIdnaHost() { + OUString input; + + input.clear(); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("Foo.M\xC3\xBCnchen.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("foo://Muenchen.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("foo://-M\xC3\xBCnchen.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("foo://M\xC3\xBCnchen-.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("foo://xn--M\xC3\xBCnchen.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("foo://xy--M\xC3\xBCnchen.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("foo://.M\xC3\xBCnchen.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("foo://-bar.M\xC3\xBCnchen.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("foo://bar-.M\xC3\xBCnchen.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("foo://xn--bar.M\xC3\xBCnchen.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + input = OUString::fromUtf8("foo://xy--bar.M\xC3\xBCnchen.de"); + CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input)); + + CPPUNIT_ASSERT_EQUAL( + OUString::fromUtf8("foo://M\xC3\xBCnchen@xn--mnchen-3ya.de"), + URIHelper::resolveIdnaHost( + OUString::fromUtf8("foo://M\xC3\xBCnchen@M\xC3\xBCnchen.de"))); + + CPPUNIT_ASSERT_EQUAL( + OUString::fromUtf8("foo://xn--mnchen-3ya.de."), + URIHelper::resolveIdnaHost( + OUString::fromUtf8("foo://M\xC3\xBCnchen.de."))); + + CPPUNIT_ASSERT_EQUAL( + OUString::fromUtf8("Foo://bar@xn--mnchen-3ya.de:123/?bar#baz"), + URIHelper::resolveIdnaHost( + OUString::fromUtf8("Foo://bar@M\xC3\xBCnchen.de:123/?bar#baz"))); + + CPPUNIT_ASSERT_EQUAL( + OUString::fromUtf8("foo://xn--mnchen-3ya.de"), + URIHelper::resolveIdnaHost( + OUString::fromUtf8("foo://Mu\xCC\x88nchen.de"))); +} + css::uno::Reference< css::uno::XComponentContext > Test::m_context; CPPUNIT_TEST_SUITE_REGISTRATION(Test); diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx index ab47bb6de3b6..bb5678a9291f 100644 --- a/svl/source/misc/urihelper.cxx +++ b/svl/source/misc/urihelper.cxx @@ -17,6 +17,10 @@ * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ +#include <sal/config.h> + +#include <unicode/idna.h> + #include <svl/urihelper.hxx> #include <com/sun/star/ucb/Command.hpp> #include <com/sun/star/ucb/IllegalIdentifierException.hpp> @@ -725,4 +729,68 @@ OUString URIHelper::removePassword(OUString const & rURI, aObj.GetURLNoPass(eDecodeMechanism, eCharset); } +OUString URIHelper::resolveIdnaHost(OUString const & url) { + css::uno::Reference<css::uri::XUriReference> uri( + css::uri::UriReferenceFactory::create( + comphelper::getProcessComponentContext()) + ->parse(url)); + if (!(uri.is() && uri->hasAuthority())) { + return url; + } + auto auth(uri->getAuthority()); + sal_Int32 hostStart = auth.indexOf('@') + 1; + sal_Int32 hostEnd = auth.getLength() - 1; + while (hostEnd > hostStart && rtl::isAsciiDigit(auth[hostEnd])) { + --hostEnd; + } + if (!(hostEnd > hostStart && auth[hostEnd] == ':')) { + hostEnd = auth.getLength() - 1; + } + auto asciiOnly = true; + for (auto i = hostStart; i != hostEnd; ++i) { + if (!rtl::isAscii(auth[i])) { + asciiOnly = false; + break; + } + } + if (asciiOnly) { + // Avoid icu::IDNA case normalization in purely non-IDNA domain names: + return url; + } + UErrorCode e = U_ZERO_ERROR; + std::unique_ptr<icu::IDNA> idna( + icu::IDNA::createUTS46Instance( + (UIDNA_USE_STD3_RULES | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ + | UIDNA_CHECK_CONTEXTO), + e)); + if (U_FAILURE(e)) { + SAL_WARN("vcl.gdi", "icu::IDNA::createUTS46Instance " << e); + return url; + } + icu::UnicodeString ascii; + icu::IDNAInfo info; + idna->nameToASCII( + icu::UnicodeString( + reinterpret_cast<UChar const *>(auth.getStr() + hostStart), + hostEnd - hostStart), + ascii, info, e); + if (U_FAILURE(e) || info.hasErrors()) { + return url; + } + OUStringBuffer buf(uri->getScheme()); + buf.append("://").append(auth.getStr(), hostStart); + buf.append( + reinterpret_cast<sal_Unicode const *>(ascii.getBuffer()), + ascii.length()); + buf.append(auth.getStr() + hostEnd, auth.getLength() - hostEnd) + .append(uri->getPath()); + if (uri->hasQuery()) { + buf.append('?').append(uri->getQuery()); + } + if (uri->hasFragment()) { + buf.append('#').append(uri->getFragment()); + } + return buf.makeStringAndClear(); +} + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx index fcd04b44515a..ae009788aaa5 100644 --- a/vcl/source/gdi/pdfwriter_impl.cxx +++ b/vcl/source/gdi/pdfwriter_impl.cxx @@ -44,6 +44,7 @@ #include <rtl/crc.h> #include <rtl/digest.h> #include <rtl/ustrbuf.hxx> +#include <svl/urihelper.hxx> #include <tools/debug.hxx> #include <tools/fract.hxx> #include <tools/stream.hxx> @@ -4495,8 +4496,10 @@ we check in the following sequence: // are the correct one!! // extract target file type + auto url(URIHelper::resolveIdnaHost(rLink.m_aURL)); + INetURLObject aDocumentURL( m_aContext.BaseURL ); - INetURLObject aTargetURL( rLink.m_aURL ); + INetURLObject aTargetURL( url ); bool bSetGoToRMode = false; bool bTargetHasPDFExtension = false; INetProtocol eTargetProtocol = aTargetURL.GetProtocol(); @@ -4507,7 +4510,7 @@ we check in the following sequence: // getting the needed URL information from the current document path if( eTargetProtocol == INetProtocol::NotValid ) { - if( rLink.m_aURL.getLength() > 4 && rLink.m_aURL.startsWith("\\\\\\\\")) + if( url.getLength() > 4 && url.startsWith("\\\\\\\\")) { bIsUNCPath = true; } @@ -4516,7 +4519,7 @@ we check in the following sequence: INetURLObject aNewBase( aDocumentURL );//duplicate document URL aNewBase.removeSegment(); //remove last segment from it, obtaining the base URL of the //target document - aNewBase.insertName( rLink.m_aURL ); + aNewBase.insertName( url ); aTargetURL = aNewBase;//reassign the new target URL //recompute the target protocol, with the new URL //normal URL processing resumes @@ -4564,7 +4567,7 @@ we check in the following sequence: { aLine.append( "/Launch/Win<</F" ); // INetURLObject is not good with UNC paths, use original path - appendLiteralStringEncrypt( rLink.m_aURL, rLink.m_nObject, aLine, osl_getThreadTextEncoding() ); + appendLiteralStringEncrypt( url, rLink.m_nObject, aLine, osl_getThreadTextEncoding() ); aLine.append( ">>" ); } else |