From 08705b75ff8b5a10dc039a9aa1042e04a281729a Mon Sep 17 00:00:00 2001 From: Stephan Bergmann Date: Wed, 23 Sep 2020 12:32:08 +0200 Subject: These PDFium-provided strings are always in UTF-16LE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...see the documentation of FPDFTextObj_GetText in workdir/UnpackedTarball/pdfium/public/fpdf_edit.h and of FPDFAnnot_GetStringValue in workdir/UnpackedTarball/pdfium/public/fpdf_annot.h. This appears to be broken ever since the code's introduction in 440cb3fb80d9fd356871eac410b9797f23433722 "pdf: add PDFiumTextPage and PDFiumPageObject + test" resp. 7e4dc3b1eabcb1993d4143c046a2f32fedc852ed "vcl: Add annotation reading to PDFiumLibrary c++ wrapper". It caused vcl_pdfium_library_test to fail on (big-endian) s390x with > vcl/qa/cppunit/PDFiumLibraryTest.cxx:141:PDFiumLibraryTest::testPageObjects equality assertion failed > - Expected: The quick, brown fox jumps over a lazy dog. DJs flock by when MTV ax quiz prog. Junk MTV quiz > - Actual : 吀栀攀 焀甀椀挀欀Ⰰ 戀爀漀眀渀 昀漀砀 樀甀洀瀀猀 漀瘀攀爀 愀 氀愀稀礀 搀漀最⸀ 䐀䨀猀 昀氀漀挀欀 戀礀 眀栀攀渀 䴀吀嘀 愀砀 焀甀椀稀 瀀爀漀最⸀ 䨀甀渀欀 䴀吀嘀 焀甀椀稀  and > vcl/qa/cppunit/PDFiumLibraryTest.cxx:192:PDFiumLibraryTest::testAnnotationsMadeInEvince > equality assertion failed > - Expected: quikee > - Actual : 焀甀椀欀攀攀 Change-Id: I6fb5bea43646d544b8c3bdf06a63a1ed3df9c07e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103243 Tested-by: Jenkins Reviewed-by: Stephan Bergmann --- vcl/source/pdf/PDFiumLibrary.cxx | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'vcl') diff --git a/vcl/source/pdf/PDFiumLibrary.cxx b/vcl/source/pdf/PDFiumLibrary.cxx index 024665efc7e2..360c88657161 100644 --- a/vcl/source/pdf/PDFiumLibrary.cxx +++ b/vcl/source/pdf/PDFiumLibrary.cxx @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -237,7 +238,16 @@ OUString PDFiumPageObject::getText(std::unique_ptr const& pTextP assert(nActualBytes % 2 == 0); nActualBytes /= 2; if (nActualBytes > 1) + { +#if defined OSL_BIGENDIAN + // The data returned by FPDFTextObj_GetText is documented to always be UTF-16LE: + for (int i = 0; i != nActualBytes; ++i) + { + pText[i] = OSL_SWAPWORD(pText[i]); + } +#endif sReturnText = OUString(pText.get()); + } return sReturnText; } @@ -432,7 +442,16 @@ OUString PDFiumAnnotation::getString(OString const& rKey) assert(nStringSize % 2 == 0); nStringSize /= 2; if (nStringSize > 0) + { +#if defined OSL_BIGENDIAN + // The data returned by FPDFAnnot_GetStringValue is documented to always be UTF-16LE: + for (unsigned long i = 0; i != nStringSize; ++i) + { + pText[i] = OSL_SWAPWORD(pText[i]); + } +#endif rString = OUString(pText.get()); + } } return rString; } -- cgit