diff options
8 files changed, 172 insertions, 253 deletions
diff --git a/external/pdfium/0001-Add-FPDFTextObj_GetText-API.patch.1 b/external/pdfium/0001-Add-FPDFTextObj_GetText-API.patch.1 new file mode 100644 index 000000000000..22926462cdac --- /dev/null +++ b/external/pdfium/0001-Add-FPDFTextObj_GetText-API.patch.1 @@ -0,0 +1,164 @@ +From 3bee9c60f013b8b7e99c39ee35699d132b330334 Mon Sep 17 00:00:00 2001 +Date: Tue, 7 Aug 2018 21:45:34 +0000 +Subject: [PATCH] Add FPDFTextObj_GetText() API + +Generalize CPDF_TextPage::GetTextByRect(), so that it's possible to get +the text from a text page using a predicate, that way we can easily +get the text that belongs to single text object as well. + +Change-Id: Ia457af0f41184694dc1481709be72b35685bce7f +Reviewed-on: https://pdfium-review.googlesource.com/39530 +Reviewed-by: Henrique Nakashima <hnakashima@chromium.org> +Reviewed-by: Lei Zhang <thestig@chromium.org> +Commit-Queue: Lei Zhang <thestig@chromium.org> +--- + core/fpdftext/cpdf_textpage.cpp | 18 +++++++++++++-- + core/fpdftext/cpdf_textpage.h | 4 ++++ + fpdfsdk/fpdf_edittext.cpp | 18 +++++++++++++++ + fpdfsdk/fpdf_text_embeddertest.cpp | 45 ++++++++++++++++++++++++++++++++++++++ + fpdfsdk/fpdf_view_c_api_test.c | 1 + + public/fpdf_edit.h | 20 +++++++++++++++++ + 6 files changed, 104 insertions(+), 2 deletions(-) + +diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp +index 289416043..ed7f36fb6 100644 +--- a/core/fpdftext/cpdf_textpage.cpp ++++ b/core/fpdftext/cpdf_textpage.cpp +@@ -426,7 +426,8 @@ int CPDF_TextPage::GetIndexAtPos(const CFX_PointF& point, + return pos < nCount ? pos : NearPos; + } + +-WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { ++WideString CPDF_TextPage::GetTextByPredicate( ++ const std::function<bool(const PAGECHAR_INFO&)>& predicate) const { + if (!m_bIsParsed) + return WideString(); + +@@ -435,7 +436,7 @@ WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { + bool IsAddLineFeed = false; + WideString strText; + for (const auto& charinfo : m_CharList) { +- if (IsRectIntersect(rect, charinfo.m_CharBox)) { ++ if (predicate(charinfo)) { + if (fabs(posy - charinfo.m_Origin.y) > 0 && !IsContainPreChar && + IsAddLineFeed) { + posy = charinfo.m_Origin.y; +@@ -460,6 +461,19 @@ WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { + return strText; + } + ++WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { ++ return GetTextByPredicate([&rect](const PAGECHAR_INFO& charinfo) { ++ return IsRectIntersect(rect, charinfo.m_CharBox); ++ }); ++} ++ ++WideString CPDF_TextPage::GetTextByObject( ++ const CPDF_TextObject* pTextObj) const { ++ return GetTextByPredicate([pTextObj](const PAGECHAR_INFO& charinfo) { ++ return charinfo.m_pTextObj == pTextObj; ++ }); ++} ++ + void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { + if (!m_bIsParsed || !pdfium::IndexInBounds(m_CharList, index)) + return; +diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h +index 36d01854f..90b45bd96 100644 +--- a/core/fpdftext/cpdf_textpage.h ++++ b/core/fpdftext/cpdf_textpage.h +@@ -8,6 +8,7 @@ + #define CORE_FPDFTEXT_CPDF_TEXTPAGE_H_ + + #include <deque> ++#include <functional> + #include <vector> + + #include "core/fpdfapi/page/cpdf_pageobjectlist.h" +@@ -97,6 +98,7 @@ class CPDF_TextPage { + std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const; + int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const; + WideString GetTextByRect(const CFX_FloatRect& rect) const; ++ WideString GetTextByObject(const CPDF_TextObject* pTextObj) const; + + // Returns string with the text from |m_TextBuf| that are covered by the input + // range. |start| and |count| are in terms of the |m_CharIndex|, so the range +@@ -151,6 +153,8 @@ class CPDF_TextPage { + TextOrientation FindTextlineFlowOrientation() const; + void AppendGeneratedCharacter(wchar_t unicode, const CFX_Matrix& formMatrix); + void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); ++ WideString GetTextByPredicate( ++ const std::function<bool(const PAGECHAR_INFO&)>& predicate) const; + + UnownedPtr<const CPDF_Page> const m_pPage; + std::vector<uint16_t> m_CharIndex; +diff --git a/fpdfsdk/fpdf_edittext.cpp b/fpdfsdk/fpdf_edittext.cpp +index 6aa44b3b2..2773763b9 100644 +--- a/fpdfsdk/fpdf_edittext.cpp ++++ b/fpdfsdk/fpdf_edittext.cpp +@@ -22,6 +22,7 @@ + #include "core/fpdfapi/parser/cpdf_number.h" + #include "core/fpdfapi/parser/cpdf_reference.h" + #include "core/fpdfapi/parser/cpdf_stream.h" ++#include "core/fpdftext/cpdf_textpage.h" + #include "core/fxcrt/fx_extension.h" + #include "core/fxge/cfx_fontmgr.h" + #include "core/fxge/fx_font.h" +@@ -564,6 +565,23 @@ FPDFTextObj_GetFontName(FPDF_PAGEOBJECT text, + return dwStringLen; + } + ++FPDF_EXPORT unsigned long FPDF_CALLCONV ++FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object, ++ FPDF_TEXTPAGE text_page, ++ void* buffer, ++ unsigned long length) { ++ CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text_object); ++ if (!pTextObj) ++ return 0; ++ ++ CPDF_TextPage* pTextPage = CPDFTextPageFromFPDFTextPage(text_page); ++ if (!pTextPage) ++ return 0; ++ ++ WideString text = pTextPage->GetTextByObject(pTextObj); ++ return Utf16EncodeMaybeCopyAndReturnLength(text, buffer, length); ++} ++ + FPDF_EXPORT void FPDF_CALLCONV FPDFFont_Close(FPDF_FONT font) { + CPDF_Font* pFont = CPDFFontFromFPDFFont(font); + if (!pFont) +diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h +index 4d5aa9c48..83fedba90 100644 +--- a/public/fpdf_edit.h ++++ b/public/fpdf_edit.h +@@ -1274,6 +1274,26 @@ FPDFTextObj_GetFontName(FPDF_PAGEOBJECT text, + void* buffer, + unsigned long length); + ++// Experimental API. ++// Get the text of a text object. ++// ++// text_object - the handle to the text object. ++// text_page - the handle to the text page. ++// buffer - the address of a buffer that receives the text. ++// length - the size, in bytes, of |buffer|. ++// ++// Returns the number of bytes in the text (including the trailing NUL ++// character) on success, 0 on error. ++// ++// Regardless of the platform, the |buffer| is always in UTF16-LE encoding. ++// If |length| is less than the returned length, or |buffer| is NULL, |buffer| ++// will not be modified. ++FPDF_EXPORT unsigned long FPDF_CALLCONV ++FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object, ++ FPDF_TEXTPAGE text_page, ++ void* buffer, ++ unsigned long length); ++ + // Experimental API. + // Get number of page objects inside |form_object|. + // +-- +2.16.4 + diff --git a/external/pdfium/0002-svx-more-accurate-PDF-text-importing.patch.2 b/external/pdfium/0002-svx-more-accurate-PDF-text-importing.patch.2 deleted file mode 100644 index 6288dccba6b4..000000000000 --- a/external/pdfium/0002-svx-more-accurate-PDF-text-importing.patch.2 +++ /dev/null @@ -1,69 +0,0 @@ -From 5f83d0a3fac4f8ccef457c03b74433ffd7b12e2a Mon Sep 17 00:00:00 2001 -From: Ashod Nakashian <ashod.nakashian@collabora.co.uk> -Date: Tue, 5 Jun 2018 11:28:30 +0200 -Subject: [PATCH 02/14] svx: more accurate PDF text importing - ---- - pdfium/fpdfsdk/fpdf_editpage.cpp | 84 ++++++++++++++++++++++++++++++++++++++++ - pdfium/public/fpdf_edit.h | 36 +++++++++++++++++ - 2 files changed, 120 insertions(+) - -diff --git a/pdfium/fpdfsdk/fpdf_editpage.cpp b/pdfium/fpdfsdk/fpdf_editpage.cpp -index 912df63..3244943 100644 ---- a/pdfium/fpdfsdk/fpdf_editpage.cpp -+++ b/pdfium/fpdfsdk/fpdf_editpage.cpp -@@ -13,6 +13,7 @@ - - #include "constants/page_object.h" - #include "core/fpdfapi/edit/cpdf_pagecontentgenerator.h" -+#include "core/fpdfapi/font/cpdf_font.h" - #include "core/fpdfapi/page/cpdf_form.h" - #include "core/fpdfapi/page/cpdf_formobject.h" - #include "core/fpdfapi/page/cpdf_imageobject.h" -@@ -26,6 +27,7 @@ - #include "core/fpdfapi/parser/cpdf_string.h" - #include "core/fpdfdoc/cpdf_annot.h" - #include "core/fpdfdoc/cpdf_annotlist.h" -+#include "core/fpdfapi/page/cpdf_textobject.h" - #include "fpdfsdk/cpdfsdk_helpers.h" - #include "public/fpdf_formfill.h" - #include "third_party/base/logging.h" -@@ -457,6 +459,16 @@ FPDFPageObj_Transform(FPDF_PAGEOBJECT page_object, - pPageObj->Transform(matrix); - } - -+FPDF_EXPORT int FPDF_CALLCONV -+FPDFTextObj_CountChars(FPDF_PAGEOBJECT text_object) -+{ -+ if (!text_object) -+ return 0; -+ -+ CPDF_TextObject* pTxtObj = static_cast<CPDF_TextObject*>(text_object); -+ return pTxtObj->CountChars(); -+} -+ - FPDF_EXPORT void FPDF_CALLCONV - FPDFPageObj_SetBlendMode(FPDF_PAGEOBJECT page_object, - FPDF_BYTESTRING blend_mode) { -diff --git a/pdfium/public/fpdf_edit.h b/pdfium/public/fpdf_edit.h ---- a/pdfium/public/fpdf_edit.h -+++ b/pdfium/public/fpdf_edit.h -@@ -1152,6 +1152,15 @@ FPDFFormObj_CountObjects(FPDF_PAGEOBJECT form_object); - FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV - FPDFFormObj_GetObject(FPDF_PAGEOBJECT form_object, unsigned long index); - -+// Get the number of characters from a text object. -+// -+// text_object - Handle of text object returned by FPDFPageObj_NewTextObj -+// or FPDFPageObj_NewTextObjEx. -+// Return Value: -+// A character count in the text object. -+FPDF_EXPORT int FPDF_CALLCONV -+FPDFTextObj_CountChars(FPDF_PAGEOBJECT text_object); -+ - #ifdef __cplusplus - } // extern "C" - #endif // __cplusplus --- -2.16.3 - diff --git a/external/pdfium/0004-svx-support-PDF-text-color.patch.2 b/external/pdfium/0004-svx-support-PDF-text-color.patch.2 index 5f492007da3b..cffb8fb80530 100644 --- a/external/pdfium/0004-svx-support-PDF-text-color.patch.2 +++ b/external/pdfium/0004-svx-support-PDF-text-color.patch.2 @@ -25,19 +25,6 @@ index d93ecfc..13362cf 100644 ByteString CFXByteStringFromFPDFWideString(FPDF_WIDESTRING wide_string); #ifdef PDF_ENABLE_XFA -diff --git a/pdfium/fpdfsdk/fpdf_editpage.cpp b/pdfium/fpdfsdk/fpdf_editpage.cpp -index 3244943..f8e2418 100644 ---- a/pdfium/fpdfsdk/fpdf_editpage.cpp -+++ b/pdfium/fpdfsdk/fpdf_editpage.cpp -@@ -443,7 +443,7 @@ FPDFTextObj_CountChars(FPDF_PAGEOBJECT text_object) - if (!text_object) - return 0; - -- CPDF_TextObject* pTxtObj = static_cast<CPDF_TextObject*>(text_object); -+ CPDF_TextObject* pTxtObj = CPDFTextObjectFromFPDFPageObject(text_object); - return pTxtObj->CountChars(); - } - diff --git a/pdfium/fpdfsdk/fpdf_edittext.cpp b/pdfium/fpdfsdk/fpdf_edittext.cpp index c38873faa..aa3287ef4 100644 --- a/pdfium/fpdfsdk/fpdf_edittext.cpp diff --git a/external/pdfium/0011-svx-correctly-possition-form-objects-from-PDF.patch.2 b/external/pdfium/0011-svx-correctly-possition-form-objects-from-PDF.patch.2 index 3849de8b7c24..db6057899bc7 100644 --- a/external/pdfium/0011-svx-correctly-possition-form-objects-from-PDF.patch.2 +++ b/external/pdfium/0011-svx-correctly-possition-form-objects-from-PDF.patch.2 @@ -61,9 +61,9 @@ diff --git a/pdfium/public/fpdf_edit.h b/pdfium/public/fpdf_edit.h index ca76954..f249e64 100644 --- a/pdfium/public/fpdf_edit.h +++ b/pdfium/public/fpdf_edit.h -@@ -1161,6 +1161,24 @@ FPDFFormObj_GetObject(FPDF_PAGEOBJECT form_object, unsigned long index); - FPDF_EXPORT int FPDF_CALLCONV - FPDFTextObj_CountChars(FPDF_PAGEOBJECT text_object); +@@ -1190,6 +1190,24 @@ FPDFFormObj_CountObjects(FPDF_PAGEOBJECT form_object); + FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV + FPDFFormObj_GetObject(FPDF_PAGEOBJECT form_object, unsigned long index); +// Get the matrix of a particular form object. +// diff --git a/external/pdfium/0012-svx-import-processed-PDF-text.patch.2 b/external/pdfium/0012-svx-import-processed-PDF-text.patch.2 deleted file mode 100644 index 23629184603f..000000000000 --- a/external/pdfium/0012-svx-import-processed-PDF-text.patch.2 +++ /dev/null @@ -1,148 +0,0 @@ -From 7e8ecec81f102993e3fe73256415dcf049c09e29 Mon Sep 17 00:00:00 2001 -From: Ashod Nakashian <ashod.nakashian@collabora.co.uk> -Date: Tue, 5 Jun 2018 11:35:39 +0200 -Subject: [PATCH 12/14] svx: import processed PDF text - ---- - pdfium/core/fpdftext/cpdf_textpage.cpp | 29 ++++++++++++++++++++++++ - pdfium/core/fpdftext/cpdf_textpage.h | 2 ++ - pdfium/fpdfsdk/fpdf_editpage.cpp | 41 ++++++++++++++++++++++++++++++++++ - pdfium/public/fpdf_edit.h | 13 +++++++++++ - 4 files changed, 85 insertions(+) - -diff --git a/pdfium/core/fpdftext/cpdf_textpage.cpp b/pdfium/core/fpdftext/cpdf_textpage.cpp -index 5690698..4d7c48a 100644 ---- a/pdfium/core/fpdftext/cpdf_textpage.cpp -+++ b/pdfium/core/fpdftext/cpdf_textpage.cpp -@@ -1464,3 +1464,32 @@ Optional<PAGECHAR_INFO> CPDF_TextPage::GenerateCharInfo(wchar_t unicode) { - info.m_Origin.x, info.m_Origin.y); - return info; - } -+ -+WideString CPDF_TextPage::GetTextObjectText(CPDF_TextObject* pTextObj) -+{ -+ if (!m_bIsParsed) -+ return WideString(); -+ -+ float posy = 0; -+ bool IsContainPreChar = false; -+ bool IsAddLineFeed = false; -+ WideString strText; -+ for (const auto& charinfo : m_CharList) { -+ if (charinfo.m_pTextObj == pTextObj) { -+ IsContainPreChar = true; -+ IsAddLineFeed = false; -+ if (charinfo.m_Unicode) -+ strText += charinfo.m_Unicode; -+ } else if (charinfo.m_Unicode == 32) { -+ if (IsContainPreChar && charinfo.m_Unicode) { -+ strText += charinfo.m_Unicode; -+ IsContainPreChar = false; -+ IsAddLineFeed = false; -+ } -+ } else { -+ IsContainPreChar = false; -+ IsAddLineFeed = true; -+ } -+ } -+ return strText; -+} -diff --git a/pdfium/core/fpdftext/cpdf_textpage.h b/pdfium/core/fpdftext/cpdf_textpage.h -index 43a0312..7d5d5ec 100644 ---- a/pdfium/core/fpdftext/cpdf_textpage.h -+++ b/pdfium/core/fpdftext/cpdf_textpage.h -@@ -105,6 +105,8 @@ class CPDF_TextPage { - WideString GetPageText(int start, int count) const; - WideString GetAllPageText() const { return GetPageText(0, CountChars()); } - -+ WideString GetTextObjectText(CPDF_TextObject* pTextObj); -+ - int CountRects(int start, int nCount); - bool GetRect(int rectIndex, CFX_FloatRect* pRect) const; - -diff --git a/pdfium/fpdfsdk/fpdf_editpage.cpp b/pdfium/fpdfsdk/fpdf_editpage.cpp -index f4a1688..f34d3b5 100644 ---- a/pdfium/fpdfsdk/fpdf_editpage.cpp -+++ b/pdfium/fpdfsdk/fpdf_editpage.cpp -@@ -28,6 +28,7 @@ - #include "core/fpdfdoc/cpdf_annot.h" - #include "core/fpdfdoc/cpdf_annotlist.h" - #include "core/fpdfapi/page/cpdf_textobject.h" -+#include "core/fpdftext/cpdf_textpage.h" - #include "fpdfsdk/cpdfsdk_helpers.h" - #include "public/fpdf_formfill.h" - #include "third_party/base/logging.h" -@@ -668,6 +669,46 @@ FPDFPageObj_SetLineCap(FPDF_PAGEOBJECT page_object, int line_cap) { - return true; - } - -+FPDF_EXPORT int FPDF_CALLCONV -+FPDFTextObj_GetTextProcessed(FPDF_PAGEOBJECT text_object, -+ FPDF_TEXTPAGE page, -+ int char_start, -+ int char_count, -+ unsigned short* result) -+{ -+ if (!page || !text_object || char_start < 0 || char_count < 0 || !result) -+ return 0; -+ -+ CPDF_TextObject* pTxtObj = CPDFTextObjectFromFPDFPageObject(text_object); -+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(page); -+ int char_available = textpage->CountChars() - char_start; -+ if (char_available <= 0) -+ return 0; -+ -+ char_count = std::min(char_count, char_available); -+ if (char_count == 0) { -+ // Writing out "", which has a character count of 1 due to the NUL. -+ *result = '\0'; -+ return 1; -+ } -+ -+ WideString str = textpage->GetTextObjectText(pTxtObj); -+ -+ if (str.GetLength() > static_cast<size_t>(char_count)) -+ str = str.Left(static_cast<size_t>(char_count)); -+ -+ // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected -+ // the number of items to stay the same. -+ ByteString byte_str = str.UTF16LE_Encode(); -+ size_t byte_str_len = byte_str.GetLength(); -+ constexpr size_t kBytesPerCharacter = sizeof(unsigned short); -+ int ret_count = byte_str_len / kBytesPerCharacter; -+ -+ ASSERT(ret_count <= char_count + 1); // +1 to account for the NUL terminator. -+ memcpy(result, byte_str.GetBuffer(byte_str_len), byte_str_len); -+ return ret_count; -+} -+ - FPDF_EXPORT int FPDF_CALLCONV - FPDFFormObj_CountObjects(FPDF_PAGEOBJECT page_object) { - const CPDF_PageObjectList* pObjectList = -diff --git a/pdfium/public/fpdf_edit.h b/pdfium/public/fpdf_edit.h -index f249e64..e14b2a5 100644 ---- a/pdfium/public/fpdf_edit.h -+++ b/pdfium/public/fpdf_edit.h -@@ -1152,6 +1152,19 @@ FPDFFormObj_CountObjects(FPDF_PAGEOBJECT form_object); - FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV - FPDFFormObj_GetObject(FPDF_PAGEOBJECT form_object, unsigned long index); - -+// Get the processed text of a text object. -+// -+// text_object - Handle of text object returned by FPDFPageObj_NewTextObj -+// or FPDFPageObj_NewTextObjEx. -+// Return Value: -+// The number of characters (not bytes) written in result. -+FPDF_EXPORT int FPDF_CALLCONV -+FPDFTextObj_GetTextProcessed(FPDF_PAGEOBJECT text_object, -+ FPDF_TEXTPAGE page, -+ int char_start, -+ int char_count, -+ unsigned short* result); -+ - // Get the number of characters from a text object. - // - // text_object - Handle of text object returned by FPDFPageObj_NewTextObj --- -2.16.3 - diff --git a/external/pdfium/0014-svx-update-PDFium-patch-and-code.patch.2 b/external/pdfium/0014-svx-update-PDFium-patch-and-code.patch.2 index f664c80232a5..3c53a2772e05 100644 --- a/external/pdfium/0014-svx-update-PDFium-patch-and-code.patch.2 +++ b/external/pdfium/0014-svx-update-PDFium-patch-and-code.patch.2 @@ -36,19 +36,6 @@ index 3f400c7..968b84a 100644 } FPDF_EXPORT unsigned long FPDF_CALLCONV -diff --git a/pdfium/fpdfsdk/fpdf_editpage.cpp b/pdfium/fpdfsdk/fpdf_editpage.cpp -index 29c8b01..a52e1a9 100644 ---- a/pdfium/fpdfsdk/fpdf_editpage.cpp -+++ b/pdfium/fpdfsdk/fpdf_editpage.cpp -@@ -688,7 +688,7 @@ FPDFTextObj_GetTextProcessed(FPDF_PAGEOBJECT text_object, - int ret_count = byte_str_len / kBytesPerCharacter; - - ASSERT(ret_count <= char_count + 1); // +1 to account for the NUL terminator. -- memcpy(result, byte_str.GetBuffer(byte_str_len), byte_str_len); -+ memcpy(result, byte_str.GetBuffer(byte_str_len).data(), byte_str_len); - return ret_count; - } - -- 2.16.3 diff --git a/external/pdfium/UnpackedTarball_pdfium.mk b/external/pdfium/UnpackedTarball_pdfium.mk index d5295106db4b..0357fdfd4ec1 100644 --- a/external/pdfium/UnpackedTarball_pdfium.mk +++ b/external/pdfium/UnpackedTarball_pdfium.mk @@ -23,11 +23,11 @@ pdfium_patches += 0001-Add-FPDFFormObj_CountObjects-API.patch.1 pdfium_patches += 0001-Add-FPDFFormObj_GetObject-API.patch.1 # Backport of <https://pdfium-review.googlesource.com/38870>. pdfium_patches += 0001-Add-FPDFText_GetFontName-API.patch.1 -pdfium_patches += 0002-svx-more-accurate-PDF-text-importing.patch.2 +# Backport of <https://pdfium-review.googlesource.com/39530>. +pdfium_patches += 0001-Add-FPDFTextObj_GetText-API.patch.1 pdfium_patches += 0003-svx-import-PDF-images-as-BGRA.patch.2 pdfium_patches += 0004-svx-support-PDF-text-color.patch.2 pdfium_patches += 0011-svx-correctly-possition-form-objects-from-PDF.patch.2 -pdfium_patches += 0012-svx-import-processed-PDF-text.patch.2 pdfium_patches += 0014-svx-update-PDFium-patch-and-code.patch.2 $(eval $(call gb_UnpackedTarball_UnpackedTarball,pdfium)) diff --git a/svx/source/svdraw/svdpdf.cxx b/svx/source/svdraw/svdpdf.cxx index 88e157b696fb..e0a394d61a2c 100644 --- a/svx/source/svdraw/svdpdf.cxx +++ b/svx/source/svdraw/svdpdf.cxx @@ -823,12 +823,10 @@ void ImpSdrPdfImport::ImportText(FPDF_PAGEOBJECT pPageObject, FPDF_TEXTPAGE pTex aTextMatrix.Transform(left, right, top, bottom); const tools::Rectangle aRect = PointsToLogic(left, right, top, bottom); - const int nChars = FPDFTextObj_CountChars(pPageObject) * 2; - std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nChars + 1]); // + terminating null + const int nChars = FPDFTextObj_GetText(pPageObject, pTextPage, nullptr, 0); + std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nChars]); - unsigned short* pShortText = reinterpret_cast<unsigned short*>(pText.get()); - const int nActualChars - = FPDFTextObj_GetTextProcessed(pPageObject, pTextPage, 0, nChars, pShortText); + const int nActualChars = FPDFTextObj_GetText(pPageObject, pTextPage, pText.get(), nChars); if (nActualChars <= 0) { return; |