summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomaž Vajngerl <tomaz.vajngerl@collabora.co.uk>2020-06-28 10:12:17 +0200
committerTomaž Vajngerl <quikee@gmail.com>2020-06-29 22:34:23 +0200
commit64b7feb7f0e0d3f29625c73e9790b2f152e34c09 (patch)
tree07195408409f4da400211bb2f7c91d6a65bad3bc
parent9cc54e9bc1219fcaea87ca35eb93b0e79325a7ac (diff)
pdf: add PDFiumTextPage and PDFiumPageObject + test
Also use it in ImpSdrPdfImport. Change-Id: I6d353ef60d036c3516448e64a50b25a9befd5db8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/97364 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl <quikee@gmail.com> (cherry picked from commit 440cb3fb80d9fd356871eac410b9797f23433722) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/97449 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com>
-rw-r--r--include/vcl/filter/PDFiumLibrary.hxx40
-rw-r--r--svx/source/svdraw/svdpdf.cxx11
-rw-r--r--vcl/qa/cppunit/PDFiumLibraryTest.cxx37
-rw-r--r--vcl/source/pdf/PDFiumLibrary.cxx62
4 files changed, 142 insertions, 8 deletions
diff --git a/include/vcl/filter/PDFiumLibrary.hxx b/include/vcl/filter/PDFiumLibrary.hxx
index 35826097e45e..501f964f395d 100644
--- a/include/vcl/filter/PDFiumLibrary.hxx
+++ b/include/vcl/filter/PDFiumLibrary.hxx
@@ -69,6 +69,41 @@ public:
std::unique_ptr<PDFiumAnnotation> getLinked(OString const& rKey);
};
+class PDFiumTextPage;
+
+class VCL_DLLPUBLIC PDFiumPageObject final
+{
+private:
+ FPDF_PAGEOBJECT mpPageObject;
+
+ PDFiumPageObject(const PDFiumPageObject&) = delete;
+ PDFiumPageObject& operator=(const PDFiumPageObject&) = delete;
+
+public:
+ PDFiumPageObject(FPDF_PAGEOBJECT pPageObject);
+ ~PDFiumPageObject();
+
+ FPDF_PAGEOBJECT getPointer() { return mpPageObject; }
+
+ int getType();
+ OUString getText(std::unique_ptr<PDFiumTextPage> const& pTextPage);
+};
+
+class VCL_DLLPUBLIC PDFiumTextPage final
+{
+private:
+ FPDF_TEXTPAGE mpTextPage;
+
+ PDFiumTextPage(const PDFiumTextPage&) = delete;
+ PDFiumTextPage& operator=(const PDFiumTextPage&) = delete;
+
+public:
+ PDFiumTextPage(FPDF_TEXTPAGE pTextPage);
+ ~PDFiumTextPage();
+
+ FPDF_TEXTPAGE getPointer() { return mpTextPage; }
+};
+
class VCL_DLLPUBLIC PDFiumPage final
{
private:
@@ -92,10 +127,15 @@ public:
FPDF_PAGE getPointer() { return mpPage; }
+ int getObjectCount();
+ std::unique_ptr<PDFiumPageObject> getObject(int nIndex);
+
int getAnnotationCount();
int getAnnotationIndex(std::unique_ptr<PDFiumAnnotation> const& rAnnotation);
std::unique_ptr<PDFiumAnnotation> getAnnotation(int nIndex);
+
+ std::unique_ptr<PDFiumTextPage> getTextPage();
};
class VCL_DLLPUBLIC PDFiumDocument final
diff --git a/svx/source/svdraw/svdpdf.cxx b/svx/source/svdraw/svdpdf.cxx
index ca8226b68ce5..50e94a6f983b 100644
--- a/svx/source/svdraw/svdpdf.cxx
+++ b/svx/source/svdraw/svdpdf.cxx
@@ -176,17 +176,16 @@ void ImpSdrPdfImport::DoObjects(SvdProgressInfo* pProgrInfo, sal_uInt32* pAction
SetupPageScale(dPageWidth, dPageHeight);
// Load the page text to extract it when we get text elements.
- FPDF_TEXTPAGE pTextPage = FPDFText_LoadPage(pPdfPage->getPointer());
+ auto pTextPage = pPdfPage->getTextPage();
- const int nPageObjectCount = FPDFPage_CountObjects(pPdfPage->getPointer());
+ const int nPageObjectCount = pPdfPage->getObjectCount();
if (pProgrInfo)
pProgrInfo->SetActionCount(nPageObjectCount);
for (int nPageObjectIndex = 0; nPageObjectIndex < nPageObjectCount; ++nPageObjectIndex)
{
- FPDF_PAGEOBJECT pPageObject
- = FPDFPage_GetObject(pPdfPage->getPointer(), nPageObjectIndex);
- ImportPdfObject(pPageObject, pTextPage, nPageObjectIndex);
+ auto pPageObject = pPdfPage->getObject(nPageObjectIndex);
+ ImportPdfObject(pPageObject->getPointer(), pTextPage->getPointer(), nPageObjectIndex);
if (pProgrInfo && pActionsToReport)
{
(*pActionsToReport)++;
@@ -200,8 +199,6 @@ void ImpSdrPdfImport::DoObjects(SvdProgressInfo* pProgrInfo, sal_uInt32* pAction
}
}
}
-
- FPDFText_ClosePage(pTextPage);
}
}
diff --git a/vcl/qa/cppunit/PDFiumLibraryTest.cxx b/vcl/qa/cppunit/PDFiumLibraryTest.cxx
index 61b3981731f6..9c0c92607b14 100644
--- a/vcl/qa/cppunit/PDFiumLibraryTest.cxx
+++ b/vcl/qa/cppunit/PDFiumLibraryTest.cxx
@@ -35,6 +35,7 @@ class PDFiumLibraryTest : public test::BootstrapFixtureBase
void testDocument();
void testPages();
+ void testPageObjects();
void testAnnotationsMadeInEvince();
void testAnnotationsMadeInAcrobat();
void testTools();
@@ -42,6 +43,7 @@ class PDFiumLibraryTest : public test::BootstrapFixtureBase
CPPUNIT_TEST_SUITE(PDFiumLibraryTest);
CPPUNIT_TEST(testDocument);
CPPUNIT_TEST(testPages);
+ CPPUNIT_TEST(testPageObjects);
CPPUNIT_TEST(testAnnotationsMadeInEvince);
CPPUNIT_TEST(testAnnotationsMadeInAcrobat);
CPPUNIT_TEST(testTools);
@@ -74,12 +76,35 @@ void PDFiumLibraryTest::testDocument()
auto aSize = pDocument->getPageSize(0);
CPPUNIT_ASSERT_EQUAL(612.0, aSize.getX());
CPPUNIT_ASSERT_EQUAL(792.0, aSize.getY());
+}
+
+void PDFiumLibraryTest::testPages()
+{
+ OUString aURL = getFullUrl("Pangram.pdf");
+ SvFileStream aStream(aURL, StreamMode::READ);
+ GraphicFilter& rGraphicFilter = GraphicFilter::GetGraphicFilter();
+ Graphic aGraphic = rGraphicFilter.ImportUnloadedGraphic(aStream);
+ aGraphic.makeAvailable();
+
+ auto pVectorGraphicData = aGraphic.getVectorGraphicData();
+ CPPUNIT_ASSERT(pVectorGraphicData);
+ CPPUNIT_ASSERT_EQUAL(VectorGraphicDataType::Pdf,
+ pVectorGraphicData->getVectorGraphicDataType());
+
+ const void* pData = pVectorGraphicData->getVectorGraphicDataArray().getConstArray();
+ int nLength = pVectorGraphicData->getVectorGraphicDataArrayLength();
+
+ auto pPdfium = vcl::pdf::PDFiumLibrary::get();
+ auto pDocument = pPdfium->openDocument(pData, nLength);
+ CPPUNIT_ASSERT(pDocument);
+
+ CPPUNIT_ASSERT_EQUAL(1, pDocument->getPageCount());
auto pPage = pDocument->openPage(0);
CPPUNIT_ASSERT(pPage);
}
-void PDFiumLibraryTest::testPages()
+void PDFiumLibraryTest::testPageObjects()
{
OUString aURL = getFullUrl("Pangram.pdf");
SvFileStream aStream(aURL, StreamMode::READ);
@@ -103,6 +128,16 @@ void PDFiumLibraryTest::testPages()
auto pPage = pDocument->openPage(0);
CPPUNIT_ASSERT(pPage);
+
+ CPPUNIT_ASSERT_EQUAL(12, pPage->getObjectCount());
+
+ auto pPageObject = pPage->getObject(0);
+ auto pTextPage = pPage->getTextPage();
+
+ CPPUNIT_ASSERT_EQUAL(1, pPageObject->getType());
+ CPPUNIT_ASSERT_EQUAL(OUString("The quick, brown fox jumps over a lazy dog. DJs flock by when "
+ "MTV ax quiz prog. Junk MTV quiz "),
+ pPageObject->getText(pTextPage));
}
void PDFiumLibraryTest::testAnnotationsMadeInEvince()
diff --git a/vcl/source/pdf/PDFiumLibrary.cxx b/vcl/source/pdf/PDFiumLibrary.cxx
index cad2296eeea9..af13f8ec8fbe 100644
--- a/vcl/source/pdf/PDFiumLibrary.cxx
+++ b/vcl/source/pdf/PDFiumLibrary.cxx
@@ -15,6 +15,7 @@
#include <vcl/filter/PDFiumLibrary.hxx>
#include <fpdf_annot.h>
#include <fpdf_edit.h>
+#include <fpdf_text.h>
namespace vcl::pdf
{
@@ -167,6 +168,19 @@ basegfx::B2DSize PDFiumDocument::getPageSize(int nIndex)
int PDFiumDocument::getPageCount() { return FPDF_GetPageCount(mpPdfDocument); }
+int PDFiumPage::getObjectCount() { return FPDFPage_CountObjects(mpPage); }
+
+std::unique_ptr<PDFiumPageObject> PDFiumPage::getObject(int nIndex)
+{
+ std::unique_ptr<PDFiumPageObject> pPDFiumPageObject;
+ FPDF_PAGEOBJECT pPageObject = FPDFPage_GetObject(mpPage, nIndex);
+ if (pPageObject)
+ {
+ pPDFiumPageObject = std::make_unique<PDFiumPageObject>(pPageObject);
+ }
+ return pPDFiumPageObject;
+}
+
int PDFiumPage::getAnnotationCount() { return FPDFPage_GetAnnotCount(mpPage); }
int PDFiumPage::getAnnotationIndex(std::unique_ptr<PDFiumAnnotation> const& rAnnotation)
@@ -185,6 +199,42 @@ std::unique_ptr<PDFiumAnnotation> PDFiumPage::getAnnotation(int nIndex)
return pPDFiumAnnotation;
}
+std::unique_ptr<PDFiumTextPage> PDFiumPage::getTextPage()
+{
+ std::unique_ptr<PDFiumTextPage> pPDFiumTextPage;
+ FPDF_TEXTPAGE pTextPage = FPDFText_LoadPage(mpPage);
+ if (pTextPage)
+ {
+ pPDFiumTextPage = std::make_unique<PDFiumTextPage>(pTextPage);
+ }
+ return pPDFiumTextPage;
+}
+
+PDFiumPageObject::PDFiumPageObject(FPDF_PAGEOBJECT pPageObject)
+ : mpPageObject(pPageObject)
+{
+}
+
+PDFiumPageObject::~PDFiumPageObject() {}
+
+OUString PDFiumPageObject::getText(std::unique_ptr<PDFiumTextPage> const& pTextPage)
+{
+ OUString sReturnText;
+
+ const int nBytes = FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), nullptr, 0);
+
+ std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nBytes]);
+
+ const int nActualBytes
+ = FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), pText.get(), nBytes);
+ if (nActualBytes > 2)
+ sReturnText = OUString(pText.get());
+
+ return sReturnText;
+}
+
+int PDFiumPageObject::getType() { return FPDFPageObj_GetType(mpPageObject); }
+
PDFiumAnnotation::PDFiumAnnotation(FPDF_ANNOTATION pAnnotation)
: mpAnnotation(pAnnotation)
{
@@ -239,6 +289,18 @@ std::unique_ptr<PDFiumAnnotation> PDFiumAnnotation::getLinked(OString const& rKe
}
return pPDFiumAnnotation;
}
+
+PDFiumTextPage::PDFiumTextPage(FPDF_TEXTPAGE pTextPage)
+ : mpTextPage(pTextPage)
+{
+}
+
+PDFiumTextPage::~PDFiumTextPage()
+{
+ if (mpTextPage)
+ FPDFText_ClosePage(mpTextPage);
+}
+
} // end vcl::pdf
#endif // HAVE_FEATURE_PDFIUM