summaryrefslogtreecommitdiff
path: root/vcl
diff options
context:
space:
mode:
authorTomaž Vajngerl <tomaz.vajngerl@collabora.co.uk>2020-06-28 10:12:17 +0200
committerTomaž Vajngerl <quikee@gmail.com>2020-06-29 14:35:37 +0200
commit440cb3fb80d9fd356871eac410b9797f23433722 (patch)
treed9c21ba1bfaeef1a5a4f895af72d94b5d31dc82b /vcl
parent34745b022d0c58e262c7ad3bfd103e769b2cdd18 (diff)
pdf: add PDFiumTextPage and PDFiumPageObject + test
Also use it in ImpSdrPdfImport. Change-Id: I6d353ef60d036c3516448e64a50b25a9befd5db8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/97364 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
Diffstat (limited to 'vcl')
-rw-r--r--vcl/qa/cppunit/PDFiumLibraryTest.cxx37
-rw-r--r--vcl/source/pdf/PDFiumLibrary.cxx62
2 files changed, 98 insertions, 1 deletions
diff --git a/vcl/qa/cppunit/PDFiumLibraryTest.cxx b/vcl/qa/cppunit/PDFiumLibraryTest.cxx
index 61b3981731f6..9c0c92607b14 100644
--- a/vcl/qa/cppunit/PDFiumLibraryTest.cxx
+++ b/vcl/qa/cppunit/PDFiumLibraryTest.cxx
@@ -35,6 +35,7 @@ class PDFiumLibraryTest : public test::BootstrapFixtureBase
void testDocument();
void testPages();
+ void testPageObjects();
void testAnnotationsMadeInEvince();
void testAnnotationsMadeInAcrobat();
void testTools();
@@ -42,6 +43,7 @@ class PDFiumLibraryTest : public test::BootstrapFixtureBase
CPPUNIT_TEST_SUITE(PDFiumLibraryTest);
CPPUNIT_TEST(testDocument);
CPPUNIT_TEST(testPages);
+ CPPUNIT_TEST(testPageObjects);
CPPUNIT_TEST(testAnnotationsMadeInEvince);
CPPUNIT_TEST(testAnnotationsMadeInAcrobat);
CPPUNIT_TEST(testTools);
@@ -74,12 +76,35 @@ void PDFiumLibraryTest::testDocument()
auto aSize = pDocument->getPageSize(0);
CPPUNIT_ASSERT_EQUAL(612.0, aSize.getX());
CPPUNIT_ASSERT_EQUAL(792.0, aSize.getY());
+}
+
+void PDFiumLibraryTest::testPages()
+{
+ OUString aURL = getFullUrl("Pangram.pdf");
+ SvFileStream aStream(aURL, StreamMode::READ);
+ GraphicFilter& rGraphicFilter = GraphicFilter::GetGraphicFilter();
+ Graphic aGraphic = rGraphicFilter.ImportUnloadedGraphic(aStream);
+ aGraphic.makeAvailable();
+
+ auto pVectorGraphicData = aGraphic.getVectorGraphicData();
+ CPPUNIT_ASSERT(pVectorGraphicData);
+ CPPUNIT_ASSERT_EQUAL(VectorGraphicDataType::Pdf,
+ pVectorGraphicData->getVectorGraphicDataType());
+
+ const void* pData = pVectorGraphicData->getVectorGraphicDataArray().getConstArray();
+ int nLength = pVectorGraphicData->getVectorGraphicDataArrayLength();
+
+ auto pPdfium = vcl::pdf::PDFiumLibrary::get();
+ auto pDocument = pPdfium->openDocument(pData, nLength);
+ CPPUNIT_ASSERT(pDocument);
+
+ CPPUNIT_ASSERT_EQUAL(1, pDocument->getPageCount());
auto pPage = pDocument->openPage(0);
CPPUNIT_ASSERT(pPage);
}
-void PDFiumLibraryTest::testPages()
+void PDFiumLibraryTest::testPageObjects()
{
OUString aURL = getFullUrl("Pangram.pdf");
SvFileStream aStream(aURL, StreamMode::READ);
@@ -103,6 +128,16 @@ void PDFiumLibraryTest::testPages()
auto pPage = pDocument->openPage(0);
CPPUNIT_ASSERT(pPage);
+
+ CPPUNIT_ASSERT_EQUAL(12, pPage->getObjectCount());
+
+ auto pPageObject = pPage->getObject(0);
+ auto pTextPage = pPage->getTextPage();
+
+ CPPUNIT_ASSERT_EQUAL(1, pPageObject->getType());
+ CPPUNIT_ASSERT_EQUAL(OUString("The quick, brown fox jumps over a lazy dog. DJs flock by when "
+ "MTV ax quiz prog. Junk MTV quiz "),
+ pPageObject->getText(pTextPage));
}
void PDFiumLibraryTest::testAnnotationsMadeInEvince()
diff --git a/vcl/source/pdf/PDFiumLibrary.cxx b/vcl/source/pdf/PDFiumLibrary.cxx
index 7e723c56bf88..92d0cf84a201 100644
--- a/vcl/source/pdf/PDFiumLibrary.cxx
+++ b/vcl/source/pdf/PDFiumLibrary.cxx
@@ -15,6 +15,7 @@
#include <vcl/filter/PDFiumLibrary.hxx>
#include <fpdf_annot.h>
#include <fpdf_edit.h>
+#include <fpdf_text.h>
namespace vcl::pdf
{
@@ -166,6 +167,19 @@ basegfx::B2DSize PDFiumDocument::getPageSize(int nIndex)
int PDFiumDocument::getPageCount() { return FPDF_GetPageCount(mpPdfDocument); }
+int PDFiumPage::getObjectCount() { return FPDFPage_CountObjects(mpPage); }
+
+std::unique_ptr<PDFiumPageObject> PDFiumPage::getObject(int nIndex)
+{
+ std::unique_ptr<PDFiumPageObject> pPDFiumPageObject;
+ FPDF_PAGEOBJECT pPageObject = FPDFPage_GetObject(mpPage, nIndex);
+ if (pPageObject)
+ {
+ pPDFiumPageObject = std::make_unique<PDFiumPageObject>(pPageObject);
+ }
+ return pPDFiumPageObject;
+}
+
int PDFiumPage::getAnnotationCount() { return FPDFPage_GetAnnotCount(mpPage); }
int PDFiumPage::getAnnotationIndex(std::unique_ptr<PDFiumAnnotation> const& rAnnotation)
@@ -184,6 +198,42 @@ std::unique_ptr<PDFiumAnnotation> PDFiumPage::getAnnotation(int nIndex)
return pPDFiumAnnotation;
}
+std::unique_ptr<PDFiumTextPage> PDFiumPage::getTextPage()
+{
+ std::unique_ptr<PDFiumTextPage> pPDFiumTextPage;
+ FPDF_TEXTPAGE pTextPage = FPDFText_LoadPage(mpPage);
+ if (pTextPage)
+ {
+ pPDFiumTextPage = std::make_unique<PDFiumTextPage>(pTextPage);
+ }
+ return pPDFiumTextPage;
+}
+
+PDFiumPageObject::PDFiumPageObject(FPDF_PAGEOBJECT pPageObject)
+ : mpPageObject(pPageObject)
+{
+}
+
+PDFiumPageObject::~PDFiumPageObject() {}
+
+OUString PDFiumPageObject::getText(std::unique_ptr<PDFiumTextPage> const& pTextPage)
+{
+ OUString sReturnText;
+
+ const int nBytes = FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), nullptr, 0);
+
+ std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nBytes]);
+
+ const int nActualBytes
+ = FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), pText.get(), nBytes);
+ if (nActualBytes > 2)
+ sReturnText = OUString(pText.get());
+
+ return sReturnText;
+}
+
+int PDFiumPageObject::getType() { return FPDFPageObj_GetType(mpPageObject); }
+
PDFiumAnnotation::PDFiumAnnotation(FPDF_ANNOTATION pAnnotation)
: mpAnnotation(pAnnotation)
{
@@ -238,6 +288,18 @@ std::unique_ptr<PDFiumAnnotation> PDFiumAnnotation::getLinked(OString const& rKe
}
return pPDFiumAnnotation;
}
+
+PDFiumTextPage::PDFiumTextPage(FPDF_TEXTPAGE pTextPage)
+ : mpTextPage(pTextPage)
+{
+}
+
+PDFiumTextPage::~PDFiumTextPage()
+{
+ if (mpTextPage)
+ FPDFText_ClosePage(mpTextPage);
+}
+
} // end vcl::pdf
#endif // HAVE_FEATURE_PDFIUM