diff options
author | Tomaž Vajngerl <tomaz.vajngerl@collabora.co.uk> | 2020-06-28 10:12:17 +0200 |
---|---|---|
committer | Tomaž Vajngerl <quikee@gmail.com> | 2020-06-29 14:35:37 +0200 |
commit | 440cb3fb80d9fd356871eac410b9797f23433722 (patch) | |
tree | d9c21ba1bfaeef1a5a4f895af72d94b5d31dc82b /vcl | |
parent | 34745b022d0c58e262c7ad3bfd103e769b2cdd18 (diff) |
pdf: add PDFiumTextPage and PDFiumPageObject + test
Also use it in ImpSdrPdfImport.
Change-Id: I6d353ef60d036c3516448e64a50b25a9befd5db8
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/97364
Tested-by: Jenkins
Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
Diffstat (limited to 'vcl')
-rw-r--r-- | vcl/qa/cppunit/PDFiumLibraryTest.cxx | 37 | ||||
-rw-r--r-- | vcl/source/pdf/PDFiumLibrary.cxx | 62 |
2 files changed, 98 insertions, 1 deletions
diff --git a/vcl/qa/cppunit/PDFiumLibraryTest.cxx b/vcl/qa/cppunit/PDFiumLibraryTest.cxx index 61b3981731f6..9c0c92607b14 100644 --- a/vcl/qa/cppunit/PDFiumLibraryTest.cxx +++ b/vcl/qa/cppunit/PDFiumLibraryTest.cxx @@ -35,6 +35,7 @@ class PDFiumLibraryTest : public test::BootstrapFixtureBase void testDocument(); void testPages(); + void testPageObjects(); void testAnnotationsMadeInEvince(); void testAnnotationsMadeInAcrobat(); void testTools(); @@ -42,6 +43,7 @@ class PDFiumLibraryTest : public test::BootstrapFixtureBase CPPUNIT_TEST_SUITE(PDFiumLibraryTest); CPPUNIT_TEST(testDocument); CPPUNIT_TEST(testPages); + CPPUNIT_TEST(testPageObjects); CPPUNIT_TEST(testAnnotationsMadeInEvince); CPPUNIT_TEST(testAnnotationsMadeInAcrobat); CPPUNIT_TEST(testTools); @@ -74,12 +76,35 @@ void PDFiumLibraryTest::testDocument() auto aSize = pDocument->getPageSize(0); CPPUNIT_ASSERT_EQUAL(612.0, aSize.getX()); CPPUNIT_ASSERT_EQUAL(792.0, aSize.getY()); +} + +void PDFiumLibraryTest::testPages() +{ + OUString aURL = getFullUrl("Pangram.pdf"); + SvFileStream aStream(aURL, StreamMode::READ); + GraphicFilter& rGraphicFilter = GraphicFilter::GetGraphicFilter(); + Graphic aGraphic = rGraphicFilter.ImportUnloadedGraphic(aStream); + aGraphic.makeAvailable(); + + auto pVectorGraphicData = aGraphic.getVectorGraphicData(); + CPPUNIT_ASSERT(pVectorGraphicData); + CPPUNIT_ASSERT_EQUAL(VectorGraphicDataType::Pdf, + pVectorGraphicData->getVectorGraphicDataType()); + + const void* pData = pVectorGraphicData->getVectorGraphicDataArray().getConstArray(); + int nLength = pVectorGraphicData->getVectorGraphicDataArrayLength(); + + auto pPdfium = vcl::pdf::PDFiumLibrary::get(); + auto pDocument = pPdfium->openDocument(pData, nLength); + CPPUNIT_ASSERT(pDocument); + + CPPUNIT_ASSERT_EQUAL(1, pDocument->getPageCount()); auto pPage = pDocument->openPage(0); CPPUNIT_ASSERT(pPage); } -void PDFiumLibraryTest::testPages() +void PDFiumLibraryTest::testPageObjects() { OUString aURL = getFullUrl("Pangram.pdf"); SvFileStream aStream(aURL, StreamMode::READ); @@ -103,6 +128,16 @@ void PDFiumLibraryTest::testPages() auto pPage = pDocument->openPage(0); CPPUNIT_ASSERT(pPage); + + CPPUNIT_ASSERT_EQUAL(12, pPage->getObjectCount()); + + auto pPageObject = pPage->getObject(0); + auto pTextPage = pPage->getTextPage(); + + CPPUNIT_ASSERT_EQUAL(1, pPageObject->getType()); + CPPUNIT_ASSERT_EQUAL(OUString("The quick, brown fox jumps over a lazy dog. DJs flock by when " + "MTV ax quiz prog. Junk MTV quiz "), + pPageObject->getText(pTextPage)); } void PDFiumLibraryTest::testAnnotationsMadeInEvince() diff --git a/vcl/source/pdf/PDFiumLibrary.cxx b/vcl/source/pdf/PDFiumLibrary.cxx index 7e723c56bf88..92d0cf84a201 100644 --- a/vcl/source/pdf/PDFiumLibrary.cxx +++ b/vcl/source/pdf/PDFiumLibrary.cxx @@ -15,6 +15,7 @@ #include <vcl/filter/PDFiumLibrary.hxx> #include <fpdf_annot.h> #include <fpdf_edit.h> +#include <fpdf_text.h> namespace vcl::pdf { @@ -166,6 +167,19 @@ basegfx::B2DSize PDFiumDocument::getPageSize(int nIndex) int PDFiumDocument::getPageCount() { return FPDF_GetPageCount(mpPdfDocument); } +int PDFiumPage::getObjectCount() { return FPDFPage_CountObjects(mpPage); } + +std::unique_ptr<PDFiumPageObject> PDFiumPage::getObject(int nIndex) +{ + std::unique_ptr<PDFiumPageObject> pPDFiumPageObject; + FPDF_PAGEOBJECT pPageObject = FPDFPage_GetObject(mpPage, nIndex); + if (pPageObject) + { + pPDFiumPageObject = std::make_unique<PDFiumPageObject>(pPageObject); + } + return pPDFiumPageObject; +} + int PDFiumPage::getAnnotationCount() { return FPDFPage_GetAnnotCount(mpPage); } int PDFiumPage::getAnnotationIndex(std::unique_ptr<PDFiumAnnotation> const& rAnnotation) @@ -184,6 +198,42 @@ std::unique_ptr<PDFiumAnnotation> PDFiumPage::getAnnotation(int nIndex) return pPDFiumAnnotation; } +std::unique_ptr<PDFiumTextPage> PDFiumPage::getTextPage() +{ + std::unique_ptr<PDFiumTextPage> pPDFiumTextPage; + FPDF_TEXTPAGE pTextPage = FPDFText_LoadPage(mpPage); + if (pTextPage) + { + pPDFiumTextPage = std::make_unique<PDFiumTextPage>(pTextPage); + } + return pPDFiumTextPage; +} + +PDFiumPageObject::PDFiumPageObject(FPDF_PAGEOBJECT pPageObject) + : mpPageObject(pPageObject) +{ +} + +PDFiumPageObject::~PDFiumPageObject() {} + +OUString PDFiumPageObject::getText(std::unique_ptr<PDFiumTextPage> const& pTextPage) +{ + OUString sReturnText; + + const int nBytes = FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), nullptr, 0); + + std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nBytes]); + + const int nActualBytes + = FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), pText.get(), nBytes); + if (nActualBytes > 2) + sReturnText = OUString(pText.get()); + + return sReturnText; +} + +int PDFiumPageObject::getType() { return FPDFPageObj_GetType(mpPageObject); } + PDFiumAnnotation::PDFiumAnnotation(FPDF_ANNOTATION pAnnotation) : mpAnnotation(pAnnotation) { @@ -238,6 +288,18 @@ std::unique_ptr<PDFiumAnnotation> PDFiumAnnotation::getLinked(OString const& rKe } return pPDFiumAnnotation; } + +PDFiumTextPage::PDFiumTextPage(FPDF_TEXTPAGE pTextPage) + : mpTextPage(pTextPage) +{ +} + +PDFiumTextPage::~PDFiumTextPage() +{ + if (mpTextPage) + FPDFText_ClosePage(mpTextPage); +} + } // end vcl::pdf #endif // HAVE_FEATURE_PDFIUM |