diff options
author | Tomaž Vajngerl <tomaz.vajngerl@collabora.co.uk> | 2020-05-07 22:01:22 +0200 |
---|---|---|
committer | Tomaž Vajngerl <quikee@gmail.com> | 2020-06-01 10:43:46 +0200 |
commit | efba780d6155317b592b6f5f73945a7851ec4d3b (patch) | |
tree | 94c513fa2d639fc9f9fff7cabb9a974501694424 /vcl | |
parent | 1ee221ad65ff5e3a725e80777406ac7f94ff3a72 (diff) |
vcl: VectorGraphicSearch - for searching text inside PDF
Change-Id: Iee940a3927330c8739774ff3c1af15998f89193b
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/95254
Tested-by: Tomaž Vajngerl <quikee@gmail.com>
Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
Diffstat (limited to 'vcl')
-rw-r--r-- | vcl/CppunitTest_vcl_graphic_test.mk | 7 | ||||
-rw-r--r-- | vcl/Library_vcl.mk | 1 | ||||
-rw-r--r-- | vcl/qa/cppunit/VectorGraphicSearchTest.cxx | 50 | ||||
-rw-r--r-- | vcl/qa/cppunit/data/Pangram.pdf | bin | 0 -> 16880 bytes | |||
-rw-r--r-- | vcl/source/graphic/VectorGraphicSearch.cxx | 168 |
5 files changed, 223 insertions, 3 deletions
diff --git a/vcl/CppunitTest_vcl_graphic_test.mk b/vcl/CppunitTest_vcl_graphic_test.mk index 353d054e1ba7..2f2c61735ef8 100644 --- a/vcl/CppunitTest_vcl_graphic_test.mk +++ b/vcl/CppunitTest_vcl_graphic_test.mk @@ -14,11 +14,12 @@ $(eval $(call gb_CppunitTest_add_exception_objects,vcl_graphic_test, \ vcl/qa/cppunit/GraphicDescriptorTest \ vcl/qa/cppunit/GraphicFormatDetectorTest \ vcl/qa/cppunit/GraphicNativeMetadataTest \ + vcl/qa/cppunit/VectorGraphicSearchTest \ )) -$(eval $(call gb_CppunitTest_use_externals,vcl_graphic_test,\ - boost_headers \ - glm_headers \ +$(eval $(call gb_CppunitTest_use_externals,vcl_graphic_test, \ + boost_headers \ + $(if $(filter PDFIUM,$(BUILD_TYPE)),pdfium) \ )) ifeq ($(TLS),NSS) $(eval $(call gb_CppunitTest_use_externals,vcl_graphic_test,\ diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk index 0e861442218f..ccbe52b2902c 100644 --- a/vcl/Library_vcl.mk +++ b/vcl/Library_vcl.mk @@ -330,6 +330,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\ vcl/source/graphic/UnoGraphicObject \ vcl/source/graphic/UnoGraphicProvider \ vcl/source/graphic/UnoGraphicTransformer \ + vcl/source/graphic/VectorGraphicSearch \ vcl/source/bitmap/bitmap \ vcl/source/bitmap/bitmapfilter \ vcl/source/bitmap/BitmapAlphaClampFilter \ diff --git a/vcl/qa/cppunit/VectorGraphicSearchTest.cxx b/vcl/qa/cppunit/VectorGraphicSearchTest.cxx new file mode 100644 index 000000000000..0ed21ccf9e26 --- /dev/null +++ b/vcl/qa/cppunit/VectorGraphicSearchTest.cxx @@ -0,0 +1,50 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <cppunit/TestAssert.h> +#include <cppunit/extensions/HelperMacros.h> +#include <unotest/bootstrapfixturebase.hxx> +#include <unotest/directories.hxx> + +#include <vcl/VectorGraphicSearch.hxx> +#include <vcl/graph.hxx> +#include <vcl/graphicfilter.hxx> +#include <tools/stream.hxx> + +class VectorGraphicSearchTest : public test::BootstrapFixtureBase +{ + OUString getFullUrl(const OUString& sFileName) + { + return m_directories.getURLFromSrc("/vcl/qa/cppunit/data/") + sFileName; + } + + void test(); + + CPPUNIT_TEST_SUITE(VectorGraphicSearchTest); + CPPUNIT_TEST(test); + CPPUNIT_TEST_SUITE_END(); +}; + +void VectorGraphicSearchTest::test() +{ + OUString aURL = getFullUrl("Pangram.pdf"); + SvFileStream aStream(aURL, StreamMode::READ); + GraphicFilter& rGraphicFilter = GraphicFilter::GetGraphicFilter(); + Graphic aGraphic = rGraphicFilter.ImportUnloadedGraphic(aStream); + aGraphic.makeAvailable(); + + VectorGraphicSearch aSearch(aGraphic); + CPPUNIT_ASSERT_EQUAL(true, aSearch.search("lazy")); + CPPUNIT_ASSERT_EQUAL(true, aSearch.next()); + CPPUNIT_ASSERT_EQUAL(34, aSearch.index()); +} + +CPPUNIT_TEST_SUITE_REGISTRATION(VectorGraphicSearchTest); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/qa/cppunit/data/Pangram.pdf b/vcl/qa/cppunit/data/Pangram.pdf Binary files differnew file mode 100644 index 000000000000..0714fda4e4dd --- /dev/null +++ b/vcl/qa/cppunit/data/Pangram.pdf diff --git a/vcl/source/graphic/VectorGraphicSearch.cxx b/vcl/source/graphic/VectorGraphicSearch.cxx new file mode 100644 index 000000000000..864c65f2dda2 --- /dev/null +++ b/vcl/source/graphic/VectorGraphicSearch.cxx @@ -0,0 +1,168 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#include <sal/config.h> +#include <vcl/VectorGraphicSearch.hxx> + +#include <fpdf_text.h> + +class SearchContext +{ +public: + bool bInitialized = false; + + FPDF_DOCUMENT mpPdfDocument; + sal_Int32 mnPageIndex; + FPDF_PAGE mpPage; + FPDF_TEXTPAGE mpTextPage; + OUString maSearchString; + FPDF_SCHHANDLE mpSearchHandle; + + SearchContext(FPDF_DOCUMENT pPdfDocument, sal_Int32 nPageIndex, OUString const& rSearchString) + : mpPdfDocument(pPdfDocument) + , mnPageIndex(nPageIndex) + , mpPage(nullptr) + , mpTextPage(nullptr) + , maSearchString(rSearchString) + , mpSearchHandle(nullptr) + { + } + + ~SearchContext() + { + if (mpSearchHandle) + FPDFText_FindClose(mpSearchHandle); + if (mpTextPage) + FPDFText_ClosePage(mpTextPage); + if (mpPage) + FPDF_ClosePage(mpPage); + } + + bool initialize() + { + if (!mpPdfDocument) + return false; + mpPage = FPDF_LoadPage(mpPdfDocument, mnPageIndex); + if (!mpPage) + return false; + mpTextPage = FPDFText_LoadPage(mpPage); + if (!mpTextPage) + return false; + + FPDF_WIDESTRING pString = reinterpret_cast<FPDF_WIDESTRING>(maSearchString.getStr()); + mpSearchHandle = FPDFText_FindStart(mpTextPage, pString, 0, 0); + + return mpSearchHandle != nullptr; + } + + bool next() + { + if (mpSearchHandle) + return FPDFText_FindNext(mpSearchHandle); + return false; + } + + int index() + { + if (mpSearchHandle) + return FPDFText_GetSchResultIndex(mpSearchHandle); + return -1; + } +}; + +VectorGraphicSearch::VectorGraphicSearch(Graphic const& rGraphic) + : maGraphic(rGraphic) + , mpPdfDocument(nullptr) +{ + FPDF_LIBRARY_CONFIG aConfig; + aConfig.version = 2; + aConfig.m_pUserFontPaths = nullptr; + aConfig.m_pIsolate = nullptr; + aConfig.m_v8EmbedderSlot = 0; + FPDF_InitLibraryWithConfig(&aConfig); +} + +VectorGraphicSearch::~VectorGraphicSearch() +{ + mpSearchContext.reset(); + + if (mpPdfDocument) + FPDF_CloseDocument(mpPdfDocument); + FPDF_DestroyLibrary(); +} + +bool VectorGraphicSearch::search(OUString const& rSearchString) +{ + auto pData = maGraphic.getVectorGraphicData(); + + if (pData && pData->getVectorGraphicDataType() == VectorGraphicDataType::Pdf) + { + return searchPDF(pData, rSearchString); + } + return false; +} + +bool VectorGraphicSearch::searchPDF(std::shared_ptr<VectorGraphicData> const& rData, + OUString const& rSearchString) +{ + if (rSearchString.isEmpty()) + return false; + + mpPdfDocument + = FPDF_LoadMemDocument(rData->getVectorGraphicDataArray().getConstArray(), + rData->getVectorGraphicDataArrayLength(), /*password=*/nullptr); + + if (!mpPdfDocument) + { + //TODO: Handle failure to load. + switch (FPDF_GetLastError()) + { + case FPDF_ERR_SUCCESS: + break; + case FPDF_ERR_UNKNOWN: + break; + case FPDF_ERR_FILE: + break; + case FPDF_ERR_FORMAT: + break; + case FPDF_ERR_PASSWORD: + break; + case FPDF_ERR_SECURITY: + break; + case FPDF_ERR_PAGE: + break; + default: + break; + } + return false; + } + + sal_Int32 nPageIndex = std::max(rData->getPageIndex(), 0); + + mpSearchContext.reset(new SearchContext(mpPdfDocument, nPageIndex, rSearchString)); + + return mpSearchContext->initialize(); +} + +bool VectorGraphicSearch::next() +{ + if (mpSearchContext) + return mpSearchContext->next(); + return false; +} + +int VectorGraphicSearch::index() +{ + if (mpSearchContext) + return mpSearchContext->index(); + return -1; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |