summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomaž Vajngerl <tomaz.vajngerl@collabora.co.uk>2020-05-07 22:01:22 +0200
committerTomaž Vajngerl <quikee@gmail.com>2020-06-01 10:43:46 +0200
commitefba780d6155317b592b6f5f73945a7851ec4d3b (patch)
tree94c513fa2d639fc9f9fff7cabb9a974501694424
parent1ee221ad65ff5e3a725e80777406ac7f94ff3a72 (diff)
vcl: VectorGraphicSearch - for searching text inside PDF
Change-Id: Iee940a3927330c8739774ff3c1af15998f89193b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/95254 Tested-by: Tomaž Vajngerl <quikee@gmail.com> Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
-rw-r--r--include/vcl/VectorGraphicSearch.hxx39
-rw-r--r--vcl/CppunitTest_vcl_graphic_test.mk7
-rw-r--r--vcl/Library_vcl.mk1
-rw-r--r--vcl/qa/cppunit/VectorGraphicSearchTest.cxx50
-rw-r--r--vcl/qa/cppunit/data/Pangram.pdfbin0 -> 16880 bytes
-rw-r--r--vcl/source/graphic/VectorGraphicSearch.cxx168
6 files changed, 262 insertions, 3 deletions
diff --git a/include/vcl/VectorGraphicSearch.hxx b/include/vcl/VectorGraphicSearch.hxx
new file mode 100644
index 000000000000..3411d0a931e6
--- /dev/null
+++ b/include/vcl/VectorGraphicSearch.hxx
@@ -0,0 +1,39 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#pragma once
+
+#include <vcl/graph.hxx>
+#include <vcl/vectorgraphicdata.hxx>
+#include <vcl/dllapi.h>
+
+#include <fpdf_doc.h>
+
+#include <memory>
+
+class SearchContext;
+
+class VCL_DLLPUBLIC VectorGraphicSearch final
+{
+private:
+ Graphic maGraphic;
+ FPDF_DOCUMENT mpPdfDocument;
+ std::unique_ptr<SearchContext> mpSearchContext;
+ bool searchPDF(std::shared_ptr<VectorGraphicData> const& rData, OUString const& rSearchString);
+
+public:
+ VectorGraphicSearch(Graphic const& rGraphic);
+ ~VectorGraphicSearch();
+ bool search(OUString const& rSearchString);
+ bool next();
+ int index();
+};
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/CppunitTest_vcl_graphic_test.mk b/vcl/CppunitTest_vcl_graphic_test.mk
index 353d054e1ba7..2f2c61735ef8 100644
--- a/vcl/CppunitTest_vcl_graphic_test.mk
+++ b/vcl/CppunitTest_vcl_graphic_test.mk
@@ -14,11 +14,12 @@ $(eval $(call gb_CppunitTest_add_exception_objects,vcl_graphic_test, \
vcl/qa/cppunit/GraphicDescriptorTest \
vcl/qa/cppunit/GraphicFormatDetectorTest \
vcl/qa/cppunit/GraphicNativeMetadataTest \
+ vcl/qa/cppunit/VectorGraphicSearchTest \
))
-$(eval $(call gb_CppunitTest_use_externals,vcl_graphic_test,\
- boost_headers \
- glm_headers \
+$(eval $(call gb_CppunitTest_use_externals,vcl_graphic_test, \
+ boost_headers \
+ $(if $(filter PDFIUM,$(BUILD_TYPE)),pdfium) \
))
ifeq ($(TLS),NSS)
$(eval $(call gb_CppunitTest_use_externals,vcl_graphic_test,\
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index 0e861442218f..ccbe52b2902c 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -330,6 +330,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
vcl/source/graphic/UnoGraphicObject \
vcl/source/graphic/UnoGraphicProvider \
vcl/source/graphic/UnoGraphicTransformer \
+ vcl/source/graphic/VectorGraphicSearch \
vcl/source/bitmap/bitmap \
vcl/source/bitmap/bitmapfilter \
vcl/source/bitmap/BitmapAlphaClampFilter \
diff --git a/vcl/qa/cppunit/VectorGraphicSearchTest.cxx b/vcl/qa/cppunit/VectorGraphicSearchTest.cxx
new file mode 100644
index 000000000000..0ed21ccf9e26
--- /dev/null
+++ b/vcl/qa/cppunit/VectorGraphicSearchTest.cxx
@@ -0,0 +1,50 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <cppunit/TestAssert.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <unotest/bootstrapfixturebase.hxx>
+#include <unotest/directories.hxx>
+
+#include <vcl/VectorGraphicSearch.hxx>
+#include <vcl/graph.hxx>
+#include <vcl/graphicfilter.hxx>
+#include <tools/stream.hxx>
+
+class VectorGraphicSearchTest : public test::BootstrapFixtureBase
+{
+ OUString getFullUrl(const OUString& sFileName)
+ {
+ return m_directories.getURLFromSrc("/vcl/qa/cppunit/data/") + sFileName;
+ }
+
+ void test();
+
+ CPPUNIT_TEST_SUITE(VectorGraphicSearchTest);
+ CPPUNIT_TEST(test);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+void VectorGraphicSearchTest::test()
+{
+ OUString aURL = getFullUrl("Pangram.pdf");
+ SvFileStream aStream(aURL, StreamMode::READ);
+ GraphicFilter& rGraphicFilter = GraphicFilter::GetGraphicFilter();
+ Graphic aGraphic = rGraphicFilter.ImportUnloadedGraphic(aStream);
+ aGraphic.makeAvailable();
+
+ VectorGraphicSearch aSearch(aGraphic);
+ CPPUNIT_ASSERT_EQUAL(true, aSearch.search("lazy"));
+ CPPUNIT_ASSERT_EQUAL(true, aSearch.next());
+ CPPUNIT_ASSERT_EQUAL(34, aSearch.index());
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(VectorGraphicSearchTest);
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/qa/cppunit/data/Pangram.pdf b/vcl/qa/cppunit/data/Pangram.pdf
new file mode 100644
index 000000000000..0714fda4e4dd
--- /dev/null
+++ b/vcl/qa/cppunit/data/Pangram.pdf
Binary files differ
diff --git a/vcl/source/graphic/VectorGraphicSearch.cxx b/vcl/source/graphic/VectorGraphicSearch.cxx
new file mode 100644
index 000000000000..864c65f2dda2
--- /dev/null
+++ b/vcl/source/graphic/VectorGraphicSearch.cxx
@@ -0,0 +1,168 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <sal/config.h>
+#include <vcl/VectorGraphicSearch.hxx>
+
+#include <fpdf_text.h>
+
+class SearchContext
+{
+public:
+ bool bInitialized = false;
+
+ FPDF_DOCUMENT mpPdfDocument;
+ sal_Int32 mnPageIndex;
+ FPDF_PAGE mpPage;
+ FPDF_TEXTPAGE mpTextPage;
+ OUString maSearchString;
+ FPDF_SCHHANDLE mpSearchHandle;
+
+ SearchContext(FPDF_DOCUMENT pPdfDocument, sal_Int32 nPageIndex, OUString const& rSearchString)
+ : mpPdfDocument(pPdfDocument)
+ , mnPageIndex(nPageIndex)
+ , mpPage(nullptr)
+ , mpTextPage(nullptr)
+ , maSearchString(rSearchString)
+ , mpSearchHandle(nullptr)
+ {
+ }
+
+ ~SearchContext()
+ {
+ if (mpSearchHandle)
+ FPDFText_FindClose(mpSearchHandle);
+ if (mpTextPage)
+ FPDFText_ClosePage(mpTextPage);
+ if (mpPage)
+ FPDF_ClosePage(mpPage);
+ }
+
+ bool initialize()
+ {
+ if (!mpPdfDocument)
+ return false;
+ mpPage = FPDF_LoadPage(mpPdfDocument, mnPageIndex);
+ if (!mpPage)
+ return false;
+ mpTextPage = FPDFText_LoadPage(mpPage);
+ if (!mpTextPage)
+ return false;
+
+ FPDF_WIDESTRING pString = reinterpret_cast<FPDF_WIDESTRING>(maSearchString.getStr());
+ mpSearchHandle = FPDFText_FindStart(mpTextPage, pString, 0, 0);
+
+ return mpSearchHandle != nullptr;
+ }
+
+ bool next()
+ {
+ if (mpSearchHandle)
+ return FPDFText_FindNext(mpSearchHandle);
+ return false;
+ }
+
+ int index()
+ {
+ if (mpSearchHandle)
+ return FPDFText_GetSchResultIndex(mpSearchHandle);
+ return -1;
+ }
+};
+
+VectorGraphicSearch::VectorGraphicSearch(Graphic const& rGraphic)
+ : maGraphic(rGraphic)
+ , mpPdfDocument(nullptr)
+{
+ FPDF_LIBRARY_CONFIG aConfig;
+ aConfig.version = 2;
+ aConfig.m_pUserFontPaths = nullptr;
+ aConfig.m_pIsolate = nullptr;
+ aConfig.m_v8EmbedderSlot = 0;
+ FPDF_InitLibraryWithConfig(&aConfig);
+}
+
+VectorGraphicSearch::~VectorGraphicSearch()
+{
+ mpSearchContext.reset();
+
+ if (mpPdfDocument)
+ FPDF_CloseDocument(mpPdfDocument);
+ FPDF_DestroyLibrary();
+}
+
+bool VectorGraphicSearch::search(OUString const& rSearchString)
+{
+ auto pData = maGraphic.getVectorGraphicData();
+
+ if (pData && pData->getVectorGraphicDataType() == VectorGraphicDataType::Pdf)
+ {
+ return searchPDF(pData, rSearchString);
+ }
+ return false;
+}
+
+bool VectorGraphicSearch::searchPDF(std::shared_ptr<VectorGraphicData> const& rData,
+ OUString const& rSearchString)
+{
+ if (rSearchString.isEmpty())
+ return false;
+
+ mpPdfDocument
+ = FPDF_LoadMemDocument(rData->getVectorGraphicDataArray().getConstArray(),
+ rData->getVectorGraphicDataArrayLength(), /*password=*/nullptr);
+
+ if (!mpPdfDocument)
+ {
+ //TODO: Handle failure to load.
+ switch (FPDF_GetLastError())
+ {
+ case FPDF_ERR_SUCCESS:
+ break;
+ case FPDF_ERR_UNKNOWN:
+ break;
+ case FPDF_ERR_FILE:
+ break;
+ case FPDF_ERR_FORMAT:
+ break;
+ case FPDF_ERR_PASSWORD:
+ break;
+ case FPDF_ERR_SECURITY:
+ break;
+ case FPDF_ERR_PAGE:
+ break;
+ default:
+ break;
+ }
+ return false;
+ }
+
+ sal_Int32 nPageIndex = std::max(rData->getPageIndex(), 0);
+
+ mpSearchContext.reset(new SearchContext(mpPdfDocument, nPageIndex, rSearchString));
+
+ return mpSearchContext->initialize();
+}
+
+bool VectorGraphicSearch::next()
+{
+ if (mpSearchContext)
+ return mpSearchContext->next();
+ return false;
+}
+
+int VectorGraphicSearch::index()
+{
+ if (mpSearchContext)
+ return mpSearchContext->index();
+ return -1;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */