diff options
author | Ashod Nakashian <ashod.nakashian@collabora.co.uk> | 2018-04-13 20:26:11 -0400 |
---|---|---|
committer | Jan Holesovsky <kendy@collabora.com> | 2018-06-07 10:45:24 +0200 |
commit | fab3ca134f687fed35d9abcd2a11e654cd84a1db (patch) | |
tree | c5dac8fc45c62ec0fd35c0758908e6e4bc2375e6 | |
parent | 2f72dcaae544f5a8103120264086919c55936ba8 (diff) |
svx: improved text importing from PDF
Change-Id: I9a2fc2c8511655c1aa362c1a03a5e82ae3ba697e
(cherry picked from commit d057cf3d9184cc5d96af9c957411911f5e788f4d)
-rw-r--r-- | external/pdfium/0007-svx-improved-text-importing-from-PDF.patch.2 | 62 | ||||
-rw-r--r-- | external/pdfium/UnpackedTarball_pdfium.mk | 1 | ||||
-rw-r--r-- | svx/source/svdraw/svdpdf.cxx | 84 |
3 files changed, 108 insertions, 39 deletions
diff --git a/external/pdfium/0007-svx-improved-text-importing-from-PDF.patch.2 b/external/pdfium/0007-svx-improved-text-importing-from-PDF.patch.2 new file mode 100644 index 000000000000..e7afda576d15 --- /dev/null +++ b/external/pdfium/0007-svx-improved-text-importing-from-PDF.patch.2 @@ -0,0 +1,62 @@ +From 87f3da183a87f3ff5df854971a0c3bc2134ecd61 Mon Sep 17 00:00:00 2001 +From: Ashod Nakashian <ashod.nakashian@collabora.co.uk> +Date: Tue, 5 Jun 2018 11:31:35 +0200 +Subject: [PATCH 07/14] svx: improved text importing from PDF + +--- + pdfium/fpdfsdk/fpdf_editpage.cpp | 9 +++++++-- + pdfium/public/fpdf_edit.h | 6 +++++- + 2 files changed, 12 insertions(+), 3 deletions(-) + +diff --git a/pdfium/fpdfsdk/fpdf_editpage.cpp b/pdfium/fpdfsdk/fpdf_editpage.cpp +index f8e2418..2249e8e 100644 +--- a/pdfium/fpdfsdk/fpdf_editpage.cpp ++++ b/pdfium/fpdfsdk/fpdf_editpage.cpp +@@ -652,8 +652,11 @@ FPDFTextObj_GetMatrix(FPDF_PAGEOBJECT text_object, + double* a, + double* b, + double* c, +- double* d) { +- if (!text_object) ++ double* d, ++ double* e, ++ double* f) ++{ ++ if (!text_object || !a || !b || !c || !d || !e || !f) + return; + + CPDF_TextObject* pTxtObj = CPDFTextObjectFromFPDFPageObject(text_object); +@@ -662,6 +665,8 @@ FPDFTextObj_GetMatrix(FPDF_PAGEOBJECT text_object, + *b = matrix.b; + *c = matrix.c; + *d = matrix.d; ++ *e = matrix.e; ++ *f = matrix.f; + } + + FPDF_EXPORT int FPDF_CALLCONV +diff --git a/pdfium/public/fpdf_edit.h b/pdfium/public/fpdf_edit.h +index 89ec8cf..fc906f4 100644 +--- a/pdfium/public/fpdf_edit.h ++++ b/pdfium/public/fpdf_edit.h +@@ -1038,12 +1038,16 @@ FPDFTextObj_GetFontSize(FPDF_PAGEOBJECT text_object); + // b - Pointer to a double value receiving coefficient "b" of the matrix. + // c - Pointer to a double value receiving coefficient "c" of the matrix. + // d - Pointer to a double value receiving coefficient "d" of the matrix. ++// e - Pointer to a double value receiving coefficient "e" of the matrix. ++// f - Pointer to a double value receiving coefficient "f" of the matrix. + FPDF_EXPORT void FPDF_CALLCONV + FPDFTextObj_GetMatrix(FPDF_PAGEOBJECT text_object, + double* a, + double* b, + double* c, +- double* d); ++ double* d, ++ double* e, ++ double* f); + + // Get the unicode of a special character in a text object. + // +-- +2.16.3 + diff --git a/external/pdfium/UnpackedTarball_pdfium.mk b/external/pdfium/UnpackedTarball_pdfium.mk index af7841dcd120..f0ee740ddf94 100644 --- a/external/pdfium/UnpackedTarball_pdfium.mk +++ b/external/pdfium/UnpackedTarball_pdfium.mk @@ -20,6 +20,7 @@ pdfium_patches += 0003-svx-import-PDF-images-as-BGRA.patch.2 pdfium_patches += 0004-svx-support-PDF-text-color.patch.2 pdfium_patches += 0005-svx-support-Paths-in-PDFs-while-importing.patch.2 pdfium_patches += 0006-svx-improve-path-importing-from-PDF.patch.2 +pdfium_patches += 0007-svx-improved-text-importing-from-PDF.patch.2 $(eval $(call gb_UnpackedTarball_UnpackedTarball,pdfium)) diff --git a/svx/source/svdraw/svdpdf.cxx b/svx/source/svdraw/svdpdf.cxx index 04e5f3f7f48e..7ef3d8614e7a 100644 --- a/svx/source/svdraw/svdpdf.cxx +++ b/svx/source/svdraw/svdpdf.cxx @@ -103,6 +103,8 @@ static inline long lcl_ToLogic(double value) const long out = OutputDevice::LogicToLogic(in, MapUnit::MapPixel, MapUnit::Map100thMM); return out / 100; } + +static inline double sqrt2(double a, double b) { return sqrt(a * a + b * b); } } struct FPDFBitmapDeleter @@ -1024,53 +1026,56 @@ void ImpSdrPdfImport::ImportText(FPDF_PAGEOBJECT pPageObject, int nPageObjectInd SAL_WARN("sd.filter", "FAILED to get TEXT bounds"); } - SAL_WARN("sd.filter", "Got TEXT bounds left: " << left << ", right: " << right - << ", top: " << top << ", bottom: " << bottom); - tools::Rectangle aRect = PointsToLogic(left, right, top, bottom); + if (left == right || top == bottom) + { + SAL_WARN("sd.filter", "Skipping empty TEXT #" << nPageObjectIndex << " left: " << left + << ", right: " << right << ", top: " << top + << ", bottom: " << bottom); + return; + } - double dFontScale = 1.0; - geometry::Matrix2D aMatrix; - FPDFTextObj_GetMatrix(pPageObject, &aMatrix.m00, &aMatrix.m01, &aMatrix.m10, &aMatrix.m11); - if (aMatrix.m00 != aMatrix.m11 || aMatrix.m00 <= 0) + const int nChars = FPDFTextObj_CountChars(pPageObject); + std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nChars + 1]); // + terminating null + + unsigned short* pShortText = reinterpret_cast<unsigned short*>(pText.get()); + const int nActualChars = FPDFTextObj_GetText(pPageObject, 0, nChars, pShortText); + if (nActualChars <= 0) { - SAL_WARN("sd.filter", "Bogus font scale matrix (" << aMatrix.m00 << ',' << aMatrix.m11 - << "), will use heuristic height of " - << aRect.GetHeight() << "."); - dFontScale = aRect.GetHeight(); + SAL_WARN("sd.filter", "Got not TEXT"); + return; } - else - dFontScale = aMatrix.m00; - double dFontSize = FPDFTextObj_GetFontSize(pPageObject); - SAL_WARN("sd.filter", "Got Font Size: " << dFontSize); - dFontSize *= dFontScale; - SAL_WARN("sd.filter", "Got Font Size Scaled: " << dFontSize); - dFontSize = lcl_PointToPixel(dFontSize); - SAL_WARN("sd.filter", "Got Font Pixel Size: " << dFontSize); - dFontSize = lcl_ToLogic(dFontSize); - SAL_WARN("sd.filter", "Got Font Logic Size: " << dFontSize); + OUString sText(pText.get(), nActualChars); + SAL_WARN("sd.filter", "Got Text (" << nChars << "): [" << sText << "]."); + + double a, b, c, d, e, f; + FPDFTextObj_GetMatrix(pPageObject, &a, &b, &c, &d, &e, &f); + SAL_WARN("sd.filter", "Got font scale matrix (" << a << ", " << b << ", " << c << ", " << d + << ", " << e << ", " << f << ')'); + Point aPos = PointsToLogic(e, f); + SAL_WARN("sd.filter", "Got TEXT origin: " << aPos); + + const double dFontSize = FPDFTextObj_GetFontSize(pPageObject); + double dFontSizeH = fabs(sqrt2(a, c) * dFontSize); + double dFontSizeV = fabs(sqrt2(b, d) * dFontSize); + SAL_WARN("sd.filter", "Got Font Size: " << dFontSize << ", Scaled Font Size H: " << dFontSizeH + << ", V: " << dFontSizeV); + dFontSizeH = lcl_PointToPixel(dFontSizeH); + dFontSizeV = lcl_PointToPixel(dFontSizeV); + SAL_WARN("sd.filter", "Got Pixel Font Size H: " << dFontSizeH << ", V: " << dFontSizeV); + dFontSizeH = lcl_ToLogic(dFontSizeH); + dFontSizeV = lcl_ToLogic(dFontSizeV); + SAL_WARN("sd.filter", "Got Logic Font Size H: " << dFontSizeH << ", V: " << dFontSizeV); unsigned int nR, nG, nB, nA; if (FPDFTextObj_GetStrokeColor(pPageObject, &nR, &nG, &nB, &nA)) mpVD->SetTextColor(Color(nR, nG, nB)); vcl::Font aFnt = mpVD->GetFont(); - aFnt.SetFontSize(Size(dFontSize, dFontSize)); + aFnt.SetFontSize(Size(dFontSizeH, dFontSizeV)); mpVD->SetFont(aFnt); - const int nChars = FPDFTextObj_CountChars(pPageObject); - std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nChars + 1]); // + terminating null - - unsigned short* pShortText = reinterpret_cast<unsigned short*>(pText.get()); - const int nActualChars = FPDFTextObj_GetText(pPageObject, 0, nChars, pShortText); - OUString sText(pText.get(), nActualChars); - - // for (int nChar = 0; nChar < nChars; ++nChar) - // pText[nChar] = static_cast<sal_Unicode>(FPDFTextObj_GetUnicode(pPageObject, nChar)); - // OUString sText(pText.get(), nChars); - SAL_WARN("sd.filter", "Got Text (" << nChars << "): [" << sText << "]."); - - ImportText(aRect.TopLeft(), sText); + ImportText(aPos, sText); } void ImpSdrPdfImport::ImportText(const Point& rPos, const OUString& rStr) @@ -1083,11 +1088,12 @@ void ImpSdrPdfImport::ImportText(const Point& rPos, const OUString& rStr) sal_Int32 nTextWidth = static_cast<sal_Int32>(mpVD->GetTextWidth(rStr) * mfScaleX); sal_Int32 nTextHeight = static_cast<sal_Int32>(mpVD->GetTextHeight() * mfScaleY); - SAL_WARN("sd.filter", "TextWidth: " << nTextWidth << ", TextHeight: " << nTextHeight); + SAL_WARN("sd.filter", + "Unscaled text size: " << mpVD->GetTextWidth(rStr) << 'x' << mpVD->GetTextHeight() + << ", Scaled: " << nTextWidth << 'x' << nTextHeight); Point aPos(FRound(rPos.X() * mfScaleX + maOfs.X()), FRound(rPos.Y() * mfScaleY + maOfs.Y())); Size aSize(nTextWidth, nTextHeight); - SAL_WARN("sd.filter", "Text Pos: " << aPos << ", Size: " << aSize); if (eAlg == ALIGN_BASELINE) aPos.AdjustY(-(FRound(aFontMetric.GetAscent() * mfScaleY))); @@ -1325,7 +1331,7 @@ void ImpSdrPdfImport::ImportPath(FPDF_PAGEOBJECT pPageObject, int nPageObjectInd } const basegfx::B2DHomMatrix aTransform( - basegfx::tools::createScaleTranslateB2DHomMatrix(mfScaleX, mfScaleY, maOfs.X(), maOfs.Y())); + basegfx::utils::createScaleTranslateB2DHomMatrix(mfScaleX, mfScaleY, maOfs.X(), maOfs.Y())); aPoly.transform(aTransform); float fWidth = 1; @@ -1353,7 +1359,7 @@ void ImpSdrPdfImport::ImportPath(FPDF_PAGEOBJECT pPageObject, int nPageObjectInd // if(!mbLastObjWasPolyWithoutLine || !CheckLastPolyLineAndFillMerge(basegfx::B2DPolyPolygon(aSource))) aPoly.setClosed(true); // TODO: Review - SdrPathObj* pPath = new SdrPathObj(OBJ_POLY, basegfx::B2DPolyPolygon(aPoly)); + SdrPathObj* pPath = new SdrPathObj(*mpModel, OBJ_POLY, basegfx::B2DPolyPolygon(aPoly)); SetAttributes(pPath); InsertObj(pPath, false); } |