From 68e74bdf63e992666016c790e8e4cfd5b28d6abe Mon Sep 17 00:00:00 2001 From: Michael Warner Date: Fri, 3 Jul 2020 10:18:33 -0400 Subject: tdf133647 tdf123386 tdf123389 Improved .docx table formula import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Converts table formula syntax from MS Word to LibreOffice. This version uses the list separator of the document for the formula regexen; however, it does not convert the decimal or list separators in the case where the person opening the document is using a different locale from the author. Change-Id: I9600a0bea060a76705a7ad6b051ed4fdd50b9d40 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/98614 Tested-by: Jenkins Tested-by: László Németh Reviewed-by: László Németh --- sw/qa/extras/ooxmlimport/data/tdf123386.docx | Bin 0 -> 14928 bytes sw/qa/extras/ooxmlimport/data/tdf123389.docx | Bin 0 -> 13649 bytes sw/qa/extras/ooxmlimport/data/tdf133647.docx | Bin 0 -> 14536 bytes .../extras/ooxmlimport/data/tdf133647_unicode.docx | Bin 0 -> 20920 bytes sw/qa/extras/ooxmlimport/ooxmlimport.cxx | 155 +++++++++++++++++++++ writerfilter/Library_writerfilter.mk | 4 +- writerfilter/source/dmapper/DomainMapper_Impl.cxx | 71 +++++++++- writerfilter/source/dmapper/DomainMapper_Impl.hxx | 3 +- writerfilter/source/dmapper/SettingsTable.cxx | 17 +++ writerfilter/source/dmapper/SettingsTable.hxx | 3 + 10 files changed, 250 insertions(+), 3 deletions(-) create mode 100644 sw/qa/extras/ooxmlimport/data/tdf123386.docx create mode 100644 sw/qa/extras/ooxmlimport/data/tdf123389.docx create mode 100644 sw/qa/extras/ooxmlimport/data/tdf133647.docx create mode 100644 sw/qa/extras/ooxmlimport/data/tdf133647_unicode.docx diff --git a/sw/qa/extras/ooxmlimport/data/tdf123386.docx b/sw/qa/extras/ooxmlimport/data/tdf123386.docx new file mode 100644 index 000000000000..1278068ddedf Binary files /dev/null and b/sw/qa/extras/ooxmlimport/data/tdf123386.docx differ diff --git a/sw/qa/extras/ooxmlimport/data/tdf123389.docx b/sw/qa/extras/ooxmlimport/data/tdf123389.docx new file mode 100644 index 000000000000..4245464b820d Binary files /dev/null and b/sw/qa/extras/ooxmlimport/data/tdf123389.docx differ diff --git a/sw/qa/extras/ooxmlimport/data/tdf133647.docx b/sw/qa/extras/ooxmlimport/data/tdf133647.docx new file mode 100644 index 000000000000..fb525446c7fc Binary files /dev/null and b/sw/qa/extras/ooxmlimport/data/tdf133647.docx differ diff --git a/sw/qa/extras/ooxmlimport/data/tdf133647_unicode.docx b/sw/qa/extras/ooxmlimport/data/tdf133647_unicode.docx new file mode 100644 index 000000000000..d5749f89de53 Binary files /dev/null and b/sw/qa/extras/ooxmlimport/data/tdf133647_unicode.docx differ diff --git a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx index 1f534cb2fbbf..7079fd8bfd19 100644 --- a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx +++ b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx @@ -796,6 +796,161 @@ DECLARE_OOXMLIMPORT_TEST(testTdf105975formula, "tdf105975.docx") CPPUNIT_ASSERT_EQUAL(OUString("25"), xEnumerationAccess->getPresentation(false).trim()); } +DECLARE_OOXMLIMPORT_TEST(testTdf133647, "tdf133647.docx") +{ + /* Tests that argument lists, cell references, and cell ranges are translated correctly + * when importing table formulae from MS Word */ + uno::Reference xTextFieldsSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference xFieldsAccess(xTextFieldsSupplier->getTextFields()); + uno::Reference xFields(xFieldsAccess->createEnumeration()); + + if( !xFields->hasMoreElements() ) { + CPPUNIT_ASSERT(false); + return; + } + + uno::Reference xEnumerationAccess1(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("SUM(1|2|3)"), xEnumerationAccess1->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("6"), xEnumerationAccess1->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess2(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("sum(|)"), xEnumerationAccess2->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("3"), xEnumerationAccess2->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess3(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("(SUM(|5)*(2+7))*(3+SUM(1|))"), xEnumerationAccess3->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("432"), xEnumerationAccess3->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess4(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("1+(SUM(1|2))"), xEnumerationAccess4->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("4"), xEnumerationAccess4->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess5(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("3*(2+SUM()+7)"), xEnumerationAccess5->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("45"), xEnumerationAccess5->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess6(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("(1+2)*SUM(|)"), xEnumerationAccess6->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("21"), xEnumerationAccess6->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess7(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("SUM(|5||6)"), xEnumerationAccess7->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("17"), xEnumerationAccess7->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess8(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("SUM()"), xEnumerationAccess8->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("7"), xEnumerationAccess8->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess9(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("SUM(|)"), xEnumerationAccess9->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("3"), xEnumerationAccess9->getPresentation(false).trim()); +} + +DECLARE_OOXMLIMPORT_TEST(testTdf123386, "tdf123386.docx") +{ + /* Tests that argument lists, cell references, and cell ranges are translated correctly + * when importing table formulae from MS Word */ + uno::Reference xTextFieldsSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference xFieldsAccess(xTextFieldsSupplier->getTextFields()); + uno::Reference xFields(xFieldsAccess->createEnumeration()); + + if( !xFields->hasMoreElements() ) { + CPPUNIT_ASSERT(false); + return; + } + + uno::Reference xEnumerationAccess1(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString(" L 2"), xEnumerationAccess1->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("1"), xEnumerationAccess1->getPresentation(false).trim()); + + /* Ensures non-cell references passed to DEFINED() are preserved. + * Doesn't test the display string because LO doesn't support DEFINED(). */ + uno::Reference xEnumerationAccess10(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("((1) AND (DEFINED(ABC1)))"), xEnumerationAccess10->getPresentation(true).trim()); + + uno::Reference xEnumerationAccess9(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("NOT(TRUE)"), xEnumerationAccess9->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("0"), xEnumerationAccess9->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess8(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("((TRUE) OR (FALSE))"), xEnumerationAccess8->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("1"), xEnumerationAccess8->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess7(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("(( EQ 1) OR ( EQ 2))"), xEnumerationAccess7->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("1"), xEnumerationAccess7->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess6(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("((( L 1)) AND (( NEQ 2)))"), xEnumerationAccess6->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("0"), xEnumerationAccess6->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess5(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("(( EQ 1) AND ( EQ 2))"), xEnumerationAccess5->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("1"), xEnumerationAccess5->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess4(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString(" NEQ 3"), xEnumerationAccess4->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("1"), xEnumerationAccess4->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess3(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString(" EQ 3"), xEnumerationAccess3->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("1"), xEnumerationAccess3->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess2(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString(" G 1"), xEnumerationAccess2->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("1"), xEnumerationAccess2->getPresentation(false).trim()); + +} + +DECLARE_OOXMLIMPORT_TEST(testTdf133647_unicode, "tdf133647_unicode.docx") +{ + /* Tests that non-ASCII characters in formulas are preserved when importing from MS Word */ + uno::Reference xTextFieldsSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference xFieldsAccess(xTextFieldsSupplier->getTextFields()); + uno::Reference xFields(xFieldsAccess->createEnumeration()); + + if( !xFields->hasMoreElements() ) { + CPPUNIT_ASSERT(false); + return; + } + + xFields->nextElement(); + xFields->nextElement(); + xFields->nextElement(); + + uno::Reference xEnumerationAccess1(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString(u"defined(預期結果)"), xEnumerationAccess1->getPresentation(true).trim()); + + uno::Reference xEnumerationAccess2(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString(u"defined(نتيجةمتوقعة)"), xEnumerationAccess2->getPresentation(true).trim()); + + uno::Reference xEnumerationAccess3(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString(u"defined(ExpectedResult)"), xEnumerationAccess3->getPresentation(true).trim()); +} + +DECLARE_OOXMLIMPORT_TEST(testTdf123389, "tdf123389.docx") +{ + /* Tests that argument lists, cell references, and cell ranges are translated correctly + * when importing table formulae from MS Word */ + uno::Reference xTextFieldsSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference xFieldsAccess(xTextFieldsSupplier->getTextFields()); + uno::Reference xFields(xFieldsAccess->createEnumeration()); + + if( !xFields->hasMoreElements() ) { + CPPUNIT_ASSERT(false); + return; + } + + uno::Reference xEnumerationAccess1(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("((2.345) ROUND (1))"), xEnumerationAccess1->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("2.3"), xEnumerationAccess1->getPresentation(false).trim()); + + uno::Reference xEnumerationAccess2(xFields->nextElement(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString("(() ROUND (2))"), xEnumerationAccess2->getPresentation(true).trim()); + CPPUNIT_ASSERT_EQUAL(OUString("2.35"), xEnumerationAccess2->getPresentation(false).trim()); +} + + DECLARE_OOXMLIMPORT_TEST(testTdf107784, "tdf107784.docx") { // Make sure the field displays the citation's title and not the identifier diff --git a/writerfilter/Library_writerfilter.mk b/writerfilter/Library_writerfilter.mk index 61e62d66e10d..cbda03e9dcdd 100644 --- a/writerfilter/Library_writerfilter.mk +++ b/writerfilter/Library_writerfilter.mk @@ -51,7 +51,9 @@ $(eval $(call gb_Library_use_libraries,writerfilter,\ $(eval $(call gb_Library_use_externals,writerfilter,\ boost_headers \ - icu_headers \ + icui18n \ + icuuc \ + icu_headers \ libxml2 \ )) diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx index e33a6d753364..4e8afddc8d50 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx @@ -88,6 +88,8 @@ #include #include #include +#include +#include #include #include @@ -100,6 +102,9 @@ #include #include +#include +#include + using namespace ::com::sun::star; using namespace oox; namespace writerfilter::dmapper{ @@ -4196,6 +4201,70 @@ void DomainMapper_Impl::handleFieldAsk } } +/** + * Converts a Microsoft Word field formula into LibreOffice syntax + * @param input The Microsoft Word field formula, with no leading '=' sign + * @return An equivalent LibreOffice field formula + */ +OUString DomainMapper_Impl::convertFieldFormula(const OUString& input) { + + OUString listSeparator = m_pSettingsTable->GetListSeparator(); + + /* Replace logical condition functions with LO equivalent operators */ + OUString changed = input.replaceAll(" <> ", " NEQ "); + changed = changed.replaceAll(" <= ", " LEQ "); + changed = changed.replaceAll(" >= ", " GEQ "); + changed = changed.replaceAll(" = " , " EQ "); + changed = changed.replaceAll(" < " , " L "); + changed = changed.replaceAll(" > " , " G "); + + changed = changed.replaceAll("<>", " NEQ "); + changed = changed.replaceAll("<=", " LEQ "); + changed = changed.replaceAll(">=", " GEQ "); + changed = changed.replaceAll("=" , " EQ "); + changed = changed.replaceAll("<" , " L "); + changed = changed.replaceAll(">" , " G "); + + /* Replace function calls with infix keywords for AND(), OR(), and ROUND(). Nothing needs to be + * done for NOT(). This simple regex will work properly with most common cases. However, it may + * not work correctly when the arguments are nested subcalls to other functions, like + * ROUND(MIN(1,2),MAX(3,4)). See TDF#134765. */ + icu::ErrorCode status; + icu::UnicodeString usInput(changed.getStr()); + const uint32_t rMatcherFlags = UREGEX_CASE_INSENSITIVE; + OUString regex = "\\b(AND|OR|ROUND)\\s*\\(\\s*([^" + listSeparator + "]+)\\s*" + listSeparator + "\\s*([^)]+)\\s*\\)"; + icu::UnicodeString usRegex(regex.getStr()); + icu::RegexMatcher rmatch1(usRegex, usInput, rMatcherFlags, status); + usInput = rmatch1.replaceAll(icu::UnicodeString("(($2) $1 ($3))"), status); + + /* Assumes any remaining list separators separate arguments to functions that accept lists + * (SUM, MIN, MAX, MEAN, etc.) */ + usInput.findAndReplace(icu::UnicodeString(listSeparator.getStr()), "|"); + + /* Surround single cell references with angle brackets. + * If there is ever added a function name that ends with a digit, this regex will need to be revisited. */ + icu::RegexMatcher rmatch2("\\b([A-Z]{1,3}[0-9]+)\\b(?![(])", usInput, rMatcherFlags, status); + usInput = rmatch2.replaceAll(icu::UnicodeString("<$1>"), status); + + /* Cell references must be upper case */ + icu::RegexMatcher rmatch3("<[a-z]{1,3}[0-9]+>", usInput, rMatcherFlags, status); + icu::UnicodeString replacedCellRefs; + while (rmatch3.find(status) && status.isSuccess()) { + rmatch3.appendReplacement(replacedCellRefs, rmatch3.group(status).toUpper(), status); + } + rmatch3.appendTail(replacedCellRefs); + + /* Fix up cell ranges */ + icu::RegexMatcher rmatch4("<([A-Z]{1,3}[0-9]+)>:<([A-Z]{1,3}[0-9]+)>", replacedCellRefs, rMatcherFlags, status); + usInput = rmatch4.replaceAll(icu::UnicodeString("<$1:$2>"), status); + + /* Fix up user defined names */ + icu::RegexMatcher rmatch5("DEFINED\\s*\\(<([A-Z]+[0-9]+)>\\)", usInput, rMatcherFlags, status); + usInput = rmatch5.replaceAll(icu::UnicodeString("DEFINED($1)"), status); + + return OUString(usInput.getTerminatedBuffer()); +} + void DomainMapper_Impl::handleFieldFormula (const FieldContextPtr& pContext, uno::Reference< beans::XPropertySet > const& xFieldProperties) @@ -4215,7 +4284,7 @@ void DomainMapper_Impl::handleFieldFormula return; // we don't copy the = symbol from the command - OUString formula = command.copy(1); + OUString formula = convertFieldFormula(command.copy(1)); xFieldProperties->setPropertyValue(getPropertyName(PROP_CONTENT), uno::makeAny(formula)); xFieldProperties->setPropertyValue(getPropertyName(PROP_NUMBER_FORMAT), uno::makeAny(sal_Int32(0))); diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx index 2597f6877f34..54a0f6c7ac25 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx @@ -818,7 +818,8 @@ public: (const FieldContextPtr& pContext, css::uno::Reference< css::uno::XInterface > & xFieldInterface, css::uno::Reference< css::beans::XPropertySet > const& xFieldProperties); - static void handleFieldFormula + OUString convertFieldFormula(const OUString& input); + void handleFieldFormula (const FieldContextPtr& pContext, css::uno::Reference< css::beans::XPropertySet > const& xFieldProperties); void handleAutoNum diff --git a/writerfilter/source/dmapper/SettingsTable.cxx b/writerfilter/source/dmapper/SettingsTable.cxx index 5ac2400cd493..2042ade7dc66 100644 --- a/writerfilter/source/dmapper/SettingsTable.cxx +++ b/writerfilter/source/dmapper/SettingsTable.cxx @@ -265,6 +265,8 @@ struct SettingsTable_Impl bool m_bReadOnly; bool m_bDisplayBackgroundShape; bool m_bNoLeading = false; + OUString m_sDecimalSymbol; + OUString m_sListSeparator; uno::Sequence m_pThemeFontLangProps; @@ -303,6 +305,8 @@ struct SettingsTable_Impl , m_sRedlineProtectionKey() , m_bReadOnly(false) , m_bDisplayBackgroundShape(false) + , m_sDecimalSymbol(".") + , m_sListSeparator(",") , m_pThemeFontLangProps(3) , m_pCurrentCompatSetting(3) {} @@ -479,8 +483,10 @@ void SettingsTable::lcl_sprm(Sprm& rSprm) case NS_ooxml::LN_CT_Settings_doNotIncludeSubdocsInStats: // 92554; // Do Not Include Content in Text Boxes, Footnotes, and Endnotes in Document Statistics) break; case NS_ooxml::LN_CT_Settings_decimalSymbol: // 92562; + m_pImpl->m_sDecimalSymbol = pValue->getString(); break; case NS_ooxml::LN_CT_Settings_listSeparator: // 92563; + m_pImpl->m_sListSeparator = pValue->getString(); break; case NS_ooxml::LN_CT_Settings_rsids: // 92549; revision save Ids - probably not necessary break; @@ -687,6 +693,17 @@ sal_Int16 SettingsTable::GetHypenationZone() const return m_pImpl->m_nHyphenationZone; } +OUString SettingsTable::GetDecimalSymbol() const +{ + return m_pImpl->m_sDecimalSymbol; +} + +OUString SettingsTable::GetListSeparator() const +{ + return m_pImpl->m_sListSeparator; +} + + uno::Sequence const & SettingsTable::GetThemeFontLangProperties() const { return m_pImpl->m_pThemeFontLangProps; diff --git a/writerfilter/source/dmapper/SettingsTable.hxx b/writerfilter/source/dmapper/SettingsTable.hxx index d91db71ab1f2..2ff62576a980 100644 --- a/writerfilter/source/dmapper/SettingsTable.hxx +++ b/writerfilter/source/dmapper/SettingsTable.hxx @@ -78,6 +78,9 @@ class SettingsTable : public LoggedProperties, public LoggedTable bool GetNoHyphenateCaps() const; sal_Int16 GetHypenationZone() const; + OUString GetDecimalSymbol() const; + OUString GetListSeparator() const; + css::uno::Sequence const & GetThemeFontLangProperties() const; css::uno::Sequence GetCompatSettings() const; -- cgit