diff options
author | Michael Warner <michael.warner.ut+libreoffice@gmail.com> | 2020-07-03 10:18:33 -0400 |
---|---|---|
committer | László Németh <nemeth@numbertext.org> | 2020-08-17 11:15:25 +0200 |
commit | 68e74bdf63e992666016c790e8e4cfd5b28d6abe (patch) | |
tree | 903d1bfbe952d5ff6c800d49a2329d1eb2153bf1 /writerfilter | |
parent | 3297c44c14d088c83bf729457e3d4ca629dc283c (diff) |
tdf133647 tdf123386 tdf123389 Improved .docx table formula import
Converts table formula syntax from MS Word to LibreOffice.
This version uses the list separator of the document for the
formula regexen; however, it does not convert the decimal or
list separators in the case where the person opening the document
is using a different locale from the author.
Change-Id: I9600a0bea060a76705a7ad6b051ed4fdd50b9d40
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/98614
Tested-by: Jenkins
Tested-by: László Németh <nemeth@numbertext.org>
Reviewed-by: László Németh <nemeth@numbertext.org>
Diffstat (limited to 'writerfilter')
-rw-r--r-- | writerfilter/Library_writerfilter.mk | 4 | ||||
-rw-r--r-- | writerfilter/source/dmapper/DomainMapper_Impl.cxx | 71 | ||||
-rw-r--r-- | writerfilter/source/dmapper/DomainMapper_Impl.hxx | 3 | ||||
-rw-r--r-- | writerfilter/source/dmapper/SettingsTable.cxx | 17 | ||||
-rw-r--r-- | writerfilter/source/dmapper/SettingsTable.hxx | 3 |
5 files changed, 95 insertions, 3 deletions
diff --git a/writerfilter/Library_writerfilter.mk b/writerfilter/Library_writerfilter.mk index 61e62d66e10d..cbda03e9dcdd 100644 --- a/writerfilter/Library_writerfilter.mk +++ b/writerfilter/Library_writerfilter.mk @@ -51,7 +51,9 @@ $(eval $(call gb_Library_use_libraries,writerfilter,\ $(eval $(call gb_Library_use_externals,writerfilter,\ boost_headers \ - icu_headers \ + icui18n \ + icuuc \ + icu_headers \ libxml2 \ )) diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx index e33a6d753364..4e8afddc8d50 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx @@ -88,6 +88,8 @@ #include <map> #include <tuple> #include <unordered_map> +#include <regex> +#include <algorithm> #include <officecfg/Office/Common.hxx> #include <filter/msfilter/util.hxx> @@ -100,6 +102,9 @@ #include <tools/diagnose_ex.h> #include <sal/log.hxx> +#include <unicode/errorcode.h> +#include <unicode/regex.h> + using namespace ::com::sun::star; using namespace oox; namespace writerfilter::dmapper{ @@ -4196,6 +4201,70 @@ void DomainMapper_Impl::handleFieldAsk } } +/** + * Converts a Microsoft Word field formula into LibreOffice syntax + * @param input The Microsoft Word field formula, with no leading '=' sign + * @return An equivalent LibreOffice field formula + */ +OUString DomainMapper_Impl::convertFieldFormula(const OUString& input) { + + OUString listSeparator = m_pSettingsTable->GetListSeparator(); + + /* Replace logical condition functions with LO equivalent operators */ + OUString changed = input.replaceAll(" <> ", " NEQ "); + changed = changed.replaceAll(" <= ", " LEQ "); + changed = changed.replaceAll(" >= ", " GEQ "); + changed = changed.replaceAll(" = " , " EQ "); + changed = changed.replaceAll(" < " , " L "); + changed = changed.replaceAll(" > " , " G "); + + changed = changed.replaceAll("<>", " NEQ "); + changed = changed.replaceAll("<=", " LEQ "); + changed = changed.replaceAll(">=", " GEQ "); + changed = changed.replaceAll("=" , " EQ "); + changed = changed.replaceAll("<" , " L "); + changed = changed.replaceAll(">" , " G "); + + /* Replace function calls with infix keywords for AND(), OR(), and ROUND(). Nothing needs to be + * done for NOT(). This simple regex will work properly with most common cases. However, it may + * not work correctly when the arguments are nested subcalls to other functions, like + * ROUND(MIN(1,2),MAX(3,4)). See TDF#134765. */ + icu::ErrorCode status; + icu::UnicodeString usInput(changed.getStr()); + const uint32_t rMatcherFlags = UREGEX_CASE_INSENSITIVE; + OUString regex = "\\b(AND|OR|ROUND)\\s*\\(\\s*([^" + listSeparator + "]+)\\s*" + listSeparator + "\\s*([^)]+)\\s*\\)"; + icu::UnicodeString usRegex(regex.getStr()); + icu::RegexMatcher rmatch1(usRegex, usInput, rMatcherFlags, status); + usInput = rmatch1.replaceAll(icu::UnicodeString("(($2) $1 ($3))"), status); + + /* Assumes any remaining list separators separate arguments to functions that accept lists + * (SUM, MIN, MAX, MEAN, etc.) */ + usInput.findAndReplace(icu::UnicodeString(listSeparator.getStr()), "|"); + + /* Surround single cell references with angle brackets. + * If there is ever added a function name that ends with a digit, this regex will need to be revisited. */ + icu::RegexMatcher rmatch2("\\b([A-Z]{1,3}[0-9]+)\\b(?![(])", usInput, rMatcherFlags, status); + usInput = rmatch2.replaceAll(icu::UnicodeString("<$1>"), status); + + /* Cell references must be upper case */ + icu::RegexMatcher rmatch3("<[a-z]{1,3}[0-9]+>", usInput, rMatcherFlags, status); + icu::UnicodeString replacedCellRefs; + while (rmatch3.find(status) && status.isSuccess()) { + rmatch3.appendReplacement(replacedCellRefs, rmatch3.group(status).toUpper(), status); + } + rmatch3.appendTail(replacedCellRefs); + + /* Fix up cell ranges */ + icu::RegexMatcher rmatch4("<([A-Z]{1,3}[0-9]+)>:<([A-Z]{1,3}[0-9]+)>", replacedCellRefs, rMatcherFlags, status); + usInput = rmatch4.replaceAll(icu::UnicodeString("<$1:$2>"), status); + + /* Fix up user defined names */ + icu::RegexMatcher rmatch5("DEFINED\\s*\\(<([A-Z]+[0-9]+)>\\)", usInput, rMatcherFlags, status); + usInput = rmatch5.replaceAll(icu::UnicodeString("DEFINED($1)"), status); + + return OUString(usInput.getTerminatedBuffer()); +} + void DomainMapper_Impl::handleFieldFormula (const FieldContextPtr& pContext, uno::Reference< beans::XPropertySet > const& xFieldProperties) @@ -4215,7 +4284,7 @@ void DomainMapper_Impl::handleFieldFormula return; // we don't copy the = symbol from the command - OUString formula = command.copy(1); + OUString formula = convertFieldFormula(command.copy(1)); xFieldProperties->setPropertyValue(getPropertyName(PROP_CONTENT), uno::makeAny(formula)); xFieldProperties->setPropertyValue(getPropertyName(PROP_NUMBER_FORMAT), uno::makeAny(sal_Int32(0))); diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx index 2597f6877f34..54a0f6c7ac25 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx @@ -818,7 +818,8 @@ public: (const FieldContextPtr& pContext, css::uno::Reference< css::uno::XInterface > & xFieldInterface, css::uno::Reference< css::beans::XPropertySet > const& xFieldProperties); - static void handleFieldFormula + OUString convertFieldFormula(const OUString& input); + void handleFieldFormula (const FieldContextPtr& pContext, css::uno::Reference< css::beans::XPropertySet > const& xFieldProperties); void handleAutoNum diff --git a/writerfilter/source/dmapper/SettingsTable.cxx b/writerfilter/source/dmapper/SettingsTable.cxx index 5ac2400cd493..2042ade7dc66 100644 --- a/writerfilter/source/dmapper/SettingsTable.cxx +++ b/writerfilter/source/dmapper/SettingsTable.cxx @@ -265,6 +265,8 @@ struct SettingsTable_Impl bool m_bReadOnly; bool m_bDisplayBackgroundShape; bool m_bNoLeading = false; + OUString m_sDecimalSymbol; + OUString m_sListSeparator; uno::Sequence<beans::PropertyValue> m_pThemeFontLangProps; @@ -303,6 +305,8 @@ struct SettingsTable_Impl , m_sRedlineProtectionKey() , m_bReadOnly(false) , m_bDisplayBackgroundShape(false) + , m_sDecimalSymbol(".") + , m_sListSeparator(",") , m_pThemeFontLangProps(3) , m_pCurrentCompatSetting(3) {} @@ -479,8 +483,10 @@ void SettingsTable::lcl_sprm(Sprm& rSprm) case NS_ooxml::LN_CT_Settings_doNotIncludeSubdocsInStats: // 92554; // Do Not Include Content in Text Boxes, Footnotes, and Endnotes in Document Statistics) break; case NS_ooxml::LN_CT_Settings_decimalSymbol: // 92562; + m_pImpl->m_sDecimalSymbol = pValue->getString(); break; case NS_ooxml::LN_CT_Settings_listSeparator: // 92563; + m_pImpl->m_sListSeparator = pValue->getString(); break; case NS_ooxml::LN_CT_Settings_rsids: // 92549; revision save Ids - probably not necessary break; @@ -687,6 +693,17 @@ sal_Int16 SettingsTable::GetHypenationZone() const return m_pImpl->m_nHyphenationZone; } +OUString SettingsTable::GetDecimalSymbol() const +{ + return m_pImpl->m_sDecimalSymbol; +} + +OUString SettingsTable::GetListSeparator() const +{ + return m_pImpl->m_sListSeparator; +} + + uno::Sequence<beans::PropertyValue> const & SettingsTable::GetThemeFontLangProperties() const { return m_pImpl->m_pThemeFontLangProps; diff --git a/writerfilter/source/dmapper/SettingsTable.hxx b/writerfilter/source/dmapper/SettingsTable.hxx index d91db71ab1f2..2ff62576a980 100644 --- a/writerfilter/source/dmapper/SettingsTable.hxx +++ b/writerfilter/source/dmapper/SettingsTable.hxx @@ -78,6 +78,9 @@ class SettingsTable : public LoggedProperties, public LoggedTable bool GetNoHyphenateCaps() const; sal_Int16 GetHypenationZone() const; + OUString GetDecimalSymbol() const; + OUString GetListSeparator() const; + css::uno::Sequence<css::beans::PropertyValue> const & GetThemeFontLangProperties() const; css::uno::Sequence<css::beans::PropertyValue> GetCompatSettings() const; |