diff options
author | Eike Rathke <erack@redhat.com> | 2021-07-28 17:31:56 +0200 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2021-07-28 18:56:29 +0200 |
commit | 516318113f0bd2b3c658aba9b285165e63a280e2 (patch) | |
tree | 3e33e570b0d62b36afa95045999d115fe005d126 /sc | |
parent | 24b06b9c6bdb777dff385b0fbfc81d55d3d013a1 (diff) |
Resolves: tdf#76310 Preserve whitespace TAB, CR, LF in formula expressions
Allowed whitespace in ODFF and OOXML are
U+0020 SPACE
U+0009 CHARACTER TABULATION
U+000A LINE FEED
U+000D CARRIAGE RETURN
Line feed and carriage return look a bit funny in the Function Wizard if
part of a function's argument but work. Once a formula is edited, CR are
converted to LF though, probably already in EditEngine, didn't
investigate.
Change-Id: I6278f6be48872e0710a3d74212db391dda249ed2
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/119635
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
Diffstat (limited to 'sc')
-rw-r--r-- | sc/inc/compiler.hxx | 20 | ||||
-rw-r--r-- | sc/source/core/tool/compiler.cxx | 78 | ||||
-rw-r--r-- | sc/source/core/tool/parclass.cxx | 1 | ||||
-rw-r--r-- | sc/source/core/tool/token.cxx | 18 | ||||
-rw-r--r-- | sc/source/filter/excel/xeformula.cxx | 8 | ||||
-rw-r--r-- | sc/source/filter/excel/xlformula.cxx | 5 | ||||
-rw-r--r-- | sc/source/ui/app/inputhdl.cxx | 2 | ||||
-rw-r--r-- | sc/source/ui/unoobj/tokenuno.cxx | 13 | ||||
-rw-r--r-- | sc/source/ui/view/viewfunc.cxx | 2 |
9 files changed, 120 insertions, 27 deletions
diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx index d8935c7f7545..17e258dc3805 100644 --- a/sc/inc/compiler.hxx +++ b/sc/inc/compiler.hxx @@ -108,6 +108,10 @@ public: union { double nValue; struct { + sal_uInt8 nCount; + sal_Unicode cChar; + } whitespace; + struct { sal_uInt8 cByte; formula::ParamClass eInForceArray; } sbyte; @@ -326,7 +330,21 @@ private: bool ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const; virtual void SetError(FormulaError nError) override; - sal_Int32 NextSymbol(bool bInArray); + + struct Whitespace final + { + sal_Int32 nCount; + sal_Unicode cChar; + + Whitespace() : nCount(0), cChar(0x20) {} + void reset( sal_Unicode c ) { nCount = 0; cChar = c; } + }; + + static void addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces, + ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n = 1 ); + + std::vector<Whitespace> NextSymbol(bool bInArray); + bool IsValue( const OUString& ); bool IsOpCode( const OUString&, bool bInArray ); bool IsOpCode2( const OUString& ); diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx index 0d1dc9d52aa6..83eb2f4ab7db 100644 --- a/sc/source/core/tool/compiler.cxx +++ b/sc/source/core/tool/compiler.cxx @@ -336,11 +336,8 @@ ScCompiler::Convention::Convention( FormulaGrammar::AddressConvention eConv ) for (i = 0; i < 128; i++) t[i] = ScCharFlags::Illegal; -// tdf#56036: Allow tabs/newlines in imported formulas (for now simply treat them as (and convert to) space) -// TODO: tdf#76310: allow saving newlines as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace") -// This is compliant with the OASIS decision (see https://issues.oasis-open.org/browse/OFFICE-701) -// Also, this would enable correct roundtrip from/to OOXML without losing tabs/newlines -// This requires saving actual space characters in ocSpaces token, using them in UI and saving +// Allow tabs/newlines. +// Allow saving whitespace as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace"). /* tab */ t[ 9] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep; /* lf */ t[10] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep; /* cr */ t[13] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep; @@ -2067,6 +2064,19 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz return true; } +// static +void ScCompiler::addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces, + ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n ) +{ + if (rSpace.cChar != c) + { + if (rSpace.cChar && rSpace.nCount > 0) + rvSpaces.emplace_back(rSpace); + rSpace.reset(c); + } + rSpace.nCount += n; +} + // NextSymbol // Parses the formula into separate symbols for further processing. @@ -2104,8 +2114,9 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz // | other | Symbol=Symbol+char | GetString //---------------+-------------------+-----------------------+--------------- -sal_Int32 ScCompiler::NextSymbol(bool bInArray) +std::vector<ScCompiler::Whitespace> ScCompiler::NextSymbol(bool bInArray) { + std::vector<Whitespace> vSpaces; cSymbol[MAXSTRLEN] = 0; // end sal_Unicode* pSym = cSymbol; const sal_Unicode* const pStart = aFormula.getStr(); @@ -2116,7 +2127,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray) bool bQuote = false; mnRangeOpPosInSymbol = -1; ScanState eState = ssGetChar; - sal_Int32 nSpaces = 0; + Whitespace aSpace; sal_Unicode cSep = mxSymbols->getSymbolChar( ocSep); sal_Unicode cArrayColSep = mxSymbols->getSymbolChar( ocArrayColSep); sal_Unicode cArrayRowSep = mxSymbols->getSymbolChar( ocArrayRowSep); @@ -2129,6 +2140,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray) int nDecSeps = 0; bool bAutoIntersection = false; + size_t nAutoIntersectionSpacesPos = 0; int nRefInName = 0; bool bErrorConstantHadSlash = false; mnPredetectedReference = 0; @@ -2187,7 +2199,12 @@ Label_MaskStateMachine: if (!bAutoIntersection) { ++pSrc; - nSpaces += 2; // must match the character count + // Add 2 because it must match the character count + // for bi18n. + addWhitespace( vSpaces, aSpace, 0x20, 2); + // Position of Whitespace where it will be added to + // vector. + nAutoIntersectionSpacesPos = vSpaces.size(); bAutoIntersection = true; } else @@ -2267,7 +2284,7 @@ Label_MaskStateMachine: } else if( nMask & ScCharFlags::CharDontCare ) { - nSpaces++; + addWhitespace( vSpaces, aSpace, c); } else if( nMask & ScCharFlags::CharIdent ) { // try to get a simple ASCII identifier before calling @@ -2731,10 +2748,15 @@ Label_MaskStateMachine: cLast = c; c = *pSrc; } + + if (aSpace.nCount && aSpace.cChar) + vSpaces.emplace_back(aSpace); + if ( bi18n ) { const sal_Int32 nOldSrcPos = nSrcPos; - nSrcPos = nSrcPos + nSpaces; + for (const auto& r : vSpaces) + nSrcPos += r.nCount; // If group separator is not a possible operator and not one of any // separators then it may be parsed away in numbers. This is // specifically the case with NO-BREAK SPACE, which actually triggers @@ -2835,9 +2857,9 @@ Label_MaskStateMachine: } if ( bAutoCorrect ) aCorrectedSymbol = OUString(cSymbol, pSym - cSymbol); - if (bAutoIntersection && nSpaces > 1) - --nSpaces; // replace '!!' with only one space - return nSpaces; + if (bAutoIntersection && vSpaces[nAutoIntersectionSpacesPos].nCount > 1) + --vSpaces[nAutoIntersectionSpacesPos].nCount; // replace '!!' with only one space + return vSpaces; } // Convert symbol to token @@ -4246,7 +4268,7 @@ bool ScCompiler::NextNewToken( bool bInArray ) } bool bAllowBooleans = bInArray; - sal_Int32 nSpaces = NextSymbol(bInArray); + const std::vector<Whitespace> & vSpaces = NextSymbol(bInArray); if (!cSymbol[0]) { @@ -4266,15 +4288,31 @@ bool ScCompiler::NextNewToken( bool bInArray ) return false; } - if( nSpaces ) + if (!vSpaces.empty()) { ScRawToken aToken; - aToken.SetOpCode( ocSpaces ); - aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(nSpaces, 255) ); - if( !static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken ) ) + for (const auto& rSpace : vSpaces) { - SetError(FormulaError::CodeOverflow); - return false; + if (rSpace.cChar == 0x20) + { + // For now keep this a FormulaByteToken for the nasty + // significant whitespace intersection. This probably can be + // changed to a FormulaSpaceToken but then other places may + // need to be adapted. + aToken.SetOpCode( ocSpaces ); + aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) ); + } + else + { + aToken.SetOpCode( ocWhitespace ); + aToken.whitespace.nCount = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) ); + aToken.whitespace.cChar = rSpace.cChar; + } + if (!static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken )) + { + SetError(FormulaError::CodeOverflow); + return false; + } } } diff --git a/sc/source/core/tool/parclass.cxx b/sc/source/core/tool/parclass.cxx index 8dd39016cb48..6c560b07b42f 100644 --- a/sc/source/core/tool/parclass.cxx +++ b/sc/source/core/tool/parclass.cxx @@ -74,6 +74,7 @@ const ScParameterClassification::RawData ScParameterClassification::pRawData[] = { ocSep, {{ Bounds }, 0, Bounds }}, { ocSkip, {{ Bounds }, 0, Bounds }}, { ocSpaces, {{ Bounds }, 0, Bounds }}, + { ocWhitespace, {{ Bounds }, 0, Bounds }}, { ocStop, {{ Bounds }, 0, Bounds }}, { ocStringXML, {{ Bounds }, 0, Bounds }}, { ocTableRef, {{ Bounds }, 0, Value }}, // or Reference? diff --git a/sc/source/core/tool/token.cxx b/sc/source/core/tool/token.cxx index 04355b86a8ec..f17cd9ac27ad 100644 --- a/sc/source/core/tool/token.cxx +++ b/sc/source/core/tool/token.cxx @@ -235,6 +235,11 @@ void ScRawToken::SetOpCode( OpCode e ) case ocTableRefClose: eType = svSep; break; + case ocWhitespace: + eType = svByte; + whitespace.nCount = 1; + whitespace.cChar = 0x20; + break; default: eType = svByte; sbyte.cByte = 0; @@ -349,7 +354,10 @@ FormulaToken* ScRawToken::CreateToken(ScSheetLimits& rLimits) const switch ( GetType() ) { case svByte : - return new FormulaByteToken( eOp, sbyte.cByte, sbyte.eInForceArray ); + if (eOp == ocWhitespace) + return new FormulaSpaceToken( whitespace.nCount, whitespace.cChar ); + else + return new FormulaByteToken( eOp, sbyte.cByte, sbyte.eInForceArray ); case svDouble : IF_NOT_OPCODE_ERROR( ocPush, FormulaDoubleToken); return new FormulaDoubleToken( nValue ); @@ -1652,6 +1660,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r ) case ocMissing: case ocBad: case ocSpaces: + case ocWhitespace: case ocSkip: case ocPercentSign: case ocErrNull: @@ -2089,6 +2098,7 @@ FormulaToken* ScTokenArray::MergeArray( ) break; case ocSpaces : + case ocWhitespace : // ignore spaces --nPrevRowSep; // shorten this row by 1 break; @@ -5136,12 +5146,18 @@ OUString ScTokenArray::CreateString( sc::TokenStringContext& rCxt, const ScAddre { const FormulaToken* pToken = *p; OpCode eOp = pToken->GetOpCode(); + /* FIXME: why does this ignore the count of spaces? */ if (eOp == ocSpaces) { // TODO : Handle intersection operator '!!'. aBuf.append(' '); continue; } + else if (eOp == ocWhitespace) + { + aBuf.append( pToken->GetChar()); + continue; + } if (eOp < rCxt.mxOpCodeMap->getSymbolCount()) aBuf.append(rCxt.mxOpCodeMap->getSymbol(eOp)); diff --git a/sc/source/filter/excel/xeformula.cxx b/sc/source/filter/excel/xeformula.cxx index f2edeffb263c..f829529ca0db 100644 --- a/sc/source/filter/excel/xeformula.cxx +++ b/sc/source/filter/excel/xeformula.cxx @@ -826,9 +826,13 @@ const FormulaToken* XclExpFmlaCompImpl::PeekNextRawToken() const bool XclExpFmlaCompImpl::GetNextToken( XclExpScToken& rTokData ) { rTokData.mpScToken = GetNextRawToken(); - rTokData.mnSpaces = (rTokData.GetOpCode() == ocSpaces) ? rTokData.mpScToken->GetByte() : 0; - while( rTokData.GetOpCode() == ocSpaces ) + rTokData.mnSpaces = 0; + /* TODO: handle ocWhitespace characters? */ + while (rTokData.GetOpCode() == ocSpaces || rTokData.GetOpCode() == ocWhitespace) + { + rTokData.mnSpaces += rTokData.mpScToken->GetByte(); rTokData.mpScToken = GetNextRawToken(); + } return rTokData.Is(); } diff --git a/sc/source/filter/excel/xlformula.cxx b/sc/source/filter/excel/xlformula.cxx index 1f974f47b38b..e2e082ac2651 100644 --- a/sc/source/filter/excel/xlformula.cxx +++ b/sc/source/filter/excel/xlformula.cxx @@ -867,8 +867,11 @@ void XclTokenArrayIterator::NextRawToken() void XclTokenArrayIterator::SkipSpaces() { if( mbSkipSpaces ) - while( Is() && ((*this)->GetOpCode() == ocSpaces) ) + { + OpCode eOp; + while( Is() && (((eOp = (*this)->GetOpCode()) == ocSpaces) || eOp == ocWhitespace) ) NextRawToken(); + } } // strings and string lists --------------------------------------------------- diff --git a/sc/source/ui/app/inputhdl.cxx b/sc/source/ui/app/inputhdl.cxx index f01f93d46d70..b3e644fc8620 100644 --- a/sc/source/ui/app/inputhdl.cxx +++ b/sc/source/ui/app/inputhdl.cxx @@ -654,7 +654,7 @@ void ScInputHandler::DeleteRangeFinder() static OUString GetEditText(const EditEngine* pEng) { - return ScEditUtil::GetSpaceDelimitedString(*pEng); + return ScEditUtil::GetMultilineString(*pEng); } static void lcl_RemoveTabs(OUString& rStr) diff --git a/sc/source/ui/unoobj/tokenuno.cxx b/sc/source/ui/unoobj/tokenuno.cxx index 33f005fff8d6..b07a04e12b04 100644 --- a/sc/source/ui/unoobj/tokenuno.cxx +++ b/sc/source/ui/unoobj/tokenuno.cxx @@ -32,6 +32,7 @@ #include <svl/itemprop.hxx> #include <vcl/svapp.hxx> +#include <comphelper/string.hxx> #include <miscuno.hxx> #include <convuno.hxx> @@ -388,6 +389,18 @@ void ScTokenConversion::ConvertToTokenSequence( const ScDocument& rDoc, // Only the count of spaces is stored as "long". Parameter count is ignored. if ( eOpCode == ocSpaces ) rAPI.Data <<= static_cast<sal_Int32>(rToken.GetByte()); + else if (eOpCode == ocWhitespace) + { + // Convention is one character repeated. + if (rToken.GetByte() == 1) + rAPI.Data <<= OUString( rToken.GetChar()); + else + { + OUStringBuffer aBuf( rToken.GetByte()); + comphelper::string::padToLength( aBuf, rToken.GetByte(), rToken.GetChar()); + rAPI.Data <<= aBuf.makeStringAndClear(); + } + } else rAPI.Data.clear(); // no data break; diff --git a/sc/source/ui/view/viewfunc.cxx b/sc/source/ui/view/viewfunc.cxx index fbe8a0b3719c..4e5e149c9755 100644 --- a/sc/source/ui/view/viewfunc.cxx +++ b/sc/source/ui/view/viewfunc.cxx @@ -677,7 +677,7 @@ void ScViewFunc::EnterData( SCCOL nCol, SCROW nRow, SCTAB nTab, } // #i97726# always get text for "repeat" of undo action - aString = ScEditUtil::GetSpaceDelimitedString(aEngine); + aString = ScEditUtil::GetMultilineString(aEngine); // undo |