diff options
author | Eike Rathke <erack@redhat.com> | 2021-07-28 17:31:56 +0200 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2021-07-28 18:56:29 +0200 |
commit | 516318113f0bd2b3c658aba9b285165e63a280e2 (patch) | |
tree | 3e33e570b0d62b36afa95045999d115fe005d126 | |
parent | 24b06b9c6bdb777dff385b0fbfc81d55d3d013a1 (diff) |
Resolves: tdf#76310 Preserve whitespace TAB, CR, LF in formula expressions
Allowed whitespace in ODFF and OOXML are
U+0020 SPACE
U+0009 CHARACTER TABULATION
U+000A LINE FEED
U+000D CARRIAGE RETURN
Line feed and carriage return look a bit funny in the Function Wizard if
part of a function's argument but work. Once a formula is edited, CR are
converted to LF though, probably already in EditEngine, didn't
investigate.
Change-Id: I6278f6be48872e0710a3d74212db391dda249ed2
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/119635
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
-rw-r--r-- | formula/source/core/api/FormulaCompiler.cxx | 29 | ||||
-rw-r--r-- | formula/source/core/api/token.cxx | 39 | ||||
-rw-r--r-- | formula/source/ui/dlg/formula.cxx | 8 | ||||
-rw-r--r-- | include/formula/compiler.hxx | 41 | ||||
-rw-r--r-- | include/formula/opcode.hxx | 2 | ||||
-rw-r--r-- | include/formula/token.hxx | 20 | ||||
-rw-r--r-- | offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl | 24 | ||||
-rw-r--r-- | sc/inc/compiler.hxx | 20 | ||||
-rw-r--r-- | sc/source/core/tool/compiler.cxx | 78 | ||||
-rw-r--r-- | sc/source/core/tool/parclass.cxx | 1 | ||||
-rw-r--r-- | sc/source/core/tool/token.cxx | 18 | ||||
-rw-r--r-- | sc/source/filter/excel/xeformula.cxx | 8 | ||||
-rw-r--r-- | sc/source/filter/excel/xlformula.cxx | 5 | ||||
-rw-r--r-- | sc/source/ui/app/inputhdl.cxx | 2 | ||||
-rw-r--r-- | sc/source/ui/unoobj/tokenuno.cxx | 13 | ||||
-rw-r--r-- | sc/source/ui/view/viewfunc.cxx | 2 |
16 files changed, 240 insertions, 70 deletions
diff --git a/formula/source/core/api/FormulaCompiler.cxx b/formula/source/core/api/FormulaCompiler.cxx index be5ce09d132f..f7174807f0f4 100644 --- a/formula/source/core/api/FormulaCompiler.cxx +++ b/formula/source/core/api/FormulaCompiler.cxx @@ -475,7 +475,8 @@ uno::Sequence< sheet::FormulaOpCodeMapEntry > FormulaCompiler::OpCodeMap::create { FormulaMapGroupSpecialOffset::DB_AREA , ocDBArea } , /* TODO: { FormulaMapGroupSpecialOffset::TABLE_REF , ocTableRef } , */ { FormulaMapGroupSpecialOffset::MACRO , ocMacro } , - { FormulaMapGroupSpecialOffset::COL_ROW_NAME , ocColRowName } + { FormulaMapGroupSpecialOffset::COL_ROW_NAME , ocColRowName } , + { FormulaMapGroupSpecialOffset::WHITESPACE , ocWhitespace } }; const size_t nCount = SAL_N_ELEMENTS(aMap); // Preallocate vector elements. @@ -1267,14 +1268,18 @@ bool FormulaCompiler::GetToken() nWasColRowName = 1; else nWasColRowName = 0; + OpCode eTmpOp; mpToken = maArrIterator.Next(); - while( mpToken && mpToken->GetOpCode() == ocSpaces ) + while (mpToken && ((eTmpOp = mpToken->GetOpCode()) == ocSpaces || eTmpOp == ocWhitespace)) { - // For significant whitespace remember last ocSpaces token. Usually - // there's only one even for multiple spaces. - pSpacesToken = mpToken; - if ( nWasColRowName ) - nWasColRowName++; + if (eTmpOp == ocSpaces) + { + // For significant whitespace remember last ocSpaces token. + // Usually there's only one even for multiple spaces. + pSpacesToken = mpToken; + if ( nWasColRowName ) + nWasColRowName++; + } if ( bAutoCorrect && !pStack ) CreateStringFromToken( aCorrectedFormula, mpToken.get() ); mpToken = maArrIterator.Next(); @@ -2272,10 +2277,10 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf if( bSpaces ) rBuffer.append( ' '); - if( eOp == ocSpaces ) + if (eOp == ocSpaces || eOp == ocWhitespace) { bool bWriteSpaces = true; - if (mxSymbols->isODFF()) + if (eOp == ocSpaces && mxSymbols->isODFF()) { const FormulaToken* p = maArrIterator.PeekPrevNoSpaces(); bool bIntersectionOp = (p && p->GetOpCode() == ocColRowName); @@ -2316,7 +2321,10 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf sal_uInt8 n = t->GetByte(); for ( sal_uInt8 j=0; j<n; ++j ) { - rBuffer.append( ' '); + if (eOp == ocWhitespace) + rBuffer.append( t->GetChar()); + else + rBuffer.append( ' '); } } } @@ -2403,6 +2411,7 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf case ocPush: case ocRange: case ocSpaces: + case ocWhitespace: break; default: nLevel = 0; diff --git a/formula/source/core/api/token.cxx b/formula/source/core/api/token.cxx index 0af1f63f0e5e..c5b69acf2c90 100644 --- a/formula/source/core/api/token.cxx +++ b/formula/source/core/api/token.cxx @@ -244,6 +244,13 @@ void FormulaToken::SetSheet( sal_Int16 ) assert( !"virtual dummy called" ); } +sal_Unicode FormulaToken::GetChar() const +{ + // This Get is worth an assert. + assert( !"virtual dummy called" ); + return 0; +} + short* FormulaToken::GetJump() const { SAL_WARN( "formula.core", "FormulaToken::GetJump: virtual dummy called" ); @@ -348,6 +355,15 @@ bool FormulaToken::TextEqual( const FormulaToken& rToken ) const // real implementations of virtual functions +sal_uInt8 FormulaSpaceToken::GetByte() const { return nByte; } +sal_Unicode FormulaSpaceToken::GetChar() const { return cChar; } +bool FormulaSpaceToken::operator==( const FormulaToken& r ) const +{ + return FormulaToken::operator==( r ) && nByte == r.GetByte() && + cChar == r.GetChar(); +} + + sal_uInt8 FormulaByteToken::GetByte() const { return nByte; } void FormulaByteToken::SetByte( sal_uInt8 n ) { nByte = n; } ParamClass FormulaByteToken::GetInForceArray() const { return eInForceArray; } @@ -425,6 +441,13 @@ bool FormulaTokenArray::AddFormulaToken( AddStringXML( aStrVal ); else if ( eOpCode == ocExternal || eOpCode == ocMacro ) Add( new formula::FormulaExternalToken( eOpCode, aStrVal ) ); + else if ( eOpCode == ocWhitespace ) + { + // Simply ignore empty string. + // Convention is one character repeated. + if (!aStrVal.isEmpty()) + Add( new formula::FormulaSpaceToken( static_cast<sal_uInt8>(aStrVal.getLength()), aStrVal[0])); + } else bError = true; // unexpected string: don't know what to do with it } @@ -1472,17 +1495,21 @@ FormulaTokenArray * FormulaTokenArray::RewriteMissing( const MissingConvention & return pNewArr; } +namespace { +inline bool isWhitespace( OpCode eOp ) { return eOp == ocSpaces || eOp == ocWhitespace; } +} + bool FormulaTokenArray::MayReferenceFollow() { if ( pCode && nLen > 0 ) { // ignore trailing spaces sal_uInt16 i = nLen - 1; - while ( i > 0 && pCode[i]->GetOpCode() == SC_OPCODE_SPACES ) + while (i > 0 && isWhitespace( pCode[i]->GetOpCode())) { --i; } - if ( i > 0 || pCode[i]->GetOpCode() != SC_OPCODE_SPACES ) + if (i > 0 || !isWhitespace( pCode[i]->GetOpCode())) { OpCode eOp = pCode[i]->GetOpCode(); if ( (SC_OPCODE_START_BIN_OP <= eOp && eOp < SC_OPCODE_STOP_BIN_OP ) || @@ -1756,7 +1783,7 @@ FormulaToken* FormulaTokenArrayPlainIterator::NextNoSpaces() { if( mpFTA->GetArray() ) { - while( (mnIndex < mpFTA->GetLen()) && (mpFTA->GetArray()[ mnIndex ]->GetOpCode() == ocSpaces) ) + while ((mnIndex < mpFTA->GetLen()) && isWhitespace( mpFTA->GetArray()[ mnIndex ]->GetOpCode())) ++mnIndex; if( mnIndex < mpFTA->GetLen() ) return mpFTA->GetArray()[ mnIndex++ ]; @@ -1793,7 +1820,7 @@ FormulaToken* FormulaTokenArrayPlainIterator::PeekNextNoSpaces() const if( mpFTA->GetArray() && mnIndex < mpFTA->GetLen() ) { sal_uInt16 j = mnIndex; - while ( j < mpFTA->GetLen() && mpFTA->GetArray()[j]->GetOpCode() == ocSpaces ) + while (j < mpFTA->GetLen() && isWhitespace( mpFTA->GetArray()[j]->GetOpCode())) j++; if ( j < mpFTA->GetLen() ) return mpFTA->GetArray()[ j ]; @@ -1809,9 +1836,9 @@ FormulaToken* FormulaTokenArrayPlainIterator::PeekPrevNoSpaces() const if( mpFTA->GetArray() && mnIndex > 1 ) { sal_uInt16 j = mnIndex - 2; - while ( mpFTA->GetArray()[j]->GetOpCode() == ocSpaces && j > 0 ) + while (isWhitespace( mpFTA->GetArray()[j]->GetOpCode()) && j > 0 ) j--; - if ( j > 0 || mpFTA->GetArray()[j]->GetOpCode() != ocSpaces ) + if (j > 0 || !isWhitespace( mpFTA->GetArray()[j]->GetOpCode())) return mpFTA->GetArray()[ j ]; else return nullptr; diff --git a/formula/source/ui/dlg/formula.cxx b/formula/source/ui/dlg/formula.cxx index 81931d8d586b..36b59d5eb0ec 100644 --- a/formula/source/ui/dlg/formula.cxx +++ b/formula/source/ui/dlg/formula.cxx @@ -389,6 +389,9 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos) sal_Int32 nOldTokPos = 1; sal_Int32 nPrevFuncPos = 1; short nBracketCount = 0; + const sal_Int32 nOpPush = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::PUSH].Token.OpCode; + const sal_Int32 nOpSpaces = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode; + const sal_Int32 nOpWhitespace = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::WHITESPACE].Token.OpCode; while ( pIter != pEnd ) { const sal_Int32 eOp = pIter->OpCode; @@ -401,8 +404,7 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos) m_xBtnMatrix->set_active(true); } - if (eOp == m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::PUSH].Token.OpCode || - eOp == m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode) + if (eOp == nOpPush || eOp == nOpSpaces || eOp == nOpWhitespace) { const sal_Int32 n1 = nTokPos < 0 ? -1 : aFormString.indexOf( sep, nTokPos); const sal_Int32 n2 = nTokPos < 0 ? -1 : aFormString.indexOf( ')', nTokPos); @@ -444,7 +446,7 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos) m_pFunctionOpCodesEnd, [&eOp](const sheet::FormulaOpCodeMapEntry& aEntry) { return aEntry.Token.OpCode == eOp; }); - if ( bIsFunction && m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode != eOp ) + if ( bIsFunction && nOpSpaces != eOp && nOpWhitespace != eOp ) { nPrevFuncPos = nFuncPos; nFuncPos = nOldTokPos; diff --git a/include/formula/compiler.hxx b/include/formula/compiler.hxx index baf3e23f6161..fcf7326d3e0f 100644 --- a/include/formula/compiler.hxx +++ b/include/formula/compiler.hxx @@ -40,26 +40,27 @@ #define SC_OPCODE_BAD 14 #define SC_OPCODE_STRINGXML 15 #define SC_OPCODE_SPACES 16 -#define SC_OPCODE_MAT_REF 17 -#define SC_OPCODE_DB_AREA 18 /* additional access operators */ -#define SC_OPCODE_TABLE_REF 19 -#define SC_OPCODE_MACRO 20 -#define SC_OPCODE_COL_ROW_NAME 21 -#define SC_OPCODE_COL_ROW_NAME_AUTO 22 -#define SC_OPCODE_PERCENT_SIGN 23 /* operator _follows_ value */ -#define SC_OPCODE_ARRAY_OPEN 24 -#define SC_OPCODE_ARRAY_CLOSE 25 -#define SC_OPCODE_ARRAY_ROW_SEP 26 -#define SC_OPCODE_ARRAY_COL_SEP 27 /* some convs use sep != col_sep */ -#define SC_OPCODE_TABLE_REF_OPEN 28 -#define SC_OPCODE_TABLE_REF_CLOSE 29 -#define SC_OPCODE_TABLE_REF_ITEM_ALL 30 -#define SC_OPCODE_TABLE_REF_ITEM_HEADERS 31 -#define SC_OPCODE_TABLE_REF_ITEM_DATA 32 -#define SC_OPCODE_TABLE_REF_ITEM_TOTALS 33 -#define SC_OPCODE_TABLE_REF_ITEM_THIS_ROW 34 -#define SC_OPCODE_STOP_DIV 35 -#define SC_OPCODE_SKIP 36 /* used to skip raw tokens during string compilation */ +#define SC_OPCODE_WHITESPACE 17 +#define SC_OPCODE_MAT_REF 18 +#define SC_OPCODE_DB_AREA 19 /* additional access operators */ +#define SC_OPCODE_TABLE_REF 20 +#define SC_OPCODE_MACRO 21 +#define SC_OPCODE_COL_ROW_NAME 22 +#define SC_OPCODE_COL_ROW_NAME_AUTO 23 +#define SC_OPCODE_PERCENT_SIGN 24 /* operator _follows_ value */ +#define SC_OPCODE_ARRAY_OPEN 25 +#define SC_OPCODE_ARRAY_CLOSE 26 +#define SC_OPCODE_ARRAY_ROW_SEP 27 +#define SC_OPCODE_ARRAY_COL_SEP 28 /* some convs use sep != col_sep */ +#define SC_OPCODE_TABLE_REF_OPEN 29 +#define SC_OPCODE_TABLE_REF_CLOSE 30 +#define SC_OPCODE_TABLE_REF_ITEM_ALL 31 +#define SC_OPCODE_TABLE_REF_ITEM_HEADERS 32 +#define SC_OPCODE_TABLE_REF_ITEM_DATA 33 +#define SC_OPCODE_TABLE_REF_ITEM_TOTALS 34 +#define SC_OPCODE_TABLE_REF_ITEM_THIS_ROW 35 +#define SC_OPCODE_STOP_DIV 36 +#define SC_OPCODE_SKIP 37 /* used to skip raw tokens during string compilation */ /*** error constants #... ***/ #define SC_OPCODE_START_ERRORS 40 diff --git a/include/formula/opcode.hxx b/include/formula/opcode.hxx index 3123e8f3fa38..d92ae0b1d41d 100644 --- a/include/formula/opcode.hxx +++ b/include/formula/opcode.hxx @@ -53,6 +53,7 @@ enum OpCode : sal_uInt16 ocBad = SC_OPCODE_BAD, ocStringXML = SC_OPCODE_STRINGXML, ocSpaces = SC_OPCODE_SPACES, + ocWhitespace = SC_OPCODE_WHITESPACE, ocMatRef = SC_OPCODE_MAT_REF, ocTableRefItemAll = SC_OPCODE_TABLE_REF_ITEM_ALL, ocTableRefItemHeaders = SC_OPCODE_TABLE_REF_ITEM_HEADERS, @@ -545,6 +546,7 @@ inline std::string OpCodeEnumToString(OpCode eCode) case ocBad: return "Bad"; case ocStringXML: return "StringXML"; case ocSpaces: return "Spaces"; + case ocWhitespace: return "Whitespace"; case ocMatRef: return "MatRef"; case ocTableRefItemAll: return "TableRefItemAll"; case ocTableRefItemHeaders: return "TableRefItemHeaders"; diff --git a/include/formula/token.hxx b/include/formula/token.hxx index 3fa00e89339f..77bf3eeb90ea 100644 --- a/include/formula/token.hxx +++ b/include/formula/token.hxx @@ -187,6 +187,7 @@ public: virtual void SetIndex( sal_uInt16 n ); virtual sal_Int16 GetSheet() const; virtual void SetSheet( sal_Int16 n ); + virtual sal_Unicode GetChar() const; virtual short* GetJump() const; virtual const OUString& GetExternal() const; virtual FormulaToken* GetFAPOrigToken() const; @@ -225,6 +226,25 @@ inline void intrusive_ptr_release(const FormulaToken* p) p->DecRef(); } +class FORMULA_DLLPUBLIC FormulaSpaceToken : public FormulaToken +{ +private: + sal_uInt8 nByte; + sal_Unicode cChar; +public: + FormulaSpaceToken( sal_uInt8 n, sal_Unicode c ) : + FormulaToken( svByte, ocWhitespace ), + nByte( n ), cChar( c ) {} + FormulaSpaceToken( const FormulaSpaceToken& r ) : + FormulaToken( r ), + nByte( r.nByte ), cChar( r.cChar ) {} + + virtual FormulaToken* Clone() const override { return new FormulaSpaceToken(*this); } + virtual sal_uInt8 GetByte() const override; + virtual sal_Unicode GetChar() const override; + virtual bool operator==( const FormulaToken& rToken ) const override; +}; + class FORMULA_DLLPUBLIC FormulaByteToken : public FormulaToken { private: diff --git a/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl b/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl index 89c21dca4328..4cb2699e9af6 100644 --- a/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl +++ b/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl @@ -140,10 +140,6 @@ constants FormulaMapGroupSpecialOffset <p>The FormulaToken::Data member shall contain a positive integer value of type `long` specifying the number of space characters.</p> - - <p>Attention: This may change in next versions to support other - characters than simple space characters (e.g. line feeds, horizontal - tabulators, non-breakable spaces).</p> */ const long SPACES = 8; @@ -176,6 +172,26 @@ constants FormulaMapGroupSpecialOffset const long COL_ROW_NAME = 12; + /** Formula tokens containing the op-code obtained from this offset + describe whitespace characters within the string representation of a + formula. + + <p>Whitespace characters in formulas are used for readability and do + not affect the result of the formula.</p> + + <p>The FormulaToken::Data member shall contain a + `string` of one (repeated) whitespace character. The length of + the string determines the number of repetitions.</p> + + <p>Allowed whitespace characters are SPACE (U+0020), CHARACTER + TABULATION (U+0009), LINE FEED (U+000A), and CARRIAGE RETURN + (U+000D). See also ODF v1.3 OpenFormula 5.14 Whitespace.</p> + + @since LibreOffice 7.3 + */ + const long WHITESPACE = 13; + + }; diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx index d8935c7f7545..17e258dc3805 100644 --- a/sc/inc/compiler.hxx +++ b/sc/inc/compiler.hxx @@ -108,6 +108,10 @@ public: union { double nValue; struct { + sal_uInt8 nCount; + sal_Unicode cChar; + } whitespace; + struct { sal_uInt8 cByte; formula::ParamClass eInForceArray; } sbyte; @@ -326,7 +330,21 @@ private: bool ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const; virtual void SetError(FormulaError nError) override; - sal_Int32 NextSymbol(bool bInArray); + + struct Whitespace final + { + sal_Int32 nCount; + sal_Unicode cChar; + + Whitespace() : nCount(0), cChar(0x20) {} + void reset( sal_Unicode c ) { nCount = 0; cChar = c; } + }; + + static void addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces, + ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n = 1 ); + + std::vector<Whitespace> NextSymbol(bool bInArray); + bool IsValue( const OUString& ); bool IsOpCode( const OUString&, bool bInArray ); bool IsOpCode2( const OUString& ); diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx index 0d1dc9d52aa6..83eb2f4ab7db 100644 --- a/sc/source/core/tool/compiler.cxx +++ b/sc/source/core/tool/compiler.cxx @@ -336,11 +336,8 @@ ScCompiler::Convention::Convention( FormulaGrammar::AddressConvention eConv ) for (i = 0; i < 128; i++) t[i] = ScCharFlags::Illegal; -// tdf#56036: Allow tabs/newlines in imported formulas (for now simply treat them as (and convert to) space) -// TODO: tdf#76310: allow saving newlines as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace") -// This is compliant with the OASIS decision (see https://issues.oasis-open.org/browse/OFFICE-701) -// Also, this would enable correct roundtrip from/to OOXML without losing tabs/newlines -// This requires saving actual space characters in ocSpaces token, using them in UI and saving +// Allow tabs/newlines. +// Allow saving whitespace as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace"). /* tab */ t[ 9] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep; /* lf */ t[10] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep; /* cr */ t[13] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep; @@ -2067,6 +2064,19 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz return true; } +// static +void ScCompiler::addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces, + ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n ) +{ + if (rSpace.cChar != c) + { + if (rSpace.cChar && rSpace.nCount > 0) + rvSpaces.emplace_back(rSpace); + rSpace.reset(c); + } + rSpace.nCount += n; +} + // NextSymbol // Parses the formula into separate symbols for further processing. @@ -2104,8 +2114,9 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz // | other | Symbol=Symbol+char | GetString //---------------+-------------------+-----------------------+--------------- -sal_Int32 ScCompiler::NextSymbol(bool bInArray) +std::vector<ScCompiler::Whitespace> ScCompiler::NextSymbol(bool bInArray) { + std::vector<Whitespace> vSpaces; cSymbol[MAXSTRLEN] = 0; // end sal_Unicode* pSym = cSymbol; const sal_Unicode* const pStart = aFormula.getStr(); @@ -2116,7 +2127,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray) bool bQuote = false; mnRangeOpPosInSymbol = -1; ScanState eState = ssGetChar; - sal_Int32 nSpaces = 0; + Whitespace aSpace; sal_Unicode cSep = mxSymbols->getSymbolChar( ocSep); sal_Unicode cArrayColSep = mxSymbols->getSymbolChar( ocArrayColSep); sal_Unicode cArrayRowSep = mxSymbols->getSymbolChar( ocArrayRowSep); @@ -2129,6 +2140,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray) int nDecSeps = 0; bool bAutoIntersection = false; + size_t nAutoIntersectionSpacesPos = 0; int nRefInName = 0; bool bErrorConstantHadSlash = false; mnPredetectedReference = 0; @@ -2187,7 +2199,12 @@ Label_MaskStateMachine: if (!bAutoIntersection) { ++pSrc; - nSpaces += 2; // must match the character count + // Add 2 because it must match the character count + // for bi18n. + addWhitespace( vSpaces, aSpace, 0x20, 2); + // Position of Whitespace where it will be added to + // vector. + nAutoIntersectionSpacesPos = vSpaces.size(); bAutoIntersection = true; } else @@ -2267,7 +2284,7 @@ Label_MaskStateMachine: } else if( nMask & ScCharFlags::CharDontCare ) { - nSpaces++; + addWhitespace( vSpaces, aSpace, c); } else if( nMask & ScCharFlags::CharIdent ) { // try to get a simple ASCII identifier before calling @@ -2731,10 +2748,15 @@ Label_MaskStateMachine: cLast = c; c = *pSrc; } + + if (aSpace.nCount && aSpace.cChar) + vSpaces.emplace_back(aSpace); + if ( bi18n ) { const sal_Int32 nOldSrcPos = nSrcPos; - nSrcPos = nSrcPos + nSpaces; + for (const auto& r : vSpaces) + nSrcPos += r.nCount; // If group separator is not a possible operator and not one of any // separators then it may be parsed away in numbers. This is // specifically the case with NO-BREAK SPACE, which actually triggers @@ -2835,9 +2857,9 @@ Label_MaskStateMachine: } if ( bAutoCorrect ) aCorrectedSymbol = OUString(cSymbol, pSym - cSymbol); - if (bAutoIntersection && nSpaces > 1) - --nSpaces; // replace '!!' with only one space - return nSpaces; + if (bAutoIntersection && vSpaces[nAutoIntersectionSpacesPos].nCount > 1) + --vSpaces[nAutoIntersectionSpacesPos].nCount; // replace '!!' with only one space + return vSpaces; } // Convert symbol to token @@ -4246,7 +4268,7 @@ bool ScCompiler::NextNewToken( bool bInArray ) } bool bAllowBooleans = bInArray; - sal_Int32 nSpaces = NextSymbol(bInArray); + const std::vector<Whitespace> & vSpaces = NextSymbol(bInArray); if (!cSymbol[0]) { @@ -4266,15 +4288,31 @@ bool ScCompiler::NextNewToken( bool bInArray ) return false; } - if( nSpaces ) + if (!vSpaces.empty()) { ScRawToken aToken; - aToken.SetOpCode( ocSpaces ); - aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(nSpaces, 255) ); - if( !static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken ) ) + for (const auto& rSpace : vSpaces) { - SetError(FormulaError::CodeOverflow); - return false; + if (rSpace.cChar == 0x20) + { + // For now keep this a FormulaByteToken for the nasty + // significant whitespace intersection. This probably can be + // changed to a FormulaSpaceToken but then other places may + // need to be adapted. + aToken.SetOpCode( ocSpaces ); + aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) ); + } + else + { + aToken.SetOpCode( ocWhitespace ); + aToken.whitespace.nCount = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) ); + aToken.whitespace.cChar = rSpace.cChar; + } + if (!static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken )) + { + SetError(FormulaError::CodeOverflow); + return false; + } } } diff --git a/sc/source/core/tool/parclass.cxx b/sc/source/core/tool/parclass.cxx index 8dd39016cb48..6c560b07b42f 100644 --- a/sc/source/core/tool/parclass.cxx +++ b/sc/source/core/tool/parclass.cxx @@ -74,6 +74,7 @@ const ScParameterClassification::RawData ScParameterClassification::pRawData[] = { ocSep, {{ Bounds }, 0, Bounds }}, { ocSkip, {{ Bounds }, 0, Bounds }}, { ocSpaces, {{ Bounds }, 0, Bounds }}, + { ocWhitespace, {{ Bounds }, 0, Bounds }}, { ocStop, {{ Bounds }, 0, Bounds }}, { ocStringXML, {{ Bounds }, 0, Bounds }}, { ocTableRef, {{ Bounds }, 0, Value }}, // or Reference? diff --git a/sc/source/core/tool/token.cxx b/sc/source/core/tool/token.cxx index 04355b86a8ec..f17cd9ac27ad 100644 --- a/sc/source/core/tool/token.cxx +++ b/sc/source/core/tool/token.cxx @@ -235,6 +235,11 @@ void ScRawToken::SetOpCode( OpCode e ) case ocTableRefClose: eType = svSep; break; + case ocWhitespace: + eType = svByte; + whitespace.nCount = 1; + whitespace.cChar = 0x20; + break; default: eType = svByte; sbyte.cByte = 0; @@ -349,7 +354,10 @@ FormulaToken* ScRawToken::CreateToken(ScSheetLimits& rLimits) const switch ( GetType() ) { case svByte : - return new FormulaByteToken( eOp, sbyte.cByte, sbyte.eInForceArray ); + if (eOp == ocWhitespace) + return new FormulaSpaceToken( whitespace.nCount, whitespace.cChar ); + else + return new FormulaByteToken( eOp, sbyte.cByte, sbyte.eInForceArray ); case svDouble : IF_NOT_OPCODE_ERROR( ocPush, FormulaDoubleToken); return new FormulaDoubleToken( nValue ); @@ -1652,6 +1660,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r ) case ocMissing: case ocBad: case ocSpaces: + case ocWhitespace: case ocSkip: case ocPercentSign: case ocErrNull: @@ -2089,6 +2098,7 @@ FormulaToken* ScTokenArray::MergeArray( ) break; case ocSpaces : + case ocWhitespace : // ignore spaces --nPrevRowSep; // shorten this row by 1 break; @@ -5136,12 +5146,18 @@ OUString ScTokenArray::CreateString( sc::TokenStringContext& rCxt, const ScAddre { const FormulaToken* pToken = *p; OpCode eOp = pToken->GetOpCode(); + /* FIXME: why does this ignore the count of spaces? */ if (eOp == ocSpaces) { // TODO : Handle intersection operator '!!'. aBuf.append(' '); continue; } + else if (eOp == ocWhitespace) + { + aBuf.append( pToken->GetChar()); + continue; + } if (eOp < rCxt.mxOpCodeMap->getSymbolCount()) aBuf.append(rCxt.mxOpCodeMap->getSymbol(eOp)); diff --git a/sc/source/filter/excel/xeformula.cxx b/sc/source/filter/excel/xeformula.cxx index f2edeffb263c..f829529ca0db 100644 --- a/sc/source/filter/excel/xeformula.cxx +++ b/sc/source/filter/excel/xeformula.cxx @@ -826,9 +826,13 @@ const FormulaToken* XclExpFmlaCompImpl::PeekNextRawToken() const bool XclExpFmlaCompImpl::GetNextToken( XclExpScToken& rTokData ) { rTokData.mpScToken = GetNextRawToken(); - rTokData.mnSpaces = (rTokData.GetOpCode() == ocSpaces) ? rTokData.mpScToken->GetByte() : 0; - while( rTokData.GetOpCode() == ocSpaces ) + rTokData.mnSpaces = 0; + /* TODO: handle ocWhitespace characters? */ + while (rTokData.GetOpCode() == ocSpaces || rTokData.GetOpCode() == ocWhitespace) + { + rTokData.mnSpaces += rTokData.mpScToken->GetByte(); rTokData.mpScToken = GetNextRawToken(); + } return rTokData.Is(); } diff --git a/sc/source/filter/excel/xlformula.cxx b/sc/source/filter/excel/xlformula.cxx index 1f974f47b38b..e2e082ac2651 100644 --- a/sc/source/filter/excel/xlformula.cxx +++ b/sc/source/filter/excel/xlformula.cxx @@ -867,8 +867,11 @@ void XclTokenArrayIterator::NextRawToken() void XclTokenArrayIterator::SkipSpaces() { if( mbSkipSpaces ) - while( Is() && ((*this)->GetOpCode() == ocSpaces) ) + { + OpCode eOp; + while( Is() && (((eOp = (*this)->GetOpCode()) == ocSpaces) || eOp == ocWhitespace) ) NextRawToken(); + } } // strings and string lists --------------------------------------------------- diff --git a/sc/source/ui/app/inputhdl.cxx b/sc/source/ui/app/inputhdl.cxx index f01f93d46d70..b3e644fc8620 100644 --- a/sc/source/ui/app/inputhdl.cxx +++ b/sc/source/ui/app/inputhdl.cxx @@ -654,7 +654,7 @@ void ScInputHandler::DeleteRangeFinder() static OUString GetEditText(const EditEngine* pEng) { - return ScEditUtil::GetSpaceDelimitedString(*pEng); + return ScEditUtil::GetMultilineString(*pEng); } static void lcl_RemoveTabs(OUString& rStr) diff --git a/sc/source/ui/unoobj/tokenuno.cxx b/sc/source/ui/unoobj/tokenuno.cxx index 33f005fff8d6..b07a04e12b04 100644 --- a/sc/source/ui/unoobj/tokenuno.cxx +++ b/sc/source/ui/unoobj/tokenuno.cxx @@ -32,6 +32,7 @@ #include <svl/itemprop.hxx> #include <vcl/svapp.hxx> +#include <comphelper/string.hxx> #include <miscuno.hxx> #include <convuno.hxx> @@ -388,6 +389,18 @@ void ScTokenConversion::ConvertToTokenSequence( const ScDocument& rDoc, // Only the count of spaces is stored as "long". Parameter count is ignored. if ( eOpCode == ocSpaces ) rAPI.Data <<= static_cast<sal_Int32>(rToken.GetByte()); + else if (eOpCode == ocWhitespace) + { + // Convention is one character repeated. + if (rToken.GetByte() == 1) + rAPI.Data <<= OUString( rToken.GetChar()); + else + { + OUStringBuffer aBuf( rToken.GetByte()); + comphelper::string::padToLength( aBuf, rToken.GetByte(), rToken.GetChar()); + rAPI.Data <<= aBuf.makeStringAndClear(); + } + } else rAPI.Data.clear(); // no data break; diff --git a/sc/source/ui/view/viewfunc.cxx b/sc/source/ui/view/viewfunc.cxx index fbe8a0b3719c..4e5e149c9755 100644 --- a/sc/source/ui/view/viewfunc.cxx +++ b/sc/source/ui/view/viewfunc.cxx @@ -677,7 +677,7 @@ void ScViewFunc::EnterData( SCCOL nCol, SCROW nRow, SCTAB nTab, } // #i97726# always get text for "repeat" of undo action - aString = ScEditUtil::GetSpaceDelimitedString(aEngine); + aString = ScEditUtil::GetMultilineString(aEngine); // undo |