diff options
Diffstat (limited to 'sc/source/core/tool/compiler.cxx')
-rw-r--r-- | sc/source/core/tool/compiler.cxx | 78 |
1 files changed, 58 insertions, 20 deletions
diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx index 0d1dc9d52aa6..83eb2f4ab7db 100644 --- a/sc/source/core/tool/compiler.cxx +++ b/sc/source/core/tool/compiler.cxx @@ -336,11 +336,8 @@ ScCompiler::Convention::Convention( FormulaGrammar::AddressConvention eConv ) for (i = 0; i < 128; i++) t[i] = ScCharFlags::Illegal; -// tdf#56036: Allow tabs/newlines in imported formulas (for now simply treat them as (and convert to) space) -// TODO: tdf#76310: allow saving newlines as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace") -// This is compliant with the OASIS decision (see https://issues.oasis-open.org/browse/OFFICE-701) -// Also, this would enable correct roundtrip from/to OOXML without losing tabs/newlines -// This requires saving actual space characters in ocSpaces token, using them in UI and saving +// Allow tabs/newlines. +// Allow saving whitespace as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace"). /* tab */ t[ 9] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep; /* lf */ t[10] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep; /* cr */ t[13] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep; @@ -2067,6 +2064,19 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz return true; } +// static +void ScCompiler::addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces, + ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n ) +{ + if (rSpace.cChar != c) + { + if (rSpace.cChar && rSpace.nCount > 0) + rvSpaces.emplace_back(rSpace); + rSpace.reset(c); + } + rSpace.nCount += n; +} + // NextSymbol // Parses the formula into separate symbols for further processing. @@ -2104,8 +2114,9 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz // | other | Symbol=Symbol+char | GetString //---------------+-------------------+-----------------------+--------------- -sal_Int32 ScCompiler::NextSymbol(bool bInArray) +std::vector<ScCompiler::Whitespace> ScCompiler::NextSymbol(bool bInArray) { + std::vector<Whitespace> vSpaces; cSymbol[MAXSTRLEN] = 0; // end sal_Unicode* pSym = cSymbol; const sal_Unicode* const pStart = aFormula.getStr(); @@ -2116,7 +2127,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray) bool bQuote = false; mnRangeOpPosInSymbol = -1; ScanState eState = ssGetChar; - sal_Int32 nSpaces = 0; + Whitespace aSpace; sal_Unicode cSep = mxSymbols->getSymbolChar( ocSep); sal_Unicode cArrayColSep = mxSymbols->getSymbolChar( ocArrayColSep); sal_Unicode cArrayRowSep = mxSymbols->getSymbolChar( ocArrayRowSep); @@ -2129,6 +2140,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray) int nDecSeps = 0; bool bAutoIntersection = false; + size_t nAutoIntersectionSpacesPos = 0; int nRefInName = 0; bool bErrorConstantHadSlash = false; mnPredetectedReference = 0; @@ -2187,7 +2199,12 @@ Label_MaskStateMachine: if (!bAutoIntersection) { ++pSrc; - nSpaces += 2; // must match the character count + // Add 2 because it must match the character count + // for bi18n. + addWhitespace( vSpaces, aSpace, 0x20, 2); + // Position of Whitespace where it will be added to + // vector. + nAutoIntersectionSpacesPos = vSpaces.size(); bAutoIntersection = true; } else @@ -2267,7 +2284,7 @@ Label_MaskStateMachine: } else if( nMask & ScCharFlags::CharDontCare ) { - nSpaces++; + addWhitespace( vSpaces, aSpace, c); } else if( nMask & ScCharFlags::CharIdent ) { // try to get a simple ASCII identifier before calling @@ -2731,10 +2748,15 @@ Label_MaskStateMachine: cLast = c; c = *pSrc; } + + if (aSpace.nCount && aSpace.cChar) + vSpaces.emplace_back(aSpace); + if ( bi18n ) { const sal_Int32 nOldSrcPos = nSrcPos; - nSrcPos = nSrcPos + nSpaces; + for (const auto& r : vSpaces) + nSrcPos += r.nCount; // If group separator is not a possible operator and not one of any // separators then it may be parsed away in numbers. This is // specifically the case with NO-BREAK SPACE, which actually triggers @@ -2835,9 +2857,9 @@ Label_MaskStateMachine: } if ( bAutoCorrect ) aCorrectedSymbol = OUString(cSymbol, pSym - cSymbol); - if (bAutoIntersection && nSpaces > 1) - --nSpaces; // replace '!!' with only one space - return nSpaces; + if (bAutoIntersection && vSpaces[nAutoIntersectionSpacesPos].nCount > 1) + --vSpaces[nAutoIntersectionSpacesPos].nCount; // replace '!!' with only one space + return vSpaces; } // Convert symbol to token @@ -4246,7 +4268,7 @@ bool ScCompiler::NextNewToken( bool bInArray ) } bool bAllowBooleans = bInArray; - sal_Int32 nSpaces = NextSymbol(bInArray); + const std::vector<Whitespace> & vSpaces = NextSymbol(bInArray); if (!cSymbol[0]) { @@ -4266,15 +4288,31 @@ bool ScCompiler::NextNewToken( bool bInArray ) return false; } - if( nSpaces ) + if (!vSpaces.empty()) { ScRawToken aToken; - aToken.SetOpCode( ocSpaces ); - aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(nSpaces, 255) ); - if( !static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken ) ) + for (const auto& rSpace : vSpaces) { - SetError(FormulaError::CodeOverflow); - return false; + if (rSpace.cChar == 0x20) + { + // For now keep this a FormulaByteToken for the nasty + // significant whitespace intersection. This probably can be + // changed to a FormulaSpaceToken but then other places may + // need to be adapted. + aToken.SetOpCode( ocSpaces ); + aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) ); + } + else + { + aToken.SetOpCode( ocWhitespace ); + aToken.whitespace.nCount = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) ); + aToken.whitespace.cChar = rSpace.cChar; + } + if (!static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken )) + { + SetError(FormulaError::CodeOverflow); + return false; + } } } |