diff options
author | Frédéric Wang <fred.wang@free.fr> | 2013-06-24 23:14:42 +0200 |
---|---|---|
committer | Fridrich Strba <fridrich@documentfoundation.org> | 2013-06-28 09:52:08 +0000 |
commit | 16a0d06f900027401716ddaba25e5c8998562b2d (patch) | |
tree | b175de21dc8c5a46324846529ee2cc9016e12c6b /starmath/source | |
parent | 2ca754cba469533b1ed160b2fadbbe035cf1db34 (diff) |
fdo#55853 - improve parsing of concatenated numbers and identifiers.
Change-Id: Ic1cce4cec27f4ca5c3be1daf50888bef788cd6f7
Reviewed-on: https://gerrit.libreoffice.org/4494
Reviewed-by: Fridrich Strba <fridrich@documentfoundation.org>
Tested-by: Fridrich Strba <fridrich@documentfoundation.org>
Diffstat (limited to 'starmath/source')
-rw-r--r-- | starmath/source/parse.cxx | 128 |
1 files changed, 90 insertions, 38 deletions
diff --git a/starmath/source/parse.cxx b/starmath/source/parse.cxx index 5a0098b82a4a..cc35a6698775 100644 --- a/starmath/source/parse.cxx +++ b/starmath/source/parse.cxx @@ -374,12 +374,12 @@ void SmParser::Replace( sal_uInt16 nPos, sal_uInt16 nLen, const String &rText ) // First character may be any alphabetic const sal_Int32 coStartFlags = - KParseTokens::ANY_LETTER_OR_NUMBER | + KParseTokens::ANY_LETTER | KParseTokens::IGNORE_LEADING_WS; -// Continuing characters may be any alphanumeric or dot. +// Continuing characters may be any alphabetic const sal_Int32 coContFlags = - ((coStartFlags | KParseTokens::ASC_DOT) & ~KParseTokens::IGNORE_LEADING_WS) + (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS) | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING; // First character for numbers, may be any numeric or dot @@ -389,7 +389,7 @@ const sal_Int32 coNumStartFlags = KParseTokens::IGNORE_LEADING_WS; // Continuing characters for numbers, may be any numeric or dot. const sal_Int32 coNumContFlags = - (coNumStartFlags | KParseTokens::ASC_DOT) & ~KParseTokens::IGNORE_LEADING_WS; + coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS; void SmParser::NextToken() { @@ -399,7 +399,6 @@ void SmParser::NextToken() ParseResult aRes; xub_StrLen nRealStart; bool bCont; - bool bNumStart = false; CharClass aCC(SM_MOD()->GetSysLocale().GetLanguageTag()); do { @@ -408,29 +407,23 @@ void SmParser::NextToken() aCC.getType( m_aBufferString, m_nBufferIndex )) ++m_nBufferIndex; - sal_Int32 nStartFlags = coStartFlags; - sal_Int32 nContFlags = coContFlags; - sal_Unicode cFirstChar = m_aBufferString.GetChar( m_nBufferIndex ); - aRes = aCC.parseAnyToken( m_aBufferString, m_nBufferIndex, - nStartFlags, aEmptyStr, - nContFlags, aEmptyStr ); - - // #i45779# parse numbers correctly - // i.e. independent from the locale setting. - // (note that #i11752# remains fixed) - if ((aRes.TokenType & KParseType::IDENTNAME) && CharClass::isAsciiDigit( cFirstChar )) + // Try to parse a number. This should be independent from the locale + // setting, so temporarily set the language to English. + // See https://issues.apache.org/ooo/show_bug.cgi?id=45779 + LanguageTag aOldLoc(aCC.getLanguageTag()); + aCC.setLanguageTag(LanguageTag(m_aDotLoc)); + aRes = aCC.parsePredefinedToken(KParseType::ASC_NUMBER, + m_aBufferString, m_nBufferIndex, + coNumStartFlags, aEmptyStr, + coNumContFlags, aEmptyStr); + aCC.setLanguageTag(aOldLoc); + + if (aRes.TokenType == 0) { - ParseResult aTmpRes; - LanguageTag aOldLoc( aCC.getLanguageTag() ); - aCC.setLanguageTag( LanguageTag( m_aDotLoc )); - aTmpRes = aCC.parsePredefinedToken( - KParseType::ASC_NUMBER, - m_aBufferString, m_nBufferIndex, - KParseTokens::ASC_DIGIT, aEmptyStr, - KParseTokens::ASC_DIGIT | KParseTokens::ASC_DOT, aEmptyStr ); - aCC.setLanguageTag( aOldLoc ); - if (aTmpRes.TokenType & KParseType::ASC_NUMBER) - aRes.TokenType = aTmpRes.TokenType; + // Try again with the default token parsing. + aRes = aCC.parseAnyToken(m_aBufferString, m_nBufferIndex, + coStartFlags, aEmptyStr, + coContFlags, aEmptyStr); } nRealStart = m_nBufferIndex + sal::static_int_cast< xub_StrLen >(aRes.LeadingWhiteSpace); @@ -476,8 +469,7 @@ void SmParser::NextToken() m_aCurToken.nLevel = 0; m_aCurToken.aText = ""; } - else if ((aRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER)) - || (bNumStart && (aRes.TokenType & KParseType::IDENTNAME))) + else if (aRes.TokenType & KParseType::ANY_NUMBER) { sal_Int32 n = aRes.EndPos - nRealStart; OSL_ENSURE( n >= 0, "length < 0" ); @@ -1277,7 +1269,7 @@ void SmParser::SubSup(sal_uLong nActiveGroup) Relation(); } else - Term(); + Term(true); switch (eType) { case TRSUB : nIndex = (int) RSUB; break; @@ -1321,7 +1313,7 @@ void SmParser::OpSubSup() void SmParser::Power() { // get body for sub- supscripts on top of stack - Term(); + Term(false); SubSup(TGPOWER); } @@ -1349,7 +1341,7 @@ void SmParser::Blank() } -void SmParser::Term() +void SmParser::Term(bool bGroupNumberIdent) { switch (m_aCurToken.eType) { @@ -1369,7 +1361,7 @@ void SmParser::Term() if (m_aCurToken.eType != TLGROUP) { m_aNodeStack.pop(); // get rid of the 'no space' node pushed above - Term(); + Term(false); } else { @@ -1411,16 +1403,76 @@ void SmParser::Term() m_aNodeStack.push(new SmTextNode(m_aCurToken, FNT_TEXT)); NextToken(); break; - case TIDENT : case TCHARACTER : m_aNodeStack.push(new SmTextNode(m_aCurToken, FNT_VARIABLE)); NextToken(); break; + case TIDENT : case TNUMBER : - m_aNodeStack.push(new SmTextNode(m_aCurToken, FNT_NUMBER)); - NextToken(); + { + m_aNodeStack.push(new SmTextNode(m_aCurToken, + m_aCurToken.eType == TNUMBER ? + FNT_NUMBER : + FNT_VARIABLE)); + if (!bGroupNumberIdent) + { + NextToken(); + } + else + { + // Some people want to be able to write "x_2n" for "x_{2n}" + // although e.g. LaTeX or AsciiMath interpret that as "x_2 n". + // The tokenizer skips whitespaces so we need some additional + // work to distinguish from "x_2 n". + // See https://issues.apache.org/ooo/show_bug.cgi?id=11752 and + // https://www.libreoffice.org/bugzilla/show_bug.cgi?id=55853 + xub_StrLen nBufLen = m_aBufferString.Len(); + CharClass aCC(SM_MOD()->GetSysLocale().GetLanguageTag()); + sal_uInt16 nTokens = 1; + + // We need to be careful to call NextToken() only after having + // tested for a whitespace separator (otherwise it will be + // skipped!) + bool moveToNextToken = true; + while (m_nBufferIndex < nBufLen && + aCC.getType(m_aBufferString, m_nBufferIndex) != + UnicodeType::SPACE_SEPARATOR) + { + NextToken(); + if (m_aCurToken.eType != TNUMBER && + m_aCurToken.eType != TIDENT) + { + // Neither a number nor an indentifier. We just moved to + // the next token, so no need to do that again. + moveToNextToken = false; + break; + } + m_aNodeStack.push(new SmTextNode(m_aCurToken, + m_aCurToken.eType == + TNUMBER ? + FNT_NUMBER : + FNT_VARIABLE)); + nTokens++; + } + if (moveToNextToken) NextToken(); + if (nTokens > 1) + { + // We have several concatenated identifiers and numbers. + // Let's group them into one SmExpressionNode. + SmNodeArray nodeArray; + nodeArray.resize(nTokens); + while (nTokens > 0) + { + nodeArray[nTokens-1] = lcl_popOrZero(m_aNodeStack); + nTokens--; + } + SmExpressionNode* pNode = new SmExpressionNode(SmToken()); + pNode->SetSubNodes(nodeArray); + m_aNodeStack.push(pNode); + } + } break; - + } case TLEFTARROW : case TRIGHTARROW : case TUPARROW : @@ -1541,7 +1593,7 @@ void SmParser::Term() SmNode *pFunc = lcl_popOrZero(m_aNodeStack); if (m_aCurToken.eType == TLPARENT) - { Term(); + { Term(false); } else { Align(); |