summaryrefslogtreecommitdiff
path: root/writerfilter/source/rtftok/rtftokenizer.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'writerfilter/source/rtftok/rtftokenizer.cxx')
-rw-r--r--writerfilter/source/rtftok/rtftokenizer.cxx330
1 files changed, 0 insertions, 330 deletions
diff --git a/writerfilter/source/rtftok/rtftokenizer.cxx b/writerfilter/source/rtftok/rtftokenizer.cxx
deleted file mode 100644
index 420c6d36f0c3..000000000000
--- a/writerfilter/source/rtftok/rtftokenizer.cxx
+++ /dev/null
@@ -1,330 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-#include "rtftokenizer.hxx"
-#include <o3tl/string_view.hxx>
-#include <tools/stream.hxx>
-#include <svx/dialmgr.hxx>
-#include <svx/strings.hrc>
-#include <rtl/strbuf.hxx>
-#include <rtl/character.hxx>
-#include <sal/log.hxx>
-#include "rtfskipdestination.hxx"
-#include <com/sun/star/io/BufferSizeExceededException.hpp>
-#include <com/sun/star/task/XStatusIndicator.hpp>
-#include <filter/msfilter/rtfutil.hxx>
-
-using namespace com::sun::star;
-
-namespace writerfilter::rtftok
-{
-std::unordered_map<OString, RTFSymbol> RTFTokenizer::s_aRTFControlWords;
-bool RTFTokenizer::s_bControlWordsInitialised;
-std::vector<RTFMathSymbol> RTFTokenizer::s_aRTFMathControlWords;
-bool RTFTokenizer::s_bMathControlWordsSorted;
-
-RTFTokenizer::RTFTokenizer(RTFListener& rImport, SvStream* pInStream,
- uno::Reference<task::XStatusIndicator> const& xStatusIndicator)
- : m_rImport(rImport)
- , m_pInStream(pInStream)
- , m_xStatusIndicator(xStatusIndicator)
- , m_nGroup(0)
- , m_nLineNumber(0)
- , m_nLineStartPos(0)
- , m_nGroupStart(0)
-{
- if (!RTFTokenizer::s_bControlWordsInitialised)
- {
- RTFTokenizer::s_bControlWordsInitialised = true;
- for (int i = 0; i < nRTFControlWords; ++i)
- s_aRTFControlWords.emplace(OString(aRTFControlWords[i].GetKeyword()),
- aRTFControlWords[i]);
- }
- if (!RTFTokenizer::s_bMathControlWordsSorted)
- {
- RTFTokenizer::s_bMathControlWordsSorted = true;
- s_aRTFMathControlWords = std::vector<RTFMathSymbol>(
- aRTFMathControlWords, aRTFMathControlWords + nRTFMathControlWords);
- std::sort(s_aRTFMathControlWords.begin(), s_aRTFMathControlWords.end());
- }
-}
-
-RTFTokenizer::~RTFTokenizer() = default;
-
-RTFError RTFTokenizer::resolveParse()
-{
- SAL_INFO("writerfilter.rtf", __func__);
- char ch;
- RTFError ret;
- // for hex chars
- int b = 0;
- int count = 2;
- std::size_t nPercentSize = 0;
- sal_uInt64 nLastPos = 0;
-
- if (m_xStatusIndicator.is())
- {
- OUString sDocLoad(SvxResId(RID_SVXSTR_DOC_LOAD));
-
- sal_uInt64 const nCurrentPos = Strm().Tell();
- sal_uInt64 const nEndPos = nCurrentPos + Strm().remainingSize();
- m_xStatusIndicator->start(sDocLoad, nEndPos);
- nPercentSize = nEndPos / 100;
-
- nLastPos = nCurrentPos;
- m_xStatusIndicator->setValue(nLastPos);
- }
-
- while (Strm().ReadChar(ch), !Strm().eof())
- {
- //SAL_INFO("writerfilter", __func__ << ": parsing character '" << ch << "'");
-
- sal_uInt64 const nCurrentPos = Strm().Tell();
- if (m_xStatusIndicator.is() && nCurrentPos > (nLastPos + nPercentSize))
- {
- nLastPos = nCurrentPos;
- m_xStatusIndicator->setValue(nLastPos);
- }
-
- if (m_nGroup < 0)
- return RTFError::GROUP_UNDER;
- if (m_nGroup > 0 && m_rImport.getInternalState() == RTFInternalState::BIN)
- {
- ret = m_rImport.resolveChars(ch);
- if (ret != RTFError::OK)
- return ret;
- }
- else
- {
- switch (ch)
- {
- case '{':
- m_nGroupStart = Strm().Tell() - 1;
- ret = m_rImport.pushState();
- if (ret != RTFError::OK)
- return ret;
- break;
- case '}':
- ret = m_rImport.popState();
- if (ret != RTFError::OK)
- return ret;
- if (m_nGroup == 0)
- {
- if (m_rImport.isSubstream())
- m_rImport.finishSubstream();
- return RTFError::OK;
- }
- break;
- case '\\':
- ret = resolveKeyword();
- if (ret != RTFError::OK)
- return ret;
- break;
- case 0x0d:
- break; // ignore this
- case 0x0a:
- m_nLineNumber++;
- m_nLineStartPos = nCurrentPos;
- break;
- default:
- if (m_nGroup == 0)
- return RTFError::CHAR_OVER;
- if (m_rImport.getInternalState() == RTFInternalState::NORMAL)
- {
- ret = m_rImport.resolveChars(ch);
- if (ret != RTFError::OK)
- return ret;
- }
- else
- {
- SAL_INFO("writerfilter.rtf", __func__ << ": hex internal state");
- // Assume that \'<number><junk> means \'0<number>.
- if (rtl::isAsciiDigit(static_cast<unsigned char>(ch))
- || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))
- {
- b = b << 4;
- sal_Int8 parsed = msfilter::rtfutil::AsHex(ch);
- if (parsed == -1)
- return RTFError::HEX_INVALID;
- b += parsed;
- }
- count--;
- if (!count)
- {
- ret = m_rImport.resolveChars(b);
- if (ret != RTFError::OK)
- return ret;
- count = 2;
- b = 0;
- m_rImport.setInternalState(RTFInternalState::NORMAL);
- }
- }
- break;
- }
- }
- }
-
- if (m_nGroup < 0)
- return RTFError::GROUP_UNDER;
- if (m_nGroup > 0)
- return RTFError::GROUP_OVER;
- return RTFError::OK;
-}
-
-void RTFTokenizer::pushGroup() { m_nGroup++; }
-
-void RTFTokenizer::popGroup() { m_nGroup--; }
-
-RTFError RTFTokenizer::resolveKeyword()
-{
- char ch;
-
- Strm().ReadChar(ch);
- if (Strm().eof())
- return RTFError::UNEXPECTED_EOF;
-
- if (!rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
- {
- // control symbols aren't followed by a space, so we can return here
- // without doing any SeekRel()
- return dispatchKeyword(OString(ch), false, 0);
- }
- OStringBuffer aBuf(32);
- while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
- {
- aBuf.append(ch);
- if (aBuf.getLength() > 32)
- // See RTF spec v1.9.1, page 7
- // A control word's name cannot be longer than 32 letters.
- throw io::BufferSizeExceededException();
- Strm().ReadChar(ch);
- if (Strm().eof())
- {
- ch = ' ';
- break;
- }
- }
-
- bool bNeg = false;
- if (ch == '-')
- {
- // in case we'll have a parameter, that will be negative
- bNeg = true;
- Strm().ReadChar(ch);
- if (Strm().eof())
- return RTFError::UNEXPECTED_EOF;
- }
- bool bParam = false;
- int nParam = 0;
- if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
- {
- OStringBuffer aParameter;
-
- // we have a parameter
- bParam = true;
- while (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
- {
- aParameter.append(ch);
- Strm().ReadChar(ch);
- if (Strm().eof())
- {
- ch = ' ';
- break;
- }
- }
- nParam = o3tl::toInt32(aParameter);
- if (bNeg)
- nParam = -nParam;
- }
- if (ch != ' ')
- Strm().SeekRel(-1);
- OString aKeyword = aBuf.makeStringAndClear();
- return dispatchKeyword(aKeyword, bParam, nParam);
-}
-
-bool RTFTokenizer::lookupMathKeyword(RTFMathSymbol& rSymbol)
-{
- auto low
- = std::lower_bound(s_aRTFMathControlWords.begin(), s_aRTFMathControlWords.end(), rSymbol);
- if (low == s_aRTFMathControlWords.end() || rSymbol < *low)
- return false;
- rSymbol = *low;
- return true;
-}
-
-RTFError RTFTokenizer::dispatchKeyword(OString const& rKeyword, bool bParam, int nParam)
-{
- if (m_rImport.getDestination() == Destination::SKIP)
- {
- // skip binary data explicitly, to not trip over rtf markup
- // control characters
- if (rKeyword == "bin" && nParam > 0)
- Strm().SeekRel(nParam);
- return RTFError::OK;
- }
- SAL_INFO("writerfilter.rtf", __func__ << ": keyword '\\" << rKeyword << "' with param? "
- << (bParam ? 1 : 0) << " param val: '"
- << (bParam ? nParam : 0) << "'");
- auto findIt = s_aRTFControlWords.find(rKeyword);
- if (findIt == s_aRTFControlWords.end())
- {
- SAL_INFO("writerfilter.rtf", __func__ << ": unknown keyword '\\" << rKeyword << "'");
- RTFSkipDestination aSkip(m_rImport);
- aSkip.setParsed(false);
- return RTFError::OK;
- }
-
- RTFError ret;
- RTFSymbol const& rSymbol = findIt->second;
- switch (rSymbol.GetControlType())
- {
- case RTFControlType::FLAG:
- // flags ignore any parameter by definition
- ret = m_rImport.dispatchFlag(rSymbol.GetIndex());
- if (ret != RTFError::OK)
- return ret;
- break;
- case RTFControlType::DESTINATION:
- // same for destinations
- ret = m_rImport.dispatchDestination(rSymbol.GetIndex());
- if (ret != RTFError::OK)
- return ret;
- break;
- case RTFControlType::SYMBOL:
- // and symbols
- ret = m_rImport.dispatchSymbol(rSymbol.GetIndex());
- if (ret != RTFError::OK)
- return ret;
- break;
- case RTFControlType::TOGGLE:
- ret = m_rImport.dispatchToggle(rSymbol.GetIndex(), bParam, nParam);
- if (ret != RTFError::OK)
- return ret;
- break;
- case RTFControlType::VALUE:
- if (!bParam)
- nParam = rSymbol.GetDefValue();
- ret = m_rImport.dispatchValue(rSymbol.GetIndex(), nParam);
- if (ret != RTFError::OK)
- return ret;
- break;
- }
-
- return RTFError::OK;
-}
-
-OUString RTFTokenizer::getPosition()
-{
- return OUString::number(m_nLineNumber + 1) + ","
- + OUString::number(Strm().Tell() - m_nLineStartPos + 1);
-}
-
-} // namespace writerfilter::rtftok
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */