summaryrefslogtreecommitdiff
path: root/sc/source
diff options
context:
space:
mode:
authorKohei Yoshida <kohei.yoshida@collabora.com>2013-12-30 12:16:35 -0500
committerKohei Yoshida <kohei.yoshida@collabora.com>2013-12-30 12:33:28 -0500
commit9a623cdca281a682d39b423aefac392c2cc22cf7 (patch)
treed8c7222f148818b163b40f1d9302f821542415c8 /sc/source
parent3ccb783be184102075fe1f9814f05e85d4968c32 (diff)
Parse CSV lines in the reader thread.
Change-Id: I6329a0e6e6fa6576df2ed473482d558bfd6cce08
Diffstat (limited to 'sc/source')
-rw-r--r--sc/source/core/tool/stringutil.cxx150
-rw-r--r--sc/source/ui/docshell/datastream.cxx209
-rw-r--r--sc/source/ui/inc/datastream.hxx37
3 files changed, 293 insertions, 103 deletions
diff --git a/sc/source/core/tool/stringutil.cxx b/sc/source/core/tool/stringutil.cxx
index e711bcba40d4..5bdc2c2fe752 100644
--- a/sc/source/core/tool/stringutil.cxx
+++ b/sc/source/core/tool/stringutil.cxx
@@ -18,11 +18,13 @@
*/
#include "stringutil.hxx"
-#include "rtl/ustrbuf.hxx"
-#include "rtl/math.hxx"
#include "global.hxx"
#include "svl/zforlist.hxx"
+#include <rtl/ustrbuf.hxx>
+#include <rtl/strbuf.hxx>
+#include <rtl/math.hxx>
+
ScSetStringParam::ScSetStringParam() :
mpNumFormatter(NULL),
mbDetectNumberFormat(true),
@@ -194,6 +196,150 @@ bool ScStringUtil::parseSimpleNumber(
return true;
}
+bool ScStringUtil::parseSimpleNumber(
+ const char* p, size_t n, char dsep, char gsep, double& rVal)
+{
+ // Actually almost the entire pre-check is unnecessary and we could call
+ // rtl::math::stringToDouble() just after having exchanged ascii space with
+ // non-breaking space, if it wasn't for check of grouped digits. The NaN
+ // and Inf cases that are accepted by stringToDouble() could be detected
+ // using rtl::math::isFinite() on the result.
+
+ /* TODO: The grouped digits check isn't even valid for locales that do not
+ * group in thousands ... e.g. Indian locales. But that's something also
+ * the number scanner doesn't implement yet, only the formatter. */
+
+ OStringBuffer aBuf;
+
+ size_t i = 0;
+ const char* pLast = p + (n-1);
+ sal_Int32 nPosDSep = -1, nPosGSep = -1;
+ sal_uInt32 nDigitCount = 0;
+ sal_Int32 nPosExponent = -1;
+
+ // Skip preceding spaces.
+ for (i = 0; i < n; ++i, ++p)
+ {
+ char c = *p;
+ if (c != ' ')
+ // first non-space character. Exit.
+ break;
+ }
+
+ if (i == n)
+ // the whole string is space. Fail.
+ return false;
+
+ n -= i; // Subtract the length of the preceding spaces.
+
+ // Determine the last non-space character.
+ for (; p != pLast; --pLast, --n)
+ {
+ char c = *pLast;
+ if (c != ' ')
+ // Non space character. Exit.
+ break;
+ }
+
+ for (i = 0; i < n; ++i, ++p)
+ {
+ char c = *p;
+
+ if ('0' <= c && c <= '9')
+ {
+ // this is a digit.
+ aBuf.append(c);
+ ++nDigitCount;
+ }
+ else if (c == dsep)
+ {
+ // this is a decimal separator.
+
+ if (nPosDSep >= 0)
+ // a second decimal separator -> not a valid number.
+ return false;
+
+ if (nPosGSep >= 0 && i - nPosGSep != 4)
+ // the number has a group separator and the decimal sep is not
+ // positioned correctly.
+ return false;
+
+ nPosDSep = i;
+ nPosGSep = -1;
+ aBuf.append(c);
+ nDigitCount = 0;
+ }
+ else if (c == gsep)
+ {
+ // this is a group (thousand) separator.
+
+ if (i == 0)
+ // not allowed as the first character.
+ return false;
+
+ if (nPosDSep >= 0)
+ // not allowed after the decimal separator.
+ return false;
+
+ if (nPosGSep >= 0 && nDigitCount != 3)
+ // must be exactly 3 digits since the last group separator.
+ return false;
+
+ if (nPosExponent >= 0)
+ // not allowed in exponent.
+ return false;
+
+ nPosGSep = i;
+ nDigitCount = 0;
+ }
+ else if (c == '-' || c == '+')
+ {
+ // A sign must be the first character if it's given, or immediately
+ // follow the exponent character if present.
+ if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
+ aBuf.append(c);
+ else
+ return false;
+ }
+ else if (c == 'E' || c == 'e')
+ {
+ // this is an exponent designator.
+
+ if (nPosExponent >= 0)
+ // Only one exponent allowed.
+ return false;
+
+ if (nPosGSep >= 0 && nDigitCount != 3)
+ // must be exactly 3 digits since the last group separator.
+ return false;
+
+ aBuf.append(c);
+ nPosExponent = i;
+ nPosDSep = -1;
+ nPosGSep = -1;
+ nDigitCount = 0;
+ }
+ else
+ return false;
+ }
+
+ // finished parsing the number.
+
+ if (nPosGSep >= 0 && nDigitCount != 3)
+ // must be exactly 3 digits since the last group separator.
+ return false;
+
+ rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
+ sal_Int32 nParseEnd = 0;
+ OString aString( aBuf.makeStringAndClear());
+ rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
+ if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
+ // Not a valid number or not entire string consumed.
+ return false;
+
+ return true;
+}
+
sal_Int32 ScStringUtil::GetQuotedTokenCount(const OUString &rIn, const OUString& rQuotedPairs, sal_Unicode cTok )
{
assert( !(rQuotedPairs.getLength()%2) );
diff --git a/sc/source/ui/docshell/datastream.cxx b/sc/source/ui/docshell/datastream.cxx
index 1b7f0d233f43..a21cba402d4b 100644
--- a/sc/source/ui/docshell/datastream.cxx
+++ b/sc/source/ui/docshell/datastream.cxx
@@ -56,6 +56,8 @@ double datastream_get_time(int nIdx)
return fTimes[ nIdx ];
}
+namespace {
+
inline double getNow()
{
TimeValue now;
@@ -63,6 +65,50 @@ inline double getNow()
return static_cast<double>(now.Seconds) + static_cast<double>(now.Nanosec) / 1000000000.0;
}
+#if ENABLE_ORCUS
+
+class CSVHandler
+{
+ DataStream::Line& mrLine;
+ size_t mnColCount;
+ size_t mnCols;
+ const char* mpLineHead;
+
+public:
+ CSVHandler( DataStream::Line& rLine, size_t nColCount ) :
+ mrLine(rLine), mnColCount(nColCount), mnCols(0), mpLineHead(rLine.maLine.getStr()) {}
+
+ void begin_parse() {}
+ void end_parse() {}
+ void begin_row() {}
+ void end_row() {}
+
+ void cell(const char* p, size_t n)
+ {
+ if (mnCols >= mnColCount)
+ return;
+
+ DataStream::Cell aCell;
+ if (ScStringUtil::parseSimpleNumber(p, n, '.', ',', aCell.mfValue))
+ {
+ aCell.mbValue = true;
+ }
+ else
+ {
+ aCell.mbValue = false;
+ aCell.maStr.Pos = std::distance(mpLineHead, p);
+ aCell.maStr.Size = n;
+ }
+ mrLine.maCells.push_back(aCell);
+
+ ++mnCols;
+ }
+};
+
+#endif
+
+}
+
namespace datastreams {
class CallerThread : public salhelper::Thread
@@ -96,7 +142,7 @@ private:
}
};
-void emptyLineQueue( std::queue<LinesList*>& rQueue )
+void emptyLineQueue( std::queue<DataStream::LinesType*>& rQueue )
{
while (!rQueue.empty())
{
@@ -108,22 +154,34 @@ void emptyLineQueue( std::queue<LinesList*>& rQueue )
class ReaderThread : public salhelper::Thread
{
SvStream *mpStream;
+ size_t mnColCount;
bool mbTerminate;
osl::Mutex maMtxTerminate;
- std::queue<LinesList* > maPendingLines;
- std::queue<LinesList* > maUsedLines;
+ std::queue<DataStream::LinesType*> maPendingLines;
+ std::queue<DataStream::LinesType*> maUsedLines;
osl::Mutex maMtxLines;
osl::Condition maCondReadStream;
osl::Condition maCondConsume;
+#if ENABLE_ORCUS
+ orcus::csv_parser_config maConfig;
+#endif
+
public:
- ReaderThread(SvStream *pData):
+ ReaderThread(SvStream *pData, size_t nColCount):
Thread("ReaderThread"),
mpStream(pData),
- mbTerminate(false) {}
+ mnColCount(nColCount),
+ mbTerminate(false)
+ {
+#if ENABLE_ORCUS
+ maConfig.delimiters.push_back(',');
+ maConfig.text_qualifier = '"';
+#endif
+ }
virtual ~ReaderThread()
{
@@ -156,9 +214,9 @@ public:
maCondConsume.reset();
}
- LinesList* popNewLines()
+ DataStream::LinesType* popNewLines()
{
- LinesList* pLines = maPendingLines.front();
+ DataStream::LinesType* pLines = maPendingLines.front();
maPendingLines.pop();
return pLines;
}
@@ -174,7 +232,7 @@ public:
return !maPendingLines.empty();
}
- void pushUsedLines( LinesList* pLines )
+ void pushUsedLines( DataStream::LinesType* pLines )
{
maUsedLines.push(pLines);
}
@@ -189,7 +247,7 @@ private:
{
while (!isTerminateRequested())
{
- LinesList* pLines = NULL;
+ DataStream::LinesType* pLines = NULL;
osl::ResettableMutexGuard aGuard(maMtxLines);
if (!maUsedLines.empty())
@@ -202,12 +260,20 @@ private:
else
{
aGuard.clear(); // unlock
- pLines = new LinesList(10);
+ pLines = new DataStream::LinesType(10);
}
// Read & store new lines from stream.
- for (size_t i = 0; i < pLines->size(); ++i)
- mpStream->ReadLine( pLines->at(i) );
+ for (size_t i = 0, n = pLines->size(); i < n; ++i)
+ {
+ DataStream::Line& rLine = (*pLines)[i];
+ rLine.maCells.clear();
+ mpStream->ReadLine(rLine.maLine);
+
+ CSVHandler aHdl(rLine, mnColCount);
+ orcus::csv_parser<CSVHandler> parser(rLine.maLine.getStr(), rLine.maLine.getLength(), aHdl, maConfig);
+ parser.parse();
+ }
aGuard.reset(); // lock
while (!isTerminateRequested() && maPendingLines.size() >= 8)
@@ -228,6 +294,19 @@ private:
}
+DataStream::Cell::Cell() : mfValue(0.0), mbValue(true) {}
+
+DataStream::Cell::Cell( const Cell& r ) : mbValue(r.mbValue)
+{
+ if (r.mbValue)
+ mfValue = r.mfValue;
+ else
+ {
+ maStr.Pos = r.maStr.Pos;
+ maStr.Size = r.maStr.Size;
+ }
+}
+
void DataStream::MakeToolbarVisible()
{
css::uno::Reference< css::frame::XFrame > xFrame =
@@ -312,13 +391,13 @@ DataStream::~DataStream()
delete mpLines;
}
-OString DataStream::ConsumeLine()
+DataStream::Line DataStream::ConsumeLine()
{
if (!mpLines || mnLinesCount >= mpLines->size())
{
mnLinesCount = 0;
if (mxReaderThread->isTerminateRequested())
- return OString();
+ return Line();
osl::ResettableMutexGuard aGuard(mxReaderThread->getLinesMutex());
if (mpLines)
@@ -402,7 +481,7 @@ void DataStream::StartImport()
pStream = new SvScriptStream(msURL);
else
pStream = new SvFileStream(msURL, STREAM_READ);
- mxReaderThread = new datastreams::ReaderThread( pStream );
+ mxReaderThread = new datastreams::ReaderThread(pStream, maStartRange.aEnd.Col() - maStartRange.aStart.Col() + 1);
mxReaderThread->launch();
}
mbRunning = true;
@@ -476,79 +555,10 @@ void DataStream::MoveData()
#if ENABLE_ORCUS
-namespace {
-
-struct StrVal
-{
- ScAddress maPos;
- OUString maStr;
-
- StrVal( const ScAddress& rPos, const OUString& rStr ) : maPos(rPos), maStr(rStr) {}
-};
-
-struct NumVal
-{
- ScAddress maPos;
- double mfVal;
-
- NumVal( const ScAddress& rPos, double fVal ) : maPos(rPos), mfVal(fVal) {}
-};
-
-typedef std::vector<StrVal> StrValArray;
-typedef std::vector<NumVal> NumValArray;
-
-/**
- * This handler handles a single line CSV input.
- */
-class CSVHandler
-{
- ScAddress maPos;
- SCCOL mnEndCol;
-
- StrValArray maStrs;
- NumValArray maNums;
-
-public:
- CSVHandler( const ScAddress& rPos, SCCOL nEndCol ) : maPos(rPos), mnEndCol(nEndCol) {}
-
- void begin_parse() {}
- void end_parse() {}
- void begin_row() {}
- void end_row() {}
-
- void cell(const char* p, size_t n)
- {
- if (maPos.Col() <= mnEndCol)
- {
- OUString aStr(p, n, RTL_TEXTENCODING_UTF8);
- double fVal;
- if (ScStringUtil::parseSimpleNumber(aStr, '.', ',', fVal))
- maNums.push_back(NumVal(maPos, fVal));
- else
- maStrs.push_back(StrVal(maPos, aStr));
- }
- maPos.IncCol();
- }
-
- const StrValArray& getStrs() const { return maStrs; }
- const NumValArray& getNums() const { return maNums; }
-};
-
-}
-
void DataStream::Text2Doc()
{
- OString aLine = ConsumeLine();
- orcus::csv_parser_config aConfig;
- aConfig.delimiters.push_back(',');
- aConfig.text_qualifier = '"';
- CSVHandler aHdl(ScAddress(maStartRange.aStart.Col(), mnCurRow, maStartRange.aStart.Tab()), maStartRange.aEnd.Col());
- orcus::csv_parser<CSVHandler> parser(aLine.getStr(), aLine.getLength(), aHdl, aConfig);
- parser.parse();
-
- const StrValArray& rStrs = aHdl.getStrs();
- const NumValArray& rNums = aHdl.getNums();
- if (rStrs.empty() && rNums.empty() && mbRefreshOnEmptyLine)
+ Line aLine = ConsumeLine();
+ if (aLine.maCells.empty() && mbRefreshOnEmptyLine)
{
// Empty line detected. Trigger refresh and discard it.
Refresh();
@@ -559,15 +569,24 @@ void DataStream::Text2Doc()
MoveData();
{
- StrValArray::const_iterator it = rStrs.begin(), itEnd = rStrs.end();
- for (; it != itEnd; ++it)
- maDocAccess.setStringCell(it->maPos, it->maStr);
- }
-
- {
- NumValArray::const_iterator it = rNums.begin(), itEnd = rNums.end();
- for (; it != itEnd; ++it)
- maDocAccess.setNumericCell(it->maPos, it->mfVal);
+ std::vector<Cell>::const_iterator it = aLine.maCells.begin(), itEnd = aLine.maCells.end();
+ SCCOL nCol = maStartRange.aStart.Col();
+ const char* pLineHead = aLine.maLine.getStr();
+ for (; it != itEnd; ++it, ++nCol)
+ {
+ const Cell& rCell = *it;
+ if (rCell.mbValue)
+ {
+ maDocAccess.setNumericCell(
+ ScAddress(nCol, mnCurRow, maStartRange.aStart.Tab()), rCell.mfValue);
+ }
+ else
+ {
+ maDocAccess.setStringCell(
+ ScAddress(nCol, mnCurRow, maStartRange.aStart.Tab()),
+ OUString(pLineHead+rCell.maStr.Pos, rCell.maStr.Size, RTL_TEXTENCODING_UTF8));
+ }
+ }
}
fTimes[ DEBUG_TIME_IMPORT ] = getNow() - fStart;
diff --git a/sc/source/ui/inc/datastream.hxx b/sc/source/ui/inc/datastream.hxx
index 5a4d8cd444b7..5af2dc7c29a6 100644
--- a/sc/source/ui/inc/datastream.hxx
+++ b/sc/source/ui/inc/datastream.hxx
@@ -33,15 +33,37 @@ namespace datastreams {
class ReaderThread;
}
-typedef std::vector<OString> LinesList;
class DataStream : boost::noncopyable
{
- OString ConsumeLine();
- void MoveData();
- void Text2Doc();
-
public:
+ struct Cell
+ {
+ struct Str
+ {
+ size_t Pos;
+ size_t Size;
+ };
+
+ union
+ {
+ Str maStr;
+ double mfValue;
+ };
+
+ bool mbValue;
+
+ Cell();
+ Cell( const Cell& r );
+ };
+
+ struct Line
+ {
+ OString maLine;
+ std::vector<Cell> maCells;
+ };
+ typedef std::vector<Line> LinesType;
+
enum MoveType { NO_MOVE, RANGE_DOWN, MOVE_DOWN, MOVE_UP };
enum { SCRIPT_STREAM = 1, VALUES_IN_LINE = 2 };
@@ -75,6 +97,9 @@ public:
void SetRefreshOnEmptyLine( bool bVal );
private:
+ Line ConsumeLine();
+ void MoveData();
+ void Text2Doc();
void Refresh();
private:
@@ -89,7 +114,7 @@ private:
bool mbRunning;
bool mbValuesInLine;
bool mbRefreshOnEmptyLine;
- LinesList* mpLines;
+ LinesType* mpLines;
size_t mnLinesCount;
size_t mnLinesSinceRefresh;
double mfLastRefreshTime;