summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sc/source/ui/dbgui/scuiasciiopt.cxx4
-rw-r--r--sc/source/ui/docshell/impex.cxx85
-rw-r--r--sc/source/ui/inc/impex.hxx53
-rw-r--r--tools/inc/tools/stream.hxx54
-rw-r--r--tools/source/stream/stream.cxx87
5 files changed, 138 insertions, 145 deletions
diff --git a/sc/source/ui/dbgui/scuiasciiopt.cxx b/sc/source/ui/dbgui/scuiasciiopt.cxx
index fdb1fa88121e..834d0cc17f9e 100644
--- a/sc/source/ui/dbgui/scuiasciiopt.cxx
+++ b/sc/source/ui/dbgui/scuiasciiopt.cxx
@@ -476,7 +476,7 @@ bool ScImportAsciiDlg::GetLine( sal_uLong nLine, String &rText )
bRet = false;
break;
}
- mpDatStream->ReadCsvLine( rText, !bFixed, maFieldSeparators,
+ ReadCsvLine(*mpDatStream, rText, !bFixed, maFieldSeparators,
mcTextSep);
mnStreamPos = mpDatStream->Tell();
mpRowPosArray[++mnRowPosCount] = mnStreamPos;
@@ -494,7 +494,7 @@ bool ScImportAsciiDlg::GetLine( sal_uLong nLine, String &rText )
else
{
Seek( mpRowPosArray[nLine]);
- mpDatStream->ReadCsvLine( rText, !bFixed, maFieldSeparators, mcTextSep);
+ ReadCsvLine(*mpDatStream, rText, !bFixed, maFieldSeparators, mcTextSep);
mnStreamPos = mpDatStream->Tell();
}
diff --git a/sc/source/ui/docshell/impex.cxx b/sc/source/ui/docshell/impex.cxx
index a10ab3906e1a..b7922b3840e7 100644
--- a/sc/source/ui/docshell/impex.cxx
+++ b/sc/source/ui/docshell/impex.cxx
@@ -1180,7 +1180,7 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm )
while(--nSkipLines>0)
{
- rStrm.ReadCsvLine( aLine, !bFixed, rSeps, cStr); // content is ignored
+ ReadCsvLine(rStrm, aLine, !bFixed, rSeps, cStr); // content is ignored
if ( rStrm.IsEof() )
break;
}
@@ -1203,7 +1203,7 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm )
{
for( ;; )
{
- rStrm.ReadCsvLine( aLine, !bFixed, rSeps, cStr);
+ ReadCsvLine(rStrm, aLine, !bFixed, rSeps, cStr);
if ( rStrm.IsEof() )
break;
@@ -2109,4 +2109,85 @@ ScFormatFilterPlugin &ScFormatFilter::Get()
return *plugin;
}
+// Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
+// array.
+inline const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
+ sal_Unicode c )
+{
+ while (*pStr)
+ {
+ if (*pStr == c)
+ return pStr;
+ ++pStr;
+ }
+ return 0;
+}
+
+void ReadCsvLine(SvStream &rStream, String& rStr, sal_Bool bEmbeddedLineBreak,
+ const String& rFieldSeparators, sal_Unicode cFieldQuote,
+ sal_Bool bAllowBackslashEscape)
+{
+ rStream.ReadUniOrByteStringLine(rStr, rStream.GetStreamCharSet());
+
+ if (bEmbeddedLineBreak)
+ {
+ const sal_Unicode* pSeps = rFieldSeparators.GetBuffer();
+
+ // See if the separator(s) include tab.
+ bool bTabSep = lcl_UnicodeStrChr(pSeps, '\t') != NULL;
+
+ xub_StrLen nLastOffset = 0;
+ xub_StrLen nQuotes = 0;
+ while (!rStream.IsEof() && rStr.Len() < STRING_MAXLEN)
+ {
+ bool bBackslashEscaped = false;
+ const sal_Unicode *p, *pStart;
+ p = pStart = rStr.GetBuffer();
+ p += nLastOffset;
+ while (*p)
+ {
+ if (nQuotes)
+ {
+ if (bTabSep && *p == '\t' && (nQuotes % 2) != 0)
+ {
+ // When tab-delimited, tab char ends quoted sequence
+ // even if we haven't reached the end quote. Doing
+ // this helps keep mal-formed rows from damaging
+ // other, well-formed rows.
+ nQuotes = 0;
+ break;
+ }
+
+ if (*p == cFieldQuote && !bBackslashEscaped)
+ ++nQuotes;
+ else if (bAllowBackslashEscape)
+ {
+ if (*p == '\\')
+ bBackslashEscaped = !bBackslashEscaped;
+ else
+ bBackslashEscaped = false;
+ }
+ }
+ else if (*p == cFieldQuote && (p == pStart ||
+ lcl_UnicodeStrChr( pSeps, p[-1])))
+ nQuotes = 1;
+ // A quote character inside a field content does not start
+ // a quote.
+ ++p;
+ }
+
+ if (nQuotes % 2 == 0)
+ break;
+ else
+ {
+ nLastOffset = rStr.Len();
+ String aNext;
+ rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet());
+ rStr += sal_Unicode(_LF);
+ rStr += aNext;
+ }
+ }
+ }
+}
+
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/ui/inc/impex.hxx b/sc/source/ui/inc/impex.hxx
index 302cf4e5c627..8ccfdd96403e 100644
--- a/sc/source/ui/inc/impex.hxx
+++ b/sc/source/ui/inc/impex.hxx
@@ -189,6 +189,59 @@ public:
}
};
+/** Read a CSV (comma separated values) data line using
+ ReadUniOrByteStringLine().
+
+ @param bEmbeddedLineBreak
+ If sal_True and a line-break occurs inside a field of data,
+ a line feed LF '\n' and the next line are appended. Repeats
+ until a line-break is not in a field. A field is determined
+ by delimiting rFieldSeparators and optionally surrounded by
+ a pair of cFieldQuote characters. For a line-break to be
+ within a field, the field content MUST be surrounded by
+ cFieldQuote characters, and the opening cFieldQuote MUST be
+ at the very start of a line or follow right behind a field
+ separator with no extra characters in between. Anything,
+ including field separators and escaped quotes (by doubling
+ them, or preceding them with a backslash if
+ bAllowBackslashEscape==sal_True) may appear in a quoted
+ field.
+
+ If bEmbeddedLineBreak==sal_False, nothing is parsed and the
+ string returned is simply one ReadUniOrByteStringLine().
+
+ @param rFieldSeparators
+ A list of characters that each may act as a field separator.
+
+ @param cFieldQuote
+ The quote character used.
+
+ @param bAllowBackslashEscape
+ If sal_True, an embedded quote character inside a quoted
+ field may also be escaped with a preceding backslash.
+ Normally, quotes are escaped by doubling them.
+
+ check Stream::good() to detect IO problems during read
+
+ @ATTENTION
+ Note that the string returned may be truncated even inside
+ a quoted field if STRING_MAXLEN was reached. There
+ currently is no way to exactly determine the conditions,
+ whether this was at a line end, or whether open quotes
+ would have closed the field before the line end, as even a
+ ReadUniOrByteStringLine() may return prematurely but the
+ stream was positioned ahead until the real end of line.
+ Additionally, due to character encoding conversions, string
+ length and bytes read don't necessarily match, and
+ resyncing to a previous position matching the string's
+ length isn't always possible. As a result, a logical line
+ with embedded line breaks and more than STRING_MAXLEN
+ characters will be spoiled, and a subsequent ReadCsvLine()
+ may start under false preconditions.
+ */
+SC_DLLPUBLIC void ReadCsvLine(SvStream &rStream, String& rStr, sal_Bool bEmbeddedLineBreak,
+ const String& rFieldSeparators, sal_Unicode cFieldQuote,
+ sal_Bool bAllowBackslashEscape = sal_False);
#endif
diff --git a/tools/inc/tools/stream.hxx b/tools/inc/tools/stream.hxx
index 85d5f124adc9..4f06610e2f00 100644
--- a/tools/inc/tools/stream.hxx
+++ b/tools/inc/tools/stream.hxx
@@ -432,60 +432,6 @@ public:
sal_Bool WriteUniOrByteChar( sal_Unicode ch )
{ return WriteUniOrByteChar( ch, GetStreamCharSet() ); }
- /** Read a CSV (comma separated values) data line using
- ReadUniOrByteStringLine().
-
- @param bEmbeddedLineBreak
- If sal_True and a line-break occurs inside a field of data,
- a line feed LF '\n' and the next line are appended. Repeats
- until a line-break is not in a field. A field is determined
- by delimiting rFieldSeparators and optionally surrounded by
- a pair of cFieldQuote characters. For a line-break to be
- within a field, the field content MUST be surrounded by
- cFieldQuote characters, and the opening cFieldQuote MUST be
- at the very start of a line or follow right behind a field
- separator with no extra characters in between. Anything,
- including field separators and escaped quotes (by doubling
- them, or preceding them with a backslash if
- bAllowBackslashEscape==sal_True) may appear in a quoted
- field.
-
- If bEmbeddedLineBreak==sal_False, nothing is parsed and the
- string returned is simply one ReadUniOrByteStringLine().
-
- @param rFieldSeparators
- A list of characters that each may act as a field separator.
-
- @param cFieldQuote
- The quote character used.
-
- @param bAllowBackslashEscape
- If sal_True, an embedded quote character inside a quoted
- field may also be escaped with a preceding backslash.
- Normally, quotes are escaped by doubling them.
-
- check Stream::good() to detect IO problems during read
-
- @ATTENTION
- Note that the string returned may be truncated even inside
- a quoted field if STRING_MAXLEN was reached. There
- currently is no way to exactly determine the conditions,
- whether this was at a line end, or whether open quotes
- would have closed the field before the line end, as even a
- ReadUniOrByteStringLine() may return prematurely but the
- stream was positioned ahead until the real end of line.
- Additionally, due to character encoding conversions, string
- length and bytes read don't necessarily match, and
- resyncing to a previous position matching the string's
- length isn't always possible. As a result, a logical line
- with embedded line breaks and more than STRING_MAXLEN
- characters will be spoiled, and a subsequent ReadCsvLine()
- may start under false preconditions.
- */
- void ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
- const String& rFieldSeparators, sal_Unicode cFieldQuote,
- sal_Bool bAllowBackslashEscape = sal_False);
-
void SetBufferSize( sal_uInt16 nBufSize );
sal_uInt16 GetBufferSize() const { return nBufSize; }
diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx
index 8c0efb66c6f2..fea6025e5263 100644
--- a/tools/source/stream/stream.cxx
+++ b/tools/source/stream/stream.cxx
@@ -1019,93 +1019,6 @@ sal_Bool SvStream::StartReadingUnicodeText( rtl_TextEncoding eReadBomCharSet )
/*************************************************************************
|*
-|* Stream::ReadCsvLine()
-|*
-*************************************************************************/
-
-// Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
-// array.
-inline const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
- sal_Unicode c )
-{
- while (*pStr)
- {
- if (*pStr == c)
- return pStr;
- ++pStr;
- }
- return 0;
-}
-
-void SvStream::ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
- const String& rFieldSeparators, sal_Unicode cFieldQuote,
- sal_Bool bAllowBackslashEscape)
-{
- ReadUniOrByteStringLine(rStr, GetStreamCharSet());
-
- if (bEmbeddedLineBreak)
- {
- const sal_Unicode* pSeps = rFieldSeparators.GetBuffer();
-
- // See if the separator(s) include tab.
- bool bTabSep = lcl_UnicodeStrChr(pSeps, '\t') != NULL;
-
- xub_StrLen nLastOffset = 0;
- xub_StrLen nQuotes = 0;
- while (!IsEof() && rStr.Len() < STRING_MAXLEN)
- {
- bool bBackslashEscaped = false;
- const sal_Unicode *p, *pStart;
- p = pStart = rStr.GetBuffer();
- p += nLastOffset;
- while (*p)
- {
- if (nQuotes)
- {
- if (bTabSep && *p == '\t' && (nQuotes % 2) != 0)
- {
- // When tab-delimited, tab char ends quoted sequence
- // even if we haven't reached the end quote. Doing
- // this helps keep mal-formed rows from damaging
- // other, well-formed rows.
- nQuotes = 0;
- break;
- }
-
- if (*p == cFieldQuote && !bBackslashEscaped)
- ++nQuotes;
- else if (bAllowBackslashEscape)
- {
- if (*p == '\\')
- bBackslashEscaped = !bBackslashEscaped;
- else
- bBackslashEscaped = false;
- }
- }
- else if (*p == cFieldQuote && (p == pStart ||
- lcl_UnicodeStrChr( pSeps, p[-1])))
- nQuotes = 1;
- // A quote character inside a field content does not start
- // a quote.
- ++p;
- }
-
- if (nQuotes % 2 == 0)
- break;
- else
- {
- nLastOffset = rStr.Len();
- String aNext;
- ReadUniOrByteStringLine(aNext, GetStreamCharSet());
- rStr += sal_Unicode(_LF);
- rStr += aNext;
- }
- }
- }
-}
-
-/*************************************************************************
-|*
|* Stream::SeekRel()
|*
*************************************************************************/