diff options
author | Eike Rathke <erack@redhat.com> | 2012-04-10 18:30:07 +0200 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2012-04-10 19:32:09 +0200 |
commit | 8cd05e9cf1152b21528c6f1a5bda3d949dc49791 (patch) | |
tree | 0f37b5ff5447c3d6088b64a01d75e4801cdab847 /tools | |
parent | bf0629e09d176555aaa10f60061b206103cc0295 (diff) |
resolved fdo#48501 enable line size >64k in SvStream::Read*Line()
CSV and other text formats may come with line sizes >64k that so far were
truncated due to limitations in ByteString/UniString/String, even if one line
consists of several fields that each are <64k.
Introduced additional SvStream methods that read into rtl::OString and
rtl::OUString and let SvStream::ReadUniOrByteStringLine() fill solely an
rtl::OUString.
Made Calc CSV import use those.
Diffstat (limited to 'tools')
-rw-r--r-- | tools/inc/tools/stream.hxx | 67 | ||||
-rw-r--r-- | tools/source/stream/stream.cxx | 51 |
2 files changed, 99 insertions, 19 deletions
diff --git a/tools/inc/tools/stream.hxx b/tools/inc/tools/stream.hxx index 5b5c443ac354..dc4505a06a35 100644 --- a/tools/inc/tools/stream.hxx +++ b/tools/inc/tools/stream.hxx @@ -380,9 +380,37 @@ public: // next Tell() <= nSize sal_Bool SetStreamSize( sal_Size nSize ); - sal_Bool ReadLine( rtl::OString& rStr ); + /** Read a line of bytes. + + @param nMaxBytesToRead + Maximum of bytes to read, if line is longer it will be + truncated. + + NOTE that the default is one character less than + STRING_MAXLEN to prevent problems after conversion to + String that may be lurking in various places doing + something like + for (sal_uInt16 i=0; i < aString.Len(); ++i) + causing endless loops ... + */ + sal_Bool ReadLine( rtl::OString& rStr, sal_Int32 nMaxBytesToRead = 0xFFFE ); sal_Bool WriteLine( const rtl::OString& rStr ); + /** Read a line of bytes. + + @param nMaxBytesToRead + Maximum of bytes to read, if line is longer it will be + truncated. + + NOTE that the default is one character less than + STRING_MAXLEN to prevent problems after conversion to + String that may be lurking in various places doing + something like + for (sal_uInt16 i=0; i < aString.Len(); ++i) + causing endless loops ... + */ + sal_Bool ReadByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet, + sal_Int32 nMaxBytesToRead = 0xFFFE ); sal_Bool ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet ); sal_Bool WriteByteStringLine( const String& rStr, rtl_TextEncoding eDestCharSet ); @@ -403,17 +431,44 @@ public: */ sal_Bool StartReadingUnicodeText( rtl_TextEncoding eReadBomCharSet ); - /// Read a line of Unicode - sal_Bool ReadUniStringLine( String& rStr ); + /** Read a line of Unicode. + + @param nMaxCodepointsToRead + Maximum of codepoints (UCS-2 or UTF-16 pairs, not + bytes) to read, if line is longer it will be truncated. + + NOTE that the default is one character less than + STRING_MAXLEN to prevent problems after conversion to + String that may be lurking in various places doing + something like + for (sal_uInt16 i=0; i < aString.Len(); ++i) + causing endless loops ... + */ + sal_Bool ReadUniStringLine( rtl::OUString& rStr, sal_Int32 nMaxCodepointsToRead = 0xFFFE ); /// Read a 32bit length prefixed sequence of utf-16 if eSrcCharSet==RTL_TEXTENCODING_UNICODE, /// otherwise read a 16bit length prefixed sequence of bytes and convert from eSrcCharSet rtl::OUString ReadUniOrByteString(rtl_TextEncoding eSrcCharSet); /// Write a 32bit length prefixed sequence of utf-16 if eSrcCharSet==RTL_TEXTENCODING_UNICODE, /// otherwise convert to eSrcCharSet and write a 16bit length prefixed sequence of bytes SvStream& WriteUniOrByteString( const rtl::OUString& rStr, rtl_TextEncoding eDestCharSet ); - /// Read a line of Unicode if eSrcCharSet==RTL_TEXTENCODING_UNICODE, - /// otherwise read a line of Bytecode and convert from eSrcCharSet - sal_Bool ReadUniOrByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet ); + + /** Read a line of Unicode if eSrcCharSet==RTL_TEXTENCODING_UNICODE, + otherwise read a line of Bytecode and convert from eSrcCharSet + + @param nMaxCodepointsToRead + Maximum of codepoints (2 bytes if Unicode, bytes if not + Unicode) to read, if line is longer it will be + truncated. + + NOTE that the default is one character less than + STRING_MAXLEN to prevent problems after conversion to + String that may be lurking in various places doing + something like + for (sal_uInt16 i=0; i < aString.Len(); ++i) + causing endless loops ... + */ + sal_Bool ReadUniOrByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet, + sal_Int32 nMaxCodepointsToRead = 0xFFFE ); /// Write a sequence of Unicode characters if eDestCharSet==RTL_TEXTENCODING_UNICODE, /// otherwise write a sequence of Bytecodes converted to eDestCharSet sal_Bool WriteUnicodeOrByteText( const String& rStr, rtl_TextEncoding eDestCharSet ); diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx index 1da4096a92c7..96cabc266c34 100644 --- a/tools/source/stream/stream.cxx +++ b/tools/source/stream/stream.cxx @@ -647,6 +647,15 @@ void SvStream::ResetError() |* *************************************************************************/ +sal_Bool SvStream::ReadByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet, + sal_Int32 nMaxBytesToRead ) +{ + rtl::OString aStr; + sal_Bool bRet = ReadLine( aStr, nMaxBytesToRead); + rStr = rtl::OStringToOUString(aStr, eSrcCharSet); + return bRet; +} + sal_Bool SvStream::ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet ) { rtl::OString aStr; @@ -655,7 +664,7 @@ sal_Bool SvStream::ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSe return bRet; } -sal_Bool SvStream::ReadLine(rtl::OString& rStr) +sal_Bool SvStream::ReadLine( rtl::OString& rStr, sal_Int32 nMaxBytesToRead ) { sal_Char buf[256+1]; sal_Bool bEnd = sal_False; @@ -663,7 +672,7 @@ sal_Bool SvStream::ReadLine(rtl::OString& rStr) sal_Char c = 0; sal_Size nTotalLen = 0; - rtl::OStringBuffer aBuf; + rtl::OStringBuffer aBuf(4096); while( !bEnd && !GetError() ) // !!! nicht auf EOF testen, // !!! weil wir blockweise // !!! lesen @@ -695,8 +704,15 @@ sal_Bool SvStream::ReadLine(rtl::OString& rStr) buf[n] = c; ++n; } - aBuf.append(buf, n); nTotalLen += j; + if (nTotalLen > static_cast<sal_Size>(nMaxBytesToRead)) + { + n -= nTotalLen - nMaxBytesToRead; + nTotalLen = nMaxBytesToRead; + bEnd = sal_True; + } + if ( n ) + aBuf.append(buf, n); } if ( !bEnd && !GetError() && aBuf.getLength() ) @@ -723,7 +739,7 @@ sal_Bool SvStream::ReadLine(rtl::OString& rStr) return bEnd; } -sal_Bool SvStream::ReadUniStringLine( String& rStr ) +sal_Bool SvStream::ReadUniStringLine( rtl::OUString& rStr, sal_Int32 nMaxCodepointsToRead ) { sal_Unicode buf[256+1]; sal_Bool bEnd = sal_False; @@ -733,7 +749,7 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr ) DBG_ASSERT( sizeof(sal_Unicode) == sizeof(sal_uInt16), "ReadUniStringLine: swapping sizeof(sal_Unicode) not implemented" ); - rStr.Erase(); + rtl::OUStringBuffer aBuf(4096); while( !bEnd && !GetError() ) // !!! nicht auf EOF testen, // !!! weil wir blockweise // !!! lesen @@ -742,10 +758,11 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr ) nLen /= sizeof(sal_Unicode); if ( !nLen ) { - if ( rStr.Len() == 0 ) + if ( aBuf.getLength() == 0 ) { // der allererste Blockread hat fehlgeschlagen -> Abflug bIsEof = sal_True; + rStr = rtl::OUString(); return sal_False; } else @@ -774,12 +791,18 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr ) ++n; } } - if ( n ) - rStr.Append( buf, n ); nTotalLen += j; + if (nTotalLen > static_cast<sal_Size>(nMaxCodepointsToRead)) + { + n -= nTotalLen - nMaxCodepointsToRead; + nTotalLen = nMaxCodepointsToRead; + bEnd = sal_True; + } + if ( n ) + aBuf.append( buf, n ); } - if ( !bEnd && !GetError() && rStr.Len() ) + if ( !bEnd && !GetError() && aBuf.getLength() ) bEnd = sal_True; nOldFilePos += nTotalLen * sizeof(sal_Unicode); @@ -799,20 +822,22 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr ) if ( bEnd ) bIsEof = sal_False; + rStr = aBuf.makeStringAndClear(); return bEnd; } -sal_Bool SvStream::ReadUniOrByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet ) +sal_Bool SvStream::ReadUniOrByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet, + sal_Int32 nMaxCodepointsToRead ) { if ( eSrcCharSet == RTL_TEXTENCODING_UNICODE ) - return ReadUniStringLine( rStr ); + return ReadUniStringLine( rStr, nMaxCodepointsToRead ); else - return ReadByteStringLine( rStr, eSrcCharSet ); + return ReadByteStringLine( rStr, eSrcCharSet, nMaxCodepointsToRead ); } rtl::OString read_zeroTerminated_uInt8s_ToOString(SvStream& rStream) { - rtl::OStringBuffer aOutput; + rtl::OStringBuffer aOutput(256); sal_Char buf[ 256 + 1 ]; sal_Bool bEnd = sal_False; |