1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#pragma once
#include <o3tl/deleter.hxx>
#include <sot/formats.hxx>
#include <address.hxx>
#include <tools/stream.hxx>
#include <com/sun/star/uno/Any.hxx>
class ScDocShell;
class ScDocument;
class ScAsciiOptions;
/**
* These options control how multi-line cells are converted during export in
* certain lossy formats (such as csv).
*/
struct ScExportTextOptions
{
enum NewlineConversion { ToSystem, ToSpace, None };
ScExportTextOptions( NewlineConversion eNewlineConversion = ToSystem, sal_Unicode cSeparatorConvertTo = 0, bool bAddQuotes = false ) :
meNewlineConversion( eNewlineConversion ), mcSeparatorConvertTo( cSeparatorConvertTo ), mbAddQuotes( bAddQuotes ) {}
NewlineConversion meNewlineConversion;
sal_Unicode mcSeparatorConvertTo; // Convert separator to this character
bool mbAddQuotes;
};
class SC_DLLPUBLIC ScImportExport
{
ScDocShell* pDocSh;
ScDocument& rDoc;
std::unique_ptr<ScDocument, o3tl::default_delete<ScDocument>> pUndoDoc;
ScRange aRange;
OUString aStreamPath;
OUString aNonConvertibleChars;
OUString maFilterOptions;
sal_uInt32 nSizeLimit;
SCROW nMaxImportRow;
sal_Unicode cSep; // Separator
sal_Unicode cStr; // String Delimiter
bool bFormulas; // Formula in Text?
bool bIncludeFiltered; // include filtered rows? (default true)
bool bAll; // no selection
bool bSingle; // Single selection
bool bUndo; // with Undo?
bool bOverflowRow; // too many rows
bool bOverflowCol; // too many columns
bool bOverflowCell; // too much data for a cell
bool mbApi;
bool mbImportBroadcast; // whether or not to broadcast after data import.
bool mbOverwriting; // Whether we could be overwriting existing values (paste).
// In this case we cannot use the insert optimization, but we
// do not need to broadcast after the import.
bool mbIncludeBOM; // Whether to include a byte-order-mark in the output.
ScExportTextOptions mExportTextOptions;
std::unique_ptr<ScAsciiOptions> pExtOptions; // extended options
bool StartPaste(); // Protect check, set up Undo
void EndPaste(bool bAutoRowHeight = true); // Undo/Redo actions, Repaint
bool Doc2Text( SvStream& );
bool Text2Doc( SvStream& );
bool Doc2Sylk( SvStream& );
bool Sylk2Doc( SvStream& );
bool Doc2HTML( SvStream&, const OUString& );
bool Doc2RTF( SvStream& );
bool Doc2Dif( SvStream& );
bool Dif2Doc( SvStream& );
bool ExtText2Doc( SvStream& ); // with pExtOptions
bool RTF2Doc( SvStream&, const OUString& rBaseURL );
bool HTML2Doc( SvStream&, const OUString& rBaseURL );
public:
ScImportExport( ScDocument& ); // the whole document
ScImportExport( ScDocument&, const OUString& ); // Range/cell input
ScImportExport( ScDocument&, const ScAddress& );
ScImportExport( ScDocument&, const ScRange& );
~ScImportExport() COVERITY_NOEXCEPT_FALSE;
void SetExtOptions( const ScAsciiOptions& rOpt );
void SetFilterOptions( const OUString& rFilterOptions );
bool IsRef() const { return !bAll; }
const ScRange& GetRange() const { return aRange; }
static void EmbeddedNullTreatment( OUString & rStr );
static bool IsFormatSupported( SotClipboardFormatId nFormat );
static const sal_Unicode* ScanNextFieldFromString( const sal_Unicode* p,
OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps,
bool bMergeSeps, bool& rbIsQuoted, bool& rbOverflowCell, bool bRemoveSpace );
static void WriteUnicodeOrByteString( SvStream& rStrm, std::u16string_view rString, bool bZero = false );
/** ScImportExport::CountVisualWidth
Count the width of string visually ( in multiple of western characters), considering CJK
ideographs and CJK symbols (U+3000-U+303F) as twice the width of western characters.
@param rStr the string.
@param nIdx the starting index, index is incremented for each counted character.
@param nMaxWidth the maximum width to count.
@return the sum of the width of counted characters.
**/
static sal_Int32 CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth);
/** ScImportExport::CountVisualWidth
@return the sum of the visual width of the whole string.
**/
static sal_Int32 CountVisualWidth(const OUString& rStr);
//! only if stream is only used in own (!) memory
static void SetNoEndianSwap( SvStream& rStrm );
void SetSeparator( sal_Unicode c ) { cSep = c; }
void SetDelimiter( sal_Unicode c ) { cStr = c; }
void SetFormulas( bool b ) { bFormulas = b; }
void SetIncludeFiltered( bool b ) { bIncludeFiltered = b; }
void SetStreamPath( const OUString& rPath ) { aStreamPath = rPath; }
bool ImportString( const OUString&, SotClipboardFormatId );
bool ExportString( OUString&, SotClipboardFormatId );
bool ExportByteString( OString&, rtl_TextEncoding, SotClipboardFormatId );
bool ImportStream( SvStream&, const OUString& rBaseURL, SotClipboardFormatId );
bool ExportStream( SvStream&, const OUString& rBaseURL, SotClipboardFormatId );
bool ExportData( std::u16string_view rMimeType,
css::uno::Any & rValue );
// after import
bool IsOverflowRow() const { return bOverflowRow; }
bool IsOverflowCol() const { return bOverflowCol; }
bool IsOverflowCell() const { return bOverflowCell; }
bool IsOverflow() const { return bOverflowRow || bOverflowCol || bOverflowCell; }
const OUString& GetNonConvertibleChars() const { return aNonConvertibleChars; }
void SetApi( bool bApi ) { mbApi = bApi; }
void SetImportBroadcast( bool b ) { mbImportBroadcast = b; }
void SetOverwriting( const bool bOverwriting ) { mbOverwriting = bOverwriting; }
void SetExportTextOptions( const ScExportTextOptions& options ) { mExportTextOptions = options; }
bool GetIncludeBOM() const { return mbIncludeBOM; }
};
// Helper class for importing clipboard strings as streams.
class ScImportStringStream : public SvMemoryStream
{
public:
ScImportStringStream(const OUString& rStr);
};
/** Read a CSV (comma separated values) data line using
ReadUniOrByteStringLine().
@param bEmbeddedLineBreak
If TRUE and a line-break occurs inside a field of data,
a line feed LF '\n' and the next line are appended. Repeats
until a line-break is not in a field. A field is determined
by delimiting rFieldSeparators and optionally surrounded by
a pair of cFieldQuote characters. For a line-break to be
within a field, the field content MUST be surrounded by
cFieldQuote characters, and the opening cFieldQuote MUST be
at the very start of a line or follow right behind a field
separator with no extra characters in between, with the
exception of blanks contradictory to RFC 4180. Anything,
including field separators and escaped quotes (by doubling
them) may appear in a quoted field.
If bEmbeddedLineBreak==FALSE, nothing is parsed and the
string returned is simply one ReadUniOrByteStringLine().
@param rFieldSeparators
A list of characters that each may act as a field separator.
If rcDetectSep was 0 and a separator is detected then it is appended to
rFieldSeparators.
@param cFieldQuote
The quote character used.
@param rcDetectSep
If 0 then attempt to detect a possible separator if
rFieldSeparators doesn't include it already. This can be necessary because
of the "accept broken misquoted CSV fields" feature that tries to ignore
trailing blanks after a quoted field and if no separator follows continues
to add content to the field assuming the single double quote was in error.
It is also necessary if the only possible separator was not selected and
not included in rFieldSeparators and a line starts with a quoted field, in
which case appending lines is tried until end of file.
If a separator is detected it is added to rFieldSeparators and the
line is reread with the new separators
@param nMaxSourceLines
Maximum source lines to read and combine into one logical line for embedded
new line purpose. Should be limited for the preview dialog because only
non-matching separators selected otherwise would lead to trying to
concatenate lines until file end.
If 0 no limit other than the internal arbitrary resulting line length
limit.
check Stream::good() to detect IO problems during read
@ATTENTION
Note that the string returned may be truncated even inside
a quoted field if some (arbitrary) maximum length was reached.
There currently is no way to exactly determine the conditions,
whether this was at a line end, or whether open quotes
would have closed the field before the line end, as even a
ReadUniOrByteStringLine() may return prematurely but the
stream was positioned ahead until the real end of line.
Additionally, due to character encoding conversions, string
length and bytes read don't necessarily match, and
resyncing to a previous position matching the string's
length isn't always possible. As a result, a logical line
with embedded line breaks and more than the maximum length
characters will be spoiled, and a subsequent ReadCsvLine()
may start under false preconditions.
*/
SC_DLLPUBLIC OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak,
OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep,
sal_uInt32 nMaxSourceLines = 0 );
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|