summaryrefslogtreecommitdiff
path: root/sc/source/ui/inc/impex.hxx
blob: 7084839943abc4fa2d968dc77a6071133a0ac4f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

#pragma once

#include <o3tl/deleter.hxx>
#include <sot/formats.hxx>
#include <address.hxx>
#include <tools/stream.hxx>

#include <com/sun/star/uno/Any.hxx>

class ScDocShell;
class ScDocument;
class ScAsciiOptions;

/**
 * These options control how multi-line cells are converted during export in
 * certain lossy formats (such as csv).
 */
struct ScExportTextOptions
{
    enum NewlineConversion { ToSystem, ToSpace, None };
    ScExportTextOptions( NewlineConversion eNewlineConversion = ToSystem, sal_Unicode cSeparatorConvertTo = 0, bool bAddQuotes = false ) :
        meNewlineConversion( eNewlineConversion ), mcSeparatorConvertTo( cSeparatorConvertTo ), mbAddQuotes( bAddQuotes ) {}

    NewlineConversion meNewlineConversion;
    sal_Unicode mcSeparatorConvertTo;   // Convert separator to this character
    bool mbAddQuotes;
};

class SC_DLLPUBLIC ScImportExport
{
    ScDocShell* pDocSh;
    ScDocument& rDoc;
    std::unique_ptr<ScDocument, o3tl::default_delete<ScDocument>> pUndoDoc;
    ScRange     aRange;
    OUString    aStreamPath;
    OUString    aNonConvertibleChars;
    OUString    maFilterOptions;
    sal_uInt32  nSizeLimit;
    SCROW       nMaxImportRow;
    sal_Unicode cSep;                   // Separator
    sal_Unicode cStr;                   // String Delimiter
    bool        bFormulas;              // Formula in Text?
    bool        bIncludeFiltered;       // include filtered rows? (default true)
    bool        bAll;                   // no selection
    bool        bSingle;                // Single selection
    bool        bUndo;                  // with Undo?
    bool        bOverflowRow;           // too many rows
    bool        bOverflowCol;           // too many columns
    bool        bOverflowCell;          // too much data for a cell
    bool        mbApi;
    bool        mbImportBroadcast; // whether or not to broadcast after data import.
    bool        mbOverwriting;  // Whether we could be overwriting existing values (paste).
                                // In this case we cannot use the insert optimization, but we
                                // do not need to broadcast after the import.
    bool        mbIncludeBOM; // Whether to include a byte-order-mark in the output.
    ScExportTextOptions mExportTextOptions;

    std::unique_ptr<ScAsciiOptions> pExtOptions;        // extended options

    bool StartPaste();                  // Protect check, set up Undo
    void EndPaste(bool bAutoRowHeight = true);                    // Undo/Redo actions, Repaint
    bool Doc2Text( SvStream& );
    bool Text2Doc( SvStream& );
    bool Doc2Sylk( SvStream& );
    bool Sylk2Doc( SvStream& );
    bool Doc2HTML( SvStream&, const OUString& );
    bool Doc2RTF( SvStream& );
    bool Doc2Dif( SvStream& );
    bool Dif2Doc( SvStream& );
    bool ExtText2Doc( SvStream& );      // with pExtOptions
    bool RTF2Doc( SvStream&, const OUString& rBaseURL );
    bool HTML2Doc( SvStream&, const OUString& rBaseURL );

public:
    ScImportExport( ScDocument& );                  // the whole document
    ScImportExport( ScDocument&, const OUString& );   // Range/cell input
    ScImportExport( ScDocument&, const ScAddress& );
    ScImportExport( ScDocument&, const ScRange& );
   ~ScImportExport() COVERITY_NOEXCEPT_FALSE;

    void SetExtOptions( const ScAsciiOptions& rOpt );
    void SetFilterOptions( const OUString& rFilterOptions );
    bool IsRef() const       { return !bAll; }

    const ScRange& GetRange() const { return aRange; }

    static void EmbeddedNullTreatment( OUString & rStr );

    static bool  IsFormatSupported( SotClipboardFormatId nFormat );
    static const sal_Unicode* ScanNextFieldFromString( const sal_Unicode* p,
            OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps,
            bool bMergeSeps, bool& rbIsQuoted, bool& rbOverflowCell, bool bRemoveSpace );
    static  void    WriteUnicodeOrByteString( SvStream& rStrm, std::u16string_view rString, bool bZero = false );

    /** ScImportExport::CountVisualWidth
        Count the width of string visually ( in multiple of western characters), considering CJK
        ideographs and CJK symbols (U+3000-U+303F) as twice the width of western characters.
        @param rStr the string.
        @param nIdx the starting index, index is incremented for each counted character.
        @param nMaxWidth the maximum width to count.
        @return the sum of the width of counted characters.
    **/
    static sal_Int32 CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth);

    /** ScImportExport::CountVisualWidth
        @return the sum of the visual width of the whole string.
    **/
    static sal_Int32 CountVisualWidth(const OUString& rStr);

    //! only if stream is only used in own (!) memory
    static  void    SetNoEndianSwap( SvStream& rStrm );

    void SetSeparator( sal_Unicode c ) { cSep = c; }
    void SetDelimiter( sal_Unicode c ) { cStr = c; }
    void SetFormulas( bool b ) { bFormulas = b; }
    void SetIncludeFiltered( bool b ) { bIncludeFiltered = b; }

    void            SetStreamPath( const OUString& rPath ) { aStreamPath = rPath; }

    bool ImportString( const OUString&, SotClipboardFormatId );
    bool ExportString( OUString&, SotClipboardFormatId );
    bool ExportByteString( OString&, rtl_TextEncoding, SotClipboardFormatId );

    bool ImportStream( SvStream&, const OUString& rBaseURL, SotClipboardFormatId );
    bool ExportStream( SvStream&, const OUString& rBaseURL, SotClipboardFormatId );

    bool ExportData( std::u16string_view rMimeType,
                     css::uno::Any & rValue  );

    // after import
    bool IsOverflowRow() const { return bOverflowRow; }
    bool IsOverflowCol() const { return bOverflowCol; }
    bool IsOverflowCell() const { return bOverflowCell; }
    bool IsOverflow() const { return bOverflowRow || bOverflowCol || bOverflowCell; }

    const OUString& GetNonConvertibleChars() const { return aNonConvertibleChars; }

    void SetApi( bool bApi ) { mbApi = bApi; }
    void SetImportBroadcast( bool b ) { mbImportBroadcast = b; }
    void SetOverwriting( const bool bOverwriting ) { mbOverwriting = bOverwriting; }
    void SetExportTextOptions( const ScExportTextOptions& options ) { mExportTextOptions = options; }

    bool GetIncludeBOM() const { return mbIncludeBOM; }
};

// Helper class for importing clipboard strings as streams.
class ScImportStringStream : public SvMemoryStream
{
public:
    ScImportStringStream(const OUString& rStr);
};

/** Read a CSV (comma separated values) data line using
    ReadUniOrByteStringLine().

    @param bEmbeddedLineBreak
    If TRUE and a line-break occurs inside a field of data,
    a line feed LF '\n' and the next line are appended. Repeats
    until a line-break is not in a field. A field is determined
    by delimiting rFieldSeparators and optionally surrounded by
    a pair of cFieldQuote characters. For a line-break to be
    within a field, the field content MUST be surrounded by
    cFieldQuote characters, and the opening cFieldQuote MUST be
    at the very start of a line or follow right behind a field
    separator with no extra characters in between, with the
    exception of blanks contradictory to RFC 4180. Anything,
    including field separators and escaped quotes (by doubling
    them) may appear in a quoted field.

    If bEmbeddedLineBreak==FALSE, nothing is parsed and the
    string returned is simply one ReadUniOrByteStringLine().

    @param rFieldSeparators
    A list of characters that each may act as a field separator.
    If rcDetectSep was 0 and a separator is detected then it is appended to
    rFieldSeparators.

    @param cFieldQuote
    The quote character used.

    @param rcDetectSep
    If 0 then attempt to detect a possible separator if
    rFieldSeparators doesn't include it already. This can be necessary because
    of the "accept broken misquoted CSV fields" feature that tries to ignore
    trailing blanks after a quoted field and if no separator follows continues
    to add content to the field assuming the single double quote was in error.
    It is also necessary if the only possible separator was not selected and
    not included in rFieldSeparators and a line starts with a quoted field, in
    which case appending lines is tried until end of file.
    If a separator is detected it is added to rFieldSeparators and the
    line is reread with the new separators

    @param nMaxSourceLines
    Maximum source lines to read and combine into one logical line for embedded
    new line purpose. Should be limited for the preview dialog because only
    non-matching separators selected otherwise would lead to trying to
    concatenate lines until file end.
    If 0 no limit other than the internal arbitrary resulting line length
    limit.

    check Stream::good() to detect IO problems during read

    @ATTENTION
    Note that the string returned may be truncated even inside
    a quoted field if some (arbitrary) maximum length was reached.
    There currently is no way to exactly determine the conditions,
    whether this was at a line end, or whether open quotes
    would have closed the field before the line end, as even a
    ReadUniOrByteStringLine() may return prematurely but the
    stream was positioned ahead until the real end of line.
    Additionally, due to character encoding conversions, string
    length and bytes read don't necessarily match, and
    resyncing to a previous position matching the string's
    length isn't always possible. As a result, a logical line
    with embedded line breaks and more than the maximum length
    characters will be spoiled, and a subsequent ReadCsvLine()
    may start under false preconditions.

  */
SC_DLLPUBLIC OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak,
        OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep,
        sal_uInt32 nMaxSourceLines = 0 );

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */