summaryrefslogtreecommitdiff
path: root/sc
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2021-08-29 18:23:35 +0200
committerEike Rathke <erack@redhat.com>2021-08-29 22:09:29 +0200
commit451e4abb5377f6d923860e9adfce82d46f31e049 (patch)
tree8169fe9944b94fd2ec5577f9306c7ab56d3c3ff4 /sc
parent182971bd9fa8c9057d1478af15a14b2b6357a2a4 (diff)
Resolves: tdf#117868 CSV: support sep=; and "sep=;" separator setting
When reading CSV the separator (any BMP character) is taken from an initial sep=; or "sep=;" single field if that is the only row content. The quoted form is preserved as (unquoted) cell content, of the unquoted form the separator is discarded as contextually it is a real field separator. When writing CSV an existing single top left cell's content if that is the only cell in the row is adapted to the current separator (any BMP character) in the quoted form "sep=;" (if quotes / text delimiters aren't set empty ...) and always uses the ASCII '"' double quote character. Change-Id: I854477bd0f9d1cafaa51a2130b616292347519cf Reviewed-on: https://gerrit.libreoffice.org/c/core/+/121232 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Jenkins
Diffstat (limited to 'sc')
-rw-r--r--sc/source/ui/dbgui/scuiasciiopt.cxx89
-rw-r--r--sc/source/ui/docshell/docsh.cxx43
-rw-r--r--sc/source/ui/inc/scuiasciiopt.hxx5
3 files changed, 117 insertions, 20 deletions
diff --git a/sc/source/ui/dbgui/scuiasciiopt.cxx b/sc/source/ui/dbgui/scuiasciiopt.cxx
index 3ed0b05cf677..7390786c5cd9 100644
--- a/sc/source/ui/dbgui/scuiasciiopt.cxx
+++ b/sc/source/ui/dbgui/scuiasciiopt.cxx
@@ -381,14 +381,6 @@ ScImportAsciiDlg::ScImportAsciiDlg(weld::Window* pParent, const OUString& aDatNa
if (nFromRow != 1)
mxNfRow->set_value(nFromRow);
- if ( bIsTSV )
- mxCkbTab->set_active(true);
- else
- SetSeparators(); // Set Separators in the dialog from maFieldSeparators (empty are not set)
-
- // Get Separators from the dialog (empty are set from default)
- maFieldSeparators = GetSeparators();
-
// Clipboard is always Unicode, else detect.
rtl_TextEncoding ePreselectUnicode = (meCall == SC_IMPORTFILE ?
RTL_TEXTENCODING_DONTKNOW : RTL_TEXTENCODING_UNICODE);
@@ -438,6 +430,40 @@ ScImportAsciiDlg::ScImportAsciiDlg(weld::Window* pParent, const OUString& aDatNa
mnStreamPos = mpDatStream->Tell();
}
+ if (bIsTSV)
+ SetSeparators('\t');
+ else
+ {
+ // Some MS-Excel convention is the first line containing the field
+ // separator as "sep=|" (without quotes and any field separator
+ // character). The second possibility seems to be it is present *with*
+ // quotes so it shows up as cell content *including* the separator and
+ // can be preserved during round trips. Check for an exact match of
+ // any such and set separator.
+ /* TODO: it is debatable whether the unquoted form should rather be
+ * treated special to actually include the separator in the field data.
+ * Currently it does not. */
+ sal_Unicode cSep = 0;
+ OUString aLine;
+ // Try to read one more character, if more than 7 it can't be an exact
+ // match of any.
+ mpDatStream->ReadUniOrByteStringLine( aLine, mpDatStream->GetStreamCharSet(), 8);
+ mpDatStream->Seek(mnStreamPos);
+ if (aLine.getLength() == 8)
+ ; // nothing
+ else if (aLine.getLength() == 5 && aLine.startsWithIgnoreAsciiCase("sep="))
+ cSep = aLine[4];
+ else if (aLine.getLength() == 7 && aLine[6] == '"' && aLine.startsWithIgnoreAsciiCase("\"sep="))
+ cSep = aLine[5];
+
+ // Set Separators in the dialog from maFieldSeparators (empty are not
+ // set) or an optionally defined by file content field separator.
+ SetSeparators(cSep);
+ }
+
+ // Get Separators from the dialog (empty are set from default)
+ maFieldSeparators = GetSeparators();
+
mxNfRow->connect_value_changed( LINK( this, ScImportAsciiDlg, FirstRowHdl ) );
// *** Separator characters ***
@@ -650,19 +676,46 @@ void ScImportAsciiDlg::SaveParameters()
mxCkbSkipEmptyCells->get_active(), mxCkbRemoveSpace->get_active(), meCall );
}
-void ScImportAsciiDlg::SetSeparators()
+void ScImportAsciiDlg::SetSeparators( sal_Unicode cSep )
{
- for (sal_Int32 i = 0; i < maFieldSeparators.getLength(); ++i)
+ if (cSep)
{
- switch (maFieldSeparators[i])
+ // Exclusively set a separator, maFieldSeparators needs not be
+ // modified, it's obtained by GetSeparators() after this call.
+ constexpr sal_Unicode aSeps[] = { '\t', ';', ',', ' ' };
+ for (const sal_Unicode c : aSeps)
{
- case '\t': mxCkbTab->set_active(true); break;
- case ';': mxCkbSemicolon->set_active(true); break;
- case ',': mxCkbComma->set_active(true); break;
- case ' ': mxCkbSpace->set_active(true); break;
- default:
- mxCkbOther->set_active(true);
- mxEdOther->set_text(mxEdOther->get_text() + OUStringChar(maFieldSeparators[i]));
+ const bool bSet = (c == cSep);
+ switch (c)
+ {
+ case '\t': mxCkbTab->set_active(bSet); break;
+ case ';': mxCkbSemicolon->set_active(bSet); break;
+ case ',': mxCkbComma->set_active(bSet); break;
+ case ' ': mxCkbSpace->set_active(bSet); break;
+ }
+ if (bSet)
+ cSep = 0;
+ }
+ if (cSep)
+ {
+ mxCkbOther->set_active(true);
+ mxEdOther->set_text(OUStringChar(cSep));
+ }
+ }
+ else
+ {
+ for (sal_Int32 i = 0; i < maFieldSeparators.getLength(); ++i)
+ {
+ switch (maFieldSeparators[i])
+ {
+ case '\t': mxCkbTab->set_active(true); break;
+ case ';': mxCkbSemicolon->set_active(true); break;
+ case ',': mxCkbComma->set_active(true); break;
+ case ' ': mxCkbSpace->set_active(true); break;
+ default:
+ mxCkbOther->set_active(true);
+ mxEdOther->set_text(mxEdOther->get_text() + OUStringChar(maFieldSeparators[i]));
+ }
}
}
}
diff --git a/sc/source/ui/docshell/docsh.cxx b/sc/source/ui/docshell/docsh.cxx
index 5801636d3cc2..47d8adc1843b 100644
--- a/sc/source/ui/docshell/docsh.cxx
+++ b/sc/source/ui/docshell/docsh.cxx
@@ -1968,6 +1968,49 @@ void ScDocShell::AsciiSave( SvStream& rStream, const ScImportOptions& rAsciiOpt,
SCCOL nCol;
SCROW nRow;
+
+ // Treat the top left cell separator "sep=" special.
+ // Here nStartRow == 0 && nStartCol == 0
+ if (!bFixedWidth && cDelim != 0)
+ {
+ // First row iterator.
+ ScHorizontalCellIterator aIter( m_aDocument, nTab, nStartCol, nStartRow, nEndCol, nStartRow);
+ ScRefCellValue* pCell;
+ // Must be first column and all following cells on this row must be
+ // empty to fiddle with "sep=".
+ if ((pCell = aIter.GetNext( nCol, nRow)) != nullptr && nCol == nStartCol && !aIter.GetNext( nCol, nRow))
+ {
+ if (pCell->meType == CELLTYPE_STRING)
+ {
+ aString = pCell->mpString->getString();
+ if (aString.getLength() <= 5 && aString.startsWithIgnoreAsciiCase("sep="))
+ {
+ // Cell content is /^sep=.?$/ so write current separator.
+ // Force the quote character to '"' regardless what is set
+ // for export because that is the only one recognized on
+ // import.
+ aString = "sep=" + OUStringChar(cDelim);
+ if (cStrDelim != 0)
+ rStream.WriteUniOrByteChar( '"', eCharSet);
+ if (eCharSet == RTL_TEXTENCODING_UNICODE)
+ {
+ write_uInt16s_FromOUString( rStream, aString);
+ }
+ else
+ {
+ OString aStrEnc = OUStringToOString( aString, eCharSet);
+ // write byte encoded
+ rStream.WriteBytes( aStrEnc.getStr(), aStrEnc.getLength());
+ }
+ if (cStrDelim != 0)
+ rStream.WriteUniOrByteChar( '"', eCharSet);
+ endlub( rStream );
+ ++nStartRow;
+ }
+ }
+ }
+ }
+
SCCOL nNextCol = nStartCol;
SCROW nNextRow = nStartRow;
SCCOL nEmptyCol;
diff --git a/sc/source/ui/inc/scuiasciiopt.hxx b/sc/source/ui/inc/scuiasciiopt.hxx
index eae2f2f06bc0..2bb7e23252b1 100644
--- a/sc/source/ui/inc/scuiasciiopt.hxx
+++ b/sc/source/ui/inc/scuiasciiopt.hxx
@@ -91,8 +91,9 @@ public:
private:
/** Sets the selected char set data to meCharSet and mbCharSetSystem. */
void SetSelectedCharSet();
- /** Set separators in ui from maFieldSeparators */
- void SetSeparators();
+ /** Set separators in ui from maFieldSeparators or an optionally defined
+ separator. */
+ void SetSeparators( sal_Unicode cSep );
/** Returns all separator characters in a string. */
OUString GetSeparators() const;