summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Hung <marklh9@gmail.com>2021-01-05 20:26:51 +0800
committerMark Hung <marklh9@gmail.com>2021-01-09 05:13:55 +0100
commit621c189173b35ac7f5ce4c578f57045479c63ab6 (patch)
treeacc1c75c57021e341841d31c29e15414f464a19d
parent65b3a5cbc7e018b4e3f4f61228bf9334fd9a956e (diff)
tdf#104927 consider character width for CSV import
dialog in fixed width mode. Postions and column width in csvgrid.cxx are muliplied by character width to get X coordinate. Positions are calculated based on the string length, disregard the fact that the characters may have different visual width. Most CJK ideographs and symbols are designed so that their width are twice of the western characters in the same font in general. This patch implement ScImportExport::CountVisualWidth() to count their visual width, convert the position so string that contains CJK ideographs render at the right place, and separate the string based on the calculated visual width instead of the number of sal_Unicode chars. Change-Id: Ic5c1ec219820cf4e1d6c554d5eaccca9f8210ec6 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/108802 Tested-by: Jenkins Reviewed-by: Mark Hung <marklh9@gmail.com>
-rw-r--r--sc/source/ui/dbgui/csvgrid.cxx14
-rw-r--r--sc/source/ui/docshell/impex.cxx44
-rw-r--r--sc/source/ui/inc/impex.hxx15
3 files changed, 64 insertions, 9 deletions
diff --git a/sc/source/ui/dbgui/csvgrid.cxx b/sc/source/ui/dbgui/csvgrid.cxx
index 1fd2cea6de69..13b8f671c3e8 100644
--- a/sc/source/ui/dbgui/csvgrid.cxx
+++ b/sc/source/ui/dbgui/csvgrid.cxx
@@ -789,7 +789,7 @@ void ScCsvGrid::ImplSetTextLineSep(
/* TODO: signal overflow somewhere in UI */
// update column width
- sal_Int32 nWidth = std::max( CSV_MINCOLWIDTH, aCellText.getLength() + 1 );
+ sal_Int32 nWidth = std::max( CSV_MINCOLWIDTH, ScImportExport::CountVisualWidth( aCellText ) + 1 );
if( IsValidColumn( nColIx ) )
{
// expand existing column
@@ -826,9 +826,9 @@ void ScCsvGrid::ImplSetTextLineFix( sal_Int32 nLine, const OUString& rTextLine )
{
if( nLine < GetFirstVisLine() ) return;
- sal_Int32 nChars = rTextLine.getLength();
- if( nChars > GetPosCount() )
- Execute( CSVCMD_SETPOSCOUNT, nChars );
+ sal_Int32 nWidth = ScImportExport::CountVisualWidth( rTextLine );
+ if( nWidth > GetPosCount() )
+ Execute( CSVCMD_SETPOSCOUNT, nWidth );
sal_uInt32 nLineIx = nLine - GetFirstVisLine();
while( maTexts.size() <= nLineIx )
@@ -842,9 +842,11 @@ void ScCsvGrid::ImplSetTextLineFix( sal_Int32 nLine, const OUString& rTextLine )
for( sal_uInt32 nColIx = 0; (nColIx < nColCount) && (nStrIx < nStrLen); ++nColIx )
{
sal_Int32 nColWidth = GetColumnWidth( nColIx );
- sal_Int32 nLen = std::min( std::min( nColWidth, CSV_MAXSTRLEN ), nStrLen - nStrIx);
+ sal_Int32 nLastIx = nStrIx;
+ ScImportExport::CountVisualWidth( rTextLine, nLastIx, nColWidth );
+ sal_Int32 nLen = std::min( CSV_MAXSTRLEN, nLastIx - nStrIx );
rStrVec.push_back( rTextLine.copy( nStrIx, nLen ) );
- nStrIx = nStrIx + nColWidth;
+ nStrIx = nStrIx + nLen;
}
InvalidateGfx();
}
diff --git a/sc/source/ui/docshell/impex.cxx b/sc/source/ui/docshell/impex.cxx
index 54fc6ea0765e..4835a75edabe 100644
--- a/sc/source/ui/docshell/impex.cxx
+++ b/sc/source/ui/docshell/impex.cxx
@@ -19,6 +19,7 @@
#include <comphelper/processfactory.hxx>
#include <i18nlangtag/languagetag.hxx>
+#include <i18nutil/unicode.hxx>
#include <sot/formats.hxx>
#include <sfx2/mieclip.hxx>
#include <com/sun/star/i18n/CalendarFieldIndex.hpp>
@@ -541,6 +542,36 @@ void ScImportExport::WriteUnicodeOrByteEndl( SvStream& rStrm )
endl( rStrm );
}
+sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth)
+{
+ sal_Int32 nWidth = 0;
+ while(nIdx < rStr.getLength() && nWidth < nMaxWidth)
+ {
+ sal_uInt32 nCode = rStr.iterateCodePoints(&nIdx);
+
+ if (unicode::isCJKIVSCharacter(nCode) || (nCode >= 0x3000 && nCode <= 0x303F))
+ nWidth += 2;
+ else if (!unicode::isIVSSelector(nCode))
+ nWidth += 1;
+ }
+
+ if (nIdx < rStr.getLength())
+ {
+ sal_Int32 nTmpIdx = nIdx;
+ sal_uInt32 nCode = rStr.iterateCodePoints(&nTmpIdx);
+
+ if (unicode::isIVSSelector(nCode))
+ nIdx = nTmpIdx;
+ }
+ return nWidth;
+}
+
+sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr)
+{
+ sal_Int32 nIdx = 0;
+ return CountVisualWidth(rStr, nIdx, SAL_MAX_INT32);
+}
+
void ScImportExport::SetNoEndianSwap( SvStream& rStrm )
{
#ifdef OSL_BIGENDIAN
@@ -1400,6 +1431,7 @@ bool ScImportExport::ExtText2Doc( SvStream& rStrm )
bool bMultiLine = false;
if ( bFixed ) // Fixed line length
{
+ sal_Int32 nStartIdx = 0;
// Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
// overflow if there is really data following to be put behind
// the last column, which doesn't happen if info is
@@ -1413,10 +1445,14 @@ bool ScImportExport::ExtText2Doc( SvStream& rStrm )
bOverflowCol = true; // display warning on import
else if (!bDetermineRange)
{
- sal_Int32 nStart = pColStart[i];
- sal_Int32 nNext = ( i+1 < nInfoCount ) ? pColStart[i+1] : nLineLen;
+ sal_Int32 nNextIdx = nStartIdx;
+ if ( i + 1 < nInfoCount )
+ CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] );
+ else
+ nNextIdx = nLineLen;
+
bool bIsQuoted = false;
- aCell = lcl_GetFixed( aLine, nStart, nNext, bIsQuoted, bOverflowCell );
+ aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell );
if (bIsQuoted && bQuotedAsText)
nFmt = SC_COL_TEXT;
@@ -1424,6 +1460,8 @@ bool ScImportExport::ExtText2Doc( SvStream& rStrm )
aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
&aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration, aCalendar,
pEnglishTransliteration.get(), pEnglishCalendar.get());
+
+ nStartIdx = nNextIdx;
}
++nCol;
}
diff --git a/sc/source/ui/inc/impex.hxx b/sc/source/ui/inc/impex.hxx
index b60fbe6d8f7c..090b17911578 100644
--- a/sc/source/ui/inc/impex.hxx
+++ b/sc/source/ui/inc/impex.hxx
@@ -110,6 +110,21 @@ public:
static void WriteUnicodeOrByteString( SvStream& rStrm, const OUString& rString, bool bZero = false );
static void WriteUnicodeOrByteEndl( SvStream& rStrm );
+ /** ScImportExport::CountVisualWidth
+ Count the width of string visually ( in multiple of western characters), considering CJK
+ ideographs and CJK symbols (U+3000-U+303F) as twice the width of western characters.
+ @param rStr the string.
+ @param nIdx the starting index, index is incremented for each counted character.
+ @param nMaxWidth the maximum width to count.
+ @return the sum of the width of counted characters.
+ **/
+ static sal_Int32 CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth);
+
+ /** ScImportExport::CountVisualWidth
+ @return the sum of the viusal width of the whole string.
+ **/
+ static sal_Int32 CountVisualWidth(const OUString& rStr);
+
//! only if stream is only used in own (!) memory
static void SetNoEndianSwap( SvStream& rStrm );