diff options
author | Mark Hung <marklh9@gmail.com> | 2021-01-05 20:26:51 +0800 |
---|---|---|
committer | Mark Hung <marklh9@gmail.com> | 2021-01-09 05:13:55 +0100 |
commit | 621c189173b35ac7f5ce4c578f57045479c63ab6 (patch) | |
tree | acc1c75c57021e341841d31c29e15414f464a19d | |
parent | 65b3a5cbc7e018b4e3f4f61228bf9334fd9a956e (diff) |
tdf#104927 consider character width for CSV import
dialog in fixed width mode. Postions and column width
in csvgrid.cxx are muliplied by character width to
get X coordinate. Positions are calculated based
on the string length, disregard the fact that the
characters may have different visual width.
Most CJK ideographs and symbols are designed so that
their width are twice of the western characters in
the same font in general.
This patch implement ScImportExport::CountVisualWidth()
to count their visual width, convert the position
so string that contains CJK ideographs render at the
right place, and separate the string based on the
calculated visual width instead of the number of
sal_Unicode chars.
Change-Id: Ic5c1ec219820cf4e1d6c554d5eaccca9f8210ec6
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/108802
Tested-by: Jenkins
Reviewed-by: Mark Hung <marklh9@gmail.com>
-rw-r--r-- | sc/source/ui/dbgui/csvgrid.cxx | 14 | ||||
-rw-r--r-- | sc/source/ui/docshell/impex.cxx | 44 | ||||
-rw-r--r-- | sc/source/ui/inc/impex.hxx | 15 |
3 files changed, 64 insertions, 9 deletions
diff --git a/sc/source/ui/dbgui/csvgrid.cxx b/sc/source/ui/dbgui/csvgrid.cxx index 1fd2cea6de69..13b8f671c3e8 100644 --- a/sc/source/ui/dbgui/csvgrid.cxx +++ b/sc/source/ui/dbgui/csvgrid.cxx @@ -789,7 +789,7 @@ void ScCsvGrid::ImplSetTextLineSep( /* TODO: signal overflow somewhere in UI */ // update column width - sal_Int32 nWidth = std::max( CSV_MINCOLWIDTH, aCellText.getLength() + 1 ); + sal_Int32 nWidth = std::max( CSV_MINCOLWIDTH, ScImportExport::CountVisualWidth( aCellText ) + 1 ); if( IsValidColumn( nColIx ) ) { // expand existing column @@ -826,9 +826,9 @@ void ScCsvGrid::ImplSetTextLineFix( sal_Int32 nLine, const OUString& rTextLine ) { if( nLine < GetFirstVisLine() ) return; - sal_Int32 nChars = rTextLine.getLength(); - if( nChars > GetPosCount() ) - Execute( CSVCMD_SETPOSCOUNT, nChars ); + sal_Int32 nWidth = ScImportExport::CountVisualWidth( rTextLine ); + if( nWidth > GetPosCount() ) + Execute( CSVCMD_SETPOSCOUNT, nWidth ); sal_uInt32 nLineIx = nLine - GetFirstVisLine(); while( maTexts.size() <= nLineIx ) @@ -842,9 +842,11 @@ void ScCsvGrid::ImplSetTextLineFix( sal_Int32 nLine, const OUString& rTextLine ) for( sal_uInt32 nColIx = 0; (nColIx < nColCount) && (nStrIx < nStrLen); ++nColIx ) { sal_Int32 nColWidth = GetColumnWidth( nColIx ); - sal_Int32 nLen = std::min( std::min( nColWidth, CSV_MAXSTRLEN ), nStrLen - nStrIx); + sal_Int32 nLastIx = nStrIx; + ScImportExport::CountVisualWidth( rTextLine, nLastIx, nColWidth ); + sal_Int32 nLen = std::min( CSV_MAXSTRLEN, nLastIx - nStrIx ); rStrVec.push_back( rTextLine.copy( nStrIx, nLen ) ); - nStrIx = nStrIx + nColWidth; + nStrIx = nStrIx + nLen; } InvalidateGfx(); } diff --git a/sc/source/ui/docshell/impex.cxx b/sc/source/ui/docshell/impex.cxx index 54fc6ea0765e..4835a75edabe 100644 --- a/sc/source/ui/docshell/impex.cxx +++ b/sc/source/ui/docshell/impex.cxx @@ -19,6 +19,7 @@ #include <comphelper/processfactory.hxx> #include <i18nlangtag/languagetag.hxx> +#include <i18nutil/unicode.hxx> #include <sot/formats.hxx> #include <sfx2/mieclip.hxx> #include <com/sun/star/i18n/CalendarFieldIndex.hpp> @@ -541,6 +542,36 @@ void ScImportExport::WriteUnicodeOrByteEndl( SvStream& rStrm ) endl( rStrm ); } +sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth) +{ + sal_Int32 nWidth = 0; + while(nIdx < rStr.getLength() && nWidth < nMaxWidth) + { + sal_uInt32 nCode = rStr.iterateCodePoints(&nIdx); + + if (unicode::isCJKIVSCharacter(nCode) || (nCode >= 0x3000 && nCode <= 0x303F)) + nWidth += 2; + else if (!unicode::isIVSSelector(nCode)) + nWidth += 1; + } + + if (nIdx < rStr.getLength()) + { + sal_Int32 nTmpIdx = nIdx; + sal_uInt32 nCode = rStr.iterateCodePoints(&nTmpIdx); + + if (unicode::isIVSSelector(nCode)) + nIdx = nTmpIdx; + } + return nWidth; +} + +sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr) +{ + sal_Int32 nIdx = 0; + return CountVisualWidth(rStr, nIdx, SAL_MAX_INT32); +} + void ScImportExport::SetNoEndianSwap( SvStream& rStrm ) { #ifdef OSL_BIGENDIAN @@ -1400,6 +1431,7 @@ bool ScImportExport::ExtText2Doc( SvStream& rStrm ) bool bMultiLine = false; if ( bFixed ) // Fixed line length { + sal_Int32 nStartIdx = 0; // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an // overflow if there is really data following to be put behind // the last column, which doesn't happen if info is @@ -1413,10 +1445,14 @@ bool ScImportExport::ExtText2Doc( SvStream& rStrm ) bOverflowCol = true; // display warning on import else if (!bDetermineRange) { - sal_Int32 nStart = pColStart[i]; - sal_Int32 nNext = ( i+1 < nInfoCount ) ? pColStart[i+1] : nLineLen; + sal_Int32 nNextIdx = nStartIdx; + if ( i + 1 < nInfoCount ) + CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] ); + else + nNextIdx = nLineLen; + bool bIsQuoted = false; - aCell = lcl_GetFixed( aLine, nStart, nNext, bIsQuoted, bOverflowCell ); + aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell ); if (bIsQuoted && bQuotedAsText) nFmt = SC_COL_TEXT; @@ -1424,6 +1460,8 @@ bool ScImportExport::ExtText2Doc( SvStream& rStrm ) aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt, &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration, aCalendar, pEnglishTransliteration.get(), pEnglishCalendar.get()); + + nStartIdx = nNextIdx; } ++nCol; } diff --git a/sc/source/ui/inc/impex.hxx b/sc/source/ui/inc/impex.hxx index b60fbe6d8f7c..090b17911578 100644 --- a/sc/source/ui/inc/impex.hxx +++ b/sc/source/ui/inc/impex.hxx @@ -110,6 +110,21 @@ public: static void WriteUnicodeOrByteString( SvStream& rStrm, const OUString& rString, bool bZero = false ); static void WriteUnicodeOrByteEndl( SvStream& rStrm ); + /** ScImportExport::CountVisualWidth + Count the width of string visually ( in multiple of western characters), considering CJK + ideographs and CJK symbols (U+3000-U+303F) as twice the width of western characters. + @param rStr the string. + @param nIdx the starting index, index is incremented for each counted character. + @param nMaxWidth the maximum width to count. + @return the sum of the width of counted characters. + **/ + static sal_Int32 CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth); + + /** ScImportExport::CountVisualWidth + @return the sum of the viusal width of the whole string. + **/ + static sal_Int32 CountVisualWidth(const OUString& rStr); + //! only if stream is only used in own (!) memory static void SetNoEndianSwap( SvStream& rStrm ); |