summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2024-11-05 16:08:50 +0500
committerXisco Fauli <xiscofauli@libreoffice.org>2024-11-18 12:43:10 +0100
commit05272dc1972b3e1e4f85a7738728315a41922933 (patch)
tree6a52e9427d392051660fd13aed85253cc4de4a74
parenta68e25937f719e4eed938969e458b126d44748e2 (diff)
tdf#122716: take encoding defined for font into account
Before this, the non-ASCII Windows-1252 characters get exported to RTF without Unicode markup, regardless of the font-defined charset; and on import to Writer (and other compliant RTF readers), this 8-bit markup was interpreted using the font data, producing different characters. Change-Id: I2032930b6585287fde3eb3b5e6abed0298d29330 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/176048 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com> (cherry picked from commit 0c1ae785e3fb3a800f6b7743a03245dca6c01f14) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/176071 Reviewed-by: Xisco Fauli <xiscofauli@libreoffice.org>
-rw-r--r--editeng/source/editeng/impedit4.cxx36
-rw-r--r--sc/qa/unit/copy_paste_test.cxx28
-rw-r--r--sc/qa/unit/data/xlsx/tdf122716_font_with_charset.xlsxbin0 -> 8551 bytes
3 files changed, 47 insertions, 17 deletions
diff --git a/editeng/source/editeng/impedit4.cxx b/editeng/source/editeng/impedit4.cxx
index 968c1d44ada5..37e0cb7cbd5d 100644
--- a/editeng/source/editeng/impedit4.cxx
+++ b/editeng/source/editeng/impedit4.cxx
@@ -274,6 +274,15 @@ void ImpEditEngine::WriteXML(SvStream& rOutput, const EditSelection& rSel)
SvxWriteXML( *GetEditEnginePtr(), rOutput, aESel );
}
+static size_t GetFontIndex(const SfxPoolItem& rItem,
+ const std::vector<std::unique_ptr<SvxFontItem>>& rFontTable)
+{
+ for (size_t i = 0; i < rFontTable.size(); ++i)
+ if (*rFontTable[i] == rItem)
+ return i;
+ return 0;
+}
+
ErrCode ImpEditEngine::WriteRTF( SvStream& rOutput, EditSelection aSel, bool bClipboard )
{
assert( IsUpdateLayout() && "WriteRTF for UpdateMode = sal_False!" );
@@ -364,10 +373,6 @@ ErrCode ImpEditEngine::WriteRTF( SvStream& rOutput, EditSelection aSel, bool bCl
rtl_TextEncoding eChrSet = pFontItem->GetCharSet();
// tdf#47679 OpenSymbol is not encoded in Symbol Encoding
- // and anyway we always attempt to write as eDestEnc
- // of RTL_TEXTENCODING_MS_1252 and pay no attention
- // on export what encoding we claim to use for these
- // fonts.
if (IsOpenSymbol(pFontItem->GetFamilyName()))
{
SAL_WARN_IF(eChrSet == RTL_TEXTENCODING_SYMBOL, "editeng", "OpenSymbol should not have charset of RTL_TEXTENCODING_SYMBOL in new documents");
@@ -670,10 +675,17 @@ ErrCode ImpEditEngine::WriteRTF( SvStream& rOutput, EditSelection aSel, bool bCl
aAttribItems.Clear();
sal_uInt16 nScriptTypeI18N = GetI18NScriptType( EditPaM( pNode, nIndex+1 ) );
SvtScriptType nScriptType = SvtLanguageOptions::FromI18NToSvtScriptType(nScriptTypeI18N);
+ rtl_TextEncoding actEncoding = eDestEnc;
if ( !n || IsScriptChange( EditPaM( pNode, nIndex ) ) )
{
SfxItemSet aAttribs = GetAttribs( nNode, nIndex+1, nIndex+1 );
- aAttribItems.Insert( &aAttribs.Get( GetScriptItemId( EE_CHAR_FONTINFO, nScriptType ) ) );
+ auto& item = aAttribs.Get(GetScriptItemId(EE_CHAR_FONTINFO, nScriptType));
+ aAttribItems.Insert(&item);
+ // The actual encoding that RTF uses for the portion is defined by the font
+ if (auto i = GetFontIndex(item, aFontTable);
+ i < aFontTable.size()
+ && aFontTable[i]->GetCharSet() != RTL_TEXTENCODING_DONTKNOW)
+ actEncoding = aFontTable[i]->GetCharSet();
aAttribItems.Insert( &aAttribs.Get( GetScriptItemId( EE_CHAR_FONTHEIGHT, nScriptType ) ) );
aAttribItems.Insert( &aAttribs.Get( GetScriptItemId( EE_CHAR_WEIGHT, nScriptType ) ) );
aAttribItems.Insert( &aAttribs.Get( GetScriptItemId( EE_CHAR_ITALIC, nScriptType ) ) );
@@ -694,7 +706,7 @@ ErrCode ImpEditEngine::WriteRTF( SvStream& rOutput, EditSelection aSel, bool bCl
nE = nEndPos;
OUString aRTFStr = EditDoc::GetParaAsString( pNode, nS, nE);
- RTFOutFuncs::Out_String( rOutput, aRTFStr, eDestEnc );
+ RTFOutFuncs::Out_String(rOutput, aRTFStr, actEncoding);
rOutput.WriteChar( '}' );
}
if ( bFinishPortion )
@@ -830,18 +842,8 @@ void ImpEditEngine::WriteItemAsRTF( const SfxPoolItem& rItem, SvStream& rOutput,
case EE_CHAR_FONTINFO_CJK:
case EE_CHAR_FONTINFO_CTL:
{
- sal_uInt32 n = 0;
- for (size_t i = 0; i < rFontTable.size(); ++i)
- {
- if (*rFontTable[i] == rItem)
- {
- n = i;
- break;
- }
- }
-
rOutput.WriteOString( OOO_STRING_SVTOOLS_RTF_F );
- rOutput.WriteNumberAsString( n );
+ rOutput.WriteNumberAsString(GetFontIndex(rItem, rFontTable));
}
break;
case EE_CHAR_FONTHEIGHT:
diff --git a/sc/qa/unit/copy_paste_test.cxx b/sc/qa/unit/copy_paste_test.cxx
index 2e49de627d8f..8315f053f388 100644
--- a/sc/qa/unit/copy_paste_test.cxx
+++ b/sc/qa/unit/copy_paste_test.cxx
@@ -45,6 +45,7 @@ public:
void tdf113500_autofillMixed();
void tdf137625_autofillMergedUserlist();
void tdf137624_autofillMergedMixed();
+ void tdf122716_rtf_portion_encoding();
CPPUNIT_TEST_SUITE(ScCopyPasteTest);
CPPUNIT_TEST(testCopyPasteXLS);
@@ -62,6 +63,7 @@ public:
CPPUNIT_TEST(tdf113500_autofillMixed);
CPPUNIT_TEST(tdf137625_autofillMergedUserlist);
CPPUNIT_TEST(tdf137624_autofillMergedMixed);
+ CPPUNIT_TEST(tdf122716_rtf_portion_encoding);
CPPUNIT_TEST_SUITE_END();
private:
@@ -865,6 +867,32 @@ void ScCopyPasteTest::tdf137624_autofillMergedMixed()
}
}
+void ScCopyPasteTest::tdf122716_rtf_portion_encoding()
+{
+ // Given a document with an explicitly defined "204" (Russian) charset for a font,
+ // and a cell having contents of "Šampūnas", which has character "Š" representable
+ // in Windows-1252 (RTF default), but not in Windows-1251 (i.e. charset 204):
+ loadFromFile(u"xlsx/tdf122716_font_with_charset.xlsx");
+ ScModelObj* pModelObj = comphelper::getFromUnoTunnel<ScModelObj>(mxComponent);
+ // Obtain a transferable, similar to what happens on copy to clipboard:
+ auto xTransferable = pModelObj->getSelection();
+ // Get the RTF data:
+ auto rtf_any = xTransferable->getTransferData({ u"text/rtf"_ustr, {}, {} });
+ css::uno::Sequence<sal_Int8> rtf_bytes;
+ CPPUNIT_ASSERT(rtf_any >>= rtf_bytes);
+ OString rtf_string(reinterpret_cast<const char*>(rtf_bytes.getConstArray()),
+ rtf_bytes.getLength());
+ // Check that the font with charset was actually emitted
+ CPPUNIT_ASSERT(rtf_string.indexOf("\\fcharset204 Liberation Sans;") >= 0);
+ // Make sure that Unicode markup is emitted for the non-Ascii characters.
+ // Without the fix, "\u352" wasn't there, because the export was using Windows-1252
+ // encoding unconditionally, even though the exported font defined a different one;
+ // so the exported characters only had Unicode markup, when not representable in the
+ // Windows-1252 encoding, and "Š" got exported as "\'8a". On import to Writer, font
+ // encoding was used, and "\'8a" was interpreted as a Cyrillic alphabet character.
+ CPPUNIT_ASSERT(rtf_string.indexOf("\\u352\\'3famp\\u363\\'3fnas") >= 0);
+}
+
ScCopyPasteTest::ScCopyPasteTest()
: UnoApiTest(u"/sc/qa/unit/data/"_ustr)
{
diff --git a/sc/qa/unit/data/xlsx/tdf122716_font_with_charset.xlsx b/sc/qa/unit/data/xlsx/tdf122716_font_with_charset.xlsx
new file mode 100644
index 000000000000..6c2326e3ed28
--- /dev/null
+++ b/sc/qa/unit/data/xlsx/tdf122716_font_with_charset.xlsx
Binary files differ