From 1a9a77f84cac68bd5374df3e9ee4df88dc87a0ac Mon Sep 17 00:00:00 2001 From: Mike Kaganski Date: Thu, 1 Sep 2016 01:02:11 +1000 Subject: Related: tdf#74299: use OEM encoding for ancient DXF This patch analyses DXF version data to heuristically detect which files use OEM text encoding (DOS versions of AutoCAD). Also, it uses LO's currently-selected default document language, instead of system locale, so that user can change that to match language of the DXF, and get correct result. Change-Id: I3aec7e24f3d7efca35d0dd1acca8b277a076f210 Reviewed-on: https://gerrit.libreoffice.org/28556 Tested-by: Jenkins Reviewed-by: Michael Stahl --- filter/Library_gie.mk | 66 ++++++++++++++------------- filter/source/graphicfilter/idxf/dxfreprd.cxx | 64 +++++++++++++++++++++++--- filter/source/graphicfilter/idxf/dxfreprd.hxx | 16 +++---- 3 files changed, 98 insertions(+), 48 deletions(-) diff --git a/filter/Library_gie.mk b/filter/Library_gie.mk index d39504b23495..2c00c75e2ea6 100644 --- a/filter/Library_gie.mk +++ b/filter/Library_gie.mk @@ -23,46 +23,50 @@ $(eval $(call gb_Library_use_external,gie,boost_headers)) $(eval $(call gb_Library_use_sdk_api,gie)) +$(eval $(call gb_Library_use_custom_headers,gie,\ + officecfg/registry \ +)) + $(eval $(call gb_Library_use_libraries,gie,\ - basegfx \ + basegfx \ comphelper \ i18nlangtag \ svt \ - vcl \ + vcl \ utl \ - tl \ + tl \ tk \ - cppu \ - sal \ - $(gb_UWINAPI) \ + cppu \ + sal \ + $(gb_UWINAPI) \ )) $(eval $(call gb_Library_add_exception_objects,gie,\ - filter/source/graphicfilter/egif/egif \ - filter/source/graphicfilter/etiff/etiff \ - filter/source/graphicfilter/eps/eps \ - filter/source/graphicfilter/egif/giflzwc \ - filter/source/graphicfilter/itiff/ccidecom \ - filter/source/graphicfilter/itiff/itiff \ - filter/source/graphicfilter/itiff/lzwdecom \ - filter/source/graphicfilter/ipict/ipict \ - filter/source/graphicfilter/ipict/shape \ - filter/source/graphicfilter/ipcx/ipcx \ - filter/source/graphicfilter/iras/iras \ - filter/source/graphicfilter/itga/itga \ - filter/source/graphicfilter/ieps/ieps \ - filter/source/graphicfilter/ipsd/ipsd \ - filter/source/graphicfilter/ipbm/ipbm \ - filter/source/graphicfilter/ios2met/ios2met \ - filter/source/graphicfilter/idxf/dxf2mtf \ - filter/source/graphicfilter/idxf/dxfblkrd \ - filter/source/graphicfilter/idxf/dxfentrd \ - filter/source/graphicfilter/idxf/dxfgrprd \ - filter/source/graphicfilter/idxf/dxfreprd \ - filter/source/graphicfilter/idxf/dxftblrd \ - filter/source/graphicfilter/idxf/dxfvec \ - filter/source/graphicfilter/idxf/idxf \ - filter/source/graphicfilter/ipcd/ipcd \ + filter/source/graphicfilter/egif/egif \ + filter/source/graphicfilter/etiff/etiff \ + filter/source/graphicfilter/eps/eps \ + filter/source/graphicfilter/egif/giflzwc \ + filter/source/graphicfilter/itiff/ccidecom \ + filter/source/graphicfilter/itiff/itiff \ + filter/source/graphicfilter/itiff/lzwdecom \ + filter/source/graphicfilter/ipict/ipict \ + filter/source/graphicfilter/ipict/shape \ + filter/source/graphicfilter/ipcx/ipcx \ + filter/source/graphicfilter/iras/iras \ + filter/source/graphicfilter/itga/itga \ + filter/source/graphicfilter/ieps/ieps \ + filter/source/graphicfilter/ipsd/ipsd \ + filter/source/graphicfilter/ipbm/ipbm \ + filter/source/graphicfilter/ios2met/ios2met \ + filter/source/graphicfilter/idxf/dxf2mtf \ + filter/source/graphicfilter/idxf/dxfblkrd \ + filter/source/graphicfilter/idxf/dxfentrd \ + filter/source/graphicfilter/idxf/dxfgrprd \ + filter/source/graphicfilter/idxf/dxfreprd \ + filter/source/graphicfilter/idxf/dxftblrd \ + filter/source/graphicfilter/idxf/dxfvec \ + filter/source/graphicfilter/idxf/idxf \ + filter/source/graphicfilter/ipcd/ipcd \ )) # vim: set noet sw=4 ts=4: diff --git a/filter/source/graphicfilter/idxf/dxfreprd.cxx b/filter/source/graphicfilter/idxf/dxfreprd.cxx index 6a80a9686db6..838e2702bffe 100644 --- a/filter/source/graphicfilter/idxf/dxfreprd.cxx +++ b/filter/source/graphicfilter/idxf/dxfreprd.cxx @@ -19,8 +19,11 @@ #include -#include +#include "dxfreprd.hxx" #include "osl/nlsupport.h" +#include "officecfg/Setup.hxx" +#include "officecfg/Office/Linguistic.hxx" +#include "unotools/wincodepage.hxx" //------------------DXFBoundingBox-------------------------------------------- @@ -128,9 +131,8 @@ void DXFPalette::SetColor(sal_uInt8 nIndex, sal_uInt8 nRed, sal_uInt8 nGreen, sa DXFRepresentation::DXFRepresentation() - : bUseUTF8(false) + : mEnc(RTL_TEXTENCODING_DONTKNOW) { - setTextEncoding(osl_getTextEncodingFromLocale(nullptr)); // Use default encoding if none specified setGlobalLineTypeScale(1.0); } @@ -139,6 +141,24 @@ DXFRepresentation::~DXFRepresentation() { } +namespace { + +OUString getLODefaultLanguage() +{ + OUString result(officecfg::Office::Linguistic::General::DefaultLocale::get()); + if (result.isEmpty()) + result = officecfg::Setup::L10N::ooSetupSystemLocale::get(); + return result; +} + +} + +rtl_TextEncoding DXFRepresentation::getTextEncoding() const +{ + return (isTextEncodingSet()) ? + mEnc : + osl_getTextEncodingFromLocale(nullptr); // Use default encoding if none specified +} bool DXFRepresentation::Read( SvStream & rIStream, sal_uInt16 /*nMinPercent*/, sal_uInt16 /*nMaxPercent*/) { @@ -196,18 +216,48 @@ void DXFRepresentation::ReadHeader(DXFGroupReader & rDGR) { if (!rDGR.Read(1)) continue; - if (rDGR.GetS() >= "AC1021") - bUseUTF8 = true; + // Versions of AutoCAD up to Release 12 (inclusive, AC1009) + // were DOS software and used OEM encoding for storing strings. + // Release 13 (AC1012) had both DOS and Windows variants. + // Its Windows variant, and later releases used ANSI encodings for + // strings (up to version 2006, which was the last one to do so). + // Later versions (2007+, AC1021+) use UTF-8 for that. + // Other (non-Autodesk) implementations may have used different + // encodings for storing to corresponding formats, but there's + // no way to know that. + // See http://autodesk.blogs.com/between_the_lines/autocad-release-history.html + if ((rDGR.GetS() <= "AC1009") || (rDGR.GetS() == "AC2.22") || (rDGR.GetS() == "AC2.21") || (rDGR.GetS() == "AC2.10") || + (rDGR.GetS() == "AC1.50") || (rDGR.GetS() == "AC1.40") || (rDGR.GetS() == "AC1.2") || (rDGR.GetS() == "MC0.0")) + { + // Set OEM encoding for old DOS formats + // only if the encoding is not set yet + // e.g. by previous $DWGCODEPAGE + if (!isTextEncodingSet()) + setTextEncoding(utl_getWinTextEncodingFromLangStr(getLODefaultLanguage().toUtf8().getStr(), true)); + } + else if (rDGR.GetS() >= "AC1021") + setTextEncoding(RTL_TEXTENCODING_UTF8); + else + { + // Set ANSI encoding for old Windows formats + // only if the encoding is not set yet + // e.g. by previous $DWGCODEPAGE + if (!isTextEncodingSet()) + setTextEncoding(utl_getWinTextEncodingFromLangStr(getLODefaultLanguage().toUtf8().getStr())); + } } else if (rDGR.GetS() == "$DWGCODEPAGE") { if (!rDGR.Read(3)) continue; + // If we already use UTF8, then don't update encoding anymore + if (isTextEncodingUTF8()) + continue; // FIXME: we really need a whole table of // $DWGCODEPAGE to encodings mappings - if ( (rDGR.GetS().equalsIgnoreAsciiCase("ANSI_932")) || - (rDGR.GetS().equalsIgnoreAsciiCase("DOS932")) ) + else if ( (rDGR.GetS().equalsIgnoreAsciiCase("ANSI_932")) || + (rDGR.GetS().equalsIgnoreAsciiCase("DOS932")) ) { setTextEncoding(RTL_TEXTENCODING_MS_932); } diff --git a/filter/source/graphicfilter/idxf/dxfreprd.hxx b/filter/source/graphicfilter/idxf/dxfreprd.hxx index a7ad7531f975..66cc747b41cd 100644 --- a/filter/source/graphicfilter/idxf/dxfreprd.hxx +++ b/filter/source/graphicfilter/idxf/dxfreprd.hxx @@ -90,28 +90,28 @@ public: rtl_TextEncoding mEnc; // $DWGCODEPAGE - bool bUseUTF8; // for AC1021 and higher - double mfGlobalLineTypeScale; // $LTSCALE DXFRepresentation(); ~DXFRepresentation(); rtl_TextEncoding getTextEncoding() const; - void setTextEncoding(rtl_TextEncoding aEnc); + void setTextEncoding(rtl_TextEncoding aEnc) { mEnc = aEnc; } OUString ToOUString(const OString& s) const; - double getGlobalLineTypeScale() const; - void setGlobalLineTypeScale(double fGlobalLineTypeScale); + double getGlobalLineTypeScale() const { return mfGlobalLineTypeScale; } + void setGlobalLineTypeScale(double fGlobalLineTypeScale) { mfGlobalLineTypeScale = fGlobalLineTypeScale; } bool Read( SvStream & rIStream, sal_uInt16 nMinPercent, sal_uInt16 nMaxPercent); // Reads complete DXF file. private: - void ReadHeader(DXFGroupReader & rDGR); void CalcBoundingBox(const DXFEntities & rEntities, DXFBoundingBox & rBox); + + bool isTextEncodingSet() const { return mEnc != RTL_TEXTENCODING_DONTKNOW; } + bool isTextEncodingUTF8() const { return mEnc == RTL_TEXTENCODING_UTF8; } }; @@ -121,10 +121,6 @@ private: inline sal_uInt8 DXFPalette::GetRed(sal_uInt8 nIndex) const { return pRed[nIndex]; } inline sal_uInt8 DXFPalette::GetGreen(sal_uInt8 nIndex) const { return pGreen[nIndex]; } inline sal_uInt8 DXFPalette::GetBlue(sal_uInt8 nIndex) const { return pBlue[nIndex]; } -inline rtl_TextEncoding DXFRepresentation::getTextEncoding() const { return bUseUTF8 ? RTL_TEXTENCODING_UTF8 : mEnc; } -inline void DXFRepresentation::setTextEncoding(rtl_TextEncoding aEnc) { mEnc = aEnc; } -inline double DXFRepresentation::getGlobalLineTypeScale() const { return mfGlobalLineTypeScale; } -inline void DXFRepresentation::setGlobalLineTypeScale(double fGlobalLineTypeScale) { mfGlobalLineTypeScale = fGlobalLineTypeScale; } #endif -- cgit