diff options
author | Stephan Bergmann <sbergman@redhat.com> | 2017-02-21 08:22:19 +0100 |
---|---|---|
committer | Stephan Bergmann <sbergman@redhat.com> | 2017-02-21 08:22:19 +0100 |
commit | fb896ebc7e47f4a63c5c4dd1415a8b4d73c0c93f (patch) | |
tree | 711e5c8d9e1ca2b1410a1c4ecfe9a395db4cbd9c /tools | |
parent | 0f6e813827fed1baefe6001b6e9655b0aeedcc46 (diff) |
Remove wrong and broken encoding from INetMIMEMessage::SetHeaderField_Impl
INetMIMEEncodedWordOutputSink had been intended to encode (non-ASCII) content of
a "free-form text" header field as per RFC 2047. It used a heuristic trying to
detect already encoded words (=?...?...?...?=) in the input, to pass them
through unmodified. (Arguably, it could just as well have encoded them,
assuming they were meant to be genuine input.) However, that heuristic had a
bug ever since 8ab086b6cc054501bfbf7ef6fa509c393691e860 "initial import", going
from STATE_FIRST_EQUALS to STATE_FIRST_EQUALS instead of STATE_FIRST_QUESTION,
rendering the heuristical detection logic effectively unused.
Instead of fixing the bug, 6e12729f715f142140d220dc7d3b28a4a0657016 "remove
unused enumerator from EncodedWordState" and
b8d8fb3f0cf4a961bbff54523eaca1a4f8179c7a "convert EncodedWordState to scoped
enum" crippled the code further by removing any reputedly unused cases.
But the only remaining use of INetMIMEEncodedWordOutputSink is in
INetMIMEMessage::SetHeaderField_Impl, encoding MIME-Version, Content-Transfer-
Encoding, Content-Type, and Content-Disposition header fields. And none of
those headers have any "free-form text" content that should be encoded as per
RFC 2047. The first two have fixed ASCII-only content ("1.0" and "7bit",
"8bit", etc., respectively), while the latter two have structured content that
may contain parameters of arbitrary content, which must be encoded according to
RFC 2231 (but currently isn't).
And the only place where such arbitrary-content parameters are generated is in
the two calls to SetContentTransferEncoding in
forms/source/component/DatabaseForm.cxx. (The calls to SetContentType there and
in tools/source/inet/ itself are all known to have unproblematic ASCII-only
content.) So mark those two places with TODOs about the missing encoding (which
had been missing since forever) and, in INetMIMEMessage::SetHeaderField_Impl,
liberally convert the content to 8-bit via OUString::toUtf8 for now.
Change-Id: I4b2a219b396953b219ca66441a5227157a35951f
Diffstat (limited to 'tools')
-rw-r--r-- | tools/source/inet/inetmime.cxx | 1084 | ||||
-rw-r--r-- | tools/source/inet/inetmsg.cxx | 7 |
2 files changed, 1 insertions, 1090 deletions
diff --git a/tools/source/inet/inetmime.cxx b/tools/source/inet/inetmime.cxx index 122358b1a9f4..8fc5a4515a73 100644 --- a/tools/source/inet/inetmime.cxx +++ b/tools/source/inet/inetmime.cxx @@ -18,12 +18,10 @@ */ #include <cstddef> -#include <forward_list> #include <limits> #include <memory> #include <osl/diagnose.h> -#include <rtl/alloc.h> #include <rtl/ustring.hxx> #include <rtl/strbuf.hxx> #include <rtl/tencinfo.h> @@ -81,14 +79,9 @@ sal_Unicode const * scanParameters(sal_Unicode const * pBegin, INetContentTypeParameterList * pParameters); -inline rtl_TextEncoding translateToMIME(rtl_TextEncoding - eEncoding); - inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding eEncoding); -const sal_Char * getCharsetName(rtl_TextEncoding eEncoding); - rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin, const sal_Char * pEnd); @@ -104,9 +97,6 @@ sal_Char * convertFromUnicode(const sal_Unicode * pBegin, rtl_TextEncoding eEncoding, sal_Size & rSize); -inline void writeEscapeSequence(INetMIMEOutputSink & rSink, - sal_uInt32 nChar); - void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar); bool translateUTF8Char(const sal_Char *& rBegin, @@ -151,16 +141,6 @@ inline bool startsWithLineFolding(const sal_Unicode * pBegin, && isWhiteSpace(pBegin[2]); // CR, LF } -inline rtl_TextEncoding translateToMIME(rtl_TextEncoding eEncoding) -{ -#if defined(_WIN32) - return eEncoding == RTL_TEXTENCODING_MS_1252 ? - RTL_TEXTENCODING_ISO_8859_1 : eEncoding; -#else // WNT - return eEncoding; -#endif // WNT -} - inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding eEncoding) { @@ -273,14 +253,6 @@ inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer, return pBuffer; } -inline void writeEscapeSequence(INetMIMEOutputSink & rSink, - sal_uInt32 nChar) -{ - DBG_ASSERT(nChar <= 0xFF, "writeEscapeSequence(): Bad char"); - rSink << '=' << sal_uInt8(INetMIME::getHexDigit(nChar >> 4)) - << sal_uInt8(INetMIME::getHexDigit(nChar & 15)); -} - void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar) { // See RFC 2279 for a discussion of UTF-8. @@ -389,59 +361,9 @@ bool translateUTF8Char(const sal_Char *& rBegin, return true; } -class Charset -{ - rtl_TextEncoding m_eEncoding; - const sal_uInt32 * m_pRanges; - -public: - inline Charset(rtl_TextEncoding eTheEncoding, - const sal_uInt32 * pTheRanges); - - rtl_TextEncoding getEncoding() const { return m_eEncoding; } - - bool contains(sal_uInt32 nChar) const; -}; - -inline Charset::Charset(rtl_TextEncoding eTheEncoding, - const sal_uInt32 * pTheRanges): - m_eEncoding(eTheEncoding), - m_pRanges(pTheRanges) -{ - DBG_ASSERT(m_pRanges, "Charset::Charset(): Bad ranges"); -} - void appendISO88591(OUString & rText, sal_Char const * pBegin, sal_Char const * pEnd); -class INetMIMECharsetList_Impl -{ - struct Node - { - Charset m_aCharset; - bool m_bDisabled; - - explicit Node(const Charset & rCharset) - :m_aCharset(rCharset), m_bDisabled(false) - {} - }; - - std::forward_list<Node> m_aList; - -public: - void prepend(const Charset & rCharset) - { - m_aList.emplace_front(rCharset); - } - - void includes(sal_uInt32 nChar); - - rtl_TextEncoding getPreferredEncoding(rtl_TextEncoding eDefault) - const; - - void reset(); -}; - struct Parameter { Parameter * m_pNext; @@ -495,19 +417,6 @@ inline ParameterList::~ParameterList() bool parseParameters(ParameterList const & rInput, INetContentTypeParameterList * pOutput); -// Charset - -bool Charset::contains(sal_uInt32 nChar) const -{ - for (const sal_uInt32 * p = m_pRanges;;) - { - if (nChar < *p++) - return false; - if (nChar <= *p++) - return true; - } -} - // appendISO88591 void appendISO88591(OUString & rText, sal_Char const * pBegin, @@ -520,30 +429,6 @@ void appendISO88591(OUString & rText, sal_Char const * pBegin, rText += OUString(pBuffer.get(), nLength); } -// INetMIMECharsetList_Impl - -void INetMIMECharsetList_Impl::includes(sal_uInt32 nChar) -{ - for (Node& rNode : m_aList) - if (!(rNode.m_bDisabled || rNode.m_aCharset.contains(nChar))) - rNode.m_bDisabled = true; -} - -rtl_TextEncoding INetMIMECharsetList_Impl::getPreferredEncoding(rtl_TextEncoding eDefault) - const -{ - for (const Node& rNode : m_aList) - if (!rNode.m_bDisabled) - return rNode.m_aCharset.getEncoding(); - return eDefault; -} - -void INetMIMECharsetList_Impl::reset() -{ - for (Node& rNode : m_aList) - rNode.m_bDisabled = false; -} - // ParameterList Parameter ** ParameterList::find(const OString& rAttribute, @@ -663,942 +548,6 @@ bool parseParameters(ParameterList const & rInput, return true; } -INetMIMECharsetList_Impl * -createPreferredCharsetList(rtl_TextEncoding eEncoding) -{ - static const sal_uInt32 aUSASCIIRanges[] = { 0, 0x7F, sal_uInt32(-1) }; - - static const sal_uInt32 aISO88591Ranges[] = { 0, 0xFF, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aISO88592Ranges[] - = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0, - 0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC2, 0xC4, 0xC4, 0xC7, 0xC7, - 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xCE, 0xD3, 0xD4, 0xD6, 0xD7, - 0xDA, 0xDA, 0xDC, 0xDD, 0xDF, 0xDF, 0xE1, 0xE2, 0xE4, 0xE4, - 0xE7, 0xE7, 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF3, 0xF4, - 0xF6, 0xF7, 0xFA, 0xFA, 0xFC, 0xFD, 0x102, 0x107, 0x10C, 0x111, - 0x118, 0x11B, 0x139, 0x13A, 0x13D, 0x13E, 0x141, 0x144, - 0x147, 0x148, 0x150, 0x151, 0x154, 0x155, 0x158, 0x15B, - 0x15E, 0x165, 0x16E, 0x171, 0x179, 0x17E, 0x2C7, 0x2C7, - 0x2D8, 0x2D9, 0x2DB, 0x2DB, 0x2DD, 0x2DD, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aISO88593Ranges[] - = { 0, 0xA0, 0xA3, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0, - 0xB2, 0xB5, 0xB7, 0xB8, 0xBD, 0xBD, 0xC0, 0xC2, 0xC4, 0xC4, - 0xC7, 0xCF, 0xD1, 0xD4, 0xD6, 0xD7, 0xD9, 0xDC, 0xDF, 0xE2, - 0xE4, 0xE4, 0xE7, 0xEF, 0xF1, 0xF4, 0xF6, 0xF7, 0xF9, 0xFC, - 0x108, 0x10B, 0x11C, 0x121, 0x124, 0x127, 0x130, 0x131, - 0x134, 0x135, 0x15C, 0x15F, 0x16C, 0x16D, 0x17B, 0x17C, - 0x2D8, 0x2D9, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aISO88594Ranges[] - = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xAF, 0xB0, - 0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB, - 0xCD, 0xCE, 0xD4, 0xD8, 0xDA, 0xDC, 0xDF, 0xDF, 0xE1, 0xE6, - 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF4, 0xF8, 0xFA, 0xFC, - 0x100, 0x101, 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113, - 0x116, 0x119, 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F, - 0x136, 0x138, 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D, - 0x156, 0x157, 0x160, 0x161, 0x166, 0x16B, 0x172, 0x173, - 0x17D, 0x17E, 0x2C7, 0x2C7, 0x2D9, 0x2D9, 0x2DB, 0x2DB, - sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aISO88595Ranges[] - = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0x401, 0x40C, 0x40E, 0x44F, - 0x451, 0x45C, 0x45E, 0x45F, 0x2116, 0x2116, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aISO88596Ranges[] - = { 0, 0xA0, 0xA4, 0xA4, 0xAD, 0xAD, 0x60C, 0x60C, 0x61B, 0x61B, - 0x61F, 0x61F, 0x621, 0x63A, 0x640, 0x652, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aISO88597Ranges[] - = { 0, 0xA0, 0xA3, 0xA3, 0xA6, 0xA9, 0xAB, 0xAD, 0xB0, 0xB3, - 0xB7, 0xB7, 0xBB, 0xBB, 0xBD, 0xBD, 0x384, 0x386, 0x388, 0x38A, - 0x38C, 0x38C, 0x38E, 0x3A1, 0x3A3, 0x3CE, 0x2015, 0x2015, - 0x2018, 0x2019, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aISO88598Ranges[] - = { 0, 0xA0, 0xA2, 0xA9, 0xAB, 0xB9, 0xBB, 0xBE, 0xD7, 0xD7, - 0xF7, 0xF7, 0x5D0, 0x5EA, 0x200E, 0x200F, 0x2017, 0x2017, - sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT> version - // 1.1 of 2000-Jan-03 - - static const sal_uInt32 aISO88599Ranges[] - = { 0, 0xCF, 0xD1, 0xDC, 0xDF, 0xEF, 0xF1, 0xFC, 0xFF, 0xFF, - 0x11E, 0x11F, 0x130, 0x131, 0x15E, 0x15F, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aISO885910Ranges[] - = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0xB0, 0xB0, 0xB7, 0xB7, - 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xD0, 0xD3, 0xD6, - 0xD8, 0xD8, 0xDA, 0xDF, 0xE1, 0xE6, 0xE9, 0xE9, 0xEB, 0xEB, - 0xED, 0xF0, 0xF3, 0xF6, 0xF8, 0xF8, 0xFA, 0xFE, 0x100, 0x101, - 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113, 0x116, 0x119, - 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F, 0x136, 0x138, - 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D, 0x160, 0x161, - 0x166, 0x16B, 0x172, 0x173, 0x17D, 0x17E, 0x2015, 0x2015, - sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT> version - // 1.1 of 1999 October 11 - - static const sal_uInt32 aISO885913Ranges[] - = { 0, 0xA0, 0xA2, 0xA4, 0xA6, 0xA7, 0xA9, 0xA9, 0xAB, 0xAE, - 0xB0, 0xB3, 0xB5, 0xB7, 0xB9, 0xB9, 0xBB, 0xBE, 0xC4, 0xC6, - 0xC9, 0xC9, 0xD3, 0xD3, 0xD5, 0xD8, 0xDC, 0xDC, 0xDF, 0xDF, - 0xE4, 0xE6, 0xE9, 0xE9, 0xF3, 0xF3, 0xF5, 0xF8, 0xFC, 0xFC, - 0x100, 0x101, 0x104, 0x107, 0x10C, 0x10D, 0x112, 0x113, - 0x116, 0x119, 0x122, 0x123, 0x12A, 0x12B, 0x12E, 0x12F, - 0x136, 0x137, 0x13B, 0x13C, 0x141, 0x146, 0x14C, 0x14D, - 0x156, 0x157, 0x15A, 0x15B, 0x160, 0x161, 0x16A, 0x16B, - 0x172, 0x173, 0x179, 0x17E, 0x2019, 0x2019, 0x201C, 0x201E, - sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aISO885914Ranges[] - = { 0, 0xA0, 0xA3, 0xA3, 0xA7, 0xA7, 0xA9, 0xA9, 0xAD, 0xAE, - 0xB6, 0xB6, 0xC0, 0xCF, 0xD1, 0xD6, 0xD8, 0xDD, 0xDF, 0xEF, - 0xF1, 0xF6, 0xF8, 0xFD, 0xFF, 0xFF, 0x10A, 0x10B, 0x120, 0x121, - 0x174, 0x178, 0x1E02, 0x1E03, 0x1E0A, 0x1E0B, 0x1E1E, 0x1E1F, - 0x1E40, 0x1E41, 0x1E56, 0x1E57, 0x1E60, 0x1E61, 0x1E6A, 0x1E6B, - 0x1E80, 0x1E85, 0x1EF2, 0x1EF3, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aISO885915Ranges[] - = { 0, 0xA3, 0xA5, 0xA5, 0xA7, 0xA7, 0xA9, 0xB3, 0xB5, 0xB7, - 0xB9, 0xBB, 0xBF, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178, - 0x17D, 0x17E, 0x20AC, 0x20AC, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT> version - // 1.0 of 1999 July 27 - - static const sal_uInt32 aKOI8RRanges[] - = { 0, 0x7F, 0xA0, 0xA0, 0xA9, 0xA9, 0xB0, 0xB0, 0xB2, 0xB2, - 0xB7, 0xB7, 0xF7, 0xF7, 0x401, 0x401, 0x410, 0x44F, 0x451, 0x451, - 0x2219, 0x221A, 0x2248, 0x2248, 0x2264, 0x2265, 0x2320, 0x2321, - 0x2500, 0x2500, 0x2502, 0x2502, 0x250C, 0x250C, 0x2510, 0x2510, - 0x2514, 0x2514, 0x2518, 0x2518, 0x251C, 0x251C, 0x2524, 0x2524, - 0x252C, 0x252C, 0x2534, 0x2534, 0x253C, 0x253C, 0x2550, 0x256C, - 0x2580, 0x2580, 0x2584, 0x2584, 0x2588, 0x2588, 0x258C, 0x258C, - 0x2590, 0x2593, 0x25A0, 0x25A0, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT> - // version 1.0 of 18 August 1999 - -#if defined(_WIN32) - static const sal_uInt32 aWindows1252Ranges[] - = { 0, 0x7F, 0xA0, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178, - 0x17D, 0x17E, 0x192, 0x192, 0x2C6, 0x2C6, 0x2DC, 0x2DC, - 0x2013, 0x2014, 0x2018, 0x201A, 0x201C, 0x201E, 0x2020, 0x2022, - 0x2026, 0x2026, 0x2030, 0x2030, 0x2039, 0x203A, 0x20AC, 0x20AC, - 0x2122, 0x2122, sal_uInt32(-1) }; - // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/ - // CP1252.TXT> version 2.01 of 04/15/98 -#endif // WNT - - INetMIMECharsetList_Impl * pList = new INetMIMECharsetList_Impl; - switch (eEncoding) - { - case RTL_TEXTENCODING_MS_1252: -#if defined(_WIN32) - pList->prepend(Charset(RTL_TEXTENCODING_MS_1252, - aWindows1252Ranges)); - break; -#endif // WNT - case RTL_TEXTENCODING_ISO_8859_1: - case RTL_TEXTENCODING_UTF7: - case RTL_TEXTENCODING_UTF8: - break; - - case RTL_TEXTENCODING_ISO_8859_2: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2, - aISO88592Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_3: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_3, - aISO88593Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_4: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4, - aISO88594Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_5: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5, - aISO88595Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_6: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6, - aISO88596Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_7: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7, - aISO88597Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_8: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8, - aISO88598Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_9: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9, - aISO88599Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_10: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_10, - aISO885910Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_13: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_13, - aISO885913Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_14: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_14, - aISO885914Ranges)); - break; - - case RTL_TEXTENCODING_ISO_8859_15: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_15, - aISO885915Ranges)); - break; - - case RTL_TEXTENCODING_MS_1250: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2, - aISO88592Ranges)); - break; - - case RTL_TEXTENCODING_MS_1251: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5, - aISO88595Ranges)); - break; - - case RTL_TEXTENCODING_MS_1253: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7, - aISO88597Ranges)); - break; - - case RTL_TEXTENCODING_MS_1254: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9, - aISO88599Ranges)); - break; - - case RTL_TEXTENCODING_MS_1255: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8, - aISO88598Ranges)); - break; - - case RTL_TEXTENCODING_MS_1256: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6, - aISO88596Ranges)); - break; - - case RTL_TEXTENCODING_MS_1257: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4, - aISO88594Ranges)); - break; - - case RTL_TEXTENCODING_KOI8_R: - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5, - aISO88595Ranges)); - pList->prepend(Charset(RTL_TEXTENCODING_KOI8_R, aKOI8RRanges)); - break; - - default: //@@@ more cases are missing! - OSL_FAIL("createPreferredCharsetList():" - " Unsupported encoding"); - break; - } - pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_1, aISO88591Ranges)); - pList->prepend(Charset(RTL_TEXTENCODING_ASCII_US, aUSASCIIRanges)); - return pList; -} - -class INetMIMEEncodedWordOutputSink -{ -private: - enum { BUFFER_SIZE = 256 }; - - enum Coding { CODING_NONE, CODING_ENCODED, CODING_ENCODED_TERMINATED }; - - enum class WordState { INITIAL, FIRST_EQUALS, - SECOND_QUESTION, ENCODING, - THIRD_QUESTION, ENCODED_TEXT, - FOURTH_QUESTION, SECOND_EQUALS, - BAD }; - - INetMIMEOutputSink & m_rSink; - sal_uInt32 m_nExtraSpaces; - INetMIMECharsetList_Impl * m_pEncodingList; - sal_Unicode * m_pBuffer; - sal_uInt32 m_nBufferSize; - sal_Unicode * m_pBufferEnd; - Coding m_ePrevCoding; - rtl_TextEncoding m_ePrevMIMEEncoding; - Coding m_eCoding; - WordState m_eEncodedWordState; - - void finish(bool bWriteTrailer); - -public: - inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink, - rtl_TextEncoding ePreferredEncoding); - - ~INetMIMEEncodedWordOutputSink(); - - INetMIMEEncodedWordOutputSink & writeChar(sal_uInt32 nChar); - - inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd); - - inline void flush(); -}; - -inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink( - INetMIMEOutputSink & rTheSink, rtl_TextEncoding ePreferredEncoding): - m_rSink(rTheSink), - m_nExtraSpaces(0), - m_pEncodingList(createPreferredCharsetList(ePreferredEncoding)), - m_ePrevCoding(CODING_NONE), - m_ePrevMIMEEncoding(RTL_TEXTENCODING_DONTKNOW), - m_eCoding(CODING_NONE), - m_eEncodedWordState(WordState::INITIAL) -{ - m_nBufferSize = BUFFER_SIZE; - m_pBuffer = static_cast< sal_Unicode * >(rtl_allocateMemory( - m_nBufferSize - * sizeof (sal_Unicode))); - m_pBufferEnd = m_pBuffer; -} - - -inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode * pBegin, - const sal_Unicode * pEnd) -{ - DBG_ASSERT(pBegin && pBegin <= pEnd, - "INetMIMEEncodedWordOutputSink::write(): Bad sequence"); - - while (pBegin != pEnd) - writeChar(*pBegin++); -} - -inline void INetMIMEEncodedWordOutputSink::flush() -{ - finish(true); -} - -static const bool aEscape[128] - = { true, // 0x00 - true, // 0x01 - true, // 0x02 - true, // 0x03 - true, // 0x04 - true, // 0x05 - true, // 0x06 - true, // 0x07 - true, // 0x08 - true, // 0x09 - true, // 0x0A - true, // 0x0B - true, // 0x0C - true, // 0x0D - true, // 0x0E - true, // 0x0F - true, // 0x10 - true, // 0x11 - true, // 0x12 - true, // 0x13 - true, // 0x14 - true, // 0x15 - true, // 0x16 - true, // 0x17 - true, // 0x18 - true, // 0x19 - true, // 0x1A - true, // 0x1B - true, // 0x1C - true, // 0x1D - true, // 0x1E - true, // 0x1F - false, // ' ' - false, // '!' - false, // '"' - false, // '#' - false, // '$' - false, // '%' - false, // '&' - false, // ''' - false, // '(' - false, // ')' - false, // '*' - false, // '+' - false, // ',' - false, // '-' - false, // '.' - false, // '/' - false, // '0' - false, // '1' - false, // '2' - false, // '3' - false, // '4' - false, // '5' - false, // '6' - false, // '7' - false, // '8' - false, // '9' - false, // ':' - false, // ';' - false, // '<' - true, // '=' - false, // '>' - true, // '?' - false, // '@' - false, // 'A' - false, // 'B' - false, // 'C' - false, // 'D' - false, // 'E' - false, // 'F' - false, // 'G' - false, // 'H' - false, // 'I' - false, // 'J' - false, // 'K' - false, // 'L' - false, // 'M' - false, // 'N' - false, // 'O' - false, // 'P' - false, // 'Q' - false, // 'R' - false, // 'S' - false, // 'T' - false, // 'U' - false, // 'V' - false, // 'W' - false, // 'X' - false, // 'Y' - false, // 'Z' - false, // '[' - false, // '\' - false, // ']' - false, // '^' - true, // '_' - false, // '`' - false, // 'a' - false, // 'b' - false, // 'c' - false, // 'd' - false, // 'e' - false, // 'f' - false, // 'g' - false, // 'h' - false, // 'i' - false, // 'j' - false, // 'k' - false, // 'l' - false, // 'm' - false, // 'n' - false, // 'o' - false, // 'p' - false, // 'q' - false, // 'r' - false, // 's' - false, // 't' - false, // 'u' - false, // 'v' - false, // 'w' - false, // 'x' - false, // 'y' - false, // 'z' - false, // '{' - false, // '|' - false, // '}' - false, // '~' - true }; // DEL - -inline bool -needsEncodedWordEscape(sal_uInt32 nChar) -{ - return !rtl::isAscii(nChar) || aEscape[nChar]; -} - -void INetMIMEEncodedWordOutputSink::finish(bool bWriteTrailer) -{ - if (m_eEncodedWordState == WordState::SECOND_EQUALS) - { - // If the text is already an encoded word, copy it verbatim: - switch (m_ePrevCoding) - { - case CODING_NONE: - while (m_nExtraSpaces-- > 0) - { - m_rSink << ' '; - } - break; - - case CODING_ENCODED: - { - while (m_nExtraSpaces-- > 0) - { - m_rSink << '_'; - } - m_rSink << "?="; - SAL_FALLTHROUGH; - } - case CODING_ENCODED_TERMINATED: - m_rSink << ' '; - break; - } - m_rSink.write(m_pBuffer, m_pBufferEnd); - m_eCoding = CODING_ENCODED_TERMINATED; - } - else - { - switch (m_eCoding) - { - case CODING_NONE: - switch (m_ePrevCoding) - { - case CODING_ENCODED: - m_rSink << "?="; - break; - - default: - break; - } - while (m_nExtraSpaces-- > 0) - { - m_rSink << ' '; - } - m_rSink.write(m_pBuffer, m_pBufferEnd); - break; - - case CODING_ENCODED: - { - rtl_TextEncoding eCharsetEncoding - = m_pEncodingList-> - getPreferredEncoding(RTL_TEXTENCODING_UTF8); - rtl_TextEncoding eMIMEEncoding - = translateToMIME(eCharsetEncoding); - - const sal_Char * pCharsetName - = getCharsetName(eMIMEEncoding); - - switch (m_ePrevCoding) - { - case CODING_NONE: - while (m_nExtraSpaces-- > 0) - { - m_rSink << ' '; - } - m_rSink << "=?" << pCharsetName << "?Q?"; - break; - - case CODING_ENCODED: - if (m_ePrevMIMEEncoding != eMIMEEncoding) - { - m_rSink << "?= =?" << pCharsetName << "?Q?"; - } - while (m_nExtraSpaces-- > 0) - { - m_rSink << '_'; - } - break; - - case CODING_ENCODED_TERMINATED: - m_rSink << " =?" << pCharsetName << "?Q?"; - while (m_nExtraSpaces-- > 0) - { - m_rSink << '_'; - } - break; - } - - // The non UTF-8 code will only work for stateless single byte - // character encodings: - if (eMIMEEncoding == RTL_TEXTENCODING_UTF8) - { - for (sal_Unicode const * p = m_pBuffer; - p != m_pBufferEnd;) - { - sal_uInt32 nUTF32 - = INetMIME::getUTF32Character(p, m_pBufferEnd); - bool bEscape = needsEncodedWordEscape(nUTF32); - if (bEscape) - { - DBG_ASSERT( - rtl::isUnicodeCodePoint(nUTF32), - "INetMIMEEncodedWordOutputSink::finish():" - " Bad char"); - if (nUTF32 < 0x80) - writeEscapeSequence(m_rSink, - nUTF32); - else if (nUTF32 < 0x800) - { - writeEscapeSequence(m_rSink, - (nUTF32 >> 6) - | 0xC0); - writeEscapeSequence(m_rSink, - (nUTF32 & 0x3F) - | 0x80); - } - else if (nUTF32 < 0x10000) - { - writeEscapeSequence(m_rSink, - (nUTF32 >> 12) - | 0xE0); - writeEscapeSequence(m_rSink, - ((nUTF32 >> 6) - & 0x3F) - | 0x80); - writeEscapeSequence(m_rSink, - (nUTF32 & 0x3F) - | 0x80); - } - else - { - writeEscapeSequence(m_rSink, - (nUTF32 >> 18) - | 0xF0); - writeEscapeSequence(m_rSink, - ((nUTF32 >> 12) - & 0x3F) - | 0x80); - writeEscapeSequence(m_rSink, - ((nUTF32 >> 6) - & 0x3F) - | 0x80); - writeEscapeSequence(m_rSink, - (nUTF32 & 0x3F) - | 0x80); - } - } - else - m_rSink << sal_Char(nUTF32); - } - } - else - { - sal_Char * pTargetBuffer = nullptr; - sal_Size nTargetSize = 0; - rtl_UnicodeToTextConverter hConverter - = rtl_createUnicodeToTextConverter(eCharsetEncoding); - rtl_UnicodeToTextContext hContext - = rtl_createUnicodeToTextContext(hConverter); - for (sal_Size nBufferSize = m_pBufferEnd - m_pBuffer;; - nBufferSize += nBufferSize / 3 + 1) - { - pTargetBuffer = new sal_Char[nBufferSize]; - sal_uInt32 nInfo; - sal_Size nSrcCvtBytes; - nTargetSize - = rtl_convertUnicodeToText( - hConverter, hContext, m_pBuffer, - m_pBufferEnd - m_pBuffer, pTargetBuffer, - nBufferSize, - RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE - | RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE, - &nInfo, &nSrcCvtBytes); - if (!(nInfo - & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) - break; - delete[] pTargetBuffer; - pTargetBuffer = nullptr; - rtl_resetUnicodeToTextContext(hConverter, hContext); - } - rtl_destroyUnicodeToTextContext(hConverter, hContext); - rtl_destroyUnicodeToTextConverter(hConverter); - for (sal_Size k = 0; k < nTargetSize; ++k) - { - sal_uInt32 nUCS4 = static_cast<unsigned char>(pTargetBuffer[k]); - bool bEscape = needsEncodedWordEscape(nUCS4); - if (bEscape) - writeEscapeSequence(m_rSink, nUCS4); - else - m_rSink << sal_Char(nUCS4); - } - delete[] pTargetBuffer; - } - - if (bWriteTrailer) - { - m_rSink << "?="; - m_eCoding = CODING_ENCODED_TERMINATED; - } - - m_ePrevMIMEEncoding = eMIMEEncoding; - break; - } - - default: - OSL_ASSERT(false); - break; - } - } - - m_nExtraSpaces = 0; - m_pEncodingList->reset(); - m_pBufferEnd = m_pBuffer; - m_ePrevCoding = m_eCoding; - m_eCoding = CODING_NONE; - m_eEncodedWordState = WordState::INITIAL; -} - -INetMIMEEncodedWordOutputSink::~INetMIMEEncodedWordOutputSink() -{ - rtl_freeMemory(m_pBuffer); - delete m_pEncodingList; -} - -INetMIMEEncodedWordOutputSink & -INetMIMEEncodedWordOutputSink::writeChar(sal_uInt32 nChar) -{ - if (nChar == ' ') - { - if (m_pBufferEnd != m_pBuffer) - finish(false); - ++m_nExtraSpaces; - } - else - { - // Check for an already encoded word: - switch (m_eEncodedWordState) - { - case WordState::INITIAL: - if (nChar == '=') - m_eEncodedWordState = WordState::FIRST_EQUALS; - else - m_eEncodedWordState = WordState::BAD; - break; - - case WordState::FIRST_EQUALS: - if (nChar == '?') - m_eEncodedWordState = WordState::FIRST_EQUALS; - else - m_eEncodedWordState = WordState::BAD; - break; - - - case WordState::SECOND_QUESTION: - if (nChar == 'B' || nChar == 'Q' - || nChar == 'b' || nChar == 'q') - m_eEncodedWordState = WordState::ENCODING; - else - m_eEncodedWordState = WordState::BAD; - break; - - case WordState::ENCODING: - if (nChar == '?') - m_eEncodedWordState = WordState::THIRD_QUESTION; - else - m_eEncodedWordState = WordState::BAD; - break; - - case WordState::THIRD_QUESTION: - if (INetMIME::isVisible(nChar) && nChar != '?') - m_eEncodedWordState = WordState::ENCODED_TEXT; - else - m_eEncodedWordState = WordState::BAD; - break; - - case WordState::ENCODED_TEXT: - if (nChar == '?') - m_eEncodedWordState = WordState::FOURTH_QUESTION; - else if (!INetMIME::isVisible(nChar)) - m_eEncodedWordState = WordState::BAD; - break; - - case WordState::FOURTH_QUESTION: - if (nChar == '=') - m_eEncodedWordState = WordState::SECOND_EQUALS; - else - m_eEncodedWordState = WordState::BAD; - break; - - case WordState::SECOND_EQUALS: - m_eEncodedWordState = WordState::BAD; - break; - - case WordState::BAD: - break; - } - - // Update encoding: - m_pEncodingList->includes(nChar); - - // Update coding: - static const bool aMinimal[128] - = { true, // 0x00 - true, // 0x01 - true, // 0x02 - true, // 0x03 - true, // 0x04 - true, // 0x05 - true, // 0x06 - true, // 0x07 - true, // 0x08 - true, // 0x09 - true, // 0x0A - true, // 0x0B - true, // 0x0C - true, // 0x0D - true, // 0x0E - true, // 0x0F - true, // 0x10 - true, // 0x11 - true, // 0x12 - true, // 0x13 - true, // 0x14 - true, // 0x15 - true, // 0x16 - true, // 0x17 - true, // 0x18 - true, // 0x19 - true, // 0x1A - true, // 0x1B - true, // 0x1C - true, // 0x1D - true, // 0x1E - true, // 0x1F - false, // ' ' - false, // '!' - false, // '"' - false, // '#' - false, // '$' - false, // '%' - false, // '&' - false, // ''' - false, // '(' - false, // ')' - false, // '*' - false, // '+' - false, // ',' - false, // '-' - false, // '.' - false, // '/' - false, // '0' - false, // '1' - false, // '2' - false, // '3' - false, // '4' - false, // '5' - false, // '6' - false, // '7' - false, // '8' - false, // '9' - false, // ':' - false, // ';' - false, // '<' - false, // '=' - false, // '>' - false, // '?' - false, // '@' - false, // 'A' - false, // 'B' - false, // 'C' - false, // 'D' - false, // 'E' - false, // 'F' - false, // 'G' - false, // 'H' - false, // 'I' - false, // 'J' - false, // 'K' - false, // 'L' - false, // 'M' - false, // 'N' - false, // 'O' - false, // 'P' - false, // 'Q' - false, // 'R' - false, // 'S' - false, // 'T' - false, // 'U' - false, // 'V' - false, // 'W' - false, // 'X' - false, // 'Y' - false, // 'Z' - false, // '[' - false, // '\' - false, // ']' - false, // '^' - false, // '_' - false, // '`' - false, // 'a' - false, // 'b' - false, // 'c' - false, // 'd' - false, // 'e' - false, // 'f' - false, // 'g' - false, // 'h' - false, // 'i' - false, // 'j' - false, // 'k' - false, // 'l' - false, // 'm' - false, // 'n' - false, // 'o' - false, // 'p' - false, // 'q' - false, // 'r' - false, // 's' - false, // 't' - false, // 'u' - false, // 'v' - false, // 'w' - false, // 'x' - false, // 'y' - false, // 'z' - false, // '{' - false, // '|' - false, // '}' - false, // '~' - true }; // DEL - Coding eNewCoding = !rtl::isAscii(nChar) ? CODING_ENCODED : - aMinimal[nChar] ? CODING_ENCODED : CODING_NONE; - if (eNewCoding > m_eCoding) - m_eCoding = eNewCoding; - - // Append to buffer: - if (sal_uInt32(m_pBufferEnd - m_pBuffer) == m_nBufferSize) - { - m_pBuffer - = static_cast< sal_Unicode * >( - rtl_reallocateMemory(m_pBuffer, - (m_nBufferSize + BUFFER_SIZE) - * sizeof (sal_Unicode))); - m_pBufferEnd = m_pBuffer + m_nBufferSize; - m_nBufferSize += BUFFER_SIZE; - } - *m_pBufferEnd++ = sal_Unicode(nChar); - } - return *this; -} - bool isTokenChar(sal_uInt32 nChar) { static const bool aMap[128] @@ -1934,29 +883,6 @@ sal_Unicode const * scanParameters(sal_Unicode const * pBegin, return parseParameters(aList, pParameters) ? pParameterBegin : pBegin; } -const sal_Char * getCharsetName(rtl_TextEncoding eEncoding) -{ - if (rtl_isOctetTextEncoding(eEncoding)) - { - char const * p = rtl_getMimeCharsetFromTextEncoding(eEncoding); - DBG_ASSERT(p, "getCharsetName(): Unsupported encoding"); - return p; - } - else - switch (eEncoding) - { - case RTL_TEXTENCODING_UCS4: - return "ISO-10646-UCS-4"; - - case RTL_TEXTENCODING_UCS2: - return "ISO-10646-UCS-2"; - - default: - OSL_FAIL("getCharsetName(): Unsupported encoding"); - return nullptr; - } -} - bool equalIgnoreCase(const sal_Char * pBegin1, const sal_Char * pEnd1, const sal_Char * pString2) @@ -2311,16 +1237,6 @@ sal_Unicode const * INetMIME::scanContentType( } // static -void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink & rSink, - const OUString& rBody, - rtl_TextEncoding ePreferredEncoding) -{ - INetMIMEEncodedWordOutputSink aOutput(rSink, ePreferredEncoding); - aOutput.write(rBody.getStr(), rBody.getStr() + rBody.getLength()); - aOutput.flush(); -} - -// static OUString INetMIME::decodeHeaderFieldBody(const OString& rBody) { // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old diff --git a/tools/source/inet/inetmsg.cxx b/tools/source/inet/inetmsg.cxx index 7208b5e57891..f4fd59cc0749 100644 --- a/tools/source/inet/inetmsg.cxx +++ b/tools/source/inet/inetmsg.cxx @@ -18,9 +18,7 @@ */ #include <sal/types.h> -#include <osl/thread.h> #include <tools/datetime.hxx> -#include <tools/inetmime.hxx> #include <tools/inetmsg.hxx> #include <tools/contnr.hxx> #include <rtl/instance.hxx> @@ -34,11 +32,8 @@ void INetMIMEMessage::SetHeaderField_Impl ( const OUString &rValue, sal_uIntPtr &rnIndex) { - INetMIMEOutputSink aSink; - INetMIME::writeHeaderFieldBody ( - aSink, rValue, osl_getThreadTextEncoding()); SetHeaderField_Impl ( - INetMessageHeader (rName, aSink.takeBuffer()), rnIndex); + INetMessageHeader (rName, rValue.toUtf8()), rnIndex); } /* ParseDateField and local helper functions. |