/************************************************************************* * * $RCSfile: pkguri.cxx,v $ * * $Revision: 1.5 $ * * last change: $Author: th $ $Date: 2001-05-11 09:14:45 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses * * - GNU Lesser General Public License Version 2.1 * - Sun Industry Standards Source License Version 1.1 * * Sun Microsystems Inc., October, 2000 * * GNU Lesser General Public License Version 2.1 * ============================================= * Copyright 2000 by Sun Microsystems, Inc. * 901 San Antonio Road, Palo Alto, CA 94303, USA * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License version 2.1, as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * * * Sun Industry Standards Source License Version 1.1 * ================================================= * The contents of this file are subject to the Sun Industry Standards * Source License Version 1.1 (the "License"); You may not use this file * except in compliance with the License. You may obtain a copy of the * License at http://www.openoffice.org/license.html. * * Software provided under this License is provided on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. * See the License for the specific provisions governing your rights and * obligations concerning the Software. * * The Initial Developer of the Original Code is: Sun Microsystems, Inc. * * Copyright: 2000 by Sun Microsystems, Inc. * * All Rights Reserved. * * Contributor(s): Kai Sommerfeld ( kso@sun.com ) * * ************************************************************************/ /************************************************************************** TODO ************************************************************************** *************************************************************************/ #ifndef _RTL_USTRBUF_HXX_ #include <rtl/ustrbuf.hxx> #endif #ifndef _VOS_DIAGNOSE_HXX_ #include <vos/diagnose.hxx> #endif #ifndef _PKGURI_HXX #include "pkguri.hxx" #endif using namespace package_ucp; using namespace rtl; //========================================================================= //========================================================================= // // PackageUri Implementation. // //========================================================================= //========================================================================= static void normalize( OUString& rURL ) { sal_Int32 nPos = 0; do { nPos = rURL.indexOf( '%', nPos ); if ( nPos != -1 ) { if ( nPos < ( rURL.getLength() - 2 ) ) { OUString aTmp = rURL.copy( nPos + 1, 2 ); rURL = rURL.replaceAt( nPos + 1, 2, aTmp.toAsciiUpperCase() ); nPos++; } } } while ( nPos != -1 ); } //========================================================================= void PackageUri::init() const { // Already inited? if ( m_aUri.getLength() && !m_aPath.getLength() ) { // Note: Maybe it's a re-init, setUri only resets m_aPath! m_aPackage = m_aParentUri = m_aName = OUString(); if ( m_aUri.getLength() && ( m_aUri.compareToAscii( PACKAGE_URL_SCHEME, PACKAGE_URL_SCHEME_LENGTH ) == 0 ) ) { sal_Int32 nStart = PACKAGE_URL_SCHEME_LENGTH + 3; sal_Int32 nEnd = m_aUri.lastIndexOf( '/' ); if ( nEnd == ( m_aUri.getLength() - 1 ) ) { // Remove trailing slash. m_aUri = m_aUri.copy( 0, nEnd ); } nEnd = m_aUri.indexOf( '/', nStart ); if ( nEnd == -1 ) { // root folder. OUString aNormPackage = m_aUri.copy( nStart ); normalize( aNormPackage ); m_aUri = m_aUri.replaceAt( nStart, m_aUri.getLength() - nStart, aNormPackage ); m_aPackage = decodeSegment( aNormPackage ); m_aPath = rtl::OUString::createFromAscii( "/" ); } else { OUString aNormPackage = m_aUri.copy( nStart, nEnd - nStart ); normalize( aNormPackage ); m_aUri = m_aUri.replaceAt( nStart, nEnd - nStart, aNormPackage ); m_aPackage = decodeSegment( aNormPackage ); // m_aPath = m_aUri.copy( nEnd ); m_aPath = m_aUri.copy( nEnd + 1 ); sal_Int32 nLastSlash = m_aUri.lastIndexOf( '/' ); if ( nLastSlash != -1 ) { m_aParentUri = m_aUri.copy( 0, nLastSlash ); m_aName = m_aUri.copy( nLastSlash + 1 ); } } // success } else { // error, but remember that we did a init(). m_aPath = rtl::OUString::createFromAscii( "/" ); } } } //========================================================================= // // URI encode/decode stuff. // //========================================================================= inline bool isUSASCII(sal_uInt32 nChar) { return nChar <= 0x7F; } inline bool isVisible(sal_uInt32 nChar) { return nChar >= '!' && nChar <= '~'; } inline bool isDigit(sal_uInt32 nChar) { return nChar >= '0' && nChar <= '9'; } inline int getHexWeight(sal_uInt32 nChar) { return isDigit(nChar) ? int(nChar - '0') : nChar >= 'A' && nChar <= 'F' ? int(nChar - 'A' + 10) : nChar >= 'a' && nChar <= 'f' ? int(nChar - 'a' + 10) : -1; } inline bool isHighSurrogate(sal_uInt32 nUTF16) { return nUTF16 >= 0xD800 && nUTF16 <= 0xDBFF; } inline bool isLowSurrogate(sal_uInt32 nUTF16) { return nUTF16 >= 0xDC00 && nUTF16 <= 0xDFFF; } inline sal_uInt32 getUTF32Character(sal_Unicode const *& rBegin, sal_Unicode const * pEnd) { VOS_ASSERT(rBegin && rBegin < pEnd); if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF && rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF) { sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10; return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000; } else return *rBegin++; } sal_uInt32 getHexDigit(int nWeight) { VOS_ASSERT(nWeight >= 0 && nWeight < 16); static sal_Char const aDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; return aDigits[nWeight]; } enum EncodeMechanism { ENCODE_ALL, WAS_ENCODED, NOT_CANONIC }; enum DecodeMechanism { NO_DECODE, DECODE_TO_IURI, DECODE_WITH_CHARSET }; enum Part { PART_OBSOLETE_NORMAL = 0x001, // Obsolete, do not use! PART_OBSOLETE_FILE = 0x002, // Obsolete, do not use! PART_OBSOLETE_PARAM = 0x004, // Obsolete, do not use! PART_USER_PASSWORD = 0x008, PART_IMAP_ACHAR = 0x010, PART_VIM = 0x020, PART_HOST_EXTRA = 0x040, PART_FPATH = 0x080, PART_AUTHORITY = 0x100, PART_PATH_SEGMENTS_EXTRA = 0x200, PART_REL_SEGMENT_EXTRA = 0x400, PART_URIC = 0x800, PART_HTTP_PATH = 0x1000, PART_FILE_SEGMENT_EXTRA = 0x2000, // Obsolete, do not use! PART_MESSAGE_ID = 0x4000, PART_MESSAGE_ID_PATH = 0x8000, PART_MAILTO = 0x10000, PART_PATH_BEFORE_QUERY = 0x20000, PART_PCHAR = 0x40000, PART_FRAGMENT = 0x80000, // Obsolete, do not use! PART_VISIBLE = 0x100000, max_part = 0x80000000 // Do not use! Only there to allow compatible changes in the future. }; enum EscapeType { ESCAPE_NO, ESCAPE_OCTET, ESCAPE_UTF32 }; inline void appendEscape(rtl::OUStringBuffer & rTheText, sal_Char cEscapePrefix, sal_uInt32 nOctet) { rTheText.append(sal_Unicode(cEscapePrefix)); rTheText.append(sal_Unicode(getHexDigit(int(nOctet >> 4)))); rTheText.append(sal_Unicode(getHexDigit(int(nOctet & 15)))); } inline bool mustEncode(sal_uInt32 nUTF32, Part ePart) { enum { pA = PART_OBSOLETE_NORMAL, pB = PART_OBSOLETE_FILE, pC = PART_OBSOLETE_PARAM, pD = PART_USER_PASSWORD, pE = PART_IMAP_ACHAR, pF = PART_VIM, pG = PART_HOST_EXTRA, pH = PART_FPATH, pI = PART_AUTHORITY, pJ = PART_PATH_SEGMENTS_EXTRA, pK = PART_REL_SEGMENT_EXTRA, pL = PART_URIC, pM = PART_HTTP_PATH, pN = PART_FILE_SEGMENT_EXTRA, pO = PART_MESSAGE_ID, pP = PART_MESSAGE_ID_PATH, pQ = PART_MAILTO, pR = PART_PATH_BEFORE_QUERY, pS = PART_PCHAR, pT = PART_FRAGMENT, pU = PART_VISIBLE }; static sal_uInt32 const aMap[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */ 0, /* ! */ pC+pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* " */ pU, /* # */ pU, /* $ */ pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* % */ pU, /* & */ pA+pB+pC+pD+pE +pH+pI+pJ+pK+pL+pM+pN+pO+pP +pR+pS+pT+pU, /* ' */ pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* ( */ pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* ) */ pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* * */ pA+pB+pC+pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* + */ pA+pB+pC+pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* , */ pA+pB+pC+pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* - */ pA+pB+pC+pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* . */ pA+pB+pC+pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* / */ pA+pB+pC +pH +pJ +pL+pM +pP+pQ+pR +pT+pU, /* 0 */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* 1 */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* 2 */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* 3 */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* 4 */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* 5 */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* 6 */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* 7 */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* 8 */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* 9 */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* : */ pB+pC +pH+pI+pJ +pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* ; */ pC+pD +pI+pJ+pK+pL+pM +pO+pP+pQ+pR +pT+pU, /* < */ pC +pO+pP +pU, /* = */ pA+pB+pC+pD+pE +pH+pI+pJ+pK+pL+pM+pN +pR+pS+pT+pU, /* > */ pC +pO+pP +pU, /* ? */ pC +pL +pT+pU, /* @ */ pC +pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* A */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* B */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* C */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* D */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* E */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* F */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* G */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* H */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* I */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* J */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* K */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* L */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* M */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* N */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* O */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* P */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* Q */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* R */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* S */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* T */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* U */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* V */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* W */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* X */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* Y */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* Z */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* [ */ pL +pU, /* \ */ pB +pU, /* ] */ pL +pU, /* ^ */ pU, /* _ */ pA+pB+pC+pD+pE +pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* ` */ +pU, /* a */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* b */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* c */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* d */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* e */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* f */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* g */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* h */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* i */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* j */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* k */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* l */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* m */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* n */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* o */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* p */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* q */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* r */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* s */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* t */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* u */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* v */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* w */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* x */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* y */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* z */ pA+pB+pC+pD+pE+pF+pG+pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* { */ +pU, /* | */ pB+pC +pN +pT+pU, /* } */ +pU, /* ~ */ pA+pB+pC+pD+pE +pH+pI+pJ+pK+pL+pM+pN+pO+pP+pQ+pR+pS+pT+pU, /* */ 0 }; return !isUSASCII(nUTF32) || !(aMap[nUTF32] & ePart); } void appendUCS4Escape(rtl::OUStringBuffer & rTheText, sal_Char cEscapePrefix, sal_uInt32 nUCS4) { VOS_ASSERT(nUCS4 < 0x80000000); if (nUCS4 < 0x80) appendEscape(rTheText, cEscapePrefix, nUCS4); else if (nUCS4 < 0x800) { appendEscape(rTheText, cEscapePrefix, nUCS4 >> 6 | 0xC0); appendEscape(rTheText, cEscapePrefix, nUCS4 & 0x3F | 0x80); } else if (nUCS4 < 0x10000) { appendEscape(rTheText, cEscapePrefix, nUCS4 >> 12 | 0xE0); appendEscape(rTheText, cEscapePrefix, nUCS4 >> 6 & 0x3F | 0x80); appendEscape(rTheText, cEscapePrefix, nUCS4 & 0x3F | 0x80); } else if (nUCS4 < 0x200000) { appendEscape(rTheText, cEscapePrefix, nUCS4 >> 18 | 0xF0); appendEscape(rTheText, cEscapePrefix, nUCS4 >> 12 & 0x3F | 0x80); appendEscape(rTheText, cEscapePrefix, nUCS4 >> 6 & 0x3F | 0x80); appendEscape(rTheText, cEscapePrefix, nUCS4 & 0x3F | 0x80); } else if (nUCS4 < 0x4000000) { appendEscape(rTheText, cEscapePrefix, nUCS4 >> 24 | 0xF8); appendEscape(rTheText, cEscapePrefix, nUCS4 >> 18 & 0x3F | 0x80); appendEscape(rTheText, cEscapePrefix, nUCS4 >> 12 & 0x3F | 0x80); appendEscape(rTheText, cEscapePrefix, nUCS4 >> 6 & 0x3F | 0x80); appendEscape(rTheText, cEscapePrefix, nUCS4 & 0x3F | 0x80); } else { appendEscape(rTheText, cEscapePrefix, nUCS4 >> 30 | 0xFC); appendEscape(rTheText, cEscapePrefix, nUCS4 >> 24 & 0x3F | 0x80); appendEscape(rTheText, cEscapePrefix, nUCS4 >> 18 & 0x3F | 0x80); appendEscape(rTheText, cEscapePrefix, nUCS4 >> 12 & 0x3F | 0x80); appendEscape(rTheText, cEscapePrefix, nUCS4 >> 6 & 0x3F | 0x80); appendEscape(rTheText, cEscapePrefix, nUCS4 & 0x3F | 0x80); } } void appendUCS4(rtl::OUStringBuffer & rTheText, sal_uInt32 nUCS4, EscapeType eEscapeType, bool bOctets, Part ePart, sal_Char cEscapePrefix, rtl_TextEncoding eCharset, bool bKeepVisibleEscapes) { bool bEscape; rtl_TextEncoding eTargetCharset; switch (eEscapeType) { case ESCAPE_NO: if (mustEncode(nUCS4, ePart)) { bEscape = true; eTargetCharset = bOctets ? RTL_TEXTENCODING_ISO_8859_1 : RTL_TEXTENCODING_UTF8; } else bEscape = false; break; case ESCAPE_OCTET: bEscape = true; eTargetCharset = RTL_TEXTENCODING_ISO_8859_1; break; case ESCAPE_UTF32: if (mustEncode(nUCS4, ePart)) { bEscape = true; eTargetCharset = eCharset; } else if (bKeepVisibleEscapes && isVisible(nUCS4)) { bEscape = true; eTargetCharset = RTL_TEXTENCODING_ASCII_US; } else bEscape = false; break; } if (bEscape) switch (eTargetCharset) { default: VOS_ASSERT(false); case RTL_TEXTENCODING_ASCII_US: case RTL_TEXTENCODING_ISO_8859_1: appendEscape(rTheText, cEscapePrefix, nUCS4); break; case RTL_TEXTENCODING_UTF8: appendUCS4Escape(rTheText, cEscapePrefix, nUCS4); break; } else rTheText.append(sal_Unicode(nUCS4)); } sal_uInt32 getUTF32(sal_Unicode const *& rBegin, sal_Unicode const * pEnd, bool bOctets, sal_Char cEscapePrefix, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, EscapeType & rEscapeType) { VOS_ASSERT(rBegin < pEnd); sal_uInt32 nUTF32 = bOctets ? *rBegin++ : getUTF32Character(rBegin, pEnd); switch (eMechanism) { case ENCODE_ALL: rEscapeType = ESCAPE_NO; break; case WAS_ENCODED: { int nWeight1; int nWeight2; if (nUTF32 == cEscapePrefix && rBegin + 1 < pEnd && (nWeight1 = getHexWeight(rBegin[0])) >= 0 && (nWeight2 = getHexWeight(rBegin[1])) >= 0) { rBegin += 2; nUTF32 = nWeight1 << 4 | nWeight2; switch (eCharset) { default: VOS_ASSERT(false); case RTL_TEXTENCODING_ASCII_US: rEscapeType = isUSASCII(nUTF32) ? ESCAPE_UTF32 : ESCAPE_OCTET; break; case RTL_TEXTENCODING_ISO_8859_1: rEscapeType = ESCAPE_UTF32; break; case RTL_TEXTENCODING_UTF8: if (isUSASCII(nUTF32)) rEscapeType = ESCAPE_UTF32; else { if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4) { sal_uInt32 nEncoded; int nShift; sal_uInt32 nMin; if (nUTF32 <= 0xDF) { nEncoded = (nUTF32 & 0x1F) << 6; nShift = 0; nMin = 0x80; } else if (nUTF32 <= 0xEF) { nEncoded = (nUTF32 & 0x0F) << 12; nShift = 6; nMin = 0x800; } else { nEncoded = (nUTF32 & 0x07) << 18; nShift = 12; nMin = 0x10000; } sal_Unicode const * p = rBegin; bool bUTF8 = true; for (;;) { if (p + 2 >= pEnd || p[0] != cEscapePrefix || (nWeight1 = getHexWeight(p[1])) < 0 || (nWeight2 = getHexWeight(p[2])) < 0 || nWeight1 < 8) { bUTF8 = false; break; } p += 3; nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift; if (nShift == 0) break; nShift -= 6; } if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded) && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF) { rBegin = p; nUTF32 = nEncoded; rEscapeType = ESCAPE_UTF32; break; } } rEscapeType = ESCAPE_OCTET; } break; } } else rEscapeType = ESCAPE_NO; break; } case NOT_CANONIC: { int nWeight1; int nWeight2; if (nUTF32 == cEscapePrefix && rBegin + 1 < pEnd && ((nWeight1 = getHexWeight(rBegin[0])) >= 0) && ((nWeight2 = getHexWeight(rBegin[1])) >= 0)) { rBegin += 2; nUTF32 = nWeight1 << 4 | nWeight2; rEscapeType = ESCAPE_OCTET; } else rEscapeType = ESCAPE_NO; break; } } return nUTF32; } static rtl::OUString encodeText(sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets, Part ePart, sal_Char cEscapePrefix, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bKeepVisibleEscapes) { rtl::OUStringBuffer aResult; while (pBegin != pEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, cEscapePrefix, eMechanism, eCharset, eEscapeType); appendUCS4(aResult, nUTF32, eEscapeType, bOctets, ePart, cEscapePrefix, eCharset, bKeepVisibleEscapes); } return aResult.makeStringAndClear(); } static rtl::OUString decode(sal_Unicode const * pBegin, sal_Unicode const * pEnd, sal_Char cEscapePrefix, DecodeMechanism eMechanism, rtl_TextEncoding eCharset) { switch (eMechanism) { case NO_DECODE: return rtl::OUString(pBegin, pEnd - pBegin); case DECODE_TO_IURI: eCharset = RTL_TEXTENCODING_UTF8; break; } rtl::OUStringBuffer aResult; while (pBegin < pEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, false, cEscapePrefix, WAS_ENCODED, eCharset, eEscapeType); switch (eEscapeType) { case ESCAPE_NO: aResult.append(sal_Unicode(nUTF32)); break; case ESCAPE_OCTET: appendEscape(aResult, cEscapePrefix, nUTF32); break; case ESCAPE_UTF32: if (eMechanism == DECODE_TO_IURI && isUSASCII(nUTF32)) appendEscape(aResult, cEscapePrefix, nUTF32); else aResult.append(sal_Unicode(nUTF32)); break; } } return aResult.makeStringAndClear(); } //========================================================================= // static rtl::OUString PackageUri::decodeSegment( const rtl::OUString& rSource ) { return decode(rSource.getStr(), rSource.getStr() + rSource.getLength(), '%', DECODE_WITH_CHARSET, RTL_TEXTENCODING_UTF8); } /* //============================================================================ // static rtl::OUString PackageUri::encodeURL( const rtl::OUString& rSource ) { rtl::OUStringBuffer aResult; sal_Unicode const * pBegin = rSource.getStr(); sal_Unicode const * pEnd = pBegin + rSource.getLength(); sal_Unicode const * p = pBegin; while (p != pEnd && *p++ != ':'); aResult.append(pBegin, p - pBegin); while (p != pEnd) { EscapeType eEscapeType; sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED, RTL_TEXTENCODING_UTF8, eEscapeType); if (nUTF32 == '/' && eEscapeType == ESCAPE_NO) aResult.append(sal_Unicode(nUTF32)); else appendUCS4(aResult, nUTF32, eEscapeType, false, PART_PCHAR, '%', RTL_TEXTENCODING_UTF8, false); } return aResult.makeStringAndClear(); } //========================================================================= // static rtl::OUString PackageUri::encodeSegment( const rtl::OUString& rSource ) { return encodeText(rSource.getStr(), rSource.getStr() + rSource.getLength(), false, PART_PCHAR, '%', ENCODE_ALL, RTL_TEXTENCODING_UTF8, false); } */