/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #include "surrogates.hxx" #include "osl/diagnose.h" #include "rtl/character.hxx" #include "rtl/strbuf.hxx" #include "rtl/textenc.h" #include "rtl/textcvt.h" #include "rtl/uri.h" #include "rtl/ustrbuf.h" #include "rtl/ustrbuf.hxx" #include "rtl/ustring.h" #include "rtl/ustring.hxx" #include "sal/types.h" #include "sal/macros.h" #include #include namespace { std::size_t const nCharClassSize = 128; sal_Unicode const cEscapePrefix = 0x25; // '%' inline bool isHighSurrogate(sal_uInt32 nUtf16) { return SAL_RTL_IS_HIGH_SURROGATE(nUtf16); } inline bool isLowSurrogate(sal_uInt32 nUtf16) { return SAL_RTL_IS_LOW_SURROGATE(nUtf16); } inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) { return SAL_RTL_COMBINE_SURROGATES(high, low); } inline int getHexWeight(sal_uInt32 nUtf32) { return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9' static_cast< int >(nUtf32 - 0x30) : nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F' static_cast< int >(nUtf32 - 0x41 + 10) : nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f' static_cast< int >(nUtf32 - 0x61 + 10) : -1; // not a hex digit } inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32) { return nUtf32 < nCharClassSize && pCharClass[nUtf32]; } inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_Unicode cChar) { rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1); } enum EscapeType { EscapeNo, EscapeChar, EscapeOctet }; /* Read any of the following: - sequence of escape sequences representing character from eCharset, translated to single UCS4 character; or - pair of UTF-16 surrogates, translated to single UCS4 character; or _ single UTF-16 character, extended to UCS4 character. */ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, bool bEncoded, rtl_TextEncoding eCharset, EscapeType * pType) { sal_uInt32 nChar = *(*pBegin)++; int nWeight1; int nWeight2; if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2 && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0 && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0) { *pBegin += 2; nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2); if (nChar <= 0x7F) *pType = EscapeChar; else if (eCharset == RTL_TEXTENCODING_UTF8) { if (nChar >= 0xC0 && nChar <= 0xF4) { sal_uInt32 nEncoded; int nShift; sal_uInt32 nMin; if (nChar <= 0xDF) { nEncoded = (nChar & 0x1F) << 6; nShift = 0; nMin = 0x80; } else if (nChar <= 0xEF) { nEncoded = (nChar & 0x0F) << 12; nShift = 6; nMin = 0x800; } else { nEncoded = (nChar & 0x07) << 18; nShift = 12; nMin = 0x10000; } sal_Unicode const * p = *pBegin; bool bUTF8 = true; for (; nShift >= 0; nShift -= 6) { if (pEnd - p < 3 || p[0] != cEscapePrefix || (nWeight1 = getHexWeight(p[1])) < 8 || nWeight1 > 11 || (nWeight2 = getHexWeight(p[2])) < 0) { bUTF8 = false; break; } p += 3; nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift; } if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded) && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF) { *pBegin = p; *pType = EscapeChar; return nEncoded; } } *pType = EscapeOctet; } else { rtl::OStringBuffer aBuf; aBuf.append(static_cast< char >(nChar)); rtl_TextToUnicodeConverter aConverter = rtl_createTextToUnicodeConverter(eCharset); sal_Unicode const * p = *pBegin; for (;;) { sal_Unicode aDst[2]; sal_uInt32 nInfo; sal_Size nConverted; sal_Size nDstSize = rtl_convertTextToUnicode( aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst, SAL_N_ELEMENTS( aDst ), (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR), &nInfo, &nConverted); if (nInfo == 0) { assert( nConverted == sal::static_int_cast< sal_uInt32 >( aBuf.getLength())); rtl_destroyTextToUnicodeConverter(aConverter); *pBegin = p; *pType = EscapeChar; assert( nDstSize == 1 || (nDstSize == 2 && isHighSurrogate(aDst[0]) && isLowSurrogate(aDst[1]))); return nDstSize == 1 ? aDst[0] : combineSurrogates(aDst[0], aDst[1]); } else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL && pEnd - p >= 3 && p[0] == cEscapePrefix && (nWeight1 = getHexWeight(p[1])) >= 0 && (nWeight2 = getHexWeight(p[2])) >= 0) { p += 3; aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2)); } else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL && p != pEnd && *p <= 0x7F) { aBuf.append(static_cast< char >(*p++)); } else { assert( (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL) == 0); break; } } rtl_destroyTextToUnicodeConverter(aConverter); *pType = EscapeOctet; } return nChar; } else { *pType = EscapeNo; return isHighSurrogate(nChar) && *pBegin < pEnd && isLowSurrogate(**pBegin) ? combineSurrogates(nChar, *(*pBegin)++) : nChar; } } void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32) { assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char if (nUtf32 <= 0xFFFF) { writeUnicode( pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32)); } else { nUtf32 -= 0x10000; writeUnicode( pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800)); writeUnicode( pBuffer, pCapacity, static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00)); } } void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nOctet) { assert(nOctet <= 0xFF); // bad octet static sal_Unicode const aHex[16] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */ writeUnicode(pBuffer, pCapacity, cEscapePrefix); writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]); writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]); } bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict) { assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char if (eCharset == RTL_TEXTENCODING_UTF8) { if (nUtf32 < 0x80) writeEscapeOctet(pBuffer, pCapacity, nUtf32); else if (nUtf32 < 0x800) { writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0); writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); } else if (nUtf32 < 0x10000) { writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0); writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); } else { writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0); writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80); writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); } } else { rtl_UnicodeToTextConverter aConverter = rtl_createUnicodeToTextConverter(eCharset); sal_Unicode aSrc[2]; sal_Size nSrcSize; if (nUtf32 <= 0xFFFF) { aSrc[0] = static_cast< sal_Unicode >(nUtf32); nSrcSize = 1; } else { aSrc[0] = static_cast< sal_Unicode >( ((nUtf32 - 0x10000) >> 10) | 0xD800); aSrc[1] = static_cast< sal_Unicode >( ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00); nSrcSize = 2; } sal_Char aDst[32]; // FIXME random value sal_uInt32 nInfo; sal_Size nConverted; sal_Size nDstSize = rtl_convertUnicodeToText( aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR | RTL_UNICODETOTEXT_FLAGS_FLUSH, &nInfo, &nConverted); assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0); rtl_destroyUnicodeToTextConverter(aConverter); if (nInfo == 0) { assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText for (sal_Size i = 0; i < nDstSize; ++i) writeEscapeOctet(pBuffer, pCapacity, static_cast< unsigned char >(aDst[i])); // FIXME all octets are escaped, even if there is no need } else { if (bStrict) { return false; } else { writeUcs4(pBuffer, pCapacity, nUtf32); } } } return true; } struct Component { sal_Unicode const * pBegin; sal_Unicode const * pEnd; inline Component(): pBegin(0), pEnd(0) {} inline bool isPresent() const { return pBegin != 0; } inline sal_Int32 getLength() const; }; inline sal_Int32 Component::getLength() const { assert(isPresent()); // taking length of non-present component return static_cast< sal_Int32 >(pEnd - pBegin); } struct Components { Component aScheme; Component aAuthority; Component aPath; Component aQuery; Component aFragment; }; void parseUriRef(rtl_uString const * pUriRef, Components * pComponents) { // This algorithm is liberal and accepts various forms of illegal input. sal_Unicode const * pBegin = pUriRef->buffer; sal_Unicode const * pEnd = pBegin + pUriRef->length; sal_Unicode const * pPos = pBegin; if (pPos != pEnd && rtl::isAsciiAlpha(*pPos)) { for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p) { if (*p == ':') { pComponents->aScheme.pBegin = pBegin; pComponents->aScheme.pEnd = ++p; pPos = p; break; } else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-' && *p != '.') { break; } } } if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') { pComponents->aAuthority.pBegin = pPos; pPos += 2; while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#') ++pPos; pComponents->aAuthority.pEnd = pPos; } pComponents->aPath.pBegin = pPos; while (pPos != pEnd && *pPos != '?' && * pPos != '#') ++pPos; pComponents->aPath.pEnd = pPos; if (pPos != pEnd && *pPos == '?') { pComponents->aQuery.pBegin = pPos++; while (pPos != pEnd && * pPos != '#') ++pPos; pComponents->aQuery.pEnd = pPos; } if (pPos != pEnd) { assert(*pPos == '#'); pComponents->aFragment.pBegin = pPos; pComponents->aFragment.pEnd = pEnd; } } void appendPath( rtl::OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash, sal_Unicode const * pathBegin, sal_Unicode const * pathEnd) { while (precedingSlash || pathBegin != pathEnd) { sal_Unicode const * p = pathBegin; while (p != pathEnd && *p != '/') { ++p; } std::size_t n = p - pathBegin; if (n == 1 && pathBegin[0] == '.') { // input begins with "." -> remove from input (and done): // i.e., !precedingSlash -> !precedingSlash // input begins with "./" -> remove from input: // i.e., !precedingSlash -> !precedingSlash // input begins with "/." -> replace with "/" in input (and not yet // done): // i.e., precedingSlash -> precedingSlash // input begins with "/./" -> replace with "/" in input: // i.e., precedingSlash -> precedingSlash } else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.') { // input begins with ".." -> remove from input (and done): // i.e., !precedingSlash -> !precedingSlash // input begins with "../" -> remove from input // i.e., !precedingSlash -> !precedingSlash // input begins with "/.." -> replace with "/" in input, and shrink // output (not not yet done): // i.e., precedingSlash -> precedingSlash // input begins with "/../" -> replace with "/" in input, and shrink // output: // i.e., precedingSlash -> precedingSlash if (precedingSlash) { buffer.truncate( bufferStart + std::max( rtl_ustr_lastIndexOfChar_WithLength( buffer.getStr() + bufferStart, buffer.getLength() - bufferStart, '/'), 0)); } } else { if (precedingSlash) { buffer.append('/'); } buffer.append(pathBegin, n); precedingSlash = p != pathEnd; } pathBegin = p + (p == pathEnd ? 0 : 1); } } } sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass) SAL_THROW_EXTERN_C() { static sal_Bool const aCharClass[][nCharClassSize] = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */ }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ }}; assert( (eCharClass >= 0 && (sal::static_int_cast< std::size_t >(eCharClass) < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass return aCharClass[eCharClass]; } void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass, rtl_UriEncodeMechanism eMechanism, rtl_TextEncoding eCharset, rtl_uString ** pResult) SAL_THROW_EXTERN_C() { assert(!pCharClass[0x25]); // make sure the percent sign is encoded... sal_Unicode const * p = pText->buffer; sal_Unicode const * pEnd = p + pText->length; sal_Int32 nCapacity = pText->length; rtl_uString_new_WithLength(pResult, nCapacity); while (p < pEnd) { EscapeType eType; sal_uInt32 nUtf32 = readUcs4( &p, pEnd, (eMechanism == rtl_UriEncodeKeepEscapes || eMechanism == rtl_UriEncodeCheckEscapes || eMechanism == rtl_UriEncodeStrictKeepEscapes), eCharset, &eType); switch (eType) { case EscapeNo: if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F writeUnicode(pResult, &nCapacity, static_cast< sal_Unicode >(nUtf32)); else if (!writeEscapeChar( pResult, &nCapacity, nUtf32, eCharset, (eMechanism == rtl_UriEncodeStrict || eMechanism == rtl_UriEncodeStrictKeepEscapes))) { rtl_uString_new(pResult); return; } break; case EscapeChar: if (eMechanism == rtl_UriEncodeCheckEscapes && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F writeUnicode(pResult, &nCapacity, static_cast< sal_Unicode >(nUtf32)); else if (!writeEscapeChar( pResult, &nCapacity, nUtf32, eCharset, (eMechanism == rtl_UriEncodeStrict || eMechanism == rtl_UriEncodeStrictKeepEscapes))) { rtl_uString_new(pResult); return; } break; case EscapeOctet: writeEscapeOctet(pResult, &nCapacity, nUtf32); break; } } *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity ); } void SAL_CALL rtl_uriDecode(rtl_uString * pText, rtl_UriDecodeMechanism eMechanism, rtl_TextEncoding eCharset, rtl_uString ** pResult) SAL_THROW_EXTERN_C() { switch (eMechanism) { case rtl_UriDecodeNone: rtl_uString_assign(pResult, pText); break; case rtl_UriDecodeToIuri: eCharset = RTL_TEXTENCODING_UTF8; default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict { sal_Unicode const * p = pText->buffer; sal_Unicode const * pEnd = p + pText->length; sal_Int32 nCapacity = pText->length; rtl_uString_new_WithLength(pResult, nCapacity); while (p < pEnd) { EscapeType eType; sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType); switch (eType) { case EscapeChar: if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri) { writeEscapeOctet(pResult, &nCapacity, nUtf32); break; } case EscapeNo: writeUcs4(pResult, &nCapacity, nUtf32); break; case EscapeOctet: if (eMechanism == rtl_UriDecodeStrict) { rtl_uString_new(pResult); return; } writeEscapeOctet(pResult, &nCapacity, nUtf32); break; } } *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity ); } break; } } sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef, rtl_uString * pRelUriRef, rtl_uString ** pResult, rtl_uString ** pException) SAL_THROW_EXTERN_C() { // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the // relative URI into an absolute one: rtl::OUStringBuffer aBuffer; Components aRelComponents; parseUriRef(pRelUriRef, &aRelComponents); if (aRelComponents.aScheme.isPresent()) { aBuffer.append(aRelComponents.aScheme.pBegin, aRelComponents.aScheme.getLength()); if (aRelComponents.aAuthority.isPresent()) aBuffer.append(aRelComponents.aAuthority.pBegin, aRelComponents.aAuthority.getLength()); appendPath( aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd); if (aRelComponents.aQuery.isPresent()) aBuffer.append(aRelComponents.aQuery.pBegin, aRelComponents.aQuery.getLength()); } else { Components aBaseComponents; parseUriRef(pBaseUriRef, &aBaseComponents); if (!aBaseComponents.aScheme.isPresent()) { rtl_uString_assign( pException, (rtl::OUString( "<" + rtl::OUString(pBaseUriRef) + "> does not start with a scheme component") .pData)); return false; } aBuffer.append(aBaseComponents.aScheme.pBegin, aBaseComponents.aScheme.getLength()); if (aRelComponents.aAuthority.isPresent()) { aBuffer.append(aRelComponents.aAuthority.pBegin, aRelComponents.aAuthority.getLength()); appendPath( aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd); if (aRelComponents.aQuery.isPresent()) aBuffer.append(aRelComponents.aQuery.pBegin, aRelComponents.aQuery.getLength()); } else { if (aBaseComponents.aAuthority.isPresent()) aBuffer.append(aBaseComponents.aAuthority.pBegin, aBaseComponents.aAuthority.getLength()); if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd) { aBuffer.append(aBaseComponents.aPath.pBegin, aBaseComponents.aPath.getLength()); if (aRelComponents.aQuery.isPresent()) aBuffer.append(aRelComponents.aQuery.pBegin, aRelComponents.aQuery.getLength()); else if (aBaseComponents.aQuery.isPresent()) aBuffer.append(aBaseComponents.aQuery.pBegin, aBaseComponents.aQuery.getLength()); } else { if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd && *aRelComponents.aPath.pBegin == '/') appendPath( aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd); else if (aBaseComponents.aAuthority.isPresent() && aBaseComponents.aPath.pBegin == aBaseComponents.aPath.pEnd) appendPath( aBuffer, aBuffer.getLength(), true, aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd); else { sal_Int32 n = aBuffer.getLength(); sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength( aBaseComponents.aPath.pBegin, aBaseComponents.aPath.getLength(), '/'); if (i >= 0) { appendPath( aBuffer, n, false, aBaseComponents.aPath.pBegin, aBaseComponents.aPath.pBegin + i); } appendPath( aBuffer, n, i >= 0, aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd); } if (aRelComponents.aQuery.isPresent()) aBuffer.append(aRelComponents.aQuery.pBegin, aRelComponents.aQuery.getLength()); } } } if (aRelComponents.aFragment.isPresent()) aBuffer.append(aRelComponents.aFragment.pBegin, aRelComponents.aFragment.getLength()); rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData); return true; } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */