diff options
author | Stephan Bergmann <sb@openoffice.org> | 2000-10-16 05:56:05 +0000 |
---|---|---|
committer | Stephan Bergmann <sb@openoffice.org> | 2000-10-16 05:56:05 +0000 |
commit | e6916ea36ff70b994623ab9ce7072ab44883909d (patch) | |
tree | 9ba26274e63f0b0b63f98c6467cd5c26dfdc3063 /tools/source | |
parent | 1b511873f0d9b7beee10f210efd0f597fd12bcdb (diff) |
Removed INetURLObject::FindFirstURLInText() (now replaced by
URIHelper::FindFirstURLInText()).
Diffstat (limited to 'tools/source')
-rw-r--r-- | tools/source/fsys/urlobj.cxx | 257 |
1 files changed, 2 insertions, 255 deletions
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx index 533092f0c8bf..6256d9e0279a 100644 --- a/tools/source/fsys/urlobj.cxx +++ b/tools/source/fsys/urlobj.cxx @@ -2,9 +2,9 @@ * * $RCSfile: urlobj.cxx,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: sb $ $Date: 2000-10-16 06:50:00 $ + * last change: $Author: sb $ $Date: 2000-10-16 06:56:05 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -78,9 +78,6 @@ #ifndef TOOLS_INETMIME_HXX #include <inetmime.hxx> #endif -#ifndef _TOOLS_INTN_HXX -#include <intn.hxx> -#endif namespace unnamed_tools_urlobj {} using namespace unnamed_tools_urlobj; // unnamed namespaces don't work well yet... @@ -3017,256 +3014,6 @@ bool INetURLObject::ConcatData(INetProtocol eTheScheme, } //============================================================================ -namespace unnamed_tools_urlobj { - -inline bool isWLetter(International const & rInternational, sal_Unicode c) -{ - return rInternational.IsAlphaNumeric(c) || c == '$' || c == '%' - || c == '&' || c == '-' || c == '/' || c == '@' || c == '\\'; -} - -inline bool checkWChar(International const & rInternational, - sal_Unicode const * p, sal_Unicode const *& rEnd, - bool bBackslash = false, bool bPipe = false) -{ - sal_Unicode c = *p; - if (rInternational.IsAlphaNumeric(c) || c == '/' - || c == '\\' && bBackslash || c == '|' && bPipe) - { - rEnd = p + 1; - return true; - } - else - return !mustEncode(c, INetURLObject::PART_URIC) || c == '%'; -} - -} - -// static -UniString INetURLObject::FindFirstURLInText(UniString const & rText, - xub_StrLen & rBegin, - xub_StrLen & rEnd, - International const & - rInternational, - UniString const *, - EncodeMechanism eMechanism, - rtl_TextEncoding eCharset) -{ - if (!(rBegin <= rEnd && rEnd <= rText.Len())) - return UniString(); - - sal_Unicode const * pBegin = rText.GetBuffer() + rBegin; - sal_Unicode const * pEnd = rText.GetBuffer() + rEnd; - - // Search for the first (longest possible) substring of [pBegin..pEnd[ - // that matches any of the following productions. "\W" stands for a word - // break, i.e., the begin or end of the block of text, or a character that - // is neither a letter nor a digit (according to rInternational). The - // productions use the auxiliary rules - // - // domain = label *("." label) - // label = alphanum [*(alphanum / "-") alphanum] - // alphanum = ALPHA / DIGIT - // IPv6reference = "[" IPv6address "]" - // IPv6address = hexpart [":" IPv4address] - // IPv4address = 1*3DIGIT 3("." 1*3DIGIT) - // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq]) - // hexseq = hex4 *(":" hex4) - // hex4 = 1*4HEXDIG - // wchar = <any uric character (ignoring the escaped rule), or "%", or - // a letter or digit (according to rInternational)> - // - // 1st Production (file): - // \W "file:" 1*(wchar / "\" / "|") ["#" 1*wchar] \W - // - // 2nd Production (known scheme): - // \W <one of the known schemes, ignoring case> ":" 1*wchar - // ["#" 1*wchar] \W - // - // 3rd Production (mailto): - // \W domain "@" domain \W - // - // 4th Production (ftp): - // \W "ftp" 2*("." label) ["/" *wchar] ["#" 1*wchar] \W - // - // 5th Production (http): - // \W label 2*("." label) ["/" *wchar] ["#" 1*wchar] \W - // - // 6th Production (file): - // \W "//" (domain / IPv6reference) ["/" *wchar] ["#" 1*wchar] \W - // - // 7th Production (Unix file): - // \W "/" 1*wchar \W - // - // 8th Production (UNC file): - // \W "\\" domain ["\" *(wchar / "\")] \W - // - // 9th Production (Unix-like DOS file): - // \W ALPHA ":/" *wchar \W - // - // 10th Production (DOS file): - // \W ALPHA ":\" *(wchar / "\") \W - - for (sal_Unicode const * pPos = pBegin; pPos != pEnd;) - if ((INetMIME::isAlpha(*pPos) || *pPos == '/' || *pPos == '\\') - && (pPos == pBegin || !isWLetter(rInternational, pPos[-1]))) - { - sal_Unicode const * pURIEnd = 0; - - if (INetMIME::isAlpha(*pPos)) - { - sal_Unicode const * p = pPos; - sal_Unicode const * pPrefixEnd = p; - PrefixInfo const * pPrefix = getPrefix(pPrefixEnd, pEnd); - if (pPrefix) // 1st, 2nd - { - while (*p++ != ':'); - pPrefixEnd = p; - if (pPrefix->m_eScheme == INET_PROT_FILE) - while (p != pEnd - && checkWChar(rInternational, p, pURIEnd, true, - true)) - ++p; - else - while (p != pEnd - && checkWChar(rInternational, p, pURIEnd)) - ++p; - if (p != pPrefixEnd && *p == '#') - { - ++p; - while (p != pEnd - && checkWChar(rInternational, p, pURIEnd)) - ++p; - } - } - else if (pEnd - p >= 3 && p[1] == ':' && p[2] == '/') // 9th - { - p += 3; - pURIEnd = p; - while (p != pEnd - && checkWChar(rInternational, p, pURIEnd)) - ++p; - } - else if (pEnd - p >= 3 && p[1] == ':' && p[2] == '\\') // 10th - { - p += 3; - pURIEnd = p; - while (p != pEnd - && checkWChar(rInternational, p, pURIEnd, true)) - ++p; - } - else - { - sal_uInt32 nLabels = scanDomain(p, pEnd, false); - if (nLabels > 0 && p != pEnd && *p == '@') // 3rd - { - ++p; - if (scanDomain(p, pEnd, false) > 0) - pURIEnd = p; - } - else if (nLabels >= 3) // 4th, 5th - { - pURIEnd = p; - if (p != pEnd && *p == '/') - { - pURIEnd = ++p; - while (p != pEnd - && checkWChar(rInternational, p, pURIEnd)) - ++p; - } - if (p != pEnd && *p == '#') - { - ++p; - while (p != pEnd - && checkWChar(rInternational, p, pURIEnd)) - ++p; - } - } - } - } - else if (*pPos == '/') - { - sal_Unicode const * p = pPos; - if (pEnd - p >= 2) - if (p[1] == '/') // 6th - { - p += 2; - if (scanDomain(p, pEnd, false) > 0 - || scanIPv6reference(p, pEnd, false)) - { - pURIEnd = p; - if (p != pEnd && *p == '/') - { - pURIEnd = ++p; - while (p != pEnd - && checkWChar(rInternational, p, - pURIEnd)) - ++p; - } - if (p != pEnd && *p == '#') - { - ++p; - while (p != pEnd - && checkWChar(rInternational, p, - pURIEnd)) - ++p; - } - } - } - else // 7th - { - ++p; - while (p != pEnd - && checkWChar(rInternational, p, pURIEnd)) - ++p; - } - } - else if (*pPos == '\\') // 8th - { - sal_Unicode const * p = pPos; - if (pEnd - p >= 2 && p[1] == '\\') - { - p += 2; - if (scanDomain(p, pEnd, false) > 0) - { - pURIEnd = p; - if (p != pEnd && *p == '\\') - { - pURIEnd = ++p; - while (p != pEnd - && checkWChar(rInternational, p, pURIEnd, - true)) - ++p; - } - } - } - } - - if (pURIEnd - && (pURIEnd == pEnd || !isWLetter(rInternational, *pURIEnd))) - { - INetURLObject aURI(UniString(pPos, pURIEnd - pPos), - INET_PROT_HTTP, eMechanism, eCharset); - if (!aURI.HasError()) - { - rBegin += pPos - pBegin; - rEnd -= pEnd - pURIEnd; - return aURI.GetMainURL(); - } - } - - ++pPos; - while (pPos != pEnd && isWLetter(rInternational, *pPos)) - ++pPos; - } - else - ++pPos; - - rBegin = rEnd; - return UniString(); -} - -//============================================================================ UniString INetURLObject::getExternalURL(DecodeMechanism eMechanism, rtl_TextEncoding eCharset) const { |