summaryrefslogtreecommitdiff
path: root/tools/source
diff options
context:
space:
mode:
authorStephan Bergmann <sb@openoffice.org>2000-10-16 05:56:05 +0000
committerStephan Bergmann <sb@openoffice.org>2000-10-16 05:56:05 +0000
commite6916ea36ff70b994623ab9ce7072ab44883909d (patch)
tree9ba26274e63f0b0b63f98c6467cd5c26dfdc3063 /tools/source
parent1b511873f0d9b7beee10f210efd0f597fd12bcdb (diff)
Removed INetURLObject::FindFirstURLInText() (now replaced by
URIHelper::FindFirstURLInText()).
Diffstat (limited to 'tools/source')
-rw-r--r--tools/source/fsys/urlobj.cxx257
1 files changed, 2 insertions, 255 deletions
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx
index 533092f0c8bf..6256d9e0279a 100644
--- a/tools/source/fsys/urlobj.cxx
+++ b/tools/source/fsys/urlobj.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: urlobj.cxx,v $
*
- * $Revision: 1.2 $
+ * $Revision: 1.3 $
*
- * last change: $Author: sb $ $Date: 2000-10-16 06:50:00 $
+ * last change: $Author: sb $ $Date: 2000-10-16 06:56:05 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -78,9 +78,6 @@
#ifndef TOOLS_INETMIME_HXX
#include <inetmime.hxx>
#endif
-#ifndef _TOOLS_INTN_HXX
-#include <intn.hxx>
-#endif
namespace unnamed_tools_urlobj {} using namespace unnamed_tools_urlobj;
// unnamed namespaces don't work well yet...
@@ -3017,256 +3014,6 @@ bool INetURLObject::ConcatData(INetProtocol eTheScheme,
}
//============================================================================
-namespace unnamed_tools_urlobj {
-
-inline bool isWLetter(International const & rInternational, sal_Unicode c)
-{
- return rInternational.IsAlphaNumeric(c) || c == '$' || c == '%'
- || c == '&' || c == '-' || c == '/' || c == '@' || c == '\\';
-}
-
-inline bool checkWChar(International const & rInternational,
- sal_Unicode const * p, sal_Unicode const *& rEnd,
- bool bBackslash = false, bool bPipe = false)
-{
- sal_Unicode c = *p;
- if (rInternational.IsAlphaNumeric(c) || c == '/'
- || c == '\\' && bBackslash || c == '|' && bPipe)
- {
- rEnd = p + 1;
- return true;
- }
- else
- return !mustEncode(c, INetURLObject::PART_URIC) || c == '%';
-}
-
-}
-
-// static
-UniString INetURLObject::FindFirstURLInText(UniString const & rText,
- xub_StrLen & rBegin,
- xub_StrLen & rEnd,
- International const &
- rInternational,
- UniString const *,
- EncodeMechanism eMechanism,
- rtl_TextEncoding eCharset)
-{
- if (!(rBegin <= rEnd && rEnd <= rText.Len()))
- return UniString();
-
- sal_Unicode const * pBegin = rText.GetBuffer() + rBegin;
- sal_Unicode const * pEnd = rText.GetBuffer() + rEnd;
-
- // Search for the first (longest possible) substring of [pBegin..pEnd[
- // that matches any of the following productions. "\W" stands for a word
- // break, i.e., the begin or end of the block of text, or a character that
- // is neither a letter nor a digit (according to rInternational). The
- // productions use the auxiliary rules
- //
- // domain = label *("." label)
- // label = alphanum [*(alphanum / "-") alphanum]
- // alphanum = ALPHA / DIGIT
- // IPv6reference = "[" IPv6address "]"
- // IPv6address = hexpart [":" IPv4address]
- // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
- // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
- // hexseq = hex4 *(":" hex4)
- // hex4 = 1*4HEXDIG
- // wchar = <any uric character (ignoring the escaped rule), or "%", or
- // a letter or digit (according to rInternational)>
- //
- // 1st Production (file):
- // \W "file:" 1*(wchar / "\" / "|") ["#" 1*wchar] \W
- //
- // 2nd Production (known scheme):
- // \W <one of the known schemes, ignoring case> ":" 1*wchar
- // ["#" 1*wchar] \W
- //
- // 3rd Production (mailto):
- // \W domain "@" domain \W
- //
- // 4th Production (ftp):
- // \W "ftp" 2*("." label) ["/" *wchar] ["#" 1*wchar] \W
- //
- // 5th Production (http):
- // \W label 2*("." label) ["/" *wchar] ["#" 1*wchar] \W
- //
- // 6th Production (file):
- // \W "//" (domain / IPv6reference) ["/" *wchar] ["#" 1*wchar] \W
- //
- // 7th Production (Unix file):
- // \W "/" 1*wchar \W
- //
- // 8th Production (UNC file):
- // \W "\\" domain ["\" *(wchar / "\")] \W
- //
- // 9th Production (Unix-like DOS file):
- // \W ALPHA ":/" *wchar \W
- //
- // 10th Production (DOS file):
- // \W ALPHA ":\" *(wchar / "\") \W
-
- for (sal_Unicode const * pPos = pBegin; pPos != pEnd;)
- if ((INetMIME::isAlpha(*pPos) || *pPos == '/' || *pPos == '\\')
- && (pPos == pBegin || !isWLetter(rInternational, pPos[-1])))
- {
- sal_Unicode const * pURIEnd = 0;
-
- if (INetMIME::isAlpha(*pPos))
- {
- sal_Unicode const * p = pPos;
- sal_Unicode const * pPrefixEnd = p;
- PrefixInfo const * pPrefix = getPrefix(pPrefixEnd, pEnd);
- if (pPrefix) // 1st, 2nd
- {
- while (*p++ != ':');
- pPrefixEnd = p;
- if (pPrefix->m_eScheme == INET_PROT_FILE)
- while (p != pEnd
- && checkWChar(rInternational, p, pURIEnd, true,
- true))
- ++p;
- else
- while (p != pEnd
- && checkWChar(rInternational, p, pURIEnd))
- ++p;
- if (p != pPrefixEnd && *p == '#')
- {
- ++p;
- while (p != pEnd
- && checkWChar(rInternational, p, pURIEnd))
- ++p;
- }
- }
- else if (pEnd - p >= 3 && p[1] == ':' && p[2] == '/') // 9th
- {
- p += 3;
- pURIEnd = p;
- while (p != pEnd
- && checkWChar(rInternational, p, pURIEnd))
- ++p;
- }
- else if (pEnd - p >= 3 && p[1] == ':' && p[2] == '\\') // 10th
- {
- p += 3;
- pURIEnd = p;
- while (p != pEnd
- && checkWChar(rInternational, p, pURIEnd, true))
- ++p;
- }
- else
- {
- sal_uInt32 nLabels = scanDomain(p, pEnd, false);
- if (nLabels > 0 && p != pEnd && *p == '@') // 3rd
- {
- ++p;
- if (scanDomain(p, pEnd, false) > 0)
- pURIEnd = p;
- }
- else if (nLabels >= 3) // 4th, 5th
- {
- pURIEnd = p;
- if (p != pEnd && *p == '/')
- {
- pURIEnd = ++p;
- while (p != pEnd
- && checkWChar(rInternational, p, pURIEnd))
- ++p;
- }
- if (p != pEnd && *p == '#')
- {
- ++p;
- while (p != pEnd
- && checkWChar(rInternational, p, pURIEnd))
- ++p;
- }
- }
- }
- }
- else if (*pPos == '/')
- {
- sal_Unicode const * p = pPos;
- if (pEnd - p >= 2)
- if (p[1] == '/') // 6th
- {
- p += 2;
- if (scanDomain(p, pEnd, false) > 0
- || scanIPv6reference(p, pEnd, false))
- {
- pURIEnd = p;
- if (p != pEnd && *p == '/')
- {
- pURIEnd = ++p;
- while (p != pEnd
- && checkWChar(rInternational, p,
- pURIEnd))
- ++p;
- }
- if (p != pEnd && *p == '#')
- {
- ++p;
- while (p != pEnd
- && checkWChar(rInternational, p,
- pURIEnd))
- ++p;
- }
- }
- }
- else // 7th
- {
- ++p;
- while (p != pEnd
- && checkWChar(rInternational, p, pURIEnd))
- ++p;
- }
- }
- else if (*pPos == '\\') // 8th
- {
- sal_Unicode const * p = pPos;
- if (pEnd - p >= 2 && p[1] == '\\')
- {
- p += 2;
- if (scanDomain(p, pEnd, false) > 0)
- {
- pURIEnd = p;
- if (p != pEnd && *p == '\\')
- {
- pURIEnd = ++p;
- while (p != pEnd
- && checkWChar(rInternational, p, pURIEnd,
- true))
- ++p;
- }
- }
- }
- }
-
- if (pURIEnd
- && (pURIEnd == pEnd || !isWLetter(rInternational, *pURIEnd)))
- {
- INetURLObject aURI(UniString(pPos, pURIEnd - pPos),
- INET_PROT_HTTP, eMechanism, eCharset);
- if (!aURI.HasError())
- {
- rBegin += pPos - pBegin;
- rEnd -= pEnd - pURIEnd;
- return aURI.GetMainURL();
- }
- }
-
- ++pPos;
- while (pPos != pEnd && isWLetter(rInternational, *pPos))
- ++pPos;
- }
- else
- ++pPos;
-
- rBegin = rEnd;
- return UniString();
-}
-
-//============================================================================
UniString INetURLObject::getExternalURL(DecodeMechanism eMechanism,
rtl_TextEncoding eCharset) const
{