INTEGRATION: CWS aquavcl01 (1.1.2); FILE ADDED

2007/06/20 19:06:51 pl 1.1.2.2: fixed a constness issue 2007/06/14 12:06:18 tra 1.1.2.1: Work in progress, checked in to have it saved in the repos.
author: Rüdiger Timm <rt@openoffice.org> 2007-07-05 08:09:56 +0000
committer: Rüdiger Timm <rt@openoffice.org> 2007-07-05 08:09:56 +0000
commit: c9b5346c23da090886b5019c048823ea078575fe (patch)
tree: 8bf29c22c195d38eb8f5cc55680526e5e499cb83 /dtrans
parent: 8b2897125ea44d56f481797a4fb6d0446bfbd0ca (diff)
2 files changed, 167 insertions, 0 deletions
diff --git a/dtrans/source/aqua/HtmlFmtFlt.cxx b/dtrans/source/aqua/HtmlFmtFlt.cxx
new file mode 100644
index 000000000000..3f558b0a5b4f
--- /dev/null
+++ b/dtrans/source/aqua/HtmlFmtFlt.cxx
@@ -0,0 +1,147 @@
+#include "HtmlFmtFlt.hxx"
+
+#include <rtl/string.h>
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <iomanip>
+
+#include <boost/assert.hpp>
+
+using namespace com::sun::star::uno;
+
+//------------------------------------------------------------------------------
+// converts the openoffice text/html clipboard format to the HTML Format
+// well known under MS Windows
+// the MS HTML Format has a header before the real html data
+//
+// Version:1.0      Version number of the clipboard. Staring is 0.9
+// StartHTML:       Byte count from the beginning of the clipboard to the start
+//                  of the context, or -1 if no context
+// EndHTML:         Byte count from the beginning of the clipboard to the end
+//                  of the context, or -1 if no context
+// StartFragment:   Byte count from the beginning of the clipboard to the
+//                  start of the fragment
+// EndFragment:     Byte count from the beginning of the clipboard to the
+//                  end of the fragment
+// StartSelection:  Byte count from the beginning of the clipboard to the
+//                  start of the selection
+// EndSelection:    Byte count from the beginning of the clipboard to the
+//                  end of the selection
+//
+// StartSelection and EndSelection are optional
+// The fragment should be preceded and followed by the HTML comments
+// <!--StartFragment--> and <!--EndFragment--> (no space between !-- and the
+// text
+//------------------------------------------------------------------------------
+
+namespace // private
+{
+std::string GetHtmlFormatHeader(size_t startHtml, size_t endHtml, size_t startFragment, size_t endFragment)
+{
+    std::ostringstream htmlHeader;
+    htmlHeader << "Version:1.0" << '\r' << '\n';
+    htmlHeader << "StartHTML:" << std::setw(10) << std::setfill('0') << std::dec << startHtml << '\r' << '\n';
+    htmlHeader << "EndHTML:" << std::setw(10) << std::setfill('0') << std::dec << endHtml << '\r' << '\n';
+    htmlHeader << "StartFragment:" << std::setw(10) << std::setfill('0') << std::dec << startFragment << '\r' << '\n';
+    htmlHeader << "EndFragment:" << std::setw(10) << std::setfill('0') << std::dec << endFragment << '\r' << '\n';
+    return htmlHeader.str();
+}
+
+} // namespace private
+
+
+// the office allways writes the start and end html tag in upper cases and
+// without spaces both tags don't allow parameters
+const std::string TAG_HTML = std::string("<HTML>");
+const std::string TAG_END_HTML = std::string("</HTML>");
+
+// The body tag may have parameters so we need to search for the
+// closing '>' manually e.g. <BODY param> #92840#
+const std::string TAG_BODY = std::string("<BODY");
+const std::string TAG_END_BODY = std::string("</BODY");
+
+Sequence<sal_Int8> SAL_CALL TextHtmlToHTMLFormat(Sequence<sal_Int8>& aTextHtml)
+{
+    OSL_ASSERT(aTextHtml.getLength() > 0);
+
+    if (!(aTextHtml.getLength() > 0))
+        return Sequence<sal_Int8>();
+
+    // fill the buffer with dummy values to calc the exact length
+    std::string dummyHtmlHeader = GetHtmlFormatHeader(0, 0, 0, 0);
+    size_t lHtmlFormatHeader = dummyHtmlHeader.length();
+
+    std::string textHtml(
+        reinterpret_cast<const sal_Char*>(aTextHtml.getConstArray()),
+        reinterpret_cast<const sal_Char*>(aTextHtml.getConstArray()) + aTextHtml.getLength());
+
+    std::string::size_type nStartHtml = textHtml.find(TAG_HTML) + lHtmlFormatHeader - 1; // we start one before '<HTML>' Word 2000 does also so
+    std::string::size_type nEndHtml = textHtml.find(TAG_END_HTML) + lHtmlFormatHeader + TAG_END_HTML.length() + 1; // our SOffice 5.2 wants 2 behind </HTML>?
+
+    // The body tag may have parameters so we need to search for the
+    // closing '>' manually e.g. <BODY param> #92840#
+    std::string::size_type nStartFragment = textHtml.find(">", textHtml.find(TAG_BODY)) + lHtmlFormatHeader + 1;
+    std::string::size_type nEndFragment = textHtml.find(TAG_END_BODY) + lHtmlFormatHeader;
+
+    std::string htmlFormat = GetHtmlFormatHeader(nStartHtml, nEndHtml, nStartFragment, nEndFragment);
+    htmlFormat += textHtml;
+
+    Sequence<sal_Int8> byteSequence(htmlFormat.length() + 1); // space the trailing '\0'
+    rtl_zeroMemory(byteSequence.getArray(), byteSequence.getLength());
+
+    rtl_copyMemory(
+        static_cast<void*>(byteSequence.getArray()),
+        static_cast<const void*>(htmlFormat.c_str()),
+        htmlFormat.length());
+
+    return byteSequence;
+}
+
+const char* HtmlStartTag = "<html";
+
+Sequence<sal_Int8> HTMLFormatToTextHtml(const Sequence<sal_Int8>& aHTMLFormat)
+{
+  BOOST_ASSERT(isHTMLFormat(aHTMLFormat) && "No HTML Format provided");
+
+  Sequence<sal_Int8>& nonconstHTMLFormatRef = const_cast< Sequence<sal_Int8>& >(aHTMLFormat);
+  sal_Char* dataStart = reinterpret_cast<sal_Char*>(nonconstHTMLFormatRef.getArray());
+  sal_Char* dataEnd = dataStart + nonconstHTMLFormatRef.getLength() - 1;
+  const sal_Char* htmlStartTag = strcasestr(dataStart, HtmlStartTag);
+
+  BOOST_ASSERT(htmlStartTag && "Seems to be no HTML at all");
+
+  // It doesn't seem to be HTML? Well then simply return what has been
+  // provided in non-debug builds
+  if (htmlStartTag == NULL)
+    {
+    return aHTMLFormat;
+    }
+
+  sal_Int32 len = dataEnd - htmlStartTag;
+  Sequence<sal_Int8> plainHtmlData(len);
+
+  rtl_copyMemory(static_cast<void*>(plainHtmlData.getArray()), htmlStartTag, len);
+
+  return plainHtmlData;
+}
+
+/* A simple format detection. We are just comparing the first few bytes
+   of the provided byte sequence to see whether or not it is the MS
+   Office Html format. If it shows that this is not reliable enough we
+   can improve this
+*/
+const char HtmlFormatStart[] = "Version:";
+int HtmlFormatStartLen = (sizeof(HtmlFormatStart) - 1);
+
+bool isHTMLFormat(const Sequence<sal_Int8>& aHtmlSequence)
+{
+  if (aHtmlSequence.getLength() < HtmlFormatStartLen)
+    return false;
+
+  return rtl_str_compareIgnoreAsciiCase_WithLength(HtmlFormatStart,
+                                                   HtmlFormatStartLen,
+                                                   reinterpret_cast<const sal_Char*>(aHtmlSequence.getConstArray()),
+                                                   HtmlFormatStartLen) == 0;
+}
diff --git a/dtrans/source/aqua/HtmlFmtFlt.hxx b/dtrans/source/aqua/HtmlFmtFlt.hxx
new file mode 100644
index 000000000000..49f0cc70590c
--- /dev/null
+++ b/dtrans/source/aqua/HtmlFmtFlt.hxx
@@ -0,0 +1,20 @@
+#ifndef INCLUDED_HTMLFMTFLT_HXX
+#define INCLUDED_HTMLFMTFLT_HXX
+
+#include <com/sun/star/uno/Sequence.hxx>
+
+/* Transform plain HTML into the format expected by MS Office.
+ */
+com::sun::star::uno::Sequence<sal_Int8> TextHtmlToHTMLFormat(com::sun::star::uno::Sequence<sal_Int8>& aTextHtml);
+
+/* Transform the MS Office HTML format into plain HTML.
+ */
+com::sun::star::uno::Sequence<sal_Int8> HTMLFormatToTextHtml(const com::sun::star::uno::Sequence<sal_Int8>& aHTMLFormat);
+
+/* Detects whether the given byte sequence contains the MS Office Html format.
+
+   @returns True if the MS Office Html format will be detected False otherwise.
+ */
+bool isHTMLFormat (const com::sun::star::uno::Sequence<sal_Int8>& aHtmlSequence);
+
+#endif
author	Rüdiger Timm <rt@openoffice.org>	2007-07-05 08:09:56 +0000
committer	Rüdiger Timm <rt@openoffice.org>	2007-07-05 08:09:56 +0000
commit	c9b5346c23da090886b5019c048823ea078575fe (patch)
tree	8bf29c22c195d38eb8f5cc55680526e5e499cb83 /dtrans
parent	8b2897125ea44d56f481797a4fb6d0446bfbd0ca (diff)