diff options
author | Rüdiger Timm <rt@openoffice.org> | 2007-07-05 08:09:56 +0000 |
---|---|---|
committer | Rüdiger Timm <rt@openoffice.org> | 2007-07-05 08:09:56 +0000 |
commit | c9b5346c23da090886b5019c048823ea078575fe (patch) | |
tree | 8bf29c22c195d38eb8f5cc55680526e5e499cb83 /dtrans | |
parent | 8b2897125ea44d56f481797a4fb6d0446bfbd0ca (diff) |
INTEGRATION: CWS aquavcl01 (1.1.2); FILE ADDED
2007/06/20 19:06:51 pl 1.1.2.2: fixed a constness issue
2007/06/14 12:06:18 tra 1.1.2.1: Work in progress, checked in to have it saved in the repos.
Diffstat (limited to 'dtrans')
-rw-r--r-- | dtrans/source/aqua/HtmlFmtFlt.cxx | 147 | ||||
-rw-r--r-- | dtrans/source/aqua/HtmlFmtFlt.hxx | 20 |
2 files changed, 167 insertions, 0 deletions
diff --git a/dtrans/source/aqua/HtmlFmtFlt.cxx b/dtrans/source/aqua/HtmlFmtFlt.cxx new file mode 100644 index 000000000000..3f558b0a5b4f --- /dev/null +++ b/dtrans/source/aqua/HtmlFmtFlt.cxx @@ -0,0 +1,147 @@ +#include "HtmlFmtFlt.hxx" + +#include <rtl/string.h> + +#include <string> +#include <sstream> +#include <vector> +#include <iomanip> + +#include <boost/assert.hpp> + +using namespace com::sun::star::uno; + +//------------------------------------------------------------------------------ +// converts the openoffice text/html clipboard format to the HTML Format +// well known under MS Windows +// the MS HTML Format has a header before the real html data +// +// Version:1.0 Version number of the clipboard. Staring is 0.9 +// StartHTML: Byte count from the beginning of the clipboard to the start +// of the context, or -1 if no context +// EndHTML: Byte count from the beginning of the clipboard to the end +// of the context, or -1 if no context +// StartFragment: Byte count from the beginning of the clipboard to the +// start of the fragment +// EndFragment: Byte count from the beginning of the clipboard to the +// end of the fragment +// StartSelection: Byte count from the beginning of the clipboard to the +// start of the selection +// EndSelection: Byte count from the beginning of the clipboard to the +// end of the selection +// +// StartSelection and EndSelection are optional +// The fragment should be preceded and followed by the HTML comments +// <!--StartFragment--> and <!--EndFragment--> (no space between !-- and the +// text +//------------------------------------------------------------------------------ + +namespace // private +{ +std::string GetHtmlFormatHeader(size_t startHtml, size_t endHtml, size_t startFragment, size_t endFragment) +{ + std::ostringstream htmlHeader; + htmlHeader << "Version:1.0" << '\r' << '\n'; + htmlHeader << "StartHTML:" << std::setw(10) << std::setfill('0') << std::dec << startHtml << '\r' << '\n'; + htmlHeader << "EndHTML:" << std::setw(10) << std::setfill('0') << std::dec << endHtml << '\r' << '\n'; + htmlHeader << "StartFragment:" << std::setw(10) << std::setfill('0') << std::dec << startFragment << '\r' << '\n'; + htmlHeader << "EndFragment:" << std::setw(10) << std::setfill('0') << std::dec << endFragment << '\r' << '\n'; + return htmlHeader.str(); +} + +} // namespace private + + +// the office allways writes the start and end html tag in upper cases and +// without spaces both tags don't allow parameters +const std::string TAG_HTML = std::string("<HTML>"); +const std::string TAG_END_HTML = std::string("</HTML>"); + +// The body tag may have parameters so we need to search for the +// closing '>' manually e.g. <BODY param> #92840# +const std::string TAG_BODY = std::string("<BODY"); +const std::string TAG_END_BODY = std::string("</BODY"); + +Sequence<sal_Int8> SAL_CALL TextHtmlToHTMLFormat(Sequence<sal_Int8>& aTextHtml) +{ + OSL_ASSERT(aTextHtml.getLength() > 0); + + if (!(aTextHtml.getLength() > 0)) + return Sequence<sal_Int8>(); + + // fill the buffer with dummy values to calc the exact length + std::string dummyHtmlHeader = GetHtmlFormatHeader(0, 0, 0, 0); + size_t lHtmlFormatHeader = dummyHtmlHeader.length(); + + std::string textHtml( + reinterpret_cast<const sal_Char*>(aTextHtml.getConstArray()), + reinterpret_cast<const sal_Char*>(aTextHtml.getConstArray()) + aTextHtml.getLength()); + + std::string::size_type nStartHtml = textHtml.find(TAG_HTML) + lHtmlFormatHeader - 1; // we start one before '<HTML>' Word 2000 does also so + std::string::size_type nEndHtml = textHtml.find(TAG_END_HTML) + lHtmlFormatHeader + TAG_END_HTML.length() + 1; // our SOffice 5.2 wants 2 behind </HTML>? + + // The body tag may have parameters so we need to search for the + // closing '>' manually e.g. <BODY param> #92840# + std::string::size_type nStartFragment = textHtml.find(">", textHtml.find(TAG_BODY)) + lHtmlFormatHeader + 1; + std::string::size_type nEndFragment = textHtml.find(TAG_END_BODY) + lHtmlFormatHeader; + + std::string htmlFormat = GetHtmlFormatHeader(nStartHtml, nEndHtml, nStartFragment, nEndFragment); + htmlFormat += textHtml; + + Sequence<sal_Int8> byteSequence(htmlFormat.length() + 1); // space the trailing '\0' + rtl_zeroMemory(byteSequence.getArray(), byteSequence.getLength()); + + rtl_copyMemory( + static_cast<void*>(byteSequence.getArray()), + static_cast<const void*>(htmlFormat.c_str()), + htmlFormat.length()); + + return byteSequence; +} + +const char* HtmlStartTag = "<html"; + +Sequence<sal_Int8> HTMLFormatToTextHtml(const Sequence<sal_Int8>& aHTMLFormat) +{ + BOOST_ASSERT(isHTMLFormat(aHTMLFormat) && "No HTML Format provided"); + + Sequence<sal_Int8>& nonconstHTMLFormatRef = const_cast< Sequence<sal_Int8>& >(aHTMLFormat); + sal_Char* dataStart = reinterpret_cast<sal_Char*>(nonconstHTMLFormatRef.getArray()); + sal_Char* dataEnd = dataStart + nonconstHTMLFormatRef.getLength() - 1; + const sal_Char* htmlStartTag = strcasestr(dataStart, HtmlStartTag); + + BOOST_ASSERT(htmlStartTag && "Seems to be no HTML at all"); + + // It doesn't seem to be HTML? Well then simply return what has been + // provided in non-debug builds + if (htmlStartTag == NULL) + { + return aHTMLFormat; + } + + sal_Int32 len = dataEnd - htmlStartTag; + Sequence<sal_Int8> plainHtmlData(len); + + rtl_copyMemory(static_cast<void*>(plainHtmlData.getArray()), htmlStartTag, len); + + return plainHtmlData; +} + +/* A simple format detection. We are just comparing the first few bytes + of the provided byte sequence to see whether or not it is the MS + Office Html format. If it shows that this is not reliable enough we + can improve this +*/ +const char HtmlFormatStart[] = "Version:"; +int HtmlFormatStartLen = (sizeof(HtmlFormatStart) - 1); + +bool isHTMLFormat(const Sequence<sal_Int8>& aHtmlSequence) +{ + if (aHtmlSequence.getLength() < HtmlFormatStartLen) + return false; + + return rtl_str_compareIgnoreAsciiCase_WithLength(HtmlFormatStart, + HtmlFormatStartLen, + reinterpret_cast<const sal_Char*>(aHtmlSequence.getConstArray()), + HtmlFormatStartLen) == 0; +} diff --git a/dtrans/source/aqua/HtmlFmtFlt.hxx b/dtrans/source/aqua/HtmlFmtFlt.hxx new file mode 100644 index 000000000000..49f0cc70590c --- /dev/null +++ b/dtrans/source/aqua/HtmlFmtFlt.hxx @@ -0,0 +1,20 @@ +#ifndef INCLUDED_HTMLFMTFLT_HXX +#define INCLUDED_HTMLFMTFLT_HXX + +#include <com/sun/star/uno/Sequence.hxx> + +/* Transform plain HTML into the format expected by MS Office. + */ +com::sun::star::uno::Sequence<sal_Int8> TextHtmlToHTMLFormat(com::sun::star::uno::Sequence<sal_Int8>& aTextHtml); + +/* Transform the MS Office HTML format into plain HTML. + */ +com::sun::star::uno::Sequence<sal_Int8> HTMLFormatToTextHtml(const com::sun::star::uno::Sequence<sal_Int8>& aHTMLFormat); + +/* Detects whether the given byte sequence contains the MS Office Html format. + + @returns True if the MS Office Html format will be detected False otherwise. + */ +bool isHTMLFormat (const com::sun::star::uno::Sequence<sal_Int8>& aHtmlSequence); + +#endif |