diff options
author | Zolnai Tamás <zolnaitamas2000@gmail.com> | 2013-03-31 20:11:57 +0200 |
---|---|---|
committer | Zolnai Tamás <zolnaitamas2000@gmail.com> | 2013-03-31 20:25:13 +0200 |
commit | ce51bf1a6ef36bbd1eea751add342cae6f1004d2 (patch) | |
tree | fedd6cb9efdd0a90306316bda580ace87b78b0b4 /l10ntools | |
parent | 6ea8d4a55c3693d75da32af7e9a40a79bac99fa7 (diff) |
Make a bit cleaner transformation of help strings
*Not escape tags and double quots in tags,
but find tags(icu regexp) when merge and
use this infromation to make strings valid.
*Define a new Quot function for helpex,
which works with icu UnicodeCharacter.
*Move tag search to xmlparse.cxx and use icu
just in helpex.
*QuotHTML not unescape just replace xml charcters.
(unescaping is also useless in uimerge.cxx)
*Move UnQuotHTML() to helper.
(was used it in xmlparse.cxx and cfgmerge.cxx)
*Use UnQuotHTML() in uimerge.cxx too.
Change-Id: Ice8940ef69279709a1c5d84c6ae1b0d62a71ca76
Diffstat (limited to 'l10ntools')
-rw-r--r-- | l10ntools/Executable_cfgex.mk | 3 | ||||
-rw-r--r-- | l10ntools/Executable_localize.mk | 2 | ||||
-rw-r--r-- | l10ntools/Executable_propex.mk | 2 | ||||
-rw-r--r-- | l10ntools/Executable_stringex.mk | 3 | ||||
-rw-r--r-- | l10ntools/Executable_transex3.mk | 3 | ||||
-rw-r--r-- | l10ntools/Executable_treex.mk | 3 | ||||
-rw-r--r-- | l10ntools/Executable_uiex.mk | 3 | ||||
-rw-r--r-- | l10ntools/Executable_ulfex.mk | 2 | ||||
-rw-r--r-- | l10ntools/StaticLibrary_transex.mk | 1 | ||||
-rw-r--r-- | l10ntools/inc/helper.hxx | 3 | ||||
-rw-r--r-- | l10ntools/inc/xmlparse.hxx | 8 | ||||
-rw-r--r-- | l10ntools/source/cfgmerge.cxx | 32 | ||||
-rw-r--r-- | l10ntools/source/helper.cxx | 59 | ||||
-rw-r--r-- | l10ntools/source/helpmerge.cxx | 6 | ||||
-rwxr-xr-x | l10ntools/source/po.cxx | 82 | ||||
-rw-r--r-- | l10ntools/source/uimerge.cxx | 2 | ||||
-rw-r--r-- | l10ntools/source/xmlparse.cxx | 157 |
17 files changed, 167 insertions, 204 deletions
diff --git a/l10ntools/Executable_cfgex.mk b/l10ntools/Executable_cfgex.mk index 7d23fa4891d8..0083d9505698 100644 --- a/l10ntools/Executable_cfgex.mk +++ b/l10ntools/Executable_cfgex.mk @@ -49,8 +49,7 @@ $(eval $(call gb_Executable_add_exception_objects,cfgex,\ $(eval $(call gb_Executable_use_externals,cfgex,\ boost_headers \ - icuuc \ - icui18n \ + libxml2 \ )) # vim:set noet sw=4 ts=4: diff --git a/l10ntools/Executable_localize.mk b/l10ntools/Executable_localize.mk index 5f3cc628bc52..b673289d9d4e 100644 --- a/l10ntools/Executable_localize.mk +++ b/l10ntools/Executable_localize.mk @@ -45,8 +45,6 @@ $(eval $(call gb_Executable_add_exception_objects,localize,\ $(eval $(call gb_Executable_use_externals,localize,\ boost_headers \ - icuuc \ - icui18n \ )) # vim:set noet sw=4 ts=4: diff --git a/l10ntools/Executable_propex.mk b/l10ntools/Executable_propex.mk index f98706f6810f..c5bb06ddbdd4 100644 --- a/l10ntools/Executable_propex.mk +++ b/l10ntools/Executable_propex.mk @@ -30,8 +30,6 @@ $(eval $(call gb_Executable_add_exception_objects,propex,\ $(eval $(call gb_Executable_use_externals,propex,\ boost_headers \ - icuuc \ - icui18n \ )) # vim: set noet sw=4 ts=4: diff --git a/l10ntools/Executable_stringex.mk b/l10ntools/Executable_stringex.mk index 9545dd582e73..4f9edfde592a 100644 --- a/l10ntools/Executable_stringex.mk +++ b/l10ntools/Executable_stringex.mk @@ -31,9 +31,6 @@ $(eval $(call gb_Executable_add_exception_objects,stringex,\ $(eval $(call gb_Executable_use_externals,stringex,\ boost_headers \ libxml2 \ - icuuc \ - icui18n \ - icu_headers \ )) # vim: set noet sw=4 ts=4: diff --git a/l10ntools/Executable_transex3.mk b/l10ntools/Executable_transex3.mk index 5178cd58b2be..b43bc89e45bb 100644 --- a/l10ntools/Executable_transex3.mk +++ b/l10ntools/Executable_transex3.mk @@ -51,9 +51,6 @@ $(eval $(call gb_Executable_add_exception_objects,transex3,\ $(eval $(call gb_Executable_use_externals,transex3,\ boost_headers \ - icuuc \ - icui18n \ - icu_headers \ libxml2 \ )) diff --git a/l10ntools/Executable_treex.mk b/l10ntools/Executable_treex.mk index c8ff48c72275..976dc1398743 100644 --- a/l10ntools/Executable_treex.mk +++ b/l10ntools/Executable_treex.mk @@ -31,9 +31,6 @@ $(eval $(call gb_Executable_add_exception_objects,treex,\ $(eval $(call gb_Executable_use_externals,treex,\ boost_headers \ libxml2 \ - icuuc \ - icui18n \ - icu_headers \ )) # vim: set noet sw=4 ts=4: diff --git a/l10ntools/Executable_uiex.mk b/l10ntools/Executable_uiex.mk index 0c142c00f85d..064b2078479e 100644 --- a/l10ntools/Executable_uiex.mk +++ b/l10ntools/Executable_uiex.mk @@ -31,9 +31,6 @@ $(eval $(call gb_Executable_use_externals,uiex,\ libxml2 \ libxslt \ boost_headers \ - icuuc \ - icui18n \ - icu_headers \ )) # vim: set noet sw=4 ts=4: diff --git a/l10ntools/Executable_ulfex.mk b/l10ntools/Executable_ulfex.mk index cd9d86eaf548..bd7b7a5c1877 100644 --- a/l10ntools/Executable_ulfex.mk +++ b/l10ntools/Executable_ulfex.mk @@ -46,8 +46,6 @@ $(eval $(call gb_Executable_add_exception_objects,ulfex,\ $(eval $(call gb_Executable_use_externals,ulfex,\ boost_headers \ - icuuc \ - icui18n \ )) # vim:set noet sw=4 ts=4: diff --git a/l10ntools/StaticLibrary_transex.mk b/l10ntools/StaticLibrary_transex.mk index ed25591719a6..cc65614b1039 100644 --- a/l10ntools/StaticLibrary_transex.mk +++ b/l10ntools/StaticLibrary_transex.mk @@ -39,7 +39,6 @@ $(eval $(call gb_StaticLibrary_set_include,transex,\ $(eval $(call gb_StaticLibrary_use_externals,transex,\ boost_headers \ - icu_headers \ libxml2 \ )) diff --git a/l10ntools/inc/helper.hxx b/l10ntools/inc/helper.hxx index 4dd2dd137937..1292f44ec957 100644 --- a/l10ntools/inc/helper.hxx +++ b/l10ntools/inc/helper.hxx @@ -25,7 +25,8 @@ namespace helper { -OString QuotHTML(const rtl::OString &rString); +OString QuotHTML( const OString &rString ); +OString UnQuotHTML( const OString& rString ); bool isWellFormedXML( OString const & text ); diff --git a/l10ntools/inc/xmlparse.hxx b/l10ntools/inc/xmlparse.hxx index 7a758727c5f6..55867d3f70ab 100644 --- a/l10ntools/inc/xmlparse.hxx +++ b/l10ntools/inc/xmlparse.hxx @@ -229,11 +229,11 @@ protected: class XMLUtil{ public: - /// Quot the XML characters and replace \n \t - static void QuotHTML( rtl::OUString &rString ); + /// Quot the XML characters + static OUString QuotHTML( const OUString& rString ); - /// UnQuot the XML characters and restore \n \t - static void UnQuotHTML ( rtl::OUString &rString ); + /// UnQuot the XML characters + static OUString UnQuotHTML( const OUString &rString ); }; diff --git a/l10ntools/source/cfgmerge.cxx b/l10ntools/source/cfgmerge.cxx index 695620004e5e..fd9474a184ce 100644 --- a/l10ntools/source/cfgmerge.cxx +++ b/l10ntools/source/cfgmerge.cxx @@ -27,6 +27,7 @@ #include "boost/scoped_ptr.hpp" #include "rtl/strbuf.hxx" +#include "helper.hxx" #include "export.hxx" #include "cfgmerge.hxx" #include "tokens.h" @@ -136,33 +137,6 @@ static OString lcl_QuoteHTML( const OString& rString ) return sReturn.makeStringAndClear(); } -static OString lcl_UnquoteHTML( const OString& rString ) -{ - rtl::OStringBuffer sReturn; - for (sal_Int32 i = 0; i != rString.getLength();) { - if (rString.match("&", i)) { - sReturn.append('&'); - i += RTL_CONSTASCII_LENGTH("&"); - } else if (rString.match("<", i)) { - sReturn.append('<'); - i += RTL_CONSTASCII_LENGTH("<"); - } else if (rString.match(">", i)) { - sReturn.append('>'); - i += RTL_CONSTASCII_LENGTH(">"); - } else if (rString.match(""", i)) { - sReturn.append('"'); - i += RTL_CONSTASCII_LENGTH("""); - } else if (rString.match("'", i)) { - sReturn.append('\''); - i += RTL_CONSTASCII_LENGTH("'"); - } else { - sReturn.append(rString[i]); - ++i; - } - } - return sReturn.makeStringAndClear(); -} - } // anonymous namespace // @@ -489,7 +463,7 @@ void CfgExport::WorkOnResourceEnd() if ( sText.isEmpty()) sText = sFallback; - sText = lcl_UnquoteHTML( sText ); + sText = helper::UnQuotHTML( sText ); common::writePoEntry( "Cfgex", pOutputStream, sPath, pStackData->sResTyp, @@ -504,7 +478,7 @@ void CfgExport::WorkOnText( const rtl::OString &rIsoLang ) { - if( rIsoLang.getLength() ) rText = lcl_UnquoteHTML( rText ); + if( rIsoLang.getLength() ) rText = helper::UnQuotHTML( rText ); } diff --git a/l10ntools/source/helper.cxx b/l10ntools/source/helper.cxx index cbcf6d1572fb..08a256013bcd 100644 --- a/l10ntools/source/helper.cxx +++ b/l10ntools/source/helper.cxx @@ -11,45 +11,58 @@ namespace helper { -rtl::OString QuotHTML(const rtl::OString &rString) +OString QuotHTML(const OString &rString) { - rtl::OStringBuffer sReturn; - for (sal_Int32 i = 0; i < rString.getLength(); ++i) { - switch (rString[i]) { - case '\\': - if (i < rString.getLength()) { - switch (rString[i + 1]) { - case '"': - case '<': - case '>': - case '\\': - ++i; - break; - } - } - // fall through - default: - sReturn.append(rString[i]); - break; - + OStringBuffer sReturn; + for (sal_Int32 i = 0; i < rString.getLength(); ++i) + { + switch (rString[i]) + { case '<': sReturn.append("<"); break; - case '>': sReturn.append(">"); break; - case '"': sReturn.append("""); break; - case '&': if (rString.match("&", i)) sReturn.append('&'); else sReturn.append("&"); break; + default: + sReturn.append(rString[i]); + break; + } + } + return sReturn.makeStringAndClear(); +} + +OString UnQuotHTML( const OString& rString ) +{ + OStringBuffer sReturn; + for (sal_Int32 i = 0; i != rString.getLength();) { + if (rString.match("&", i)) { + sReturn.append('&'); + i += RTL_CONSTASCII_LENGTH("&"); + } else if (rString.match("<", i)) { + sReturn.append('<'); + i += RTL_CONSTASCII_LENGTH("<"); + } else if (rString.match(">", i)) { + sReturn.append('>'); + i += RTL_CONSTASCII_LENGTH(">"); + } else if (rString.match(""", i)) { + sReturn.append('"'); + i += RTL_CONSTASCII_LENGTH("""); + } else if (rString.match("'", i)) { + sReturn.append('\''); + i += RTL_CONSTASCII_LENGTH("'"); + } else { + sReturn.append(rString[i]); + ++i; } } return sReturn.makeStringAndClear(); diff --git a/l10ntools/source/helpmerge.cxx b/l10ntools/source/helpmerge.cxx index 60ae8ed21962..98e81651569f 100644 --- a/l10ntools/source/helpmerge.cxx +++ b/l10ntools/source/helpmerge.cxx @@ -242,10 +242,10 @@ void HelpParser::ProcessHelp( LangHashMap* aLangHM , const rtl::OString& sCur , nPreSpaces++; pEntrys->GetText( sNewText, STRING_TYP_TEXT, sCur , true ); OUString sNewdata; - if (helper::isWellFormedXML(helper::QuotHTML(sNewText))) + OUString sTemp = OStringToOUString(sNewText, RTL_TEXTENCODING_UTF8); + if (helper::isWellFormedXML(OUStringToOString(XMLUtil::QuotHTML(sTemp),RTL_TEXTENCODING_UTF8))) { - sNewdata = sSourceText.copy(0,nPreSpaces) + - rtl::OStringToOUString(sNewText, RTL_TEXTENCODING_UTF8); + sNewdata = sSourceText.copy(0,nPreSpaces) + sTemp; } else { diff --git a/l10ntools/source/po.cxx b/l10ntools/source/po.cxx index 36e6ebb6f2ce..bbfe0633bab6 100755 --- a/l10ntools/source/po.cxx +++ b/l10ntools/source/po.cxx @@ -17,15 +17,12 @@ #include <string> #include <boost/crc.hpp> -#include <unicode/regex.h> #include "po.hxx" #define POESCAPED OString("\\n\\t\\r\\\\\\\"") #define POUNESCAPED OString("\n\t\r\\\"") -using namespace U_ICU_NAMESPACE; - /** Container of po entry Provide all file operations related to LibreOffice specific @@ -282,92 +279,17 @@ namespace const OString& rText,const bool bHelpText = false ) { if ( bHelpText ) - return lcl_UnEscapeText(rText,"\\<\\>\\\"\\\\","<>\"\\"); + return rText; else return lcl_UnEscapeText(rText,"\\n\\t\\r","\n\t\r"); } - //Find all special tag in a string using a regular expression - static void lcl_FindAllTag( - const OString& rText,std::vector<OString>& o_vFoundTags ) - { - - UErrorCode nIcuErr = U_ZERO_ERROR; - static const sal_uInt32 nSearchFlags = - UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE; - OUString sLocaleText( OStringToOUString(rText,RTL_TEXTENCODING_UTF8) ); - static const OUString sPattern( - "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>"); - static const UnicodeString sSearchPat( - reinterpret_cast<const UChar*>(sPattern.getStr()), - sPattern.getLength() ); - UnicodeString sSource( - reinterpret_cast<const UChar*>( - sLocaleText.getStr()), sLocaleText.getLength() ); - - RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr ); - aRegexMatcher.reset( sSource ); - int64_t nStartPos = 0; - while( aRegexMatcher.find(nStartPos, nIcuErr) && - nIcuErr == U_ZERO_ERROR ) - { - UnicodeString sMatch = - aRegexMatcher.group(nIcuErr); - o_vFoundTags.push_back( - OUStringToOString( - OUString( - reinterpret_cast<const sal_Unicode*>( - sMatch.getBuffer()),sMatch.length()), - RTL_TEXTENCODING_UTF8)); - nStartPos = aRegexMatcher.start(nIcuErr)+1; - } - } - - //Escape special tags - static OString lcl_EscapeTags( const OString& rText ) - { - typedef std::vector<OString> StrVec_t; - static const OString vInitializer[] = { - "ahelp", "link", "item", "emph", "defaultinline", - "switchinline", "caseinline", "variable", - "bookmark_value", "image", "embedvar", "alt" }; - static const StrVec_t vTagsForEscape( vInitializer, - vInitializer + sizeof(vInitializer) / sizeof(vInitializer[0]) ); - StrVec_t vFoundTags; - lcl_FindAllTag(rText,vFoundTags); - OString sResult = rText; - for(StrVec_t::const_iterator pFound = vFoundTags.begin(); - pFound != vFoundTags.end(); ++pFound) - { - bool bEscapeThis = false; - for(StrVec_t::const_iterator pEscape = vTagsForEscape.begin(); - pEscape != vTagsForEscape.end(); ++pEscape) - { - if (pFound->startsWith("<" + *pEscape) || - *pFound == "</" + *pEscape + ">") - { - bEscapeThis = true; - break; - } - } - if( bEscapeThis || *pFound=="<br/>" || - *pFound =="<help-id-missing/>") - { - OString sToReplace = "\\<" + - pFound->copy(1,pFound->getLength()-2). - replaceAll("\"","\\\"") + "\\>"; - sResult = sResult.replaceAll(*pFound, sToReplace); - } - } - return sResult; - } - //Escape to get merge string static OString lcl_EscapeMergeText( const OString& rText,const bool bHelpText = false ) { if ( bHelpText ) - return lcl_EscapeTags(rText.replaceAll("\\","\\\\")); + return rText; else return lcl_EscapeText(rText,"\n\t\r","\\n\\t\\r"); } diff --git a/l10ntools/source/uimerge.cxx b/l10ntools/source/uimerge.cxx index 76fc9ef8b400..7bebc61293cc 100644 --- a/l10ntools/source/uimerge.cxx +++ b/l10ntools/source/uimerge.cxx @@ -64,7 +64,7 @@ int extractTranslations() vIDs.push_back(helper::xmlStrToOString(content)); xmlFree(content); } - OString sText = helper::xmlStrToOString(xmlNodeGetContent(nodeLevel2)); + OString sText = helper::UnQuotHTML(helper::xmlStrToOString(xmlNodeGetContent(nodeLevel2))); common::writePoEntry( "Uiex", aPOStream, sInputFileName, vIDs[0], (vIDs.size()>=2) ? vIDs[1] : OString(), diff --git a/l10ntools/source/xmlparse.cxx b/l10ntools/source/xmlparse.cxx index 8ba715d48f62..c76d5a60ddc7 100644 --- a/l10ntools/source/xmlparse.cxx +++ b/l10ntools/source/xmlparse.cxx @@ -20,6 +20,7 @@ #include <iterator> /* std::iterator*/ +#include <cassert> #include <stdio.h> #include <sal/alloca.h> @@ -32,7 +33,9 @@ #include <osl/thread.hxx> #include <osl/process.h> #include <rtl/strbuf.hxx> +#include <unicode/regex.h> +using namespace U_ICU_NAMESPACE; using namespace std; using namespace osl; @@ -195,12 +198,10 @@ sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur ) for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) { rStream << " "; rtl::OUString sData( (*pElement->GetAttributeList())[ j ]->GetName() ); - XMLUtil::QuotHTML( sData ); - WriteString( rStream , sData ); + WriteString( rStream , XMLUtil::QuotHTML( sData ) ); rStream << "=\""; sData = (*pElement->GetAttributeList())[ j ]->GetValue(); - XMLUtil::QuotHTML( sData ); - WriteString( rStream , sData ); + WriteString( rStream , XMLUtil::QuotHTML( sData ) ); rStream << "\""; } if ( !pElement->GetChildList()) @@ -218,8 +219,7 @@ sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur ) case XML_NODE_TYPE_DATA: { XMLData *pData = ( XMLData * ) pCur; rtl::OUString sData( pData->GetData()); - XMLUtil::QuotHTML( sData ); - WriteString( rStream, sData ); + WriteString( rStream, XMLUtil::QuotHTML( sData ) ); } break; case XML_NODE_TYPE_COMMENT: { @@ -717,7 +717,7 @@ void XMLElement::Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement XMLElement *pElement = ( XMLElement * ) pCur; if( !pElement->GetName().equalsIgnoreAsciiCase("comment") ){ - buffer.append( OUString("\\<") ); + buffer.append( OUString("<") ); buffer.append( pElement->GetName() ); if ( pElement->GetAttributeList()){ for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ){ @@ -727,24 +727,24 @@ void XMLElement::Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement buffer.append( OUString(" ") ); buffer.append( aAttrName ); buffer.append( OUString("=") ); - buffer.append( OUString("\\\"") ); + buffer.append( OUString("\"") ); buffer.append( (*pElement->GetAttributeList())[ j ]->GetValue() ); - buffer.append( OUString("\\\"") ); + buffer.append( OUString("\"") ); } } } if ( !pElement->GetChildList()) - buffer.append( OUString("/\\>") ); + buffer.append( OUString("/>") ); else { - buffer.append( OUString("\\>") ); + buffer.append( OUString(">") ); XMLChildNode* tmp=NULL; for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ){ tmp = (*pElement->GetChildList())[ k ]; Print( tmp, buffer , false); } - buffer.append( OUString("\\</") ); + buffer.append( OUString("</") ); buffer.append( pElement->GetName() ); - buffer.append( OUString("\\>") ); + buffer.append( OUString(">") ); } } } @@ -1172,41 +1172,114 @@ XMLFile *SimpleXMLParser::Execute( const rtl::OUString &rFileName, XMLFile* pXML return pXMLFile; } +namespace +{ -void XMLUtil::QuotHTML( OUString &rString ) +static icu::UnicodeString lcl_QuotRange( + const icu::UnicodeString& rString, const sal_Int32 nStart, + const sal_Int32 nEnd, bool bInsideTag = false ) { - const OString sString(OUStringToOString(rString, RTL_TEXTENCODING_UTF8)); - rString = OStringToOUString(helper::QuotHTML( sString ), RTL_TEXTENCODING_UTF8); + icu::UnicodeString sReturn; + assert( nStart > 0 && nStart < rString.length() ); + assert( nEnd > 0 && nEnd < rString.length() ); + for (sal_Int32 i = nStart; i <= nEnd; ++i) + { + switch (rString[i]) + { + case '<': + sReturn.append("<"); + break; + case '>': + sReturn.append(">"); + break; + case '"': + if( !bInsideTag ) + sReturn.append("""); + else + sReturn.append(rString[i]); + break; + case '&': + if (rString.startsWith("&", i, 5)) + sReturn.append('&'); + else + sReturn.append("&"); + break; + default: + sReturn.append(rString[i]); + break; + } + } + return sReturn; } -void XMLUtil::UnQuotHTML( rtl::OUString &rString ){ - rtl::OStringBuffer sReturn; - rtl::OString sString(rtl::OUStringToOString(rString, RTL_TEXTENCODING_UTF8)); - for (sal_Int32 i = 0; i != sString.getLength();) { - if (sString[i] == '\\') { - sReturn.append(RTL_CONSTASCII_STRINGPARAM("\\\\")); - ++i; - } else if (sString.match("&", i)) { - sReturn.append('&'); - i += RTL_CONSTASCII_LENGTH("&"); - } else if (sString.match("<", i)) { - sReturn.append('<'); - i += RTL_CONSTASCII_LENGTH("<"); - } else if (sString.match(">", i)) { - sReturn.append('>'); - i += RTL_CONSTASCII_LENGTH(">"); - } else if (sString.match(""", i)) { - sReturn.append('"'); - i += RTL_CONSTASCII_LENGTH("""); - } else if (sString.match("'", i)) { - sReturn.append('\''); - i += RTL_CONSTASCII_LENGTH("'"); - } else { - sReturn.append(sString[i]); - ++i; +static bool lcl_isTag( const icu::UnicodeString& rString ) +{ + const int nSize = 12; + static const icu::UnicodeString vTags[nSize] = { + "ahelp", "link", "item", "emph", "defaultinline", + "switchinline", "caseinline", "variable", + "bookmark_value", "image", "embedvar", "alt" }; + + for( int nIndex = 0; nIndex < nSize; ++nIndex ) + { + if( rString.startsWith("<" + vTags[nIndex]) || + rString == "</" + vTags[nIndex] + ">" ) + return true; + } + + return rString == "<br/>" || rString =="<help-id-missing/>"; +} + +} /// anonymous namespace + +OUString XMLUtil::QuotHTML( const OUString &rString ) +{ + if( rString.trim().isEmpty() ) + return rString; + UErrorCode nIcuErr = U_ZERO_ERROR; + static const sal_uInt32 nSearchFlags = + UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE; + static const OUString sPattern( + "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>"); + static const UnicodeString sSearchPat( + reinterpret_cast<const UChar*>(sPattern.getStr()), + sPattern.getLength() ); + + icu::UnicodeString sSource( + reinterpret_cast<const UChar*>( + rString.getStr()), rString.getLength() ); + + RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr ); + aRegexMatcher.reset( sSource ); + + icu::UnicodeString sReturn; + int32_t nEndPos = 0; + int32_t nStartPos = 0; + while( aRegexMatcher.find(nStartPos, nIcuErr) && nIcuErr == U_ZERO_ERROR ) + { + nStartPos = aRegexMatcher.start(nIcuErr); + sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos-1)); + nEndPos = aRegexMatcher.end(nIcuErr); + icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr); + if( lcl_isTag(sMatch) ) + { + sReturn.append("<"); + sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-2, true)); + sReturn.append(">"); } + else + sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos-1)); + ++nStartPos; } - rString = rtl::OStringToOUString(sReturn.makeStringAndClear(), RTL_TEXTENCODING_UTF8); + sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length()-1)); + sReturn.append('\0'); + return OUString(reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer())); +} + +OUString XMLUtil::UnQuotHTML( const OUString& rString ) +{ + const OString sString(OUStringToOString(rString, RTL_TEXTENCODING_UTF8)); + return OStringToOUString(helper::UnQuotHTML(sString), RTL_TEXTENCODING_UTF8); } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |