summaryrefslogtreecommitdiff
path: root/l10ntools/source
diff options
context:
space:
mode:
authorZolnai Tamás <zolnaitamas2000@gmail.com>2013-03-31 20:11:57 +0200
committerZolnai Tamás <zolnaitamas2000@gmail.com>2013-03-31 20:25:13 +0200
commitce51bf1a6ef36bbd1eea751add342cae6f1004d2 (patch)
treefedd6cb9efdd0a90306316bda580ace87b78b0b4 /l10ntools/source
parent6ea8d4a55c3693d75da32af7e9a40a79bac99fa7 (diff)
Make a bit cleaner transformation of help strings
*Not escape tags and double quots in tags, but find tags(icu regexp) when merge and use this infromation to make strings valid. *Define a new Quot function for helpex, which works with icu UnicodeCharacter. *Move tag search to xmlparse.cxx and use icu just in helpex. *QuotHTML not unescape just replace xml charcters. (unescaping is also useless in uimerge.cxx) *Move UnQuotHTML() to helper. (was used it in xmlparse.cxx and cfgmerge.cxx) *Use UnQuotHTML() in uimerge.cxx too. Change-Id: Ice8940ef69279709a1c5d84c6ae1b0d62a71ca76
Diffstat (limited to 'l10ntools/source')
-rw-r--r--l10ntools/source/cfgmerge.cxx32
-rw-r--r--l10ntools/source/helper.cxx59
-rw-r--r--l10ntools/source/helpmerge.cxx6
-rwxr-xr-xl10ntools/source/po.cxx82
-rw-r--r--l10ntools/source/uimerge.cxx2
-rw-r--r--l10ntools/source/xmlparse.cxx157
6 files changed, 160 insertions, 178 deletions
diff --git a/l10ntools/source/cfgmerge.cxx b/l10ntools/source/cfgmerge.cxx
index 695620004e5e..fd9474a184ce 100644
--- a/l10ntools/source/cfgmerge.cxx
+++ b/l10ntools/source/cfgmerge.cxx
@@ -27,6 +27,7 @@
#include "boost/scoped_ptr.hpp"
#include "rtl/strbuf.hxx"
+#include "helper.hxx"
#include "export.hxx"
#include "cfgmerge.hxx"
#include "tokens.h"
@@ -136,33 +137,6 @@ static OString lcl_QuoteHTML( const OString& rString )
return sReturn.makeStringAndClear();
}
-static OString lcl_UnquoteHTML( const OString& rString )
-{
- rtl::OStringBuffer sReturn;
- for (sal_Int32 i = 0; i != rString.getLength();) {
- if (rString.match("&amp;", i)) {
- sReturn.append('&');
- i += RTL_CONSTASCII_LENGTH("&amp;");
- } else if (rString.match("&lt;", i)) {
- sReturn.append('<');
- i += RTL_CONSTASCII_LENGTH("&lt;");
- } else if (rString.match("&gt;", i)) {
- sReturn.append('>');
- i += RTL_CONSTASCII_LENGTH("&gt;");
- } else if (rString.match("&quot;", i)) {
- sReturn.append('"');
- i += RTL_CONSTASCII_LENGTH("&quot;");
- } else if (rString.match("&apos;", i)) {
- sReturn.append('\'');
- i += RTL_CONSTASCII_LENGTH("&apos;");
- } else {
- sReturn.append(rString[i]);
- ++i;
- }
- }
- return sReturn.makeStringAndClear();
-}
-
} // anonymous namespace
//
@@ -489,7 +463,7 @@ void CfgExport::WorkOnResourceEnd()
if ( sText.isEmpty())
sText = sFallback;
- sText = lcl_UnquoteHTML( sText );
+ sText = helper::UnQuotHTML( sText );
common::writePoEntry(
"Cfgex", pOutputStream, sPath, pStackData->sResTyp,
@@ -504,7 +478,7 @@ void CfgExport::WorkOnText(
const rtl::OString &rIsoLang
)
{
- if( rIsoLang.getLength() ) rText = lcl_UnquoteHTML( rText );
+ if( rIsoLang.getLength() ) rText = helper::UnQuotHTML( rText );
}
diff --git a/l10ntools/source/helper.cxx b/l10ntools/source/helper.cxx
index cbcf6d1572fb..08a256013bcd 100644
--- a/l10ntools/source/helper.cxx
+++ b/l10ntools/source/helper.cxx
@@ -11,45 +11,58 @@
namespace helper {
-rtl::OString QuotHTML(const rtl::OString &rString)
+OString QuotHTML(const OString &rString)
{
- rtl::OStringBuffer sReturn;
- for (sal_Int32 i = 0; i < rString.getLength(); ++i) {
- switch (rString[i]) {
- case '\\':
- if (i < rString.getLength()) {
- switch (rString[i + 1]) {
- case '"':
- case '<':
- case '>':
- case '\\':
- ++i;
- break;
- }
- }
- // fall through
- default:
- sReturn.append(rString[i]);
- break;
-
+ OStringBuffer sReturn;
+ for (sal_Int32 i = 0; i < rString.getLength(); ++i)
+ {
+ switch (rString[i])
+ {
case '<':
sReturn.append("&lt;");
break;
-
case '>':
sReturn.append("&gt;");
break;
-
case '"':
sReturn.append("&quot;");
break;
-
case '&':
if (rString.match("&amp;", i))
sReturn.append('&');
else
sReturn.append("&amp;");
break;
+ default:
+ sReturn.append(rString[i]);
+ break;
+ }
+ }
+ return sReturn.makeStringAndClear();
+}
+
+OString UnQuotHTML( const OString& rString )
+{
+ OStringBuffer sReturn;
+ for (sal_Int32 i = 0; i != rString.getLength();) {
+ if (rString.match("&amp;", i)) {
+ sReturn.append('&');
+ i += RTL_CONSTASCII_LENGTH("&amp;");
+ } else if (rString.match("&lt;", i)) {
+ sReturn.append('<');
+ i += RTL_CONSTASCII_LENGTH("&lt;");
+ } else if (rString.match("&gt;", i)) {
+ sReturn.append('>');
+ i += RTL_CONSTASCII_LENGTH("&gt;");
+ } else if (rString.match("&quot;", i)) {
+ sReturn.append('"');
+ i += RTL_CONSTASCII_LENGTH("&quot;");
+ } else if (rString.match("&apos;", i)) {
+ sReturn.append('\'');
+ i += RTL_CONSTASCII_LENGTH("&apos;");
+ } else {
+ sReturn.append(rString[i]);
+ ++i;
}
}
return sReturn.makeStringAndClear();
diff --git a/l10ntools/source/helpmerge.cxx b/l10ntools/source/helpmerge.cxx
index 60ae8ed21962..98e81651569f 100644
--- a/l10ntools/source/helpmerge.cxx
+++ b/l10ntools/source/helpmerge.cxx
@@ -242,10 +242,10 @@ void HelpParser::ProcessHelp( LangHashMap* aLangHM , const rtl::OString& sCur ,
nPreSpaces++;
pEntrys->GetText( sNewText, STRING_TYP_TEXT, sCur , true );
OUString sNewdata;
- if (helper::isWellFormedXML(helper::QuotHTML(sNewText)))
+ OUString sTemp = OStringToOUString(sNewText, RTL_TEXTENCODING_UTF8);
+ if (helper::isWellFormedXML(OUStringToOString(XMLUtil::QuotHTML(sTemp),RTL_TEXTENCODING_UTF8)))
{
- sNewdata = sSourceText.copy(0,nPreSpaces) +
- rtl::OStringToOUString(sNewText, RTL_TEXTENCODING_UTF8);
+ sNewdata = sSourceText.copy(0,nPreSpaces) + sTemp;
}
else
{
diff --git a/l10ntools/source/po.cxx b/l10ntools/source/po.cxx
index 36e6ebb6f2ce..bbfe0633bab6 100755
--- a/l10ntools/source/po.cxx
+++ b/l10ntools/source/po.cxx
@@ -17,15 +17,12 @@
#include <string>
#include <boost/crc.hpp>
-#include <unicode/regex.h>
#include "po.hxx"
#define POESCAPED OString("\\n\\t\\r\\\\\\\"")
#define POUNESCAPED OString("\n\t\r\\\"")
-using namespace U_ICU_NAMESPACE;
-
/** Container of po entry
Provide all file operations related to LibreOffice specific
@@ -282,92 +279,17 @@ namespace
const OString& rText,const bool bHelpText = false )
{
if ( bHelpText )
- return lcl_UnEscapeText(rText,"\\<\\>\\\"\\\\","<>\"\\");
+ return rText;
else
return lcl_UnEscapeText(rText,"\\n\\t\\r","\n\t\r");
}
- //Find all special tag in a string using a regular expression
- static void lcl_FindAllTag(
- const OString& rText,std::vector<OString>& o_vFoundTags )
- {
-
- UErrorCode nIcuErr = U_ZERO_ERROR;
- static const sal_uInt32 nSearchFlags =
- UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
- OUString sLocaleText( OStringToOUString(rText,RTL_TEXTENCODING_UTF8) );
- static const OUString sPattern(
- "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>");
- static const UnicodeString sSearchPat(
- reinterpret_cast<const UChar*>(sPattern.getStr()),
- sPattern.getLength() );
- UnicodeString sSource(
- reinterpret_cast<const UChar*>(
- sLocaleText.getStr()), sLocaleText.getLength() );
-
- RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
- aRegexMatcher.reset( sSource );
- int64_t nStartPos = 0;
- while( aRegexMatcher.find(nStartPos, nIcuErr) &&
- nIcuErr == U_ZERO_ERROR )
- {
- UnicodeString sMatch =
- aRegexMatcher.group(nIcuErr);
- o_vFoundTags.push_back(
- OUStringToOString(
- OUString(
- reinterpret_cast<const sal_Unicode*>(
- sMatch.getBuffer()),sMatch.length()),
- RTL_TEXTENCODING_UTF8));
- nStartPos = aRegexMatcher.start(nIcuErr)+1;
- }
- }
-
- //Escape special tags
- static OString lcl_EscapeTags( const OString& rText )
- {
- typedef std::vector<OString> StrVec_t;
- static const OString vInitializer[] = {
- "ahelp", "link", "item", "emph", "defaultinline",
- "switchinline", "caseinline", "variable",
- "bookmark_value", "image", "embedvar", "alt" };
- static const StrVec_t vTagsForEscape( vInitializer,
- vInitializer + sizeof(vInitializer) / sizeof(vInitializer[0]) );
- StrVec_t vFoundTags;
- lcl_FindAllTag(rText,vFoundTags);
- OString sResult = rText;
- for(StrVec_t::const_iterator pFound = vFoundTags.begin();
- pFound != vFoundTags.end(); ++pFound)
- {
- bool bEscapeThis = false;
- for(StrVec_t::const_iterator pEscape = vTagsForEscape.begin();
- pEscape != vTagsForEscape.end(); ++pEscape)
- {
- if (pFound->startsWith("<" + *pEscape) ||
- *pFound == "</" + *pEscape + ">")
- {
- bEscapeThis = true;
- break;
- }
- }
- if( bEscapeThis || *pFound=="<br/>" ||
- *pFound =="<help-id-missing/>")
- {
- OString sToReplace = "\\<" +
- pFound->copy(1,pFound->getLength()-2).
- replaceAll("\"","\\\"") + "\\>";
- sResult = sResult.replaceAll(*pFound, sToReplace);
- }
- }
- return sResult;
- }
-
//Escape to get merge string
static OString lcl_EscapeMergeText(
const OString& rText,const bool bHelpText = false )
{
if ( bHelpText )
- return lcl_EscapeTags(rText.replaceAll("\\","\\\\"));
+ return rText;
else
return lcl_EscapeText(rText,"\n\t\r","\\n\\t\\r");
}
diff --git a/l10ntools/source/uimerge.cxx b/l10ntools/source/uimerge.cxx
index 76fc9ef8b400..7bebc61293cc 100644
--- a/l10ntools/source/uimerge.cxx
+++ b/l10ntools/source/uimerge.cxx
@@ -64,7 +64,7 @@ int extractTranslations()
vIDs.push_back(helper::xmlStrToOString(content));
xmlFree(content);
}
- OString sText = helper::xmlStrToOString(xmlNodeGetContent(nodeLevel2));
+ OString sText = helper::UnQuotHTML(helper::xmlStrToOString(xmlNodeGetContent(nodeLevel2)));
common::writePoEntry(
"Uiex", aPOStream, sInputFileName, vIDs[0],
(vIDs.size()>=2) ? vIDs[1] : OString(),
diff --git a/l10ntools/source/xmlparse.cxx b/l10ntools/source/xmlparse.cxx
index 8ba715d48f62..c76d5a60ddc7 100644
--- a/l10ntools/source/xmlparse.cxx
+++ b/l10ntools/source/xmlparse.cxx
@@ -20,6 +20,7 @@
#include <iterator> /* std::iterator*/
+#include <cassert>
#include <stdio.h>
#include <sal/alloca.h>
@@ -32,7 +33,9 @@
#include <osl/thread.hxx>
#include <osl/process.h>
#include <rtl/strbuf.hxx>
+#include <unicode/regex.h>
+using namespace U_ICU_NAMESPACE;
using namespace std;
using namespace osl;
@@ -195,12 +198,10 @@ sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur )
for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) {
rStream << " ";
rtl::OUString sData( (*pElement->GetAttributeList())[ j ]->GetName() );
- XMLUtil::QuotHTML( sData );
- WriteString( rStream , sData );
+ WriteString( rStream , XMLUtil::QuotHTML( sData ) );
rStream << "=\"";
sData = (*pElement->GetAttributeList())[ j ]->GetValue();
- XMLUtil::QuotHTML( sData );
- WriteString( rStream , sData );
+ WriteString( rStream , XMLUtil::QuotHTML( sData ) );
rStream << "\"";
}
if ( !pElement->GetChildList())
@@ -218,8 +219,7 @@ sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur )
case XML_NODE_TYPE_DATA: {
XMLData *pData = ( XMLData * ) pCur;
rtl::OUString sData( pData->GetData());
- XMLUtil::QuotHTML( sData );
- WriteString( rStream, sData );
+ WriteString( rStream, XMLUtil::QuotHTML( sData ) );
}
break;
case XML_NODE_TYPE_COMMENT: {
@@ -717,7 +717,7 @@ void XMLElement::Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement
XMLElement *pElement = ( XMLElement * ) pCur;
if( !pElement->GetName().equalsIgnoreAsciiCase("comment") ){
- buffer.append( OUString("\\<") );
+ buffer.append( OUString("<") );
buffer.append( pElement->GetName() );
if ( pElement->GetAttributeList()){
for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ){
@@ -727,24 +727,24 @@ void XMLElement::Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement
buffer.append( OUString(" ") );
buffer.append( aAttrName );
buffer.append( OUString("=") );
- buffer.append( OUString("\\\"") );
+ buffer.append( OUString("\"") );
buffer.append( (*pElement->GetAttributeList())[ j ]->GetValue() );
- buffer.append( OUString("\\\"") );
+ buffer.append( OUString("\"") );
}
}
}
if ( !pElement->GetChildList())
- buffer.append( OUString("/\\>") );
+ buffer.append( OUString("/>") );
else {
- buffer.append( OUString("\\>") );
+ buffer.append( OUString(">") );
XMLChildNode* tmp=NULL;
for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ){
tmp = (*pElement->GetChildList())[ k ];
Print( tmp, buffer , false);
}
- buffer.append( OUString("\\</") );
+ buffer.append( OUString("</") );
buffer.append( pElement->GetName() );
- buffer.append( OUString("\\>") );
+ buffer.append( OUString(">") );
}
}
}
@@ -1172,41 +1172,114 @@ XMLFile *SimpleXMLParser::Execute( const rtl::OUString &rFileName, XMLFile* pXML
return pXMLFile;
}
+namespace
+{
-void XMLUtil::QuotHTML( OUString &rString )
+static icu::UnicodeString lcl_QuotRange(
+ const icu::UnicodeString& rString, const sal_Int32 nStart,
+ const sal_Int32 nEnd, bool bInsideTag = false )
{
- const OString sString(OUStringToOString(rString, RTL_TEXTENCODING_UTF8));
- rString = OStringToOUString(helper::QuotHTML( sString ), RTL_TEXTENCODING_UTF8);
+ icu::UnicodeString sReturn;
+ assert( nStart > 0 && nStart < rString.length() );
+ assert( nEnd > 0 && nEnd < rString.length() );
+ for (sal_Int32 i = nStart; i <= nEnd; ++i)
+ {
+ switch (rString[i])
+ {
+ case '<':
+ sReturn.append("&lt;");
+ break;
+ case '>':
+ sReturn.append("&gt;");
+ break;
+ case '"':
+ if( !bInsideTag )
+ sReturn.append("&quot;");
+ else
+ sReturn.append(rString[i]);
+ break;
+ case '&':
+ if (rString.startsWith("&amp;", i, 5))
+ sReturn.append('&');
+ else
+ sReturn.append("&amp;");
+ break;
+ default:
+ sReturn.append(rString[i]);
+ break;
+ }
+ }
+ return sReturn;
}
-void XMLUtil::UnQuotHTML( rtl::OUString &rString ){
- rtl::OStringBuffer sReturn;
- rtl::OString sString(rtl::OUStringToOString(rString, RTL_TEXTENCODING_UTF8));
- for (sal_Int32 i = 0; i != sString.getLength();) {
- if (sString[i] == '\\') {
- sReturn.append(RTL_CONSTASCII_STRINGPARAM("\\\\"));
- ++i;
- } else if (sString.match("&amp;", i)) {
- sReturn.append('&');
- i += RTL_CONSTASCII_LENGTH("&amp;");
- } else if (sString.match("&lt;", i)) {
- sReturn.append('<');
- i += RTL_CONSTASCII_LENGTH("&lt;");
- } else if (sString.match("&gt;", i)) {
- sReturn.append('>');
- i += RTL_CONSTASCII_LENGTH("&gt;");
- } else if (sString.match("&quot;", i)) {
- sReturn.append('"');
- i += RTL_CONSTASCII_LENGTH("&quot;");
- } else if (sString.match("&apos;", i)) {
- sReturn.append('\'');
- i += RTL_CONSTASCII_LENGTH("&apos;");
- } else {
- sReturn.append(sString[i]);
- ++i;
+static bool lcl_isTag( const icu::UnicodeString& rString )
+{
+ const int nSize = 12;
+ static const icu::UnicodeString vTags[nSize] = {
+ "ahelp", "link", "item", "emph", "defaultinline",
+ "switchinline", "caseinline", "variable",
+ "bookmark_value", "image", "embedvar", "alt" };
+
+ for( int nIndex = 0; nIndex < nSize; ++nIndex )
+ {
+ if( rString.startsWith("<" + vTags[nIndex]) ||
+ rString == "</" + vTags[nIndex] + ">" )
+ return true;
+ }
+
+ return rString == "<br/>" || rString =="<help-id-missing/>";
+}
+
+} /// anonymous namespace
+
+OUString XMLUtil::QuotHTML( const OUString &rString )
+{
+ if( rString.trim().isEmpty() )
+ return rString;
+ UErrorCode nIcuErr = U_ZERO_ERROR;
+ static const sal_uInt32 nSearchFlags =
+ UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
+ static const OUString sPattern(
+ "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>");
+ static const UnicodeString sSearchPat(
+ reinterpret_cast<const UChar*>(sPattern.getStr()),
+ sPattern.getLength() );
+
+ icu::UnicodeString sSource(
+ reinterpret_cast<const UChar*>(
+ rString.getStr()), rString.getLength() );
+
+ RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
+ aRegexMatcher.reset( sSource );
+
+ icu::UnicodeString sReturn;
+ int32_t nEndPos = 0;
+ int32_t nStartPos = 0;
+ while( aRegexMatcher.find(nStartPos, nIcuErr) && nIcuErr == U_ZERO_ERROR )
+ {
+ nStartPos = aRegexMatcher.start(nIcuErr);
+ sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos-1));
+ nEndPos = aRegexMatcher.end(nIcuErr);
+ icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr);
+ if( lcl_isTag(sMatch) )
+ {
+ sReturn.append("<");
+ sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-2, true));
+ sReturn.append(">");
}
+ else
+ sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos-1));
+ ++nStartPos;
}
- rString = rtl::OStringToOUString(sReturn.makeStringAndClear(), RTL_TEXTENCODING_UTF8);
+ sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length()-1));
+ sReturn.append('\0');
+ return OUString(reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer()));
+}
+
+OUString XMLUtil::UnQuotHTML( const OUString& rString )
+{
+ const OString sString(OUStringToOString(rString, RTL_TEXTENCODING_UTF8));
+ return OStringToOUString(helper::UnQuotHTML(sString), RTL_TEXTENCODING_UTF8);
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */