diff options
author | Norbert Thiebaud <nthiebaud@gmail.com> | 2012-12-09 00:55:32 -0600 |
---|---|---|
committer | Andras Timar <atimar@suse.com> | 2012-12-11 15:54:44 +0000 |
commit | f1335171987272b66c2e1e0b02cddb28d44502ec (patch) | |
tree | 8ee00899faf91bef00388a0b1d66479eef4c4644 /l10ntools | |
parent | d449869617a3e07aef483437158ca7f634ee7263 (diff) |
performance tuning of helpex
callgrind (on text/swriter/guide translated in 'fr')
before Ir = 889,687,925
after Ir = 406,372,177 (405,668,292 w/o the 'native' memory allocator)
time confirms:
before 0.288/0.272/0.016
after 0.146/0.131/0.015
the output before and after were diff-ed and every generated xhp files
are identical.
All improvements where done still using 'normal' sal and c++ API.
There are still some low-hanging fruits, like XMLUtil:QuoteHTML
(probably 20-30 millions Ir to shave there)
The destruction of XMLElements is also singularly high
(11% of the remaining Ir count)
But the bulk of what is left is OString management (alloc/acquire/release/
free/copy), and I/O using streams
Change-Id: Ia316c953cd4bd46fc33a58a0c10f26b0ffa042c2
Reviewed-on: https://gerrit.libreoffice.org/1262
Tested-by: Norbert Thiebaud <nthiebaud@gmail.com>
Reviewed-by: Andras Timar <atimar@suse.com>
Tested-by: Andras Timar <atimar@suse.com>
Diffstat (limited to 'l10ntools')
-rw-r--r-- | l10ntools/inc/xmlparse.hxx | 70 | ||||
-rw-r--r-- | l10ntools/source/common.hxx | 27 | ||||
-rw-r--r-- | l10ntools/source/helpex.cxx | 2 | ||||
-rw-r--r-- | l10ntools/source/helpmerge.cxx | 87 | ||||
-rw-r--r-- | l10ntools/source/merge.cxx | 27 | ||||
-rw-r--r-- | l10ntools/source/po.cxx | 237 | ||||
-rw-r--r-- | l10ntools/source/xmlparse.cxx | 552 |
7 files changed, 712 insertions, 290 deletions
diff --git a/l10ntools/inc/xmlparse.hxx b/l10ntools/inc/xmlparse.hxx index 7a758727c5f6..0df7880b92e1 100644 --- a/l10ntools/inc/xmlparse.hxx +++ b/l10ntools/inc/xmlparse.hxx @@ -31,7 +31,7 @@ #include <expat.h> #include <rtl/ustring.hxx> -#include <rtl/ustrbuf.hxx> +#include <rtl/strbuf.hxx> #include "boost/unordered_map.hpp" #include "export.hxx" @@ -56,21 +56,21 @@ using namespace std; class XMLAttribute { private: - rtl::OUString sName; - rtl::OUString sValue; + rtl::OString sName; + rtl::OString sValue; public: /// creates an attribute XMLAttribute( - const rtl::OUString &rName, // attributes name - const rtl::OUString &rValue // attributes data + const rtl::OString &rName, // attributes name + const rtl::OString &rValue // attributes data ) : sName( rName ), sValue( rValue ) {} - rtl::OUString GetName() const { return sName; } - rtl::OUString GetValue() const { return sValue; } + rtl::OString GetName() const { return sName; } + rtl::OString GetValue() const { return sValue; } - void setValue(const rtl::OUString &rValue){sValue=rValue;} + void setValue(const rtl::OString &rValue){sValue=rValue;} /// returns true if two attributes are equal and have the same value sal_Bool IsEqual( @@ -183,7 +183,7 @@ class XMLFile : public XMLParentNode { public: XMLFile( - const rtl::OUString &rFileName // the file name, empty if created from memory stream + const rtl::OString &rFileName // the file name, empty if created from memory stream ); XMLFile( const XMLFile& obj ) ; ~XMLFile(); @@ -203,18 +203,18 @@ public: virtual sal_uInt16 GetNodeType(); /// returns file name - rtl::OUString GetName() { return sFileName; } - void SetName( const rtl::OUString &rFilename ) { sFileName = rFilename; } + rtl::OString GetName() { return sFileName; } + void SetName( const rtl::OString &rFilename ) { sFileName = rFilename; } const std::vector<rtl::OString> getOrder(){ return order; } protected: // writes a string as UTF8 with dos line ends to a given stream - void WriteString( ofstream &rStream, const rtl::OUString &sString ); + void WriteString( ofstream &rStream, const rtl::OString &sString ); void InsertL10NElement( XMLElement* pElement); // DATA - rtl::OUString sFileName; + rtl::OString sFileName; const rtl::OString ID, OLDREF, XML_LANG; @@ -230,10 +230,10 @@ class XMLUtil{ public: /// Quot the XML characters and replace \n \t - static void QuotHTML( rtl::OUString &rString ); + static void QuotHTML( rtl::OString &rString ); /// UnQuot the XML characters and restore \n \t - static void UnQuotHTML ( rtl::OUString &rString ); + static void UnQuotHTML ( rtl::OString &rString ); }; @@ -245,7 +245,7 @@ public: class XMLElement : public XMLParentNode { private: - rtl::OUString sElementName; + rtl::OString sElementName; XMLAttributeList *pAttributes; rtl::OString project, filename, @@ -256,12 +256,12 @@ private: int nPos; protected: - void Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement); + void Print(XMLNode *pCur, OStringBuffer& buffer , bool rootelement); public: /// create a element node XMLElement(){} XMLElement( - const rtl::OUString &rName, // the element name + const rtl::OString &rName, // the element name XMLParentNode *Parent // parent node of this element ): XMLParentNode( Parent ), sElementName( rName ), @@ -283,18 +283,18 @@ public: virtual sal_uInt16 GetNodeType(); /// returns element name - rtl::OUString GetName() { return sElementName; } + rtl::OString GetName() { return sElementName; } /// returns list of attributes of this element XMLAttributeList *GetAttributeList() { return pAttributes; } /// adds a new attribute to this element, typically used by parser - void AddAttribute( const rtl::OUString &rAttribute, const rtl::OUString &rValue ); + void AddAttribute( const rtl::OString &rAttribute, const rtl::OString &rValue ); - void ChangeLanguageTag( const rtl::OUString &rValue ); + void ChangeLanguageTag( const rtl::OString &rValue ); // Return a Unicode String representation of this object - OUString ToOUString(); + OString ToOString(); void SetProject ( rtl::OString const & prj ){ project = prj; } void SetFileName ( rtl::OString const & fn ){ filename = fn; } @@ -322,18 +322,18 @@ public: class XMLData : public XMLChildNode { private: - rtl::OUString sData; + rtl::OString sData; bool isNewCreated; public: /// create a data node XMLData( - const rtl::OUString &rData, // the initial data + const rtl::OString &rData, // the initial data XMLParentNode *Parent // the parent node of this data, typically a element node ) : XMLChildNode( Parent ), sData( rData ) , isNewCreated ( false ){} XMLData( - const rtl::OUString &rData, // the initial data + const rtl::OString &rData, // the initial data XMLParentNode *Parent, // the parent node of this data, typically a element node bool newCreated ) @@ -345,12 +345,12 @@ public: virtual sal_uInt16 GetNodeType(); /// returns the data - rtl::OUString GetData() { return sData; } + rtl::OString GetData() { return sData; } bool isNew() { return isNewCreated; } /// adds new character data to the existing one void AddData( - const rtl::OUString &rData // the new data + const rtl::OString &rData // the new data ); @@ -364,12 +364,12 @@ public: class XMLComment : public XMLChildNode { private: - rtl::OUString sComment; + rtl::OString sComment; public: /// create a comment node XMLComment( - const rtl::OUString &rComment, // the comment + const rtl::OString &rComment, // the comment XMLParentNode *Parent // the parent node of this comemnt, typically a element node ) : XMLChildNode( Parent ), sComment( rComment ) {} @@ -381,7 +381,7 @@ public: XMLComment& operator=(const XMLComment& obj); /// returns the comment - rtl::OUString GetComment() { return sComment; } + rtl::OString GetComment() { return sComment; } }; //------------------------------------------------------------------------- @@ -391,12 +391,12 @@ public: class XMLDefault : public XMLChildNode { private: - rtl::OUString sDefault; + rtl::OString sDefault; public: /// create a comment node XMLDefault( - const rtl::OUString &rDefault, // the comment + const rtl::OString &rDefault, // the comment XMLParentNode *Parent // the parent node of this comemnt, typically a element node ) : XMLChildNode( Parent ), sDefault( rDefault ) {} @@ -409,7 +409,7 @@ public: virtual sal_uInt16 GetNodeType(); /// returns the comment - rtl::OUString GetDefault() { return sDefault; } + rtl::OString GetDefault() { return sDefault; } }; //------------------------------------------------------------------------- @@ -420,7 +420,7 @@ struct XMLError { XML_Error eCode; // the error code std::size_t nLine; // error line number std::size_t nColumn; // error column number - rtl::OUString sMessage; // readable error message + rtl::OString sMessage; // readable error message }; //------------------------------------------------------------------------- @@ -460,7 +460,7 @@ public: /// parse a file, returns NULL on criticall errors XMLFile *Execute( - const rtl::OUString &rFileName, // the file name + const rtl::OString &rFileName, // the file name XMLFile *pXMLFileIn // the XMLFile ); diff --git a/l10ntools/source/common.hxx b/l10ntools/source/common.hxx index 0129c6a7c34e..da45286eee68 100644 --- a/l10ntools/source/common.hxx +++ b/l10ntools/source/common.hxx @@ -69,6 +69,33 @@ inline rtl::OUString pathnameToAbsoluteUrl(rtl::OUString const & pathname) { return url; } +inline rtl::OUString pathnameToAbsoluteUrl(rtl::OString const & pathname) +{ + rtl::OUString url; + if (osl::FileBase::getFileURLFromSystemPath(OStringToOUString(pathname, RTL_TEXTENCODING_UTF8) , url) + != osl::FileBase::E_None) + { + std::cerr << "Error: Cannot convert input pathname to URL\n"; + std::exit(EXIT_FAILURE); + } + static rtl::OUString cwd; + if(cwd.isEmpty()) + { + if (osl_getProcessWorkingDir(&cwd.pData) != osl_Process_E_None) + { + std::cerr << "Error: Cannot determine cwd\n"; + std::exit(EXIT_FAILURE); + } + } + if (osl::FileBase::getAbsoluteFileURL(cwd, url, url) + != osl::FileBase::E_None) + { + std::cerr << "Error: Cannot convert input URL to absolute URL\n"; + std::exit(EXIT_FAILURE); + } + return url; +} + inline rtl::OString pathnameToken(char const * pathname, char const * root) { rtl::OUString full; if (!rtl_convertStringToUString( diff --git a/l10ntools/source/helpex.cxx b/l10ntools/source/helpex.cxx index 21a8b9d3e1b7..cd99d06fa6f5 100644 --- a/l10ntools/source/helpex.cxx +++ b/l10ntools/source/helpex.cxx @@ -118,7 +118,7 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) { hasNoError = aParser.CreateSDF( aArgs.m_sOutputFile, aArgs.m_sPrj, aArgs.m_sPrjRoot, - aArgs.m_sInputFile, new XMLFile( OUString('0') ), "help" ); + aArgs.m_sInputFile, new XMLFile( OString('0') ), "help" ); } if( hasNoError ) diff --git a/l10ntools/source/helpmerge.cxx b/l10ntools/source/helpmerge.cxx index 317378544928..d598bfa1f969 100644 --- a/l10ntools/source/helpmerge.cxx +++ b/l10ntools/source/helpmerge.cxx @@ -78,20 +78,16 @@ bool HelpParser::CreateSDF( const rtl::OString &rSDFFile_in, const rtl::OString &rPrj_in,const rtl::OString &rRoot_in, const rtl::OString &sHelpFile, XMLFile *pXmlFile, const rtl::OString &rGsi1){ SimpleXMLParser aParser; - rtl::OUString sXmlFile( - rtl::OStringToOUString(sHelpFile, RTL_TEXTENCODING_ASCII_US)); //TODO: explicit BOM handling? - std::auto_ptr <XMLFile> file ( aParser.Execute( sXmlFile, pXmlFile ) ); + std::auto_ptr <XMLFile> file ( aParser.Execute( sHelpFile, pXmlFile ) ); if(file.get() == NULL) { printf( "%s: %s\n", sHelpFile.getStr(), - (rtl::OUStringToOString( - aParser.GetError().sMessage, RTL_TEXTENCODING_ASCII_US). - getStr())); + aParser.GetError().sMessage.getStr()); exit(-1); } file->Extract(); @@ -113,10 +109,7 @@ bool HelpParser::CreateSDF( LangHashMap* pElem; XMLElement* pXMLElement = NULL; - OUStringBuffer sBuffer; - const OUString sOUPrj( rPrj_in.getStr() , rPrj_in.getLength() , RTL_TEXTENCODING_ASCII_US ); - const OUString sOUActFileName(sActFileName.getStr() , sActFileName.getLength() , RTL_TEXTENCODING_ASCII_US ); - const OUString sOUGsi1( rGsi1.getStr() , rGsi1.getLength() , RTL_TEXTENCODING_ASCII_US ); + OStringBuffer sBuffer; Export::InitLanguages( false ); std::vector<rtl::OString> aLanguages = Export::GetLanguages(); @@ -138,34 +131,33 @@ bool HelpParser::CreateSDF( if( pXMLElement != NULL ) { - OUString data( - pXMLElement->ToOUString(). + OString data( + pXMLElement->ToOString(). replaceAll( - rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("\n")), - rtl::OUString()). + rtl::OString("\n"), + rtl::OString()). replaceAll( - rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("\t")), - rtl::OUString()).trim()); - sBuffer.append( sOUPrj ); - sBuffer.append('\t'); - if ( !rRoot_in.isEmpty()) - sBuffer.append( sOUActFileName ); - sBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("\t0\t")); - sBuffer.append( sOUGsi1 ); //"help"; - sBuffer.append('\t'); - rtl::OString sID = posm->first; // ID - sBuffer.append( rtl::OStringToOUString( sID, RTL_TEXTENCODING_UTF8 ) ); - sBuffer.append('\t'); - rtl::OString sOldRef = pXMLElement->GetOldref(); // oldref - sBuffer.append( rtl::OStringToOUString(sOldRef, RTL_TEXTENCODING_UTF8 ) ); - sBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("\t\t\t0\t")); - sBuffer.append( rtl::OStringToOUString( sCur, RTL_TEXTENCODING_UTF8 ) ); - sBuffer.append('\t'); - sBuffer.append( data ); - sBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("\t\t\t\t")); - rtl::OString sOut(rtl::OUStringToOString(sBuffer.makeStringAndClear().getStr() , RTL_TEXTENCODING_UTF8)); + rtl::OString("\t"), + rtl::OString()).trim()); if( !data.isEmpty() ) - aSDFStream << sOut.getStr() << '\n'; + { + sBuffer.append( rPrj_in ); + sBuffer.append("\t"); + if ( !rRoot_in.isEmpty()) + sBuffer.append( sActFileName ); + sBuffer.append( "\t0\t"); + sBuffer.append( rGsi1 ); //"help"; + sBuffer.append( "\t"); + sBuffer.append( posm->first ); + sBuffer.append( "\t"); + sBuffer.append( pXMLElement->GetOldref()); + sBuffer.append( "\t\t\t0\t"); + sBuffer.append( sCur); + sBuffer.append('\t'); + sBuffer.append( data ); + sBuffer.append( "\t\t\t\t"); + aSDFStream << sBuffer.makeStringAndClear().getStr() << '\n'; + } pXMLElement=NULL; } else @@ -188,11 +180,9 @@ bool HelpParser::Merge( const rtl::OString &rSDFFile, const rtl::OString &rDesti SimpleXMLParser aParser; - rtl::OUString sXmlFile( - rtl::OStringToOUString(sHelpFile, RTL_TEXTENCODING_ASCII_US)); //TODO: explicit BOM handling? - XMLFile* xmlfile = ( aParser.Execute( sXmlFile, new XMLFile( rtl::OUString('0') ) ) ); + XMLFile* xmlfile = ( aParser.Execute( sHelpFile, new XMLFile( rtl::OString('0') ) ) ); hasNoError = MergeSingleFile( xmlfile , aMergeDataFile , rLanguage , rDestinationFile ); delete xmlfile; return hasNoError; @@ -203,7 +193,7 @@ bool HelpParser::MergeSingleFile( XMLFile* file , MergeDataFile& aMergeDataFile { file->Extract(); - XMLHashMap* aXMLStrHM = file->GetStrings(); + XMLHashMap* aXMLStrHM = file->GetStrings(); LangHashMap* aLangHM; static ResData pResData( "","",""); pResData.sResTyp = "help"; @@ -255,14 +245,14 @@ void HelpParser::ProcessHelp( LangHashMap* aLangHM , const rtl::OString& sCur , if( pEntrys != NULL) { rtl::OString sNewText; - rtl::OUString sSourceText( - pXMLElement->ToOUString(). + rtl::OString sSourceText( + pXMLElement->ToOString(). replaceAll( - rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("\n")), - rtl::OUString()). + rtl::OString("\n"), + rtl::OString()). replaceAll( - rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("\t")), - rtl::OUString())); + rtl::OString("\t"), + rtl::OString())); // re-add spaces to the beginning of translated string, // important for indentation of Basic code examples sal_Int32 nPreSpaces = 0; @@ -270,11 +260,11 @@ void HelpParser::ProcessHelp( LangHashMap* aLangHM , const rtl::OString& sCur , while ( (nPreSpaces < nLen) && (*(sSourceText.getStr()+nPreSpaces) == ' ') ) nPreSpaces++; pEntrys->GetText( sNewText, STRING_TYP_TEXT, sCur , true ); - OUString sNewdata; + OString sNewdata; if (helper::isWellFormedXML(helper::QuotHTML(sNewText))) { sNewdata = sSourceText.copy(0,nPreSpaces) + - rtl::OStringToOUString(sNewText, RTL_TEXTENCODING_UTF8); + sNewText; } else { @@ -297,8 +287,7 @@ void HelpParser::ProcessHelp( LangHashMap* aLangHM , const rtl::OString& sCur , pResData->sGId.getStr(), pResData->sId.getStr(), pResData->sResTyp.getStr()); } - pXMLElement->ChangeLanguageTag( - rtl::OStringToOUString(sCur, RTL_TEXTENCODING_ASCII_US)); + pXMLElement->ChangeLanguageTag(sCur); } } diff --git a/l10ntools/source/merge.cxx b/l10ntools/source/merge.cxx index 4f065fb2ef7e..d7c5353f929b 100644 --- a/l10ntools/source/merge.cxx +++ b/l10ntools/source/merge.cxx @@ -29,13 +29,18 @@ namespace { - static ::rtl::OString lcl_NormalizeFilename(const ::rtl::OString& rFilename) + static sal_Int32 lcl_BasenameIndex(const OString& rFilename) { - return rFilename.copy( - std::max( - rFilename.lastIndexOf( '\\' ), - rFilename.lastIndexOf( '/' ))+1); - }; + sal_Int32 index; + for(index = rFilename.getLength() - 1; index >= 0 ; --index) + { + if(rFilename[index] == '/' || rFilename[index] == '\\') + { + break; + } + } + return index + 1; + } static bool lcl_ReadPoChecked( PoEntry& o_rPoEntry, PoIfstream& rPoFile, @@ -165,7 +170,7 @@ MergeDataFile::MergeDataFile( while( !aInputStream.eof() ) { const OString sHack("HACK"); - const OString sFileName( lcl_NormalizeFilename(rFile) ); + const OString sFileName( rFile.getStr() + lcl_BasenameIndex(rFile) ); const bool bReadAll = sFileName.isEmpty(); const OString sPoFileName(sPoFile.data(), sPoFile.length()); PoIfstream aPoInput; @@ -379,13 +384,7 @@ rtl::OString MergeDataFile::CreateKey(const rtl::OString& rTYP, const rtl::OStri const rtl::OString& rLID, const rtl::OString& rFilename, bool bCaseSensitive) { static const ::rtl::OString sStroke('-'); - ::rtl::OString sKey( rTYP ); - sKey += sStroke; - sKey += rGID; - sKey += sStroke; - sKey += rLID; - sKey += sStroke; - sKey += lcl_NormalizeFilename(rFilename); + ::rtl::OString sKey = rTYP + "-" + rGID + "-" + rLID + "-" + (rFilename.getStr() + lcl_BasenameIndex(rFilename) ); OSL_TRACE("created key: %s", sKey.getStr()); if(bCaseSensitive) return sKey; // officecfg case sensitive identifier diff --git a/l10ntools/source/po.cxx b/l10ntools/source/po.cxx index c59a4f8a4e31..918a76878a8c 100644 --- a/l10ntools/source/po.cxx +++ b/l10ntools/source/po.cxx @@ -8,6 +8,7 @@ */ #include <rtl/ustring.hxx> +#include <rtl/strbuf.hxx> #include <cstring> #include <ctime> @@ -107,21 +108,217 @@ namespace } //Unescape text - static OString lcl_UnEscapeText(const OString& rText, - const OString& rEscaped = POESCAPED, - const OString& rUnEscaped = POUNESCAPED) + static OString lcl_UnEscapeText(const OString& rText) { - assert( rEscaped.getLength() == 2*rUnEscaped.getLength() ); - OString sResult = rText; - int nCount = 0; - for(sal_Int32 nIndex=0; nIndex<rText.getLength()-1; ++nIndex) + sal_Int32 index; + for(index = 0 ; index < rText.getLength() - 1; ++index) { - sal_Int32 nActChar = rEscaped.indexOf(rText.copy(nIndex,2)); - if(nActChar % 2 == 0) - sResult = sResult.replaceAt((nIndex++)-(nCount++),2, - rUnEscaped.copy(nActChar/2,1)); + if(rText[index] == '\\') + { + switch(rText[index + 1]) + { + case '\\': + case 'n': + case 'r': + case 't': + case '"': + OStringBuffer sBuff(rText); + const sal_Char* in = sBuff.getStr() + index; + sal_Char* out = &sBuff[index]; + while(*in) + { + if(*in == '\\') + { + switch(in[1]) + { + case '\\': + *out++ = '\\'; + in += 2; + break; + case 'n': + *out++ = '\n'; + in += 2; + break; + case 'r': + *out++ = '\r'; + in += 2; + break; + case 't': + *out++ = '\t'; + in += 2; + break; + case '"': + *out++ = '"'; + in += 2; + break; + default: + *out++ = *in++; + break; + } + } + else + { + *out++ = *in++; + } + } + *out = 0; + sBuff.setLength((out - sBuff.getStr())); + return sBuff.makeStringAndClear(); + } + } } - return sResult; + return rText; + } + + static OString lcl_UnEscapeTextBlanks(const OString& rText) + { + sal_Int32 index; + for(index = 0 ; index < rText.getLength() - 1; ++index) + { + if(rText[index] == '\\') + { + switch(rText[index + 1]) + { + case 'n': + case 'r': + case 't': + OStringBuffer sBuff(rText); + const sal_Char* in = sBuff.getStr() + index; + sal_Char* out = &sBuff[index]; + while(*in) + { + if(*in == '\\') + { + switch(in[1]) + { + case 'n': + *out++ = '\n'; + in += 2; + break; + case 'r': + *out++ = '\r'; + in += 2; + break; + case 't': + *out++ = '\t'; + in += 2; + break; + default: + *out++ = *in++; + break; + } + } + else + { + *out++ = *in++; + } + } + *out = 0; + sBuff.setLength((out - sBuff.getStr())); + return sBuff.makeStringAndClear(); + } + } + } + return rText; + } + + static OString lcl_EscapeTextBlanks(const OString& rText) + { + sal_Int32 index; + for(index = 0 ; index < rText.getLength() - 1; ++index) + { + switch(rText[index]) + { + case '\n': + case '\r': + case '\t': + OStringBuffer sBuff(rText); + const sal_Char* in = rText.getStr() + index; + sal_Char* out = &sBuff[index]; + while(*in) + { + switch(in[1]) + { + case '\n': + *out++ = '\\'; + *out++ = 'n'; + break; + case '\r': + *out++ = '\\'; + *out++ = 'r'; + break; + case 't': + *out++ = '\\'; + *out++ = 'r'; + break; + default: + *out++ = *in++; + break; + } + } + *out = 0; + sBuff.setLength((out - sBuff.getStr())); + return sBuff.makeStringAndClear(); + } + } + return rText; + } + + static OString lcl_UnEscapeTextHelp(const OString& rText) + { + sal_Int32 index; + for(index = 0 ; index < rText.getLength() - 1; ++index) + { + if(rText[index] == '\\') + { + switch(rText[index + 1]) + { + case '<': + case '>': + case '"': + case '\\': + OStringBuffer sBuff(rText); + const sal_Char* in = sBuff.getStr() + index; + sal_Char* out = &sBuff[index]; + while(*in) + { + if(*in == '\\') + { + switch(in[1]) + { + case '<': + *out++ = '<'; + in += 2; + break; + case '>': + *out++ = '>'; + in += 2; + break; + case '"': + *out++ = '"'; + in += 2; + break; + case '\\': + *out++ = '\\'; + in += 2; + break; + default: + *out++ = *in++; + break; + } + } + else + { + *out++ = *in++; + } + } + *out = 0; + sBuff.setLength((out - sBuff.getStr())); + return sBuff.makeStringAndClear(); + } + } + } + return rText; } //Convert a normal string to msg/po output string @@ -301,9 +498,9 @@ namespace const OString& rText,const bool bHelpText = false ) { if ( bHelpText ) - return lcl_UnEscapeText(rText,"\\<\\>\\\"\\\\","<>\"\\"); + return lcl_UnEscapeTextHelp(rText); else - return lcl_UnEscapeText(rText,"\\n\\t\\r","\n\t\r"); + return lcl_UnEscapeTextBlanks(rText); } //Find all special tag in a string using a regular expression @@ -311,18 +508,18 @@ namespace const OString& rText,std::vector<OString>& o_vFoundTags ) { - UErrorCode nIcuErr = U_ZERO_ERROR; - sal_uInt32 nSearchFlags = UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE; + static UErrorCode nIcuErr = U_ZERO_ERROR; + static sal_uInt32 nSearchFlags = UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE; OUString sLocaleText( OStringToOUString(rText,RTL_TEXTENCODING_UTF8) ); - OUString sPattern("<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>"); - UnicodeString sSearchPat( + static OUString sPattern("<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>"); + static UnicodeString sSearchPat( reinterpret_cast<const UChar*>( sPattern.getStr()), sPattern.getLength() ); UnicodeString sSource( reinterpret_cast<const UChar*>( sLocaleText.getStr()), sLocaleText.getLength() ); - RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr ); + static RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr ); aRegexMatcher.reset( sSource ); int64_t nStartPos = 0; while( aRegexMatcher.find(nStartPos, nIcuErr) && @@ -386,7 +583,7 @@ namespace if ( bHelpText ) return lcl_EscapeTags(rText.replaceAll("\\","\\\\")); else - return lcl_EscapeText(rText,"\n\t\r","\\n\\t\\r"); + return lcl_EscapeTextBlanks(rText); } } diff --git a/l10ntools/source/xmlparse.cxx b/l10ntools/source/xmlparse.cxx index dc2e3d639606..2a74fdb26519 100644 --- a/l10ntools/source/xmlparse.cxx +++ b/l10ntools/source/xmlparse.cxx @@ -72,7 +72,8 @@ XMLChildNode& XMLChildNode::operator=(const XMLChildNode& obj){ XMLParentNode::~XMLParentNode() /*****************************************************************************/ { - if( pChildList ){ + if( pChildList ) + { RemoveAndDeleteAllChildren(); delete pChildList; pChildList = NULL; @@ -168,9 +169,8 @@ void XMLFile::Write( rtl::OString const &aFilename ) s.close(); } -void XMLFile::WriteString( ofstream &rStream, const rtl::OUString &sString ) +void XMLFile::WriteString( ofstream &rStream, const rtl::OString &sText ) { - rtl::OString sText(rtl::OUStringToOString(sString, RTL_TEXTENCODING_UTF8)); rStream << sText.getStr(); } @@ -193,7 +193,7 @@ sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur ) if ( pElement->GetAttributeList()) for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) { rStream << " "; - rtl::OUString sData( (*pElement->GetAttributeList())[ j ]->GetName() ); + rtl::OString sData( (*pElement->GetAttributeList())[ j ]->GetName() ); XMLUtil::QuotHTML( sData ); WriteString( rStream , sData ); rStream << "=\""; @@ -216,7 +216,7 @@ sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur ) break; case XML_NODE_TYPE_DATA: { XMLData *pData = ( XMLData * ) pCur; - rtl::OUString sData( pData->GetData()); + rtl::OString sData( pData->GetData()); XMLUtil::QuotHTML( sData ); WriteString( rStream, sData ); } @@ -255,19 +255,17 @@ void XMLFile::Print( XMLNode *pCur, sal_uInt16 nLevel ) case XML_NODE_TYPE_ELEMENT: { XMLElement *pElement = ( XMLElement * ) pCur; - fprintf( stdout, "<%s", rtl::OUStringToOString(pElement->GetName(), RTL_TEXTENCODING_UTF8).getStr()); + fprintf( stdout, "<%s", pElement->GetName().getStr()); if ( pElement->GetAttributeList()) { for (size_t j = 0; j < pElement->GetAttributeList()->size(); ++j) { - rtl::OString aAttrName(rtl::OUStringToOString((*pElement->GetAttributeList())[j]->GetName(), - RTL_TEXTENCODING_UTF8)); + rtl::OString aAttrName((*pElement->GetAttributeList())[j]->GetName()); if (!aAttrName.equalsIgnoreAsciiCase(XML_LANG)) { fprintf( stdout, " %s=\"%s\"", aAttrName.getStr(), - rtl::OUStringToOString( (*pElement->GetAttributeList())[ j ]->GetValue(), - RTL_TEXTENCODING_UTF8).getStr()); + (*pElement->GetAttributeList())[ j ]->GetValue().getStr()); } } } @@ -277,24 +275,23 @@ void XMLFile::Print( XMLNode *pCur, sal_uInt16 nLevel ) fprintf( stdout, ">" ); for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) Print( (*pElement->GetChildList())[ k ], nLevel + 1 ); - fprintf( stdout, "</%s>", rtl::OUStringToOString(pElement->GetName(), RTL_TEXTENCODING_UTF8).getStr()); + fprintf( stdout, "</%s>", pElement->GetName().getStr()); } } break; case XML_NODE_TYPE_DATA: { XMLData *pData = ( XMLData * ) pCur; - rtl::OUString sData = pData->GetData(); - fprintf( stdout, "%s", rtl::OUStringToOString(sData, RTL_TEXTENCODING_UTF8).getStr()); + fprintf( stdout, "%s", pData->GetData().getStr()); } break; case XML_NODE_TYPE_COMMENT: { XMLComment *pComment = ( XMLComment * ) pCur; - fprintf( stdout, "<!--%s-->", rtl::OUStringToOString(pComment->GetComment(), RTL_TEXTENCODING_UTF8).getStr()); + fprintf( stdout, "<!--%s-->", pComment->GetComment().getStr()); } break; case XML_NODE_TYPE_DEFAULT: { XMLDefault *pDefault = ( XMLDefault * ) pCur; - fprintf( stdout, "%s", rtl::OUStringToOString(pDefault->GetDefault(), RTL_TEXTENCODING_UTF8).getStr()); + fprintf( stdout, "%s", pDefault->GetDefault().getStr()); } break; } @@ -312,7 +309,7 @@ XMLFile::~XMLFile() } } /*****************************************************************************/ -XMLFile::XMLFile( const rtl::OUString &rFileName ) // the file name, empty if created from memory stream +XMLFile::XMLFile( const rtl::OString &rFileName ) // the file name, empty if created from memory stream /*****************************************************************************/ : XMLParentNode( NULL ), sFileName ( rFileName ), @@ -349,18 +346,21 @@ void XMLFile::Extract( XMLFile *pCur ) /*****************************************************************************/ void XMLFile::InsertL10NElement( XMLElement* pElement ){ /*****************************************************************************/ - rtl::OString tmpStr,id,oldref,language(""); + rtl::OString id,oldref,language(""); LangHashMap* elem; if( pElement->GetAttributeList() != NULL ){ for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) { - tmpStr=rtl::OUStringToOString((*pElement->GetAttributeList())[ j ]->GetName(), RTL_TEXTENCODING_UTF8); - if (tmpStr == ID) { // Get the "id" Attribute - id = rtl::OUStringToOString((*pElement->GetAttributeList())[ j ]->GetValue(),RTL_TEXTENCODING_UTF8); + if ((*pElement->GetAttributeList())[ j ]->GetName() == ID) + { + // Get the "id" Attribute + id = (*pElement->GetAttributeList())[ j ]->GetValue(); } - if (tmpStr == XML_LANG) { // Get the "xml-lang" Attribute - language = rtl::OUStringToOString((*pElement->GetAttributeList())[j]->GetValue(),RTL_TEXTENCODING_UTF8); + if ((*pElement->GetAttributeList())[ j ]->GetName() == XML_LANG) + { + // Get the "xml-lang" Attribute + language = (*pElement->GetAttributeList())[j]->GetValue(); } } @@ -381,7 +381,8 @@ void XMLFile::InsertL10NElement( XMLElement* pElement ){ elem=pos->second; if ( (*elem)[ language ] ) { - fprintf(stdout,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", id.getStr(), language.getStr(), rtl::OUStringToOString(sFileName, RTL_TEXTENCODING_ASCII_US).getStr() ); + fprintf(stdout,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", id.getStr(), language.getStr(), + sFileName.getStr() ); exit( -1 ); } (*elem)[ language ]=pElement; @@ -457,24 +458,30 @@ void XMLFile::SearchL10NElements( XMLParentNode *pCur , int pos) break; case XML_NODE_TYPE_ELEMENT: { XMLElement *pElement = ( XMLElement * ) pCur; - rtl::OString sName(rtl::OUStringToOString(pElement->GetName(), RTL_TEXTENCODING_ASCII_US).toAsciiLowerCase()); rtl::OString language,tmpStrVal,oldref; if ( pElement->GetAttributeList()) { for ( size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j ) { - const rtl::OString tmpStr = rtl::OUStringToOString((*pElement->GetAttributeList())[j]->GetName(), RTL_TEXTENCODING_UTF8); - if (tmpStr == THEID) { // Get the "id" Attribute - tmpStrVal=rtl::OUStringToOString( (*pElement->GetAttributeList())[ j ]->GetValue(),RTL_TEXTENCODING_UTF8 ); + if ((*pElement->GetAttributeList())[j]->GetName() == THEID) + { + // Get the "id" Attribute + tmpStrVal= (*pElement->GetAttributeList())[ j ]->GetValue(); } - if (tmpStr == LOCALIZE) { // Get the "localize" Attribute + else if ((*pElement->GetAttributeList())[j]->GetName() == LOCALIZE) + { + // Get the "localize" Attribute bInsert=false; } - if (tmpStr == XML_LANG) { // Get the "xml-lang" Attribute - language=rtl::OUStringToOString( (*pElement->GetAttributeList())[ j ]->GetValue(),RTL_TEXTENCODING_UTF8 ); + else if ((*pElement->GetAttributeList())[j]->GetName() == XML_LANG) + { + // Get the "xml-lang" Attribute + language= (*pElement->GetAttributeList())[ j ]->GetValue(); } - if (tmpStr == OLDREF) { // Get the "oldref" Attribute - oldref=rtl::OUStringToOString( (*pElement->GetAttributeList())[ j ]->GetValue(),RTL_TEXTENCODING_UTF8 ); + else if ((*pElement->GetAttributeList())[j]->GetName() == OLDREF) + { + // Get the "oldref" Attribute + oldref=(*pElement->GetAttributeList())[ j ]->GetValue(); } } pElement->SetLanguageId ( language ); @@ -483,7 +490,7 @@ void XMLFile::SearchL10NElements( XMLParentNode *pCur , int pos) pElement->SetPos( pos ); } - if ( bInsert && ( nodes_localize.find( sName ) != nodes_localize.end() ) ) + if ( bInsert && ( nodes_localize.find( pElement->GetName() ) != nodes_localize.end() ) ) InsertL10NElement(pElement); else if ( bInsert && pElement->GetChildList() ){ for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) @@ -510,10 +517,10 @@ bool XMLFile::CheckExportStatus( XMLParentNode *pCur ) { static bool bStatusExport = true; const rtl::OString LOCALIZE("localize"); - const rtl::OString STATUS(RTL_CONSTASCII_STRINGPARAM("status")); - const rtl::OString PUBLISH(RTL_CONSTASCII_STRINGPARAM("PUBLISH")); - const rtl::OString DEPRECATED(RTL_CONSTASCII_STRINGPARAM("DEPRECATED")); - const rtl::OString TOPIC(RTL_CONSTASCII_STRINGPARAM("topic")); + const rtl::OString STATUS("status"); + const rtl::OString PUBLISH("PUBLISH"); + const rtl::OString DEPRECATED("DEPRECATED"); + const rtl::OString TOPIC("topic"); bool bInsert = true; if ( !pCur ) @@ -532,19 +539,16 @@ bool XMLFile::CheckExportStatus( XMLParentNode *pCur ) break; case XML_NODE_TYPE_ELEMENT: { XMLElement *pElement = ( XMLElement * ) pCur; - rtl::OString sName(rtl::OUStringToOString(pElement->GetName(), RTL_TEXTENCODING_ASCII_US)); + const rtl::OString sName = pElement->GetName(); if (sName.equalsIgnoreAsciiCase(TOPIC)) { if ( pElement->GetAttributeList()) { for (size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j) { - const rtl::OString tmpStr(rtl::OUStringToOString((*pElement->GetAttributeList())[j]->GetName(), - RTL_TEXTENCODING_UTF8)); - if (tmpStr.equalsIgnoreAsciiCase(STATUS)) + if ((*pElement->GetAttributeList())[j]->GetName().equalsIgnoreAsciiCase(STATUS)) { - rtl::OString tmpStrVal(rtl::OUStringToOString( (*pElement->GetAttributeList())[j]->GetValue(), - RTL_TEXTENCODING_UTF8)); + const rtl::OString tmpStrVal((*pElement->GetAttributeList())[j]->GetValue()); if (!tmpStrVal.equalsIgnoreAsciiCase(PUBLISH) && !tmpStrVal.equalsIgnoreAsciiCase(DEPRECATED)) { @@ -626,7 +630,7 @@ XMLElement& XMLElement::operator=(const XMLElement& obj){ } /*****************************************************************************/ -void XMLElement::AddAttribute( const rtl::OUString &rAttribute, const rtl::OUString &rValue ) +void XMLElement::AddAttribute( const rtl::OString &rAttribute, const rtl::OString &rValue ) /*****************************************************************************/ { if ( !pAttributes ) @@ -635,9 +639,9 @@ void XMLElement::AddAttribute( const rtl::OUString &rAttribute, const rtl::OUStr } /*****************************************************************************/ -void XMLElement::ChangeLanguageTag( const rtl::OUString &rValue ) +void XMLElement::ChangeLanguageTag( const rtl::OString &rValue ) { - SetLanguageId(rtl::OUStringToOString(rValue, RTL_TEXTENCODING_UTF8)); + SetLanguageId(rValue); if ( pAttributes ) { for (size_t i = 0; i < pAttributes->size(); ++i) @@ -659,7 +663,7 @@ void XMLElement::ChangeLanguageTag( const rtl::OUString &rValue ) { pElem = static_cast< XMLElement* >(pNode); pElem->ChangeLanguageTag( rValue ); - pElem->SetLanguageId(rtl::OUStringToOString(rValue, RTL_TEXTENCODING_UTF8)); + pElem->SetLanguageId(rValue); pElem = NULL; pNode = NULL; } @@ -682,94 +686,110 @@ XMLElement::~XMLElement() } /*****************************************************************************/ -OUString XMLElement::ToOUString(){ +OString XMLElement::ToOString(){ /*****************************************************************************/ - OUStringBuffer* buffer = new OUStringBuffer(); + OStringBuffer* buffer = new OStringBuffer(); Print(this,*buffer,true); - OUString result=buffer->makeStringAndClear(); - rtl::OUString xy(result.getStr()); - result=OUString(xy); + OString result=buffer->makeStringAndClear(); delete buffer; return result; } /*****************************************************************************/ -void XMLElement::Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement ){ +void XMLElement::Print(XMLNode *pCur, OStringBuffer& buffer , bool rootelement ){ /*****************************************************************************/ - static const OUString XML_LANG ( "xml-lang" ); + static const OString XML_LANG ( "xml-lang" ); - if(pCur!=NULL){ - if(rootelement){ + if(pCur!=NULL) + { + if(rootelement) + { XMLElement *pElement = ( XMLElement * ) pCur; - if ( pElement->GetAttributeList()){ - if ( pElement->GetChildList()){ + if ( pElement->GetAttributeList()) + { + if ( pElement->GetChildList()) + { XMLChildNode* tmp=NULL; - for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ){ + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + { tmp = (*pElement->GetChildList())[ k ]; Print( tmp, buffer , false); } } } } - else{ - - switch( pCur->GetNodeType()) { - case XML_NODE_TYPE_ELEMENT: { + else + { + switch( pCur->GetNodeType()) + { + case XML_NODE_TYPE_ELEMENT: + { XMLElement *pElement = ( XMLElement * ) pCur; - if( !pElement->GetName().equalsIgnoreAsciiCaseAsciiL(RTL_CONSTASCII_STRINGPARAM("comment")) ){ - buffer.append( OUString("\\<") ); + if( !pElement->GetName().equalsIgnoreAsciiCase("comment") ) + { + buffer.append( "\\<"); buffer.append( pElement->GetName() ); - if ( pElement->GetAttributeList()){ - for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ){ + if ( pElement->GetAttributeList()) + { + for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) + { - OUString aAttrName( (*pElement->GetAttributeList())[ j ]->GetName() ); - if( !aAttrName.equalsIgnoreAsciiCase( XML_LANG ) ) { - buffer.append( OUString(" ") ); + const OString aAttrName = (*pElement->GetAttributeList())[ j ]->GetName(); + if( !aAttrName.equalsIgnoreAsciiCase( XML_LANG ) ) + { + buffer.append( " "); buffer.append( aAttrName ); - buffer.append( OUString("=") ); - buffer.append( OUString("\\\"") ); + buffer.append( "=\\\""); buffer.append( (*pElement->GetAttributeList())[ j ]->GetValue() ); - buffer.append( OUString("\\\"") ); + buffer.append( "\\\""); } } } if ( !pElement->GetChildList()) - buffer.append( OUString("/\\>") ); - else { - buffer.append( OUString("\\>") ); + { + buffer.append( "/\\>"); + } + else + { + buffer.append( "\\>" ); XMLChildNode* tmp=NULL; - for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ){ + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + { tmp = (*pElement->GetChildList())[ k ]; Print( tmp, buffer , false); } - buffer.append( OUString("\\</") ); + buffer.append( "\\</" ); buffer.append( pElement->GetName() ); - buffer.append( OUString("\\>") ); + buffer.append( "\\>" ); } } } break; - case XML_NODE_TYPE_DATA: { + case XML_NODE_TYPE_DATA: + { XMLData *pData = ( XMLData * ) pCur; - rtl::OUString sData = pData->GetData(); - buffer.append( sData ); + buffer.append( pData->GetData() ); } break; - case XML_NODE_TYPE_COMMENT: { + case XML_NODE_TYPE_COMMENT: + { XMLComment *pComment = ( XMLComment * ) pCur; - buffer.append( OUString("<!--") ); + buffer.append( "<!--" ); buffer.append( pComment->GetComment() ); - buffer.append( OUString("-->") ); + buffer.append( "-->" ); } break; - case XML_NODE_TYPE_DEFAULT: { + case XML_NODE_TYPE_DEFAULT: + { XMLDefault *pDefault = ( XMLDefault * ) pCur; buffer.append( pDefault->GetDefault() ); } break; + } } - } - }else { + } + else + { fprintf(stdout,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n"); return; } @@ -797,7 +817,7 @@ XMLData& XMLData::operator=(const XMLData& obj){ return *this; } /*****************************************************************************/ -void XMLData::AddData( const rtl::OUString &rData) { +void XMLData::AddData( const rtl::OString &rData) { /*****************************************************************************/ sData += rData; } @@ -866,8 +886,8 @@ XMLDefault& XMLDefault::operator=(const XMLDefault& obj){ // class SimpleXMLParser // -#define XML_CHAR_TO_OUSTRING(x) OStringToOUString(OString(x), RTL_TEXTENCODING_UTF8) -#define XML_CHAR_N_TO_OUSTRING(x,n) OStringToOUString(OString(x,n), RTL_TEXTENCODING_UTF8 ) +//#define XML_CHAR_TO_OUSTRING(x) OStringToOUString(OString(x), RTL_TEXTENCODING_UTF8) +//#define XML_CHAR_N_TO_OUSTRING(x,n) OStringToOUString(OString(x,n), RTL_TEXTENCODING_UTF8 ) /*****************************************************************************/ @@ -936,7 +956,7 @@ void SimpleXMLParser::StartElement( const XML_Char *name, const XML_Char **atts ) /*****************************************************************************/ { - rtl::OUString sElementName = rtl::OUString( XML_CHAR_TO_OUSTRING( name )); + rtl::OString sElementName( name ); XMLElement *pElement = new XMLElement( sElementName, ( XMLParentNode * ) pCurNode ); pCurNode = pElement; pCurData = NULL; @@ -944,8 +964,8 @@ void SimpleXMLParser::StartElement( int i = 0; while( atts[i] ) { pElement->AddAttribute( - rtl::OUString( XML_CHAR_TO_OUSTRING( atts[ i ] )), - rtl::OUString( XML_CHAR_TO_OUSTRING( atts[ i + 1 ] ))); + rtl::OString( atts[ i ] ), + rtl::OString( atts[ i + 1 ] )); i += 2; } } @@ -969,11 +989,11 @@ void SimpleXMLParser::CharacterData( /*****************************************************************************/ { if ( !pCurData ){ - rtl::OUString x = XML_CHAR_N_TO_OUSTRING( s, len ); + rtl::OString x( s, len ); XMLUtil::UnQuotHTML(x); pCurData = new XMLData( x , pCurNode ); }else{ - rtl::OUString x = XML_CHAR_N_TO_OUSTRING( s, len ); + rtl::OString x( s, len ); XMLUtil::UnQuotHTML(x); pCurData->AddData( x ); @@ -986,7 +1006,7 @@ void SimpleXMLParser::Comment( /*****************************************************************************/ { pCurData = NULL; - new XMLComment( rtl::OUString( XML_CHAR_TO_OUSTRING( data )), pCurNode ); + new XMLComment( OString( data ), pCurNode ); } /*****************************************************************************/ @@ -995,18 +1015,17 @@ void SimpleXMLParser::Default( /*****************************************************************************/ { pCurData = NULL; - new XMLDefault( - rtl::OUString( XML_CHAR_N_TO_OUSTRING( s, len )), pCurNode ); + new XMLDefault( OString( s, len), pCurNode ); } /*****************************************************************************/ -XMLFile *SimpleXMLParser::Execute( const rtl::OUString &rFileName, XMLFile* pXMLFileIn ) +XMLFile *SimpleXMLParser::Execute( const rtl::OString &rFileName, XMLFile* pXMLFileIn ) /*****************************************************************************/ { aErrorInformation.eCode = XML_ERROR_NONE; aErrorInformation.nLine = 0; aErrorInformation.nColumn = 0; - aErrorInformation.sMessage = rtl::OUString( "ERROR: Unable to open file "); + aErrorInformation.sMessage = "ERROR: Unable to open file "; aErrorInformation.sMessage += rFileName; rtl::OUString aFileURL(common::pathnameToAbsoluteUrl(rFileName)); @@ -1039,12 +1058,10 @@ XMLFile *SimpleXMLParser::Execute( const rtl::OUString &rFileName, XMLFile* pXML aErrorInformation.nLine = 0; aErrorInformation.nColumn = 0; if ( !pXMLFile->GetName().isEmpty()) { - aErrorInformation.sMessage = rtl::OUString( "File "); - aErrorInformation.sMessage += pXMLFile->GetName(); - aErrorInformation.sMessage += rtl::OUString( " parsed successfully"); + aErrorInformation.sMessage = "File " + pXMLFile->GetName() + " parsed successfully"; } else - aErrorInformation.sMessage = rtl::OUString( "XML-File parsed successfully"); + aErrorInformation.sMessage = "XML-File parsed successfully"; if (!XML_Parse(aParser, reinterpret_cast< char * >(p), s, true)) { @@ -1052,84 +1069,82 @@ XMLFile *SimpleXMLParser::Execute( const rtl::OUString &rFileName, XMLFile* pXML aErrorInformation.nLine = XML_GetErrorLineNumber( aParser ); aErrorInformation.nColumn = XML_GetErrorColumnNumber( aParser ); - aErrorInformation.sMessage = rtl::OUString( "ERROR: "); + aErrorInformation.sMessage = rtl::OString( "ERROR: "); if ( !pXMLFile->GetName().isEmpty()) aErrorInformation.sMessage += pXMLFile->GetName(); else - aErrorInformation.sMessage += rtl::OUString( "XML-File ("); - aErrorInformation.sMessage += rtl::OUString::valueOf( - sal::static_int_cast< sal_Int64 >(aErrorInformation.nLine)); - aErrorInformation.sMessage += rtl::OUString( ","); - aErrorInformation.sMessage += rtl::OUString::valueOf( - sal::static_int_cast< sal_Int64 >(aErrorInformation.nColumn)); - aErrorInformation.sMessage += rtl::OUString( "): "); - - switch (aErrorInformation.eCode) { + aErrorInformation.sMessage += "XML-File ("; + + aErrorInformation.sMessage += rtl::OString::valueOf( sal::static_int_cast< sal_Int64 >(aErrorInformation.nLine)) + + "," + rtl::OString::valueOf( sal::static_int_cast< sal_Int64 >(aErrorInformation.nColumn)) + "): "; + + switch (aErrorInformation.eCode) + { case XML_ERROR_NO_MEMORY: - aErrorInformation.sMessage += rtl::OUString( "No memory"); + aErrorInformation.sMessage += "No memory"; break; case XML_ERROR_SYNTAX: - aErrorInformation.sMessage += rtl::OUString( "Syntax"); + aErrorInformation.sMessage += "Syntax"; break; case XML_ERROR_NO_ELEMENTS: - aErrorInformation.sMessage += rtl::OUString( "No elements"); + aErrorInformation.sMessage += "No elements"; break; case XML_ERROR_INVALID_TOKEN: - aErrorInformation.sMessage += rtl::OUString( "Invalid token"); + aErrorInformation.sMessage += "Invalid token"; break; case XML_ERROR_UNCLOSED_TOKEN: - aErrorInformation.sMessage += rtl::OUString( "Unclosed token"); + aErrorInformation.sMessage += "Unclosed token"; break; case XML_ERROR_PARTIAL_CHAR: - aErrorInformation.sMessage += rtl::OUString( "Partial char"); + aErrorInformation.sMessage += "Partial char"; break; case XML_ERROR_TAG_MISMATCH: - aErrorInformation.sMessage += rtl::OUString( "Tag mismatch"); + aErrorInformation.sMessage += "Tag mismatch"; break; case XML_ERROR_DUPLICATE_ATTRIBUTE: - aErrorInformation.sMessage += rtl::OUString( "Dublicat attribute"); + aErrorInformation.sMessage += "Dublicat attribute"; break; case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: - aErrorInformation.sMessage += rtl::OUString( "Junk after doc element"); + aErrorInformation.sMessage += "Junk after doc element"; break; case XML_ERROR_PARAM_ENTITY_REF: - aErrorInformation.sMessage += rtl::OUString( "Param entity ref"); + aErrorInformation.sMessage += "Param entity ref"; break; case XML_ERROR_UNDEFINED_ENTITY: - aErrorInformation.sMessage += rtl::OUString( "Undefined entity"); + aErrorInformation.sMessage += "Undefined entity"; break; case XML_ERROR_RECURSIVE_ENTITY_REF: - aErrorInformation.sMessage += rtl::OUString( "Recursive entity ref"); + aErrorInformation.sMessage += "Recursive entity ref"; break; case XML_ERROR_ASYNC_ENTITY: - aErrorInformation.sMessage += rtl::OUString( "Async_entity"); + aErrorInformation.sMessage += "Async_entity"; break; case XML_ERROR_BAD_CHAR_REF: - aErrorInformation.sMessage += rtl::OUString( "Bad char ref"); + aErrorInformation.sMessage += "Bad char ref"; break; case XML_ERROR_BINARY_ENTITY_REF: - aErrorInformation.sMessage += rtl::OUString( "Binary entity"); + aErrorInformation.sMessage += "Binary entity"; break; case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: - aErrorInformation.sMessage += rtl::OUString( "Attribute external entity ref"); + aErrorInformation.sMessage += "Attribute external entity ref"; break; case XML_ERROR_MISPLACED_XML_PI: - aErrorInformation.sMessage += rtl::OUString( "Misplaced xml pi"); + aErrorInformation.sMessage += "Misplaced xml pi"; break; case XML_ERROR_UNKNOWN_ENCODING: - aErrorInformation.sMessage += rtl::OUString( "Unknown encoding"); + aErrorInformation.sMessage += "Unknown encoding"; break; case XML_ERROR_INCORRECT_ENCODING: - aErrorInformation.sMessage += rtl::OUString( "Incorrect encoding"); + aErrorInformation.sMessage += "Incorrect encoding"; break; case XML_ERROR_UNCLOSED_CDATA_SECTION: - aErrorInformation.sMessage += rtl::OUString( "Unclosed cdata section"); + aErrorInformation.sMessage += "Unclosed cdata section"; break; case XML_ERROR_EXTERNAL_ENTITY_HANDLING: - aErrorInformation.sMessage += rtl::OUString( "External entity handling"); + aErrorInformation.sMessage += "External entity handling"; break; case XML_ERROR_NOT_STANDALONE: - aErrorInformation.sMessage += rtl::OUString( "Not standalone"); + aErrorInformation.sMessage += "Not standalone"; break; case XML_ERROR_NONE: break; @@ -1147,10 +1162,10 @@ XMLFile *SimpleXMLParser::Execute( const rtl::OUString &rFileName, XMLFile* pXML } /*****************************************************************************/ -void XMLUtil::QuotHTML( rtl::OUString &rString ) +void XMLUtil::QuotHTML( rtl::OString &rString ) /*****************************************************************************/ { - OUStringBuffer sReturn; + OStringBuffer sReturn; for (sal_Int32 i = 0; i < rString.getLength(); ++i) { switch (rString[i]) { case '\\': @@ -1170,56 +1185,251 @@ void XMLUtil::QuotHTML( rtl::OUString &rString ) break; case '<': - sReturn.appendAscii(RTL_CONSTASCII_STRINGPARAM("<")); + sReturn.append("<"); break; case '>': - sReturn.appendAscii(RTL_CONSTASCII_STRINGPARAM(">")); + sReturn.append(">"); break; case '"': - sReturn.appendAscii(RTL_CONSTASCII_STRINGPARAM(""")); + sReturn.append("""); break; case '&': - if (rString.matchAsciiL(RTL_CONSTASCII_STRINGPARAM("&"), i)) - sReturn.append('&'); + if (rString.match("&", i)) + sReturn.append("&"); else - sReturn.appendAscii(RTL_CONSTASCII_STRINGPARAM("&")); + sReturn.append("&"); break; } } rString = sReturn.makeStringAndClear(); } -void XMLUtil::UnQuotHTML( rtl::OUString &rString ){ - rtl::OStringBuffer sReturn; - rtl::OString sString(rtl::OUStringToOString(rString, RTL_TEXTENCODING_UTF8)); - for (sal_Int32 i = 0; i != sString.getLength();) { - if (sString[i] == '\\') { - sReturn.append(RTL_CONSTASCII_STRINGPARAM("\\\\")); - ++i; - } else if (sString.match("&", i)) { - sReturn.append('&'); - i += RTL_CONSTASCII_LENGTH("&"); - } else if (sString.match("<", i)) { - sReturn.append('<'); - i += RTL_CONSTASCII_LENGTH("<"); - } else if (sString.match(">", i)) { - sReturn.append('>'); - i += RTL_CONSTASCII_LENGTH(">"); - } else if (sString.match(""", i)) { - sReturn.append('"'); - i += RTL_CONSTASCII_LENGTH("""); - } else if (sString.match("'", i)) { - sReturn.append('\''); - i += RTL_CONSTASCII_LENGTH("'"); - } else { - sReturn.append(sString[i]); - ++i; +void XMLUtil::UnQuotHTML( rtl::OString &sString ) +{ + sal_Int32 i; + sal_Int32 len = sString.getLength(); + const sal_Char* cursor = sString.getStr(); + + for(i = 0; i < len; ++i, ++cursor) + { + if(*cursor == '\\') + { + break; + } + else if(*cursor == '&') + { + if(i < len - 5) + { + if(cursor[3] == ';') + { + if(cursor[2] == 't') + { + if(cursor[1] == 'l' || cursor[1] == 'g') + { + break; + } + } + } + else if(cursor[4] == ';') + { + if(!memcmp(cursor + 1, "amp;" , 4)) + { + break; + } + } + else if(cursor[5] == ';') + { + if(cursor[3] == 'o') + { + if(!memcmp(cursor + 1, "quot" , 4)) + { + break; + } + if(!memcmp(cursor + 1, "apos" , 4)) + { + break; + } + } + } + } + else if (i < len - 4) + { + if(cursor[3] == ';') + { + if(cursor[2] == 't') + { + if(cursor[1] == 'l' || cursor[1] == 'g') + { + break; + } + } + } + else if(cursor[4] == ';') + { + if(!memcmp(cursor + 1, "amp;" , 4)) + { + break; + } + } + } + else if (i < len - 3) + { + if(cursor[3] == ';') + { + if(cursor[2] == 't') + { + if(cursor[1] == 'l' || cursor[1] == 'g') + { + break; + } + } + } + } } } - rString = rtl::OStringToOUString(sReturn.makeStringAndClear(), RTL_TEXTENCODING_UTF8); + + if(i == len) + { + return; + } + /* here we have some unquoting to do */ + /* at worse we have only '\' and we need to double the size */ + OStringBuffer sBuff(sString.getLength() * 2); + sBuff.append(sString.getStr(), i); + const sal_Char* in = sString.getStr() + i; + sal_Char* out = &sBuff[i]; + + while(*in) + { + if(*in == '\\') + { + *out++ = '\\'; + *out++ = '\\'; + in += 1; + i += 1; + continue; + } + else if(*in == '&') + { + if(i < len - 5) + { + if(in[3] == ';') + { + if(in[2] == 't') + { + if(in[1] == 'l') + { + *out++ = '<'; + in += 4; + i += 4; + continue; + } + else if (in[1] == 'g') + { + *out++ = '>'; + in += 4; + i += 4; + continue; + } + } + } + else if(in[4] == ';') + { + if(!memcmp(in + 1, "amp;" , 4)) + { + *out++ = '&'; + in += 5; + i += 5; + continue; + } + } + else if(in[5] == ';') + { + if(in[3] == 'o') + { + if(!memcmp(in + 1, "quot" , 4)) + { + *out++ ='"'; + in += 6; + i += 6; + break; + } + if(!memcmp(in + 1, "apos" , 4)) + { + *out++ ='\''; + in += 6; + i += 6; + break; + } + } + } + } + else if (i < len - 4) + { + if(in[3] == ';') + { + if(in[2] == 't') + { + if(in[1] == 'l') + { + *out++ = '<'; + in += 4; + i += 4; + continue; + } + else if (in[1] == 'g') + { + *out++ = '>'; + in += 4; + i += 4; + continue; + } + } + } + else if(in[4] == ';') + { + if(!memcmp(in + 1, "amp;" , 4)) + { + *out++ = '&'; + in += 5; + i += 5; + continue; + } + } + } + else if (i < len - 3) + { + if(in[3] == ';') + { + if(in[2] == 't') + { + if(in[1] == 'l') + { + *out++ = '<'; + in += 4; + i += 4; + continue; + } + else if (in[1] == 'g') + { + *out++ = '>'; + in += 4; + i += 4; + continue; + } + } + } + } + } + *out++ = *in++; + i += 1; + } + *out = 0; + sString = OString(sBuff.getStr()); + } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |