summaryrefslogtreecommitdiff
path: root/linguistic/source/dicimp.cxx
diff options
context:
space:
mode:
authorRüdiger Timm <rt@openoffice.org>2006-05-05 07:10:07 +0000
committerRüdiger Timm <rt@openoffice.org>2006-05-05 07:10:07 +0000
commitcd0ec826a262441f0dd248a9cca8b746e5c7da89 (patch)
tree9338b1216e5de821b3e5dce59e18bbd7acc74bce /linguistic/source/dicimp.cxx
parent74a4ac33e3dacaabe9238c7ce14e4adf032f987c (diff)
INTEGRATION: CWS tl18 (1.15.4); FILE MERGED
2006/03/08 10:13:39 tl 1.15.4.1: #i60698# introducing new, optional tagged file format for user-dictionaries
Diffstat (limited to 'linguistic/source/dicimp.cxx')
-rw-r--r--linguistic/source/dicimp.cxx368
1 files changed, 257 insertions, 111 deletions
diff --git a/linguistic/source/dicimp.cxx b/linguistic/source/dicimp.cxx
index 632ff575b7da..631867a0abb5 100644
--- a/linguistic/source/dicimp.cxx
+++ b/linguistic/source/dicimp.cxx
@@ -4,9 +4,9 @@
*
* $RCSfile: dicimp.cxx,v $
*
- * $Revision: 1.16 $
+ * $Revision: 1.17 $
*
- * last change: $Author: vg $ $Date: 2006-04-07 13:47:28 $
+ * last change: $Author: rt $ $Date: 2006-05-05 08:10:07 $
*
* The Contents of this file are made available subject to
* the terms of GNU Lesser General Public License Version 2.1.
@@ -86,28 +86,132 @@ using namespace linguistic;
///////////////////////////////////////////////////////////////////////////
-#define BUFSIZE 256
+#define BUFSIZE 4096
#define VERS2_NOLANGUAGE 1024
+#define MAX_HEADER_LENGTH 16
+
static const sal_Char* pDicExt = "dic";
static const sal_Char* pVerStr2 = "WBSWG2";
static const sal_Char* pVerStr5 = "WBSWG5";
static const sal_Char* pVerStr6 = "WBSWG6";
+static const sal_Char* pVerOOo7 = "OOoUserDict1";
-int GetDicVersion( const sal_Char *pVerStr )
+static sal_Bool getTag(const ByteString &rLine,
+ const sal_Char *pTagName, ByteString &rTagValue)
{
- if (pVerStr)
+ xub_StrLen nPos = rLine.Search( pTagName );
+ if (nPos == STRING_NOTFOUND)
+ return FALSE;
+
+ rTagValue = rLine.Copy( nPos + strlen( pTagName ) ).EraseLeadingAndTrailingChars();
+ return TRUE;
+}
+
+
+int ReadDicVersion( SvStream *pStream, USHORT &nLng, BOOL &bNeg )
+{
+ // Sniff the header
+ int nDicVersion;
+ sal_Char pMagicHeader[MAX_HEADER_LENGTH];
+
+ nLng = LANGUAGE_NONE;
+ bNeg = FALSE;
+
+ if (!pStream || pStream->GetError())
+ return -1;
+
+ sal_Size nSniffPos = pStream->Tell();
+ static int nVerOOo7Len = strlen( pVerOOo7 );
+ pMagicHeader[ nVerOOo7Len ] = '\0';
+ if ((pStream->Read((void *) pMagicHeader, nVerOOo7Len) == nVerOOo7Len) &&
+ !strcmp(pMagicHeader, pVerOOo7))
{
- if (0 == strcmp( pVerStr, pVerStr6 ))
- return 6;
- if (0 == strcmp( pVerStr, pVerStr5 ))
- return 5;
- if (0 == strcmp( pVerStr, pVerStr2 ))
- return 2;
+ sal_Bool bSuccess;
+ ByteString aLine;
+
+ nDicVersion = 7;
+
+ // 1st skip magic / header line
+ pStream->ReadLine(aLine);
+
+ // 2nd line: language all | en-US | pt-BR ...
+ while ((bSuccess = pStream->ReadLine(aLine)))
+ {
+ ByteString aTagValue;
+
+ if (aLine.GetChar(0) == '#') // skip comments
+ continue;
+
+ // lang: field
+ if (getTag(aLine, "lang: ", aTagValue))
+ {
+ if (aTagValue == "<none>")
+ nLng = LANGUAGE_NONE;
+ else
+ nLng = ConvertIsoByteStringToLanguage(aTagValue);
+ }
+
+ // type: negative / positive
+ if (getTag(aLine, "type: ", aTagValue))
+ {
+ if (aTagValue == "negative")
+ bNeg = TRUE;
+ else
+ bNeg = FALSE;
+ }
+
+ if (aLine.Search ("---") != STRING_NOTFOUND) // end of header
+ break;
+ }
+ if (!bSuccess)
+ return pStream->GetError();
+ }
+ else
+ {
+ USHORT nLen;
+
+ pStream->Seek (nSniffPos );
+
+ *pStream >> nLen;
+ if (nLen >= MAX_HEADER_LENGTH)
+ return -1;
+
+ pStream->Read(pMagicHeader, nLen);
+ pMagicHeader[nLen] = '\0';
+
+ // Check version magic
+ if (0 == strcmp( pMagicHeader, pVerStr6 ))
+ nDicVersion = 6;
+ else if (0 == strcmp( pMagicHeader, pVerStr5 ))
+ nDicVersion = 5;
+ else if (0 == strcmp( pMagicHeader, pVerStr2 ))
+ nDicVersion = 2;
+ else
+ nDicVersion = -1;
+
+ if (2 == nDicVersion ||
+ 5 == nDicVersion ||
+ 6 == nDicVersion)
+ {
+ // The language of the dictionary
+ *pStream >> nLng;
+
+ if (VERS2_NOLANGUAGE == nLng)
+ nLng = LANGUAGE_NONE;
+
+ // Negative Flag
+ sal_Char nTmp;
+ *pStream >> nTmp;
+ bNeg = (BOOL)nTmp;
+ }
}
- return -1;
+
+ return nDicVersion;
}
+
+
const String GetDicExtension()
{
return String::CreateFromAscii( pDicExt );
@@ -193,49 +297,26 @@ ULONG DictionaryNeo::loadEntries(const OUString &rMainURL)
return nErr;
// Header einlesen
- BOOL bSkip = FALSE;
- USHORT nLen;
-
- *pStream >> nLen;
- if ((nErr = pStream->GetError()))
- return nErr;
-
- sal_Char aWordBuf[ BUFSIZE ];
BOOL bNegativ;
-
- if (nLen >= BUFSIZE)
- return nErr;
- pStream->Read(aWordBuf, nLen);
+ USHORT nLang;
+ nDicVersion = ReadDicVersion(pStream, nLang, bNegativ);
if ((nErr = pStream->GetError()))
return nErr;
- *(aWordBuf + nLen) = 0;
+ nLanguage = nLang;
- nDicVersion = GetDicVersion( aWordBuf );
+ eDicType = bNegativ ? DictionaryType_NEGATIVE : DictionaryType_POSITIVE;
rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
- if (6 == nDicVersion)
+ if (nDicVersion >= 6)
eEnc = RTL_TEXTENCODING_UTF8;
+ nCount = 0;
if (6 == nDicVersion ||
5 == nDicVersion ||
2 == nDicVersion)
{
- bSkip = TRUE;
- // Sprache des Dictionaries
- *pStream >> nLanguage;
- if ((nErr = pStream->GetError()))
- return nErr;
-
- if ( VERS2_NOLANGUAGE == nLanguage )
- nLanguage = LANGUAGE_NONE;
-
- // Negativ-Flag
- sal_Char nTmp;
- *pStream >> nTmp;
- if ((nErr = pStream->GetError()))
- return nErr;
- bNegativ = (BOOL) nTmp;
- eDicType = bNegativ ? DictionaryType_NEGATIVE : DictionaryType_POSITIVE;
+ USHORT nLen;
+ sal_Char aWordBuf[ BUFSIZE ];
// Das erste Wort einlesen
if (!pStream->IsEof())
@@ -251,43 +332,58 @@ ULONG DictionaryNeo::loadEntries(const OUString &rMainURL)
*(aWordBuf + nLen) = 0;
}
}
- }
- nCount = 0;
-
- while(!pStream->IsEof())
- {
- // Aus dem File einlesen
- // Einfuegen ins Woerterbuch ohne Konvertierung
- if(*aWordBuf)
+ while(!pStream->IsEof())
{
- ByteString aDummy( aWordBuf );
- String aText( aDummy, eEnc );
- Reference< XDictionaryEntry > xEntry =
- new DicEntry( aText, bNegativ );
- addEntry_Impl( xEntry , TRUE ); //! don't launch events here
- }
+ // Aus dem File einlesen
+ // Einfuegen ins Woerterbuch ohne Konvertierung
+ if(*aWordBuf)
+ {
+ ByteString aDummy( aWordBuf );
+ String aText( aDummy, eEnc );
+ Reference< XDictionaryEntry > xEntry =
+ new DicEntry( aText, bNegativ );
+ addEntry_Impl( xEntry , TRUE ); //! don't launch events here
+ }
- *pStream >> nLen;
- if (pStream->IsEof()) // #75082# GPF in online-spelling
- break;
- if ((nErr = pStream->GetError()))
- return nErr;
+
+
+ *pStream >> nLen;
+ if (pStream->IsEof()) // #75082# GPF in online-spelling
+ break;
+ if ((nErr = pStream->GetError()))
+ return nErr;
#ifdef LINGU_EXCEPTIONS
- if ( nLen >= BUFSIZE )
- throw io::IOException() ;
-// break; // Woerterbuch defekt?
+ if (nLen >= BUFSIZE)
+ throw io::IOException() ;
#endif
- if( nLen < BUFSIZE )
+ if (nLen < BUFSIZE)
+ {
+ pStream->Read(aWordBuf, nLen);
+ if ((nErr = pStream->GetError()))
+ return nErr;
+ }
+ else
+ return SVSTREAM_READ_ERROR;
+ *(aWordBuf + nLen) = 0;
+ }
+ }
+ else if (7 == nDicVersion)
+ {
+ sal_Bool bSuccess;
+ ByteString aLine;
+
+ // remaining lines - stock strings (a [==] b)
+ while ((bSuccess = pStream->ReadLine(aLine)))
{
- pStream->Read(aWordBuf, nLen);
- if ((nErr = pStream->GetError()))
- return nErr;
+ if (aLine.GetChar(0) == '#') // skip comments
+ continue;
+ rtl::OUString aText = rtl::OStringToOUString (aLine, RTL_TEXTENCODING_UTF8);
+ Reference< XDictionaryEntry > xEntry =
+ new DicEntry( aText, eDicType == DictionaryType_NEGATIVE );
+ addEntry_Impl( xEntry , TRUE ); //! don't launch events here
}
- else
- return SVSTREAM_READ_ERROR;
- *(aWordBuf + nLen) = 0;
}
DBG_ASSERT(isSorted(), "lng : dictionary is not sorted");
@@ -300,6 +396,21 @@ ULONG DictionaryNeo::loadEntries(const OUString &rMainURL)
return pStream->GetError();
}
+
+static ByteString formatForSave(
+ const Reference< XDictionaryEntry > &xEntry, rtl_TextEncoding eEnc )
+{
+ ByteString aStr(xEntry->getDictionaryWord().getStr(), eEnc);
+
+ if (xEntry->isNegative())
+ {
+ aStr += "==";
+ aStr += ByteString(xEntry->getReplacementText().getStr(), eEnc);
+ }
+ return aStr;
+}
+
+
ULONG DictionaryNeo::saveEntries(const OUString &rURL)
{
MutexGuard aGuard( GetLinguMutex() );
@@ -317,54 +428,89 @@ ULONG DictionaryNeo::saveEntries(const OUString &rURL)
if (!pStream)
return nErr;
- sal_Char aWordBuf[BUFSIZE];
+ rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
+ if (nDicVersion >= 6)
+ eEnc = RTL_TEXTENCODING_UTF8;
- // write version
- const sal_Char *pVerStr = NULL;
- if (6 == nDicVersion)
- pVerStr = pVerStr6;
- else
- pVerStr = eDicType == DictionaryType_POSITIVE ? pVerStr2 : pVerStr5;
- strcpy( aWordBuf, pVerStr ); // #100211# - checked
- USHORT nLen = strlen( aWordBuf );
- *pStream << nLen;
- if ((nErr = pStream->GetError()))
- return nErr;
- pStream->Write(aWordBuf, nLen);
- if ((nErr = pStream->GetError()))
- return nErr;
+ if (nDicVersion == 7)
+ {
+ pStream->WriteLine(ByteString (pVerOOo7));
+ if ((nErr = pStream->GetError()))
+ return nErr;
- *pStream << nLanguage;
- if ((nErr = pStream->GetError()))
- return nErr;
- *pStream << (sal_Char) (eDicType == DictionaryType_NEGATIVE ? TRUE : FALSE);
- if ((nErr = pStream->GetError()))
- return nErr;
+ if (nLanguage == LANGUAGE_NONE)
+ pStream->WriteLine(ByteString("lang: <none>"));
+ else
+ {
+ ByteString aLine("lang: ");
+ aLine += ConvertLanguageToIsoByteString( nLanguage );
+ pStream->WriteLine( aLine );
+ }
+ if ((nErr = pStream->GetError()))
+ return nErr;
- rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
- if (6 == nDicVersion)
- eEnc = RTL_TEXTENCODING_UTF8;
+ if (eDicType == DictionaryType_POSITIVE)
+ pStream->WriteLine(ByteString("type: positive"));
+ else
+ pStream->WriteLine(ByteString("type: negative"));
+ if ((nErr = pStream->GetError()))
+ return nErr;
- const Reference< XDictionaryEntry > *pEntry = aEntries.getConstArray();
- for (INT32 i = 0; i < nCount; i++)
+ pStream->WriteLine(ByteString("---"));
+ if ((nErr = pStream->GetError()))
+ return nErr;
+
+ const Reference< XDictionaryEntry > *pEntry = aEntries.getConstArray();
+ for (INT32 i = 0; i < nCount; i++)
+ {
+ ByteString aOutStr = formatForSave(pEntry[i], eEnc);
+ pStream->WriteLine (aOutStr);
+ if ((nErr = pStream->GetError()))
+ return nErr;
+ }
+ }
+ else
{
- BOOL bIsNegativEntry = pEntry[i]->isNegative();
-
- ByteString aTmp1 ( pEntry[i]->getDictionaryWord().getStr(), eEnc ),
- aTmp2 ( pEntry[i]->getReplacementText().getStr(), eEnc );
- if (bIsNegativEntry)
- aTmp1 += "==";
- xub_StrLen nLen1 = aTmp1.Len(),
- nLen2 = aTmp2.Len();
- if ((nLen = nLen1) < BUFSIZE)
+ sal_Char aWordBuf[BUFSIZE];
+
+ // write version
+ const sal_Char *pVerStr = NULL;
+ if (6 == nDicVersion)
+ pVerStr = pVerStr6;
+ else
+ pVerStr = eDicType == DictionaryType_POSITIVE ? pVerStr2 : pVerStr5;
+ strcpy( aWordBuf, pVerStr ); // #100211# - checked
+ USHORT nLen = strlen( aWordBuf );
+ *pStream << nLen;
+ if ((nErr = pStream->GetError()))
+ return nErr;
+ pStream->Write(aWordBuf, nLen);
+ if ((nErr = pStream->GetError()))
+ return nErr;
+
+ *pStream << nLanguage;
+ if ((nErr = pStream->GetError()))
+ return nErr;
+ *pStream << (sal_Char) (eDicType == DictionaryType_NEGATIVE ? TRUE : FALSE);
+ if ((nErr = pStream->GetError()))
+ return nErr;
+
+ const Reference< XDictionaryEntry > *pEntry = aEntries.getConstArray();
+ for (INT32 i = 0; i < nCount; i++)
{
- strncpy( aWordBuf, aTmp1.GetBuffer(), nLen1 );
- if (bIsNegativEntry && (nLen = nLen1 + nLen2) < BUFSIZE)
- strncpy( aWordBuf + nLen1, aTmp2.GetBuffer(), nLen2);
+ ByteString aOutStr = formatForSave(pEntry[i], eEnc);
+
+ // the old format would fail (mis-calculation of nLen) and write
+ // uninitialized junk for combined len >= BUFSIZE - we truncate
+ // silently here, but BUFSIZE is large anyway.
+ nLen = aOutStr.Len();
+ if (nLen >= BUFSIZE)
+ nLen = BUFSIZE - 1;
+
*pStream << nLen;
if ((nErr = pStream->GetError()))
return nErr;
- pStream->Write(aWordBuf, nLen);
+ pStream->Write(aOutStr.GetBuffer(), nLen);
if ((nErr = pStream->GetError()))
return nErr;
}