diff options
author | Thomas Lange [tl] <tl@openoffice.org> | 2010-07-21 16:29:56 +0200 |
---|---|---|
committer | Thomas Lange [tl] <tl@openoffice.org> | 2010-07-21 16:29:56 +0200 |
commit | c7896e24ec34b87aba39e8b2ff19a21fe0c518a6 (patch) | |
tree | 92be29fc60478d6b727ce86211983f3baa773b8a /lingucomponent/source/hyphenator | |
parent | 16ae4c1524769458f6d205430e7dcb8fc4ae333b (diff) |
cws tl82: #i113293# unify encoding detection for linguistic components
Diffstat (limited to 'lingucomponent/source/hyphenator')
-rwxr-xr-x | lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx | 839 | ||||
-rwxr-xr-x[-rw-r--r--] | lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.hxx | 94 |
2 files changed, 453 insertions, 480 deletions
diff --git a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx index 3b0871cc36b6..43c22d0b84a4 100755 --- a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx +++ b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx @@ -32,7 +32,6 @@ #include <com/sun/star/uno/Reference.h> #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp> -//#include <com/sun/star/linguistic2/SpellFailure.hpp> #include <cppuhelper/factory.hxx> // helper for factories #include <com/sun/star/registry/XRegistryKey.hpp> #include <i18npool/mslangid.hxx> @@ -43,10 +42,7 @@ #include <osl/mutex.hxx> #include <hyphen.h> - -#ifndef _HYPHENIMP_HXX #include <hyphenimp.hxx> -#endif #include <linguistic/hyphdta.hxx> #include <rtl/ustring.hxx> @@ -97,9 +93,8 @@ Hyphenator::Hyphenator() : { bDisposing = FALSE; pPropHelper = NULL; - aDicts = NULL; - numdict = 0; - + aDicts = NULL; + numdict = 0; } @@ -108,21 +103,22 @@ Hyphenator::~Hyphenator() if (pPropHelper) pPropHelper->RemoveAsPropListener(); - if ((numdict) && (aDicts)) { - for (int i=0; i < numdict; i++) { + if ((numdict) && (aDicts)) + { + for (int i=0; i < numdict; i++) + { if (aDicts[i].apCC) delete aDicts[i].apCC; aDicts[i].apCC = NULL; - } + } } - if (aDicts) delete[] aDicts; + if (aDicts) delete[] aDicts; aDicts = NULL; - numdict = 0; + numdict = 0; } PropertyHelper_Hyphen & Hyphenator::GetPropHelper_Impl() { - if (!pPropHelper) { Reference< XPropertySet > xPropSet( GetLinguProperties(), UNO_QUERY ); @@ -212,12 +208,7 @@ Sequence< Locale > SAL_CALL Hyphenator::getLocales() // add dictionary information aDicts = new HDInfo[numdict]; -/* - aTEncs = new rtl_TextEncoding [numdict]; - aTLocs = new Locale [numdict]; - aTNames = new OUString [numdict]; - aCharSetInfo = new CharClass* [numdict]; -*/ + k = 0; for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt) { @@ -233,7 +224,7 @@ Sequence< Locale > SAL_CALL Hyphenator::getLocales() for (sal_Int32 i = 0; i < nLocales; ++i) { aDicts[k].aPtr = NULL; - aDicts[k].aEnc = 0; + aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW; aDicts[k].aLoc = MsLangId::convertLanguageToLocale( MsLangId::convertIsoStringToLanguage( aDictIt->aLocaleNames[i] )); aDicts[k].apCC = new CharClass( aDicts[k].aLoc ); @@ -288,240 +279,268 @@ sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale) } -Reference< XHyphenatedWord > SAL_CALL -Hyphenator::hyphenate( const ::rtl::OUString& aWord, - const ::com::sun::star::lang::Locale& aLocale, - sal_Int16 nMaxLeading, - const ::com::sun::star::beans::PropertyValues& aProperties ) - throw (com::sun::star::uno::RuntimeException, - com::sun::star::lang::IllegalArgumentException) +Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const ::rtl::OUString& aWord, + const ::com::sun::star::lang::Locale& aLocale, + sal_Int16 nMaxLeading, + const ::com::sun::star::beans::PropertyValues& aProperties ) + throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException) { int nHyphenationPos = -1; - int nHyphenationPosAlt = -1; - int nHyphenationPosAltHyph = -1; + int nHyphenationPosAlt = -1; + int nHyphenationPosAltHyph = -1; int wordlen; char *hyphens; - char *lcword; - int k = 0; + char *lcword; + int k = 0; - PropertyHelper_Hyphen & rHelper = GetPropHelper(); - rHelper.SetTmpPropVals(aProperties); + PropertyHelper_Hyphen & rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); sal_Int16 minTrail = rHelper.GetMinTrailing(); sal_Int16 minLead = rHelper.GetMinLeading(); sal_Int16 minLen = rHelper.GetMinWordLength(); HyphenDict *dict = NULL; - rtl_TextEncoding aEnc = 0; - CharClass * pCC = NULL; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + CharClass * pCC = NULL; Reference< XHyphenatedWord > xRes; - k = -1; - for (int j = 0; j < numdict; j++) - if (aLocale == aDicts[j].aLoc) k = j; - - - // if we have a hyphenation dictionary matching this locale - if (k != -1) { + k = -1; + for (int j = 0; j < numdict; j++) + { + if (aLocale == aDicts[j].aLoc) + k = j; + } + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { // if this dictinary has not been loaded yet do that - if (!aDicts[k].aPtr) { - - OUString DictFN = aDicts[k].aName + A2OU(".dic"); - OUString dictpath; + if (!aDicts[k].aPtr) + { + OUString DictFN = aDicts[k].aName + A2OU(".dic"); + OUString dictpath; - osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); - OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); + osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); + OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); #if defined(WNT) - // workaround for Windows specifc problem that the - // path length in calls to 'fopen' is limted to somewhat - // about 120+ characters which will usually be exceed when - // using dictionaries as extensions. - sTmp = Win_GetShortPathName( dictpath ); + // workaround for Windows specifc problem that the + // path length in calls to 'fopen' is limted to somewhat + // about 120+ characters which will usually be exceed when + // using dictionaries as extensions. + sTmp = Win_GetShortPathName( dictpath ); #endif - if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) - { - fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) ); - return NULL; - } - aDicts[k].aPtr = dict; - aDicts[k].aEnc = rtl_getTextEncodingFromUnixCharset(dict->cset); - if (aDicts[k].aEnc == RTL_TEXTENCODING_DONTKNOW) { - if (strcmp("ISCII-DEVANAGARI", dict->cset) == 0) { - aDicts[k].aEnc = RTL_TEXTENCODING_ISCII_DEVANAGARI; - } else if (strcmp("UTF-8", dict->cset) == 0) { - aDicts[k].aEnc = RTL_TEXTENCODING_UTF8; - } - } + if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) + { + fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) ); + return NULL; + } + aDicts[k].aPtr = dict; + aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset); } - // other wise hyphenate the word with that dictionary - dict = aDicts[k].aPtr; - aEnc = aDicts[k].aEnc; - pCC = aDicts[k].apCC; + // other wise hyphenate the word with that dictionary + dict = aDicts[k].aPtr; + eEnc = aDicts[k].eEnc; + pCC = aDicts[k].apCC; + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return NULL; - sal_uInt16 ct = CAPTYPE_UNKNOWN; - ct = capitalType(aWord, pCC); + sal_uInt16 ct = CAPTYPE_UNKNOWN; + ct = capitalType(aWord, pCC); - // first convert any smart quotes or apostrophes to normal ones + // first convert any smart quotes or apostrophes to normal ones OUStringBuffer rBuf(aWord); - sal_Int32 nc = rBuf.getLength(); - sal_Unicode ch; - for (sal_Int32 ix=0; ix < nc; ix++) { + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) + { ch = rBuf.charAt(ix); - if ((ch == 0x201C) || (ch == 0x201D)) rBuf.setCharAt(ix,(sal_Unicode)0x0022); - if ((ch == 0x2018) || (ch == 0x2019)) rBuf.setCharAt(ix,(sal_Unicode)0x0027); - } - OUString nWord(rBuf.makeStringAndClear()); + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf.setCharAt(ix,(sal_Unicode)0x0022); + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf.setCharAt(ix,(sal_Unicode)0x0027); + } + OUString nWord(rBuf.makeStringAndClear()); - // now convert word to all lowercase for pattern recognition - OUString nTerm(makeLowerCase(nWord, pCC)); + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); - // now convert word to needed encoding - OString encWord(OU2ENC(nTerm,aEnc)); + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); wordlen = encWord.getLength(); - lcword = new char[wordlen + 1]; + lcword = new char[wordlen + 1]; hyphens = new char[wordlen + 5]; - char ** rep = NULL; // replacements of discretionary hyphenation - int * pos = NULL; // array of [hyphenation point] minus [deletion position] - int * cut = NULL; // length of deletions in original word - - // copy converted word into simple char buffer - strcpy(lcword,encWord.getStr()); - - // now strip off any ending periods - int n = wordlen-1; - while((n >=0) && (lcword[n] == '.')) n--; - n++; - if (n > 0) { - if (hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, &rep, &pos, &cut, - minLead, minTrail, Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), - Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))))) - { - //whoops something did not work - delete[] hyphens; - delete[] lcword; - if (rep) { - for(int j = 0; j < n; j++) { + char ** rep = NULL; // replacements of discretionary hyphenation + int * pos = NULL; // array of [hyphenation point] minus [deletion position] + int * cut = NULL; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword,encWord.getStr()); + + // now strip off any ending periods + int n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL, + &rep, &pos, &cut, minLead, minTrail, + Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), + Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) ); + if (bFailed) + { + //whoops something did not work + delete[] hyphens; + delete[] lcword; + if (rep) + { + for(int j = 0; j < n; j++) + { if (rep[j]) free(rep[j]); } free(rep); - } - if (pos) free(pos); - if (cut) free(cut); - return NULL; - } + } + if (pos) free(pos); + if (cut) free(cut); + return NULL; } + } - // now backfill hyphens[] for any removed trailing periods - for (int c = n; c < wordlen; c++) hyphens[c] = '0'; - hyphens[wordlen] = '\0'; + // now backfill hyphens[] for any removed trailing periods + for (int c = n; c < wordlen; c++) hyphens[c] = '0'; + hyphens[wordlen] = '\0'; INT32 Leading = GetPosInWordToCheck( aWord, nMaxLeading ); for (INT32 i = 0; i < n; i++) { - int leftrep = 0; - BOOL hit = (n >= minLen); - if (!rep || !rep[i] || (i >= n)) { - hit = hit && (hyphens[i]&1) && (i < Leading); - hit = hit && (i >= (minLead-1) ); - hit = hit && ((n - i - 1) >= minTrail); - } else { - // calculate change character length before hyphenation point signed with '=' - for (char * c = rep[i]; *c && (*c != '='); c++) { - if (aEnc == RTL_TEXTENCODING_UTF8) { - if (((unsigned char) *c) >> 6 != 2) leftrep++; - } else leftrep++; + int leftrep = 0; + BOOL hit = (n >= minLen); + if (!rep || !rep[i] || (i >= n)) + { + hit = hit && (hyphens[i]&1) && (i < Leading); + hit = hit && (i >= (minLead-1) ); + hit = hit && ((n - i - 1) >= minTrail); + } + else + { + // calculate change character length before hyphenation point signed with '=' + for (char * c = rep[i]; *c && (*c != '='); c++) + { + if (eEnc == RTL_TEXTENCODING_UTF8) + { + if (((unsigned char) *c) >> 6 != 2) + leftrep++; } - hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); - hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); - hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); + else + leftrep++; } - if (hit) { - nHyphenationPos = i; - if (rep && (i < n) && rep[i]) { - nHyphenationPosAlt = i - pos[i]; - nHyphenationPosAltHyph = i + leftrep - pos[i]; - } + hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); + hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); + hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); + } + if (hit) + { + nHyphenationPos = i; + if (rep && (i < n) && rep[i]) + { + nHyphenationPosAlt = i - pos[i]; + nHyphenationPosAltHyph = i + leftrep - pos[i]; } - } - - if (nHyphenationPos == -1) { - xRes = NULL; - } else { - if (rep && rep[nHyphenationPos]) { - // remove equal sign - char * s = rep[nHyphenationPos]; - int eq = 0; - for (; *s; s++) { - if (*s == '=') eq = 1; - if (eq) *s = *(s + 1); - } - OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), aEnc); - OUString repHyph; - switch (ct) { - case CAPTYPE_ALLCAP: - { - repHyph = makeUpperCase(repHyphlow, pCC); - break; - } - case CAPTYPE_INITCAP: - { - if (nHyphenationPosAlt == 0) { - repHyph = makeInitCap(repHyphlow, pCC); - } else { - repHyph = repHyphlow; - } - break; - } - default: - { - repHyph = repHyphlow; - break; - } } + } + + if (nHyphenationPos == -1) + { + xRes = NULL; + } + else + { + if (rep && rep[nHyphenationPos]) + { + // remove equal sign + char * s = rep[nHyphenationPos]; + int eq = 0; + for (; *s; s++) + { + if (*s == '=') eq = 1; + if (eq) *s = *(s + 1); + } + OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc); + OUString repHyph; + switch (ct) + { + case CAPTYPE_ALLCAP: + { + repHyph = makeUpperCase(repHyphlow, pCC); + break; + } + case CAPTYPE_INITCAP: + { + if (nHyphenationPosAlt == 0) + repHyph = makeInitCap(repHyphlow, pCC); + else + repHyph = repHyphlow; + break; + } + default: + { + repHyph = repHyphlow; + break; + } + } - // handle shortening - INT16 nPos = (INT16) ((nHyphenationPosAltHyph < nHyphenationPos) ? - nHyphenationPosAltHyph : nHyphenationPos); - // dicretionary hyphenation + // handle shortening + INT16 nPos = (INT16) ((nHyphenationPosAltHyph < nHyphenationPos) ? + nHyphenationPosAltHyph : nHyphenationPos); + // dicretionary hyphenation xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), nPos, - aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), - (INT16) nHyphenationPosAltHyph); - } else { + aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), + (INT16) nHyphenationPosAltHyph); + } + else + { xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), - (INT16)nHyphenationPos, aWord, (INT16) nHyphenationPos); - } + (INT16)nHyphenationPos, aWord, (INT16) nHyphenationPos); + } } - delete[] lcword; + delete[] lcword; delete[] hyphens; - if (rep) { - for(int j = 0; j < n; j++) { - if (rep[j]) free(rep[j]); - } - free(rep); + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); } - if (pos) free(pos); - if (cut) free(cut); + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); return xRes; } - return NULL; + return NULL; } -Reference < XHyphenatedWord > SAL_CALL - Hyphenator::queryAlternativeSpelling( const ::rtl::OUString& /*aWord*/, - const ::com::sun::star::lang::Locale& /*aLocale*/, - sal_Int16 /*nIndex*/, - const ::com::sun::star::beans::PropertyValues& /*aProperties*/ ) - throw(::com::sun::star::lang::IllegalArgumentException, - ::com::sun::star::uno::RuntimeException) +Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling( + const ::rtl::OUString& /*aWord*/, + const ::com::sun::star::lang::Locale& /*aLocale*/, + sal_Int16 /*nIndex*/, + const ::com::sun::star::beans::PropertyValues& /*aProperties*/ ) + throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException) { /* alternative spelling isn't supported by tex dictionaries */ /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */ @@ -529,251 +548,262 @@ Reference < XHyphenatedWord > SAL_CALL return NULL; } -Reference< XPossibleHyphens > SAL_CALL - Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord, - const ::com::sun::star::lang::Locale& aLocale, - const ::com::sun::star::beans::PropertyValues& aProperties ) - throw(::com::sun::star::lang::IllegalArgumentException, - ::com::sun::star::uno::RuntimeException) - +Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord, + const ::com::sun::star::lang::Locale& aLocale, + const ::com::sun::star::beans::PropertyValues& aProperties ) + throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException) { - int wordlen; - char *hyphens; - char *lcword; - int k; + int wordlen; + char *hyphens; + char *lcword; + int k; - PropertyHelper_Hyphen & rHelper = GetPropHelper(); - rHelper.SetTmpPropVals(aProperties); - sal_Int16 minTrail = rHelper.GetMinTrailing(); - sal_Int16 minLead = rHelper.GetMinLeading(); + PropertyHelper_Hyphen & rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); + sal_Int16 minTrail = rHelper.GetMinTrailing(); + sal_Int16 minLead = rHelper.GetMinLeading(); - HyphenDict *dict = NULL; - rtl_TextEncoding aEnc = 0; - CharClass* pCC = NULL; + HyphenDict *dict = NULL; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + CharClass* pCC = NULL; - Reference< XPossibleHyphens > xRes; + Reference< XPossibleHyphens > xRes; - k = -1; - for (int j = 0; j < numdict; j++) - if (aLocale == aDicts[j].aLoc) k = j; + k = -1; + for (int j = 0; j < numdict; j++) + { + if (aLocale == aDicts[j].aLoc) k = j; + } + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { + // if this dictioanry has not been loaded yet do that + if (!aDicts[k].aPtr) + { + OUString DictFN = aDicts[k].aName + A2OU(".dic"); + OUString dictpath; - // if we have a hyphenation dictionary matching this locale - if (k != -1) { + osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); + OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); - // if this dictioanry has not been loaded yet do that - if (!aDicts[k].aPtr) { +#if defined(WNT) + // workaround for Windows specifc problem that the + // path length in calls to 'fopen' is limted to somewhat + // about 120+ characters which will usually be exceed when + // using dictionaries as extensions. + sTmp = Win_GetShortPathName( dictpath ); +#endif - OUString DictFN = aDicts[k].aName + A2OU(".dic"); - OUString dictpath; + if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) + { + fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) ); + return NULL; + } + aDicts[k].aPtr = dict; + aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset); + } - osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); - OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); + // other wise hyphenate the word with that dictionary + dict = aDicts[k].aPtr; + eEnc = aDicts[k].eEnc; + pCC = aDicts[k].apCC; -#if defined(WNT) - // workaround for Windows specifc problem that the - // path length in calls to 'fopen' is limted to somewhat - // about 120+ characters which will usually be exceed when - // using dictionaries as extensions. - sTmp = Win_GetShortPathName( dictpath ); -#endif + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return NULL; - if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) + // first handle smart quotes both single and double + OUStringBuffer rBuf(aWord); + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) { - fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) ); - return NULL; + ch = rBuf.charAt(ix); + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf.setCharAt(ix,(sal_Unicode)0x0022); + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf.setCharAt(ix,(sal_Unicode)0x0027); } - aDicts[k].aPtr = dict; - aDicts[k].aEnc = rtl_getTextEncodingFromUnixCharset(dict->cset); - if (aDicts[k].aEnc == RTL_TEXTENCODING_DONTKNOW) { - if (strcmp("ISCII-DEVANAGARI", dict->cset) == 0) { - aDicts[k].aEnc = RTL_TEXTENCODING_ISCII_DEVANAGARI; - } else if (strcmp("UTF-8", dict->cset) == 0) { - aDicts[k].aEnc = RTL_TEXTENCODING_UTF8; + OUString nWord(rBuf.makeStringAndClear()); + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); + + wordlen = encWord.getLength(); + lcword = new char[wordlen+1]; + hyphens = new char[wordlen+5]; + char ** rep = NULL; // replacements of discretionary hyphenation + int * pos = NULL; // array of [hyphenation point] minus [deletion position] + int * cut = NULL; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword,encWord.getStr()); + + // first remove any trailing periods + int n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + // fprintf(stderr,"hyphenate... %s\n",lcword); fflush(stderr); + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, + &rep, &pos, &cut, minLead, minTrail, + Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), + Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) ); + if (bFailed) + { + delete[] hyphens; + delete[] lcword; + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return NULL; } - } - } - - // other wise hyphenate the word with that dictionary - dict = aDicts[k].aPtr; - aEnc = aDicts[k].aEnc; - pCC = aDicts[k].apCC; - - // first handle smart quotes both single and double - OUStringBuffer rBuf(aWord); - sal_Int32 nc = rBuf.getLength(); - sal_Unicode ch; - for (sal_Int32 ix=0; ix < nc; ix++) { - ch = rBuf.charAt(ix); - if ((ch == 0x201C) || (ch == 0x201D)) rBuf.setCharAt(ix,(sal_Unicode)0x0022); - if ((ch == 0x2018) || (ch == 0x2019)) rBuf.setCharAt(ix,(sal_Unicode)0x0027); - } - OUString nWord(rBuf.makeStringAndClear()); - - // now convert word to all lowercase for pattern recognition - OUString nTerm(makeLowerCase(nWord, pCC)); - - // now convert word to needed encoding - OString encWord(OU2ENC(nTerm,aEnc)); - - wordlen = encWord.getLength(); - lcword = new char[wordlen+1]; - hyphens = new char[wordlen+5]; - char ** rep = NULL; // replacements of discretionary hyphenation - int * pos = NULL; // array of [hyphenation point] minus [deletion position] - int * cut = NULL; // length of deletions in original word - - // copy converted word into simple char buffer - strcpy(lcword,encWord.getStr()); - - // first remove any trailing periods - int n = wordlen-1; - while((n >=0) && (lcword[n] == '.')) n--; - n++; - // fprintf(stderr,"hyphenate... %s\n",lcword); fflush(stderr); - if (n > 0) { - if (hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, &rep, &pos, &cut, - minLead, minTrail, Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), - Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))))) - { - delete[] hyphens; - delete[] lcword; - - if (rep) { - for(int j = 0; j < n; j++) { - if (rep[j]) free(rep[j]); - } - free(rep); - } - if (pos) free(pos); - if (cut) free(cut); - - return NULL; - } - } - // now backfill hyphens[] for any removed periods - for (int c = n; c < wordlen; c++) hyphens[c] = '0'; - hyphens[wordlen] = '\0'; - // fprintf(stderr,"... %s\n",hyphens); fflush(stderr); - - INT16 nHyphCount = 0; - INT16 i; - - for ( i = 0; i < encWord.getLength(); i++) - if (hyphens[i]&1 && (!rep || !rep[i])) - nHyphCount++; - - Sequence< INT16 > aHyphPos(nHyphCount); - INT16 *pPos = aHyphPos.getArray(); - OUStringBuffer hyphenatedWordBuffer; - OUString hyphenatedWord; - nHyphCount = 0; - - for (i = 0; i < nWord.getLength(); i++) { - hyphenatedWordBuffer.append(aWord[i]); - // hyphenation position (not alternative) - if (hyphens[i]&1 && (!rep || !rep[i])) { - pPos[nHyphCount] = i; - hyphenatedWordBuffer.append(sal_Unicode('=')); - nHyphCount++; - } - } - - hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); - //fprintf(stderr,"result is %s\n",OU2A(hyphenatedWord)); - //fflush(stderr); - - xRes = new PossibleHyphens( aWord, LocaleToLanguage( aLocale ), - hyphenatedWord, aHyphPos ); - - delete[] hyphens; - delete[] lcword; - - if (rep) { - for(int j = 0; j < n; j++) { - if (rep[j]) free(rep[j]); - } - free(rep); - } - if (pos) free(pos); - if (cut) free(cut); - - return xRes; - } + } + // now backfill hyphens[] for any removed periods + for (int c = n; c < wordlen; c++) + hyphens[c] = '0'; + hyphens[wordlen] = '\0'; + // fprintf(stderr,"... %s\n",hyphens); fflush(stderr); - return NULL; + INT16 nHyphCount = 0; + INT16 i; + for ( i = 0; i < encWord.getLength(); i++) + { + if (hyphens[i]&1 && (!rep || !rep[i])) + nHyphCount++; + } + + Sequence< INT16 > aHyphPos(nHyphCount); + INT16 *pPos = aHyphPos.getArray(); + OUStringBuffer hyphenatedWordBuffer; + OUString hyphenatedWord; + nHyphCount = 0; + + for (i = 0; i < nWord.getLength(); i++) + { + hyphenatedWordBuffer.append(aWord[i]); + // hyphenation position (not alternative) + if (hyphens[i]&1 && (!rep || !rep[i])) + { + pPos[nHyphCount] = i; + hyphenatedWordBuffer.append(sal_Unicode('=')); + nHyphCount++; + } + } + + hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); + //fprintf(stderr,"result is %s\n",OU2A(hyphenatedWord)); + //fflush(stderr); + + xRes = new PossibleHyphens( aWord, LocaleToLanguage( aLocale ), + hyphenatedWord, aHyphPos ); + + delete[] hyphens; + delete[] lcword; + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return xRes; + } + + return NULL; } sal_uInt16 SAL_CALL Hyphenator::capitalType(const OUString& aTerm, CharClass * pCC) { - sal_Int32 tlen = aTerm.getLength(); - if ((pCC) && (tlen)) { - String aStr(aTerm); - sal_Int32 nc = 0; - for (xub_StrLen tindex = 0; tindex < tlen; tindex++) { - if (pCC->getCharacterType(aStr,tindex) & - ::com::sun::star::i18n::KCharacterType::UPPER) nc++; - } - - if (nc == 0) return (sal_uInt16) CAPTYPE_NOCAP; - - if (nc == tlen) return (sal_uInt16) CAPTYPE_ALLCAP; + sal_Int32 tlen = aTerm.getLength(); + if ((pCC) && (tlen)) + { + String aStr(aTerm); + sal_Int32 nc = 0; + for (xub_StrLen tindex = 0; tindex < tlen; tindex++) + { + if (pCC->getCharacterType(aStr,tindex) & ::com::sun::star::i18n::KCharacterType::UPPER) + nc++; + } - if ((nc == 1) && (pCC->getCharacterType(aStr,0) & - ::com::sun::star::i18n::KCharacterType::UPPER)) - return (sal_uInt16) CAPTYPE_INITCAP; + if (nc == 0) + return (sal_uInt16) CAPTYPE_NOCAP; + if (nc == tlen) + return (sal_uInt16) CAPTYPE_ALLCAP; + if ((nc == 1) && (pCC->getCharacterType(aStr,0) & ::com::sun::star::i18n::KCharacterType::UPPER)) + return (sal_uInt16) CAPTYPE_INITCAP; - return (sal_uInt16) CAPTYPE_MIXED; + return (sal_uInt16) CAPTYPE_MIXED; } - return (sal_uInt16) CAPTYPE_UNKNOWN; + return (sal_uInt16) CAPTYPE_UNKNOWN; } OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC) { - if (pCC) - return pCC->toLower_rtl(aTerm, 0, aTerm.getLength()); - return aTerm; + if (pCC) + return pCC->toLower_rtl(aTerm, 0, aTerm.getLength()); + return aTerm; } OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC) { - if (pCC) - return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength()); - return aTerm; + if (pCC) + return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength()); + return aTerm; } OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC) { - sal_Int32 tlen = aTerm.getLength(); - if ((pCC) && (tlen)) { - OUString bTemp = aTerm.copy(0,1); - if (tlen > 1) - return ( pCC->toUpper_rtl(bTemp, 0, 1) - + pCC->toLower_rtl(aTerm,1,(tlen-1)) ); - - return pCC->toUpper_rtl(bTemp, 0, 1); + sal_Int32 tlen = aTerm.getLength(); + if ((pCC) && (tlen)) + { + OUString bTemp = aTerm.copy(0,1); + if (tlen > 1) + return ( pCC->toUpper_rtl(bTemp, 0, 1) + pCC->toLower_rtl(aTerm,1,(tlen-1)) ); + + return pCC->toUpper_rtl(bTemp, 0, 1); } - return aTerm; + return aTerm; } - - Reference< XInterface > SAL_CALL Hyphenator_CreateInstance( - const Reference< XMultiServiceFactory > & /*rSMgr*/ ) + const Reference< XMultiServiceFactory > & /*rSMgr*/ ) throw(Exception) { - Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator; return xService; } -sal_Bool SAL_CALL - Hyphenator::addLinguServiceEventListener( - const Reference< XLinguServiceEventListener >& rxLstnr ) +sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); @@ -787,9 +817,8 @@ sal_Bool SAL_CALL } -sal_Bool SAL_CALL - Hyphenator::removeLinguServiceEventListener( - const Reference< XLinguServiceEventListener >& rxLstnr ) +sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); @@ -804,8 +833,7 @@ sal_Bool SAL_CALL } -OUString SAL_CALL - Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ ) +OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); @@ -813,8 +841,7 @@ OUString SAL_CALL } -void SAL_CALL - Hyphenator::initialize( const Sequence< Any >& rArguments ) +void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments ) throw(Exception, RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); @@ -836,16 +863,15 @@ void SAL_CALL xPropHelper = pPropHelper; pPropHelper->AddAsPropListener(); //! after a reference is established } - else { + else + { DBG_ERROR( "wrong number of arguments in sequence" ); } - } } -void SAL_CALL - Hyphenator::dispose() +void SAL_CALL Hyphenator::dispose() throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); @@ -859,8 +885,7 @@ void SAL_CALL } -void SAL_CALL - Hyphenator::addEventListener( const Reference< XEventListener >& rxListener ) +void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); @@ -870,8 +895,7 @@ void SAL_CALL } -void SAL_CALL - Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener ) +void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); @@ -931,7 +955,6 @@ Sequence< OUString > Hyphenator::getSupportedServiceNames_Static() sal_Bool SAL_CALL Hyphenator_writeInfo( void * /*pServiceManager*/, registry::XRegistryKey * pRegistryKey ) { - try { String aImpl( '/' ); diff --git a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.hxx b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.hxx index 031217914754..e20b4f58f589 100644..100755 --- a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.hxx +++ b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.hxx @@ -65,7 +65,7 @@ struct HDInfo { HyphenDict * aPtr; OUString aName; Locale aLoc; - rtl_TextEncoding aEnc; + rtl_TextEncoding eEnc; CharClass * apCC; }; @@ -83,12 +83,12 @@ class Hyphenator : > { Sequence< Locale > aSuppLocales; - HDInfo * aDicts; - sal_Int32 numdict; + HDInfo * aDicts; + sal_Int32 numdict; ::cppu::OInterfaceContainerHelper aEvtListeners; Reference< XPropertyChangeListener > xPropHelper; - Reference< XMultiServiceFactory > rSMgr; + Reference< XMultiServiceFactory > rSMgr; linguistic::PropertyHelper_Hyphen * pPropHelper; BOOL bDisposing; @@ -108,88 +108,38 @@ public: virtual ~Hyphenator(); // XSupportedLocales (for XHyphenator) - virtual Sequence< Locale > SAL_CALL getLocales() - throw(RuntimeException); - virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) - throw(RuntimeException); + virtual Sequence< Locale > SAL_CALL getLocales() throw(RuntimeException); + virtual sal_Bool SAL_CALL hasLocale( const Locale& rLocale ) throw(RuntimeException); // XHyphenator - virtual ::com::sun::star::uno::Reference< ::com::sun::star::linguistic2::XHyphenatedWord > SAL_CALL - hyphenate( const ::rtl::OUString& aWord, - const ::com::sun::star::lang::Locale& aLocale, - sal_Int16 nMaxLeading, - const ::com::sun::star::beans::PropertyValues& aProperties ) - throw(::com::sun::star::lang::IllegalArgumentException, - ::com::sun::star::uno::RuntimeException); - - virtual ::com::sun::star::uno::Reference< ::com::sun::star::linguistic2::XHyphenatedWord > SAL_CALL - queryAlternativeSpelling( const ::rtl::OUString& aWord, - const ::com::sun::star::lang::Locale& aLocale, - sal_Int16 nIndex, - const ::com::sun::star::beans::PropertyValues& aProperties ) - throw(::com::sun::star::lang::IllegalArgumentException, - ::com::sun::star::uno::RuntimeException); - - virtual ::com::sun::star::uno::Reference< ::com::sun::star::linguistic2::XPossibleHyphens > SAL_CALL - createPossibleHyphens( const ::rtl::OUString& aWord, - const ::com::sun::star::lang::Locale& aLocale, - const ::com::sun::star::beans::PropertyValues& aProperties ) - throw(::com::sun::star::lang::IllegalArgumentException, - ::com::sun::star::uno::RuntimeException); + virtual ::com::sun::star::uno::Reference< ::com::sun::star::linguistic2::XHyphenatedWord > SAL_CALL hyphenate( const ::rtl::OUString& aWord, const ::com::sun::star::lang::Locale& aLocale, sal_Int16 nMaxLeading, const ::com::sun::star::beans::PropertyValues& aProperties ) throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException); + virtual ::com::sun::star::uno::Reference< ::com::sun::star::linguistic2::XHyphenatedWord > SAL_CALL queryAlternativeSpelling( const ::rtl::OUString& aWord, const ::com::sun::star::lang::Locale& aLocale, sal_Int16 nIndex, const ::com::sun::star::beans::PropertyValues& aProperties ) throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException); + virtual ::com::sun::star::uno::Reference< ::com::sun::star::linguistic2::XPossibleHyphens > SAL_CALL createPossibleHyphens( const ::rtl::OUString& aWord, const ::com::sun::star::lang::Locale& aLocale, const ::com::sun::star::beans::PropertyValues& aProperties ) throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException); // XLinguServiceEventBroadcaster - virtual sal_Bool SAL_CALL - addLinguServiceEventListener( - const Reference< XLinguServiceEventListener >& rxLstnr ) - throw(RuntimeException); - virtual sal_Bool SAL_CALL - removeLinguServiceEventListener( - const Reference< XLinguServiceEventListener >& rxLstnr ) - throw(RuntimeException); + virtual sal_Bool SAL_CALL addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) throw(RuntimeException); + virtual sal_Bool SAL_CALL removeLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) throw(RuntimeException); // XServiceDisplayName - virtual OUString SAL_CALL - getServiceDisplayName( const Locale& rLocale ) - throw(RuntimeException); + virtual OUString SAL_CALL getServiceDisplayName( const Locale& rLocale ) throw(RuntimeException); // XInitialization - virtual void SAL_CALL - initialize( const Sequence< Any >& rArguments ) - throw(Exception, RuntimeException); + virtual void SAL_CALL initialize( const Sequence< Any >& rArguments ) throw(Exception, RuntimeException); // XComponent - virtual void SAL_CALL - dispose() - throw(RuntimeException); - virtual void SAL_CALL - addEventListener( const Reference< XEventListener >& rxListener ) - throw(RuntimeException); - virtual void SAL_CALL - removeEventListener( const Reference< XEventListener >& rxListener ) - throw(RuntimeException); - - //////////////////////////////////////////////////////////// - // Service specific part - // + virtual void SAL_CALL dispose() throw(RuntimeException); + virtual void SAL_CALL addEventListener( const Reference< XEventListener >& rxListener ) throw(RuntimeException); + virtual void SAL_CALL removeEventListener( const Reference< XEventListener >& rxListener ) throw(RuntimeException); // XServiceInfo - virtual OUString SAL_CALL - getImplementationName() - throw(RuntimeException); - virtual sal_Bool SAL_CALL - supportsService( const OUString& rServiceName ) - throw(RuntimeException); - virtual Sequence< OUString > SAL_CALL - getSupportedServiceNames() - throw(RuntimeException); - - - static inline OUString - getImplementationName_Static() throw(); - static Sequence< OUString > - getSupportedServiceNames_Static() throw(); + virtual OUString SAL_CALL getImplementationName() throw(RuntimeException); + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) throw(RuntimeException); + virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() throw(RuntimeException); + static inline OUString getImplementationName_Static() throw(); + static Sequence< OUString > getSupportedServiceNames_Static() throw(); + private: sal_uInt16 SAL_CALL capitalType(const OUString&, CharClass *); |