diff options
Diffstat (limited to 'lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx')
-rw-r--r-- | lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx | 873 |
1 files changed, 439 insertions, 434 deletions
diff --git a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx index e9325e3ba5bb..8324c4fcba0b 100644 --- a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx +++ b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx @@ -41,13 +41,12 @@ #include <osl/mutex.hxx> #include <hyphen.h> - #include <hyphenimp.hxx> #include <linguistic/hyphdta.hxx> #include <rtl/ustring.hxx> - #include <rtl/ustrbuf.hxx> +#include <rtl/textenc.h> #include <linguistic/lngprops.hxx> #include <unotools/pathoptions.hxx> @@ -90,11 +89,10 @@ using ::rtl::OUString; Hyphenator::Hyphenator() : aEvtListeners ( GetLinguMutex() ) { - bDisposing = FALSE; + bDisposing = sal_False; pPropHelper = NULL; - aDicts = NULL; - numdict = 0; - + aDicts = NULL; + numdict = 0; } @@ -103,21 +101,22 @@ Hyphenator::~Hyphenator() if (pPropHelper) pPropHelper->RemoveAsPropListener(); - if ((numdict) && (aDicts)) { - for (int i=0; i < numdict; i++) { + if ((numdict) && (aDicts)) + { + for (int i=0; i < numdict; i++) + { if (aDicts[i].apCC) delete aDicts[i].apCC; aDicts[i].apCC = NULL; - } + } } - if (aDicts) delete[] aDicts; + if (aDicts) delete[] aDicts; aDicts = NULL; - numdict = 0; + numdict = 0; } PropertyHelper_Hyphen & Hyphenator::GetPropHelper_Impl() { - if (!pPropHelper) { Reference< XPropertySet > xPropSet( GetLinguProperties(), UNO_QUERY ); @@ -223,7 +222,7 @@ Sequence< Locale > SAL_CALL Hyphenator::getLocales() for (sal_Int32 i = 0; i < nLocales; ++i) { aDicts[k].aPtr = NULL; - aDicts[k].aEnc = 0; + aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW; aDicts[k].aLoc = MsLangId::convertLanguageToLocale( MsLangId::convertIsoStringToLanguage( aDictIt->aLocaleNames[i] )); aDicts[k].apCC = new CharClass( aDicts[k].aLoc ); @@ -260,17 +259,17 @@ sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale) { MutexGuard aGuard( GetLinguMutex() ); - BOOL bRes = FALSE; + sal_Bool bRes = sal_False; if (!aSuppLocales.getLength()) getLocales(); const Locale *pLocale = aSuppLocales.getConstArray(); - INT32 nLen = aSuppLocales.getLength(); - for (INT32 i = 0; i < nLen; ++i) + sal_Int32 nLen = aSuppLocales.getLength(); + for (sal_Int32 i = 0; i < nLen; ++i) { if (rLocale == pLocale[i]) { - bRes = TRUE; + bRes = sal_True; break; } } @@ -278,240 +277,268 @@ sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale) } -Reference< XHyphenatedWord > SAL_CALL -Hyphenator::hyphenate( const ::rtl::OUString& aWord, - const ::com::sun::star::lang::Locale& aLocale, - sal_Int16 nMaxLeading, - const ::com::sun::star::beans::PropertyValues& aProperties ) - throw (com::sun::star::uno::RuntimeException, - com::sun::star::lang::IllegalArgumentException) +Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const ::rtl::OUString& aWord, + const ::com::sun::star::lang::Locale& aLocale, + sal_Int16 nMaxLeading, + const ::com::sun::star::beans::PropertyValues& aProperties ) + throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException) { int nHyphenationPos = -1; - int nHyphenationPosAlt = -1; - int nHyphenationPosAltHyph = -1; + int nHyphenationPosAlt = -1; + int nHyphenationPosAltHyph = -1; int wordlen; char *hyphens; - char *lcword; - int k = 0; + char *lcword; + int k = 0; - PropertyHelper_Hyphen & rHelper = GetPropHelper(); - rHelper.SetTmpPropVals(aProperties); + PropertyHelper_Hyphen & rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); sal_Int16 minTrail = rHelper.GetMinTrailing(); sal_Int16 minLead = rHelper.GetMinLeading(); sal_Int16 minLen = rHelper.GetMinWordLength(); HyphenDict *dict = NULL; - rtl_TextEncoding aEnc = 0; - CharClass * pCC = NULL; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + CharClass * pCC = NULL; Reference< XHyphenatedWord > xRes; - k = -1; - for (int j = 0; j < numdict; j++) - if (aLocale == aDicts[j].aLoc) k = j; - - - // if we have a hyphenation dictionary matching this locale - if (k != -1) { + k = -1; + for (int j = 0; j < numdict; j++) + { + if (aLocale == aDicts[j].aLoc) + k = j; + } + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { // if this dictinary has not been loaded yet do that - if (!aDicts[k].aPtr) { - - OUString DictFN = aDicts[k].aName + A2OU(".dic"); - OUString dictpath; + if (!aDicts[k].aPtr) + { + OUString DictFN = aDicts[k].aName + A2OU(".dic"); + OUString dictpath; - osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); - OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); + osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); + OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); #if defined(WNT) - // workaround for Windows specifc problem that the - // path length in calls to 'fopen' is limted to somewhat - // about 120+ characters which will usually be exceed when - // using dictionaries as extensions. - sTmp = Win_GetShortPathName( dictpath ); + // workaround for Windows specifc problem that the + // path length in calls to 'fopen' is limted to somewhat + // about 120+ characters which will usually be exceed when + // using dictionaries as extensions. + sTmp = Win_GetShortPathName( dictpath ); #endif - if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) - { - fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) ); - return NULL; - } - aDicts[k].aPtr = dict; - aDicts[k].aEnc = rtl_getTextEncodingFromUnixCharset(dict->cset); - if (aDicts[k].aEnc == RTL_TEXTENCODING_DONTKNOW) { - if (strcmp("ISCII-DEVANAGARI", dict->cset) == 0) { - aDicts[k].aEnc = RTL_TEXTENCODING_ISCII_DEVANAGARI; - } else if (strcmp("UTF-8", dict->cset) == 0) { - aDicts[k].aEnc = RTL_TEXTENCODING_UTF8; - } - } + if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) + { + fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) ); + return NULL; + } + aDicts[k].aPtr = dict; + aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset); } - // other wise hyphenate the word with that dictionary - dict = aDicts[k].aPtr; - aEnc = aDicts[k].aEnc; - pCC = aDicts[k].apCC; + // other wise hyphenate the word with that dictionary + dict = aDicts[k].aPtr; + eEnc = aDicts[k].eEnc; + pCC = aDicts[k].apCC; + + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return NULL; - sal_uInt16 ct = CAPTYPE_UNKNOWN; - ct = capitalType(aWord, pCC); + sal_uInt16 ct = CAPTYPE_UNKNOWN; + ct = capitalType(aWord, pCC); - // first convert any smart quotes or apostrophes to normal ones + // first convert any smart quotes or apostrophes to normal ones OUStringBuffer rBuf(aWord); - sal_Int32 nc = rBuf.getLength(); - sal_Unicode ch; - for (sal_Int32 ix=0; ix < nc; ix++) { + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) + { ch = rBuf.charAt(ix); - if ((ch == 0x201C) || (ch == 0x201D)) rBuf.setCharAt(ix,(sal_Unicode)0x0022); - if ((ch == 0x2018) || (ch == 0x2019)) rBuf.setCharAt(ix,(sal_Unicode)0x0027); - } - OUString nWord(rBuf.makeStringAndClear()); + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf.setCharAt(ix,(sal_Unicode)0x0022); + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf.setCharAt(ix,(sal_Unicode)0x0027); + } + OUString nWord(rBuf.makeStringAndClear()); - // now convert word to all lowercase for pattern recognition - OUString nTerm(makeLowerCase(nWord, pCC)); + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); - // now convert word to needed encoding - OString encWord(OU2ENC(nTerm,aEnc)); + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); wordlen = encWord.getLength(); - lcword = new char[wordlen + 1]; + lcword = new char[wordlen + 1]; hyphens = new char[wordlen + 5]; - char ** rep = NULL; // replacements of discretionary hyphenation - int * pos = NULL; // array of [hyphenation point] minus [deletion position] - int * cut = NULL; // length of deletions in original word - - // copy converted word into simple char buffer - strcpy(lcword,encWord.getStr()); - - // now strip off any ending periods - int n = wordlen-1; - while((n >=0) && (lcword[n] == '.')) n--; - n++; - if (n > 0) { - if (hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, &rep, &pos, &cut, - minLead, minTrail, Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), - Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))))) - { - //whoops something did not work - delete[] hyphens; - delete[] lcword; - if (rep) { - for(int j = 0; j < n; j++) { + char ** rep = NULL; // replacements of discretionary hyphenation + int * pos = NULL; // array of [hyphenation point] minus [deletion position] + int * cut = NULL; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword,encWord.getStr()); + + // now strip off any ending periods + int n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL, + &rep, &pos, &cut, minLead, minTrail, + Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), + Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) ); + if (bFailed) + { + //whoops something did not work + delete[] hyphens; + delete[] lcword; + if (rep) + { + for(int j = 0; j < n; j++) + { if (rep[j]) free(rep[j]); } free(rep); - } - if (pos) free(pos); - if (cut) free(cut); - return NULL; - } + } + if (pos) free(pos); + if (cut) free(cut); + return NULL; } + } - // now backfill hyphens[] for any removed trailing periods - for (int c = n; c < wordlen; c++) hyphens[c] = '0'; - hyphens[wordlen] = '\0'; + // now backfill hyphens[] for any removed trailing periods + for (int c = n; c < wordlen; c++) hyphens[c] = '0'; + hyphens[wordlen] = '\0'; - INT32 Leading = GetPosInWordToCheck( aWord, nMaxLeading ); + sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading ); - for (INT32 i = 0; i < n; i++) + for (sal_Int32 i = 0; i < n; i++) { - int leftrep = 0; - BOOL hit = (n >= minLen); - if (!rep || !rep[i] || (i >= n)) { - hit = hit && (hyphens[i]&1) && (i < Leading); - hit = hit && (i >= (minLead-1) ); - hit = hit && ((n - i - 1) >= minTrail); - } else { - // calculate change character length before hyphenation point signed with '=' - for (char * c = rep[i]; *c && (*c != '='); c++) { - if (aEnc == RTL_TEXTENCODING_UTF8) { - if (((unsigned char) *c) >> 6 != 2) leftrep++; - } else leftrep++; + int leftrep = 0; + sal_Bool hit = (n >= minLen); + if (!rep || !rep[i] || (i >= n)) + { + hit = hit && (hyphens[i]&1) && (i < Leading); + hit = hit && (i >= (minLead-1) ); + hit = hit && ((n - i - 1) >= minTrail); + } + else + { + // calculate change character length before hyphenation point signed with '=' + for (char * c = rep[i]; *c && (*c != '='); c++) + { + if (eEnc == RTL_TEXTENCODING_UTF8) + { + if (((unsigned char) *c) >> 6 != 2) + leftrep++; } - hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); - hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); - hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); + else + leftrep++; } - if (hit) { - nHyphenationPos = i; - if (rep && (i < n) && rep[i]) { - nHyphenationPosAlt = i - pos[i]; - nHyphenationPosAltHyph = i + leftrep - pos[i]; - } + hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); + hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); + hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); + } + if (hit) + { + nHyphenationPos = i; + if (rep && (i < n) && rep[i]) + { + nHyphenationPosAlt = i - pos[i]; + nHyphenationPosAltHyph = i + leftrep - pos[i]; } - } - - if (nHyphenationPos == -1) { - xRes = NULL; - } else { - if (rep && rep[nHyphenationPos]) { - // remove equal sign - char * s = rep[nHyphenationPos]; - int eq = 0; - for (; *s; s++) { - if (*s == '=') eq = 1; - if (eq) *s = *(s + 1); - } - OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), aEnc); - OUString repHyph; - switch (ct) { - case CAPTYPE_ALLCAP: - { - repHyph = makeUpperCase(repHyphlow, pCC); - break; - } - case CAPTYPE_INITCAP: - { - if (nHyphenationPosAlt == 0) { - repHyph = makeInitCap(repHyphlow, pCC); - } else { - repHyph = repHyphlow; - } - break; - } - default: - { - repHyph = repHyphlow; - break; - } } + } - // handle shortening - INT16 nPos = (INT16) ((nHyphenationPosAltHyph < nHyphenationPos) ? - nHyphenationPosAltHyph : nHyphenationPos); - // dicretionary hyphenation + if (nHyphenationPos == -1) + { + xRes = NULL; + } + else + { + if (rep && rep[nHyphenationPos]) + { + // remove equal sign + char * s = rep[nHyphenationPos]; + int eq = 0; + for (; *s; s++) + { + if (*s == '=') eq = 1; + if (eq) *s = *(s + 1); + } + OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc); + OUString repHyph; + switch (ct) + { + case CAPTYPE_ALLCAP: + { + repHyph = makeUpperCase(repHyphlow, pCC); + break; + } + case CAPTYPE_INITCAP: + { + if (nHyphenationPosAlt == 0) + repHyph = makeInitCap(repHyphlow, pCC); + else + repHyph = repHyphlow; + break; + } + default: + { + repHyph = repHyphlow; + break; + } + } + + // handle shortening + sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ? + nHyphenationPosAltHyph : nHyphenationPos); + // dicretionary hyphenation xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), nPos, - aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), - (INT16) nHyphenationPosAltHyph); - } else { + aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), + (sal_Int16) nHyphenationPosAltHyph); + } + else + { xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), - (INT16)nHyphenationPos, aWord, (INT16) nHyphenationPos); - } + (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos); + } } - delete[] lcword; + delete[] lcword; delete[] hyphens; - if (rep) { - for(int j = 0; j < n; j++) { - if (rep[j]) free(rep[j]); - } - free(rep); + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); } - if (pos) free(pos); - if (cut) free(cut); + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); return xRes; } - return NULL; + return NULL; } -Reference < XHyphenatedWord > SAL_CALL - Hyphenator::queryAlternativeSpelling( const ::rtl::OUString& /*aWord*/, - const ::com::sun::star::lang::Locale& /*aLocale*/, - sal_Int16 /*nIndex*/, - const ::com::sun::star::beans::PropertyValues& /*aProperties*/ ) - throw(::com::sun::star::lang::IllegalArgumentException, - ::com::sun::star::uno::RuntimeException) +Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling( + const ::rtl::OUString& /*aWord*/, + const ::com::sun::star::lang::Locale& /*aLocale*/, + sal_Int16 /*nIndex*/, + const ::com::sun::star::beans::PropertyValues& /*aProperties*/ ) + throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException) { /* alternative spelling isn't supported by tex dictionaries */ /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */ @@ -519,252 +546,263 @@ Reference < XHyphenatedWord > SAL_CALL return NULL; } -Reference< XPossibleHyphens > SAL_CALL - Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord, - const ::com::sun::star::lang::Locale& aLocale, - const ::com::sun::star::beans::PropertyValues& aProperties ) - throw(::com::sun::star::lang::IllegalArgumentException, - ::com::sun::star::uno::RuntimeException) - +Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord, + const ::com::sun::star::lang::Locale& aLocale, + const ::com::sun::star::beans::PropertyValues& aProperties ) + throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException) { - int wordlen; - char *hyphens; - char *lcword; - int k; + int wordlen; + char *hyphens; + char *lcword; + int k; - PropertyHelper_Hyphen & rHelper = GetPropHelper(); - rHelper.SetTmpPropVals(aProperties); - sal_Int16 minTrail = rHelper.GetMinTrailing(); - sal_Int16 minLead = rHelper.GetMinLeading(); + PropertyHelper_Hyphen & rHelper = GetPropHelper(); + rHelper.SetTmpPropVals(aProperties); + sal_Int16 minTrail = rHelper.GetMinTrailing(); + sal_Int16 minLead = rHelper.GetMinLeading(); - HyphenDict *dict = NULL; - rtl_TextEncoding aEnc = 0; - CharClass* pCC = NULL; + HyphenDict *dict = NULL; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + CharClass* pCC = NULL; - Reference< XPossibleHyphens > xRes; + Reference< XPossibleHyphens > xRes; - k = -1; - for (int j = 0; j < numdict; j++) - if (aLocale == aDicts[j].aLoc) k = j; + k = -1; + for (int j = 0; j < numdict; j++) + { + if (aLocale == aDicts[j].aLoc) k = j; + } + // if we have a hyphenation dictionary matching this locale + if (k != -1) + { + // if this dictioanry has not been loaded yet do that + if (!aDicts[k].aPtr) + { + OUString DictFN = aDicts[k].aName + A2OU(".dic"); + OUString dictpath; - // if we have a hyphenation dictionary matching this locale - if (k != -1) { + osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); + OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); - // if this dictioanry has not been loaded yet do that - if (!aDicts[k].aPtr) { +#if defined(WNT) + // workaround for Windows specifc problem that the + // path length in calls to 'fopen' is limted to somewhat + // about 120+ characters which will usually be exceed when + // using dictionaries as extensions. + sTmp = Win_GetShortPathName( dictpath ); +#endif - OUString DictFN = aDicts[k].aName + A2OU(".dic"); - OUString dictpath; + if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) + { + fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) ); + return NULL; + } + aDicts[k].aPtr = dict; + aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset); + } - osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); - OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); + // other wise hyphenate the word with that dictionary + dict = aDicts[k].aPtr; + eEnc = aDicts[k].eEnc; + pCC = aDicts[k].apCC; -#if defined(WNT) - // workaround for Windows specifc problem that the - // path length in calls to 'fopen' is limted to somewhat - // about 120+ characters which will usually be exceed when - // using dictionaries as extensions. - sTmp = Win_GetShortPathName( dictpath ); -#endif + // we don't want to work with a default text encoding since following incorrect + // results may occur only for specific text and thus may be hard to notice. + // Thus better always make a clean exit here if the text encoding is in question. + // Hopefully something not working at all will raise proper attention quickly. ;-) + DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); + if (eEnc == RTL_TEXTENCODING_DONTKNOW) + return NULL; - if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) + // first handle smart quotes both single and double + OUStringBuffer rBuf(aWord); + sal_Int32 nc = rBuf.getLength(); + sal_Unicode ch; + for (sal_Int32 ix=0; ix < nc; ix++) { - fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) ); - return NULL; + ch = rBuf.charAt(ix); + if ((ch == 0x201C) || (ch == 0x201D)) + rBuf.setCharAt(ix,(sal_Unicode)0x0022); + if ((ch == 0x2018) || (ch == 0x2019)) + rBuf.setCharAt(ix,(sal_Unicode)0x0027); } - aDicts[k].aPtr = dict; - aDicts[k].aEnc = rtl_getTextEncodingFromUnixCharset(dict->cset); - if (aDicts[k].aEnc == RTL_TEXTENCODING_DONTKNOW) { - if (strcmp("ISCII-DEVANAGARI", dict->cset) == 0) { - aDicts[k].aEnc = RTL_TEXTENCODING_ISCII_DEVANAGARI; - } else if (strcmp("UTF-8", dict->cset) == 0) { - aDicts[k].aEnc = RTL_TEXTENCODING_UTF8; + OUString nWord(rBuf.makeStringAndClear()); + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(nWord, pCC)); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm,eEnc)); + + wordlen = encWord.getLength(); + lcword = new char[wordlen+1]; + hyphens = new char[wordlen+5]; + char ** rep = NULL; // replacements of discretionary hyphenation + int * pos = NULL; // array of [hyphenation point] minus [deletion position] + int * cut = NULL; // length of deletions in original word + + // copy converted word into simple char buffer + strcpy(lcword,encWord.getStr()); + + // first remove any trailing periods + int n = wordlen-1; + while((n >=0) && (lcword[n] == '.')) + n--; + n++; + if (n > 0) + { + const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, + &rep, &pos, &cut, minLead, minTrail, + Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), + Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) ); + if (bFailed) + { + delete[] hyphens; + delete[] lcword; + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return NULL; } - } - } - - // other wise hyphenate the word with that dictionary - dict = aDicts[k].aPtr; - aEnc = aDicts[k].aEnc; - pCC = aDicts[k].apCC; - - // first handle smart quotes both single and double - OUStringBuffer rBuf(aWord); - sal_Int32 nc = rBuf.getLength(); - sal_Unicode ch; - for (sal_Int32 ix=0; ix < nc; ix++) { - ch = rBuf.charAt(ix); - if ((ch == 0x201C) || (ch == 0x201D)) rBuf.setCharAt(ix,(sal_Unicode)0x0022); - if ((ch == 0x2018) || (ch == 0x2019)) rBuf.setCharAt(ix,(sal_Unicode)0x0027); - } - OUString nWord(rBuf.makeStringAndClear()); - - // now convert word to all lowercase for pattern recognition - OUString nTerm(makeLowerCase(nWord, pCC)); - - // now convert word to needed encoding - OString encWord(OU2ENC(nTerm,aEnc)); - - wordlen = encWord.getLength(); - lcword = new char[wordlen+1]; - hyphens = new char[wordlen+5]; - char ** rep = NULL; // replacements of discretionary hyphenation - int * pos = NULL; // array of [hyphenation point] minus [deletion position] - int * cut = NULL; // length of deletions in original word - - // copy converted word into simple char buffer - strcpy(lcword,encWord.getStr()); - - // first remove any trailing periods - int n = wordlen-1; - while((n >=0) && (lcword[n] == '.')) n--; - n++; - if (n > 0) { - if (hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, &rep, &pos, &cut, - minLead, minTrail, Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), - Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))))) - { - delete[] hyphens; - delete[] lcword; - - if (rep) { - for(int j = 0; j < n; j++) { - if (rep[j]) free(rep[j]); - } - free(rep); - } - if (pos) free(pos); - if (cut) free(cut); - - return NULL; - } - } - // now backfill hyphens[] for any removed periods - for (int c = n; c < wordlen; c++) hyphens[c] = '0'; - hyphens[wordlen] = '\0'; - - INT16 nHyphCount = 0; - INT16 i; - - for ( i = 0; i < encWord.getLength(); i++) - if (hyphens[i]&1 && (!rep || !rep[i])) - nHyphCount++; - - Sequence< INT16 > aHyphPos(nHyphCount); - INT16 *pPos = aHyphPos.getArray(); - OUStringBuffer hyphenatedWordBuffer; - OUString hyphenatedWord; - nHyphCount = 0; - - for (i = 0; i < nWord.getLength(); i++) { - hyphenatedWordBuffer.append(aWord[i]); - // hyphenation position (not alternative) - if (hyphens[i]&1 && (!rep || !rep[i])) { - pPos[nHyphCount] = i; - hyphenatedWordBuffer.append(sal_Unicode('=')); - nHyphCount++; - } - } - - hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); - - xRes = new PossibleHyphens( aWord, LocaleToLanguage( aLocale ), - hyphenatedWord, aHyphPos ); - - delete[] hyphens; - delete[] lcword; - - if (rep) { - for(int j = 0; j < n; j++) { - if (rep[j]) free(rep[j]); - } - free(rep); - } - if (pos) free(pos); - if (cut) free(cut); - - return xRes; - } + } + // now backfill hyphens[] for any removed periods + for (int c = n; c < wordlen; c++) + hyphens[c] = '0'; + hyphens[wordlen] = '\0'; - return NULL; + sal_Int16 nHyphCount = 0; + sal_Int16 i; + + for ( i = 0; i < encWord.getLength(); i++) + { + if (hyphens[i]&1 && (!rep || !rep[i])) + nHyphCount++; + } + + Sequence< sal_Int16 > aHyphPos(nHyphCount); + sal_Int16 *pPos = aHyphPos.getArray(); + OUStringBuffer hyphenatedWordBuffer; + OUString hyphenatedWord; + nHyphCount = 0; + + for (i = 0; i < nWord.getLength(); i++) + { + hyphenatedWordBuffer.append(aWord[i]); + // hyphenation position (not alternative) + if (hyphens[i]&1 && (!rep || !rep[i])) + { + pPos[nHyphCount] = i; + hyphenatedWordBuffer.append(sal_Unicode('=')); + nHyphCount++; + } + } + + hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); + + xRes = new PossibleHyphens( aWord, LocaleToLanguage( aLocale ), + hyphenatedWord, aHyphPos ); + + delete[] hyphens; + delete[] lcword; + + if (rep) + { + for(int j = 0; j < n; j++) + { + if (rep[j]) free(rep[j]); + } + free(rep); + } + if (pos) free(pos); + if (cut) free(cut); + + return xRes; + } + return NULL; } sal_uInt16 SAL_CALL Hyphenator::capitalType(const OUString& aTerm, CharClass * pCC) { - sal_Int32 tlen = aTerm.getLength(); - if ((pCC) && (tlen)) { - String aStr(aTerm); - sal_Int32 nc = 0; - for (xub_StrLen tindex = 0; tindex < tlen; tindex++) { - if (pCC->getCharacterType(aStr,tindex) & - ::com::sun::star::i18n::KCharacterType::UPPER) nc++; - } - - if (nc == 0) return (sal_uInt16) CAPTYPE_NOCAP; - - if (nc == tlen) return (sal_uInt16) CAPTYPE_ALLCAP; + sal_Int32 tlen = aTerm.getLength(); + if ((pCC) && (tlen)) + { + String aStr(aTerm); + sal_Int32 nc = 0; + for (xub_StrLen tindex = 0; tindex < tlen; tindex++) + { + if (pCC->getCharacterType(aStr,tindex) & ::com::sun::star::i18n::KCharacterType::UPPER) + nc++; + } - if ((nc == 1) && (pCC->getCharacterType(aStr,0) & - ::com::sun::star::i18n::KCharacterType::UPPER)) - return (sal_uInt16) CAPTYPE_INITCAP; + if (nc == 0) + return (sal_uInt16) CAPTYPE_NOCAP; + if (nc == tlen) + return (sal_uInt16) CAPTYPE_ALLCAP; + if ((nc == 1) && (pCC->getCharacterType(aStr,0) & ::com::sun::star::i18n::KCharacterType::UPPER)) + return (sal_uInt16) CAPTYPE_INITCAP; - return (sal_uInt16) CAPTYPE_MIXED; + return (sal_uInt16) CAPTYPE_MIXED; } - return (sal_uInt16) CAPTYPE_UNKNOWN; + return (sal_uInt16) CAPTYPE_UNKNOWN; } OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC) { - if (pCC) - return pCC->toLower_rtl(aTerm, 0, aTerm.getLength()); - return aTerm; + if (pCC) + return pCC->toLower_rtl(aTerm, 0, aTerm.getLength()); + return aTerm; } OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC) { - if (pCC) - return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength()); - return aTerm; + if (pCC) + return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength()); + return aTerm; } OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC) { - sal_Int32 tlen = aTerm.getLength(); - if ((pCC) && (tlen)) { - OUString bTemp = aTerm.copy(0,1); - if (tlen > 1) - return ( pCC->toUpper_rtl(bTemp, 0, 1) - + pCC->toLower_rtl(aTerm,1,(tlen-1)) ); - - return pCC->toUpper_rtl(bTemp, 0, 1); + sal_Int32 tlen = aTerm.getLength(); + if ((pCC) && (tlen)) + { + OUString bTemp = aTerm.copy(0,1); + if (tlen > 1) + return ( pCC->toUpper_rtl(bTemp, 0, 1) + pCC->toLower_rtl(aTerm,1,(tlen-1)) ); + + return pCC->toUpper_rtl(bTemp, 0, 1); } - return aTerm; + return aTerm; } - - Reference< XInterface > SAL_CALL Hyphenator_CreateInstance( - const Reference< XMultiServiceFactory > & /*rSMgr*/ ) + const Reference< XMultiServiceFactory > & /*rSMgr*/ ) throw(Exception) { - Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator; return xService; } -sal_Bool SAL_CALL - Hyphenator::addLinguServiceEventListener( - const Reference< XLinguServiceEventListener >& rxLstnr ) +sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); - BOOL bRes = FALSE; + sal_Bool bRes = sal_False; if (!bDisposing && rxLstnr.is()) { bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); @@ -773,14 +811,13 @@ sal_Bool SAL_CALL } -sal_Bool SAL_CALL - Hyphenator::removeLinguServiceEventListener( - const Reference< XLinguServiceEventListener >& rxLstnr ) +sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener( + const Reference< XLinguServiceEventListener >& rxLstnr ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); - BOOL bRes = FALSE; + sal_Bool bRes = sal_False; if (!bDisposing && rxLstnr.is()) { DBG_ASSERT( xPropHelper.is(), "xPropHelper non existent" ); @@ -790,8 +827,7 @@ sal_Bool SAL_CALL } -OUString SAL_CALL - Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ ) +OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); @@ -799,15 +835,14 @@ OUString SAL_CALL } -void SAL_CALL - Hyphenator::initialize( const Sequence< Any >& rArguments ) +void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments ) throw(Exception, RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); if (!pPropHelper) { - INT32 nLen = rArguments.getLength(); + sal_Int32 nLen = rArguments.getLength(); if (2 == nLen) { Reference< XPropertySet > xPropSet; @@ -825,28 +860,25 @@ void SAL_CALL else { OSL_FAIL( "wrong number of arguments in sequence" ); } - } } -void SAL_CALL - Hyphenator::dispose() +void SAL_CALL Hyphenator::dispose() throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); if (!bDisposing) { - bDisposing = TRUE; + bDisposing = sal_True; EventObject aEvtObj( (XHyphenator *) this ); aEvtListeners.disposeAndClear( aEvtObj ); } } -void SAL_CALL - Hyphenator::addEventListener( const Reference< XEventListener >& rxListener ) +void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); @@ -856,8 +888,7 @@ void SAL_CALL } -void SAL_CALL - Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener ) +void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener ) throw(RuntimeException) { MutexGuard aGuard( GetLinguMutex() ); @@ -887,10 +918,10 @@ sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName ) Sequence< OUString > aSNL = getSupportedServiceNames(); const OUString * pArray = aSNL.getConstArray(); - for( INT32 i = 0; i < aSNL.getLength(); i++ ) + for( sal_Int32 i = 0; i < aSNL.getLength(); i++ ) if( pArray[i] == ServiceName ) - return TRUE; - return FALSE; + return sal_True; + return sal_False; } @@ -913,32 +944,6 @@ Sequence< OUString > Hyphenator::getSupportedServiceNames_Static() return aSNS; } - -sal_Bool SAL_CALL Hyphenator_writeInfo( - void * /*pServiceManager*/, registry::XRegistryKey * pRegistryKey ) -{ - - try - { - String aImpl( '/' ); - aImpl += Hyphenator::getImplementationName_Static().getStr(); - aImpl.AppendAscii( "/UNO/SERVICES" ); - Reference< registry::XRegistryKey > xNewKey = - pRegistryKey->createKey( aImpl ); - Sequence< OUString > aServices = - Hyphenator::getSupportedServiceNames_Static(); - for( INT32 i = 0; i < aServices.getLength(); i++ ) - xNewKey->createKey( aServices.getConstArray()[i] ); - - return sal_True; - } - catch(Exception &) - { - return sal_False; - } -} - - void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName, XMultiServiceFactory * pServiceManager, void * ) { |