From f023ac778c97881a53035f951fca95cee759888e Mon Sep 17 00:00:00 2001 From: Aron Budea Date: Thu, 17 Nov 2016 04:20:51 +0100 Subject: tdf#103922: add Tibetan syllable spellchecker Change-Id: Idd42597c95fac98023e3a6b671ef73f8ead7fabc From: https://github.com/eroux/hunspell-bo Reviewed-on: https://gerrit.libreoffice.org/30920 Reviewed-by: Andras Timar Tested-by: Andras Timar --- Dictionary_bo.mk | 19 +++ Module_dictionaries.mk | 1 + bo/CHANGELOG.md | 28 ++++ bo/META-INF/manifest.xml | 6 + bo/README.md | 41 ++++++ bo/bo.aff | 73 ++++++++++ bo/bo.dic | 373 +++++++++++++++++++++++++++++++++++++++++++++++ bo/description.xml | 16 ++ bo/dictionaries.xcu | 18 +++ 9 files changed, 575 insertions(+) create mode 100644 Dictionary_bo.mk create mode 100644 bo/CHANGELOG.md create mode 100644 bo/META-INF/manifest.xml create mode 100644 bo/README.md create mode 100644 bo/bo.aff create mode 100644 bo/bo.dic create mode 100644 bo/description.xml create mode 100644 bo/dictionaries.xcu diff --git a/Dictionary_bo.mk b/Dictionary_bo.mk new file mode 100644 index 0000000..f30b087 --- /dev/null +++ b/Dictionary_bo.mk @@ -0,0 +1,19 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Dictionary_Dictionary,dict-bo,dictionaries/bo)) + +$(eval $(call gb_Dictionary_add_root_files,dict-bo,\ + dictionaries/bo/bo.aff \ + dictionaries/bo/bo.dic \ + dictionaries/bo/CHANGELOG.md \ + dictionaries/bo/README.md \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/Module_dictionaries.mk b/Module_dictionaries.mk index 338e3f2..c70f64f 100644 --- a/Module_dictionaries.mk +++ b/Module_dictionaries.mk @@ -23,6 +23,7 @@ $(eval $(call gb_Module_add_l10n_targets,dictionaries,\ Dictionary_be \ Dictionary_bg \ Dictionary_bn \ + Dictionary_bo \ Dictionary_br \ Dictionary_bs \ Dictionary_ca \ diff --git a/bo/CHANGELOG.md b/bo/CHANGELOG.md new file mode 100644 index 0000000..ef2e6f7 --- /dev/null +++ b/bo/CHANGELOG.md @@ -0,0 +1,28 @@ +# Change Log +All notable changes to this project will be documented in this file. +As of v0.3.0 this project adheres to [Semantic Versioning](http://semver.org/). It follows [some conventions](http://keepachangelog.com/). + +## [0.3.0] - 2016-08-31 +### Fixed +- ཧྤ -> ཧྥ + +### Added +- more wasurs and འུ suffix possibilities +- rare affixed particle combination འིའོ + +### Changed +- treat ཏྲ, མྲ, སྣྲ and སྨྲ as exceptions and list their possibilities + +### Removed +- remove erroneous དཀླ, བཏྲ and གཏྲ + +## [0.2.0] - 2015-08-15 +### Changed +- contains all possible "legal" Classical Tibetan syllables (coming from research in grammar books), not limitted to a dictionnary, see [tibetan-spellchecker](https://github.com/eroux/tibetan-spellchecker) + +### Added +- replacement proposals for archaic forms +- main proper name syllables (not including Sanskrit names) + +## [0.1.0] - 2013-06-15 +- initial release, contains syllables from the བོད་རྒྱ་ཚིག་མཛོད་ཆེན་མོ།. diff --git a/bo/META-INF/manifest.xml b/bo/META-INF/manifest.xml new file mode 100644 index 0000000..0383ca4 --- /dev/null +++ b/bo/META-INF/manifest.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/bo/README.md b/bo/README.md new file mode 100644 index 0000000..5dc8ccb --- /dev/null +++ b/bo/README.md @@ -0,0 +1,41 @@ +# Classical Tibetan syllable spellchecker for Hunspell + +You can find here the necessary files to use spell checking for Classical Tibetan at syllable level (not composed words) in [Hunspell](http://hunspell.sourceforge.net/) (used in [many applications](https://en.wikipedia.org/wiki/Hunspell#Uses)). + +Note that checking compound words for Tibetan is not possible with hunspell alone due to the absence of separation between words. + +## Using + +#### Global installation + +Under Linux or OSX, you can install the spellchecker globally and benefit from it in most applications. + +- under Linux, copy `bo.dic` and `bo.aff` to `/usr/share/hunspell`, or use the `hunspell-bo` package if available in your distribution (under [Debian](https://packages.debian.org/search?keywords=hunspell-bo) for example). +- Under OSX, copy `bo.dic` and `bo.aff` to `/Library/Spelling` and restart your machine. + +#### Application-specific installation + +- for Firefox, [an extension](https://addons.mozilla.org/fr/firefox/addon/tibetan-spellchecker/) is released +- for LibreOffice/OpenOffice see [this extension](http://extensions.openoffice.org/en/project/tibetan-syllable-spell-checker) +- for Adobe products (>= CS5.5), see the instructions on [this page](http://blog.napsys.com/2012/11/adding-hyphenation-and-spelling.html) +- for Chrome see [this feature request](https://bugs.chromium.org/p/chromium/issues/detail?id=662850) + +The sources for these extensions are in the [firefox](firefox/) and [lo](lo/) directories. To build them, run `make`. + +## Building / Testing + +To rebuild `bo.dic` from the data in [tibetan-spellchecker](https://github.com/eroux/tibetan-spellchecker), run + + make update + +For a small test, run + + make test + +## Changes + +See [CHANGELOG.md](CHANGELOG.md). + +## License + +This work and the derived files are under the [Creative Commons CC0 license](LICENSE). diff --git a/bo/bo.aff b/bo/bo.aff new file mode 100644 index 0000000..f583723 --- /dev/null +++ b/bo/bo.aff @@ -0,0 +1,73 @@ +SET UTF-8 +LANGCODE bo + +# ignoring Ux0F35 and Ux0F37, as they are extremely rare but valid. +# This means that a syllable containing these signs at invalid places will not +# be detected by the spell checker, but this makes the code more simple at +# almost no cost +IGNORE ༵༷ + +NOSPLITSUGS + +# The "N" makes the suffixes mandatory. It is used only with B, example: དཀ/NB, +# in this case, དཀ is invalid, but is not if there is a suffix. +NEEDAFFIX N + +# grammatical suffixes only +SFX C Y 7 +SFX C 0 འི . +'brel_sgra +SFX C 0 འོ . +slar_bsdu +SFX C 0 འིའོ . +'brel_sgra_slar_bsdu +SFX C 0 ར . +la_don +SFX C 0 ས . +byed_sgra +SFX C 0 འང . +rgyan_sdud +SFX C 0 འམ . +'byed_sdud + +# Words with mandatory vowel or suffix +# If no ashung and no vowel, a suffix is mandatory +SFX B Y 6 +SFX B 0 ི/S . +i +SFX B 0 ུ/S . +u +SFX B 0 ེ/S . +e +SFX B 0 ོ/S . +o +SFX B 0 འ . +ashung +SFX B 0 /NS . + +# Non-mandatory vowel or suffix. In these cases, no ashung is possible, see +# the documentation. +SFX A Y 5 +SFX A 0 ི/S . +i +SFX A 0 ུ/S . +u +SFX A 0 ེ/S . +e +SFX A 0 ོ/S . +o +SFX A 0 /S . + +# List of suffixes used by both A and B +SFX S Y 18 +SFX S 0 ག . +g +SFX S 0 གས . +gs +SFX S 0 ང . +ng +SFX S 0 ངས . +ngs +SFX S 0 ད . +d +SFX S 0 ན . +n +SFX S 0 བ . +b +SFX S 0 བས . +bs +SFX S 0 མ . +m +SFX S 0 མས . +ms +SFX S 0 འི . +'brel_sgra +SFX S 0 འོ . +slar_bsdu +SFX S 0 འིའོ . +'brel_sgra_slar_bsdu +SFX S 0 འང . +rgyan_sdud +SFX S 0 འམ . +'byed_sdud +SFX S 0 ར . +r +SFX S 0 ལ . +l +SFX S 0 ས . +s + +# Replace archaic forms by modern ones +REP 6 +REP འིས ས +REP འའིས ས +REP འར ར +REP ནད ན +REP རད ར +REP ལད ལ diff --git a/bo/bo.dic b/bo/bo.dic new file mode 100644 index 0000000..f54115d --- /dev/null +++ b/bo/bo.dic @@ -0,0 +1,373 @@ +372 +བགླ/C +དམེའ +མདྲོན +བརྡའ +བརྟའ +དབའས +ཏྲ/C +ཏྲེས +ཐྲིག +མྲ/C +སྨྲ/C +སྨྲང +སྣྲོན +སྣྲུབས +སྣྲེལ +རྒྭ/C +ཧྥ/A +ཀརྨ/C +པདྨ/C +ཨཱ/C +རྒྭ/C +བསྭེ/C +རྭང +རྭི/C +དྭང +དྭ/C +ཏྭོན +ཀྭན +ཀྭས +ཧྭང +ཀྲའུ/C +ཀྲུའུ/C +ཁྲུའུ/C +སྒྱིའུ/C +ཅོའུ/C +གཅོའུ/C +ཐུའུ/C +དུའུ/C +དྲིའུ/C +ནོའུར +ཕེའུ/C +མུའུ/C +མོའུ/C +ཚུའུ/C +ལོའུ/C +ཧུའུ/C +ཧེའུ/C +ཧྲུའུ/C +བྲའོ/C +སླེའོ/C +ཀའུ/C +ཀིའུ/C +ཀེའུ/C +ཁིའུ/C +ཁེའུ/C +ཁྱིའུ/C +ཁྱེའུ/C +ཁྲིའུ/C +ཁྲེའུ/C +གའུ/C +གྲིའུ/C +གྲེའུ/C +གླེའུ/C +འགིའུ/C +རྒེའུ/C +སྒའུ/C +སྒེའུ/C +སྒྱེའུ/C +སྒྲེའུ/C +རྔེའུ/C +སྔེའུ/C +ཅེའུ/C +གཅིའུ/C +གཅེའུ/C +ལྕེའུ/C +རྗེའུ/C +ཉེའུ/C +སྙེའུ/C +ཏེའུ/C +གཏེའུ/C +རྟའུ/C +རྟེའུ/C +སྟེའུ/C +ཐའུ/C +ཐིའུ/C +ཐེའུ/C +ཐོའུ/C +མཐེའུ/C +དེའུ/C +དྲེའུ/C +མདེའུ/C +རྡེའུ/C +ལྡེའུ/C +སྡེའུ/C +ནའུ/C +ནེའུ/C +སྣེའུ/C +དཔེའུ/C +སྤའུ/C +སྤེའུ/C +སྤྱིའུ/C +སྤྲེའུ/C +ཕྲའུ/C +ཕྲེའུ/C +འཕེའུ/C +བེའུ/C +བྱའུ/C +བྱིའུ/C +བྱེའུ/C +བྲའུ/C +བྲེའུ/C +བྲོའུ/C +འབེའུ/C +སྦྲེའུ/C +མིའུ/C +མྱིའུ/C +རྨེའུ/C +སྨེའུ/C +ཙིའུ/C +ཙེའུ/C +གཙེའུ/C +རྩིའུ/C +རྩེའུ/C +ཚའུ/C +ཚེའུ/C +མཚེའུ/C +མཚེའུ/C +རྫིའུ/C +རྫེའུ/C +གཞུའུ/C +ཟེའུ/C +ཡེའུ/C +གཡིའུ/C +རེའུ/C +ལའུ/C +ལིའུ/C +ལེའུ/C +ཤའུ/C +ཤེའུ/C +སིའུ/C +སེའུ/C +སྲིའུ/C +སླེའུ/C +བསེའུ/C +ཨའུ/C +ཀ/A +ཀྱ/A +ཀྲ/A +ཀླ/A +དཀ/NB +དཀྱ/A +དཀྲ/A +བཀ/NB +བཀྱ/A +བཀྲ/A +བཀླ/A +རྐ/A +རྐྱ/A +ལྐ/A +སྐ/A +སྐྱ/A +སྐྲ/A +བརྐ/A +བརྐྱ/A +བསྐ/A +བསྐྱ/A +བསྐྲ/A +ཁ/A +ཁྱ/A +ཁྲ/A +མཁ/NB +མཁྱ/A +མཁྲ/A +འཁ/NB +འཁྱ/A +འཁྲ/A +ག/A +གྱ/A +གྲ/A +གླ/A +དག/NB +དགྱ/A +དགྲ/A +བག/NB +བགྱ/A +བགྲ/A +མག/NB +མགྱ/A +མགྲ/A +འག/NB +འགྱ/A +འགྲ/A +རྒ/A +རྒྱ/A +ལྒ/A +སྒ/A +སྒྱ/A +སྒྲ/A +བརྒ/A +བརྒྱ/A +བསྒ/A +བསྒྱ/A +བསྒྲ/A +ང/A +དང/NB +མང/NB +རྔ/A +ལྔ/A +སྔ/A +བརྔ/A +བསྔ/A +ཅ/A +གཅ/NB +བཅ/NB +ལྕ/A +ཆ/A +མཆ/NB +འཆ/NB +ཇ/A +མཇ/NB +འཇ/NB +རྗ/A +ལྗ/A +བརྗ/A +ཉ/A +གཉ/NB +མཉ/NB +རྙ/A +སྙ/A +བརྙ/A +བསྙ/A +ཏ/A +གཏ/NB +བཏ/NB +རྟ/A +ལྟ/A +སྟ/A +བརྟ/A +བལྟ/A +བསྟ/A +ཐ/A +མཐ/NB +འཐ/NB +ད/A +དྲ/A +གད/NB +བད/NB +མད/NB +འད/NB +འདྲ/A +རྡ/A +ལྡ/A +སྡ/A +བརྡ/A +བལྡ/A +བསྡ/A +ན/A +གན/NB +མན/NB +རྣ/A +སྣ/A +བརྣ/A +བསྣ/A +པ/A +པྱ/A +པྲ/A +དཔ/NB +དཔྱ/A +དཔྲ/A +ལྤ/A +སྤ/A +སྤྱ/A +སྤྲ/A +ཕ/A +ཕྱ/A +ཕྲ/A +འཕ/NB +འཕྱ/A +འཕྲ/A +བ/A +བྱ/A +བྲ/A +བླ/A +དབ/NB +དབྱ/A +དབྲ/A +འབ/NB +འབྱ/A +འབྲ/A +རྦ/A +ལྦ/A +སྦ/A +སྦྱ/A +སྦྲ/A +མ/A +མྱ/A +དམ/NB +དམྱ/A +རྨ/A +རྨྱ/A +སྨ/A +སྨྱ/A +ཙ/A +གཙ/NB +བཙ/NB +རྩ/A +སྩ/A +བརྩ/A +བསྩ/A +ཚ/A +མཚ/NB +འཚ/NB +ཛ/A +མཛ/NB +འཛ/NB +རྫ/A +བརྫ/A +ཝ/A +ཞ/A +གཞ/NB +བཞ/NB +ཟ/A +ཟླ/A +གཟ/NB +བཟ/NB +བཟླ/A +འ/A +ཡ/A +གཡ/NB +ར/A +རླ/A +བརླ/A +ལ/A +ཤ/A +གཤ/NB +བཤ/NB +ས/A +སྲ/A +སླ/A +གས/NB +བས/NB +བསྲ/A +བསླ/A +ཧ/A +ཧྲ/A +ལྷ/A +ཨ/A +ཀྭ/C +ཀྭའི/C +ཁྭ/C +གྭ/C +གྲྭ/C +ཉྭ/C +དྭོ/C +དྭངས +དྭགས +དྲྭ/C +ཕྱྭ/C +རྩྭ/C +ཚྭ/C +ཚྭབ +ཞྭ/C +ཟྭ/C +རྭ/C +ལྭ/C +ཤྭ/C +སྭོ/C +བསྭ/C +བསྭོ/C +ཧྭ/C +ཧྭག +ཧྭགས diff --git a/bo/description.xml b/bo/description.xml new file mode 100644 index 0000000..f02b666 --- /dev/null +++ b/bo/description.xml @@ -0,0 +1,16 @@ + + + + + + + Classical Tibetan syllable spellchecker for Hunspell + + + + + Elie Roux + + diff --git a/bo/dictionaries.xcu b/bo/dictionaries.xcu new file mode 100644 index 0000000..0d5fca8 --- /dev/null +++ b/bo/dictionaries.xcu @@ -0,0 +1,18 @@ + + + + + + + %origin%/bo.aff %origin%/bo.dic + + + DICT_SPELL + + + bo-CN bo-IN bo + + + + + -- cgit