summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAron Budea <baron@caesar.elte.hu>2016-11-17 04:20:51 +0100
committerAndras Timar <andras.timar@collabora.com>2016-11-22 22:23:34 +0000
commitf023ac778c97881a53035f951fca95cee759888e (patch)
tree442f3b9442a1613f097064d8fe1b9ef6c22908c0
parent3470ef8c07e59551eb3bbe3c5aa4e7a8f98b124d (diff)
tdf#103922: add Tibetan syllable spellcheckerlibreoffice-5-3-branch-point
Change-Id: Idd42597c95fac98023e3a6b671ef73f8ead7fabc From: https://github.com/eroux/hunspell-bo Reviewed-on: https://gerrit.libreoffice.org/30920 Reviewed-by: Andras Timar <andras.timar@collabora.com> Tested-by: Andras Timar <andras.timar@collabora.com>
-rw-r--r--Dictionary_bo.mk19
-rw-r--r--Module_dictionaries.mk1
-rw-r--r--bo/CHANGELOG.md28
-rw-r--r--bo/META-INF/manifest.xml6
-rw-r--r--bo/README.md41
-rw-r--r--bo/bo.aff73
-rw-r--r--bo/bo.dic373
-rw-r--r--bo/description.xml16
-rw-r--r--bo/dictionaries.xcu18
9 files changed, 575 insertions, 0 deletions
diff --git a/Dictionary_bo.mk b/Dictionary_bo.mk
new file mode 100644
index 0000000..f30b087
--- /dev/null
+++ b/Dictionary_bo.mk
@@ -0,0 +1,19 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Dictionary_Dictionary,dict-bo,dictionaries/bo))
+
+$(eval $(call gb_Dictionary_add_root_files,dict-bo,\
+ dictionaries/bo/bo.aff \
+ dictionaries/bo/bo.dic \
+ dictionaries/bo/CHANGELOG.md \
+ dictionaries/bo/README.md \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/Module_dictionaries.mk b/Module_dictionaries.mk
index 338e3f2..c70f64f 100644
--- a/Module_dictionaries.mk
+++ b/Module_dictionaries.mk
@@ -23,6 +23,7 @@ $(eval $(call gb_Module_add_l10n_targets,dictionaries,\
Dictionary_be \
Dictionary_bg \
Dictionary_bn \
+ Dictionary_bo \
Dictionary_br \
Dictionary_bs \
Dictionary_ca \
diff --git a/bo/CHANGELOG.md b/bo/CHANGELOG.md
new file mode 100644
index 0000000..ef2e6f7
--- /dev/null
+++ b/bo/CHANGELOG.md
@@ -0,0 +1,28 @@
+# Change Log
+All notable changes to this project will be documented in this file.
+As of v0.3.0 this project adheres to [Semantic Versioning](http://semver.org/). It follows [some conventions](http://keepachangelog.com/).
+
+## [0.3.0] - 2016-08-31
+### Fixed
+- ཧྤ -> ཧྥ
+
+### Added
+- more wasurs and འུ suffix possibilities
+- rare affixed particle combination འིའོ
+
+### Changed
+- treat ཏྲ, མྲ, སྣྲ and སྨྲ as exceptions and list their possibilities
+
+### Removed
+- remove erroneous དཀླ, བཏྲ and གཏྲ
+
+## [0.2.0] - 2015-08-15
+### Changed
+- contains all possible "legal" Classical Tibetan syllables (coming from research in grammar books), not limitted to a dictionnary, see [tibetan-spellchecker](https://github.com/eroux/tibetan-spellchecker)
+
+### Added
+- replacement proposals for archaic forms
+- main proper name syllables (not including Sanskrit names)
+
+## [0.1.0] - 2013-06-15
+- initial release, contains syllables from the བོད་རྒྱ་ཚིག་མཛོད་ཆེན་མོ།.
diff --git a/bo/META-INF/manifest.xml b/bo/META-INF/manifest.xml
new file mode 100644
index 0000000..0383ca4
--- /dev/null
+++ b/bo/META-INF/manifest.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE manifest:manifest PUBLIC "-//OpenOffice.org//DTD Manifest 1.0//EN" "Manifest.dtd">
+<manifest:manifest xmlns:manifest="http://openoffice.org/2001/manifest">
+ <manifest:file-entry manifest:media-type="application/vnd.sun.star.configuration-data" manifest:full-path="dictionaries.xcu"/>
+ <manifest:file-entry manifest:media-type="application/vnd.sun.star.package-bundle-description" manifest:full-path="package-description.txt"/>
+</manifest:manifest>
diff --git a/bo/README.md b/bo/README.md
new file mode 100644
index 0000000..5dc8ccb
--- /dev/null
+++ b/bo/README.md
@@ -0,0 +1,41 @@
+# Classical Tibetan syllable spellchecker for Hunspell
+
+You can find here the necessary files to use spell checking for Classical Tibetan at syllable level (not composed words) in [Hunspell](http://hunspell.sourceforge.net/) (used in [many applications](https://en.wikipedia.org/wiki/Hunspell#Uses)).
+
+Note that checking compound words for Tibetan is not possible with hunspell alone due to the absence of separation between words.
+
+## Using
+
+#### Global installation
+
+Under Linux or OSX, you can install the spellchecker globally and benefit from it in most applications.
+
+- under Linux, copy `bo.dic` and `bo.aff` to `/usr/share/hunspell`, or use the `hunspell-bo` package if available in your distribution (under [Debian](https://packages.debian.org/search?keywords=hunspell-bo) for example).
+- Under OSX, copy `bo.dic` and `bo.aff` to `/Library/Spelling` and restart your machine.
+
+#### Application-specific installation
+
+- for Firefox, [an extension](https://addons.mozilla.org/fr/firefox/addon/tibetan-spellchecker/) is released
+- for LibreOffice/OpenOffice see [this extension](http://extensions.openoffice.org/en/project/tibetan-syllable-spell-checker)
+- for Adobe products (>= CS5.5), see the instructions on [this page](http://blog.napsys.com/2012/11/adding-hyphenation-and-spelling.html)
+- for Chrome see [this feature request](https://bugs.chromium.org/p/chromium/issues/detail?id=662850)
+
+The sources for these extensions are in the [firefox](firefox/) and [lo](lo/) directories. To build them, run `make`.
+
+## Building / Testing
+
+To rebuild `bo.dic` from the data in [tibetan-spellchecker](https://github.com/eroux/tibetan-spellchecker), run
+
+ make update
+
+For a small test, run
+
+ make test
+
+## Changes
+
+See [CHANGELOG.md](CHANGELOG.md).
+
+## License
+
+This work and the derived files are under the [Creative Commons CC0 license](LICENSE).
diff --git a/bo/bo.aff b/bo/bo.aff
new file mode 100644
index 0000000..f583723
--- /dev/null
+++ b/bo/bo.aff
@@ -0,0 +1,73 @@
+SET UTF-8
+LANGCODE bo
+
+# ignoring Ux0F35 and Ux0F37, as they are extremely rare but valid.
+# This means that a syllable containing these signs at invalid places will not
+# be detected by the spell checker, but this makes the code more simple at
+# almost no cost
+IGNORE ༵༷
+
+NOSPLITSUGS
+
+# The "N" makes the suffixes mandatory. It is used only with B, example: དཀ/NB,
+# in this case, དཀ is invalid, but is not if there is a suffix.
+NEEDAFFIX N
+
+# grammatical suffixes only
+SFX C Y 7
+SFX C 0 འི . +'brel_sgra
+SFX C 0 འོ . +slar_bsdu
+SFX C 0 འིའོ . +'brel_sgra_slar_bsdu
+SFX C 0 ར . +la_don
+SFX C 0 ས . +byed_sgra
+SFX C 0 འང . +rgyan_sdud
+SFX C 0 འམ . +'byed_sdud
+
+# Words with mandatory vowel or suffix
+# If no ashung and no vowel, a suffix is mandatory
+SFX B Y 6
+SFX B 0 ི/S . +i
+SFX B 0 ུ/S . +u
+SFX B 0 ེ/S . +e
+SFX B 0 ོ/S . +o
+SFX B 0 འ . +ashung
+SFX B 0 /NS .
+
+# Non-mandatory vowel or suffix. In these cases, no ashung is possible, see
+# the documentation.
+SFX A Y 5
+SFX A 0 ི/S . +i
+SFX A 0 ུ/S . +u
+SFX A 0 ེ/S . +e
+SFX A 0 ོ/S . +o
+SFX A 0 /S .
+
+# List of suffixes used by both A and B
+SFX S Y 18
+SFX S 0 ག . +g
+SFX S 0 གས . +gs
+SFX S 0 ང . +ng
+SFX S 0 ངས . +ngs
+SFX S 0 ད . +d
+SFX S 0 ན . +n
+SFX S 0 བ . +b
+SFX S 0 བས . +bs
+SFX S 0 མ . +m
+SFX S 0 མས . +ms
+SFX S 0 འི . +'brel_sgra
+SFX S 0 འོ . +slar_bsdu
+SFX S 0 འིའོ . +'brel_sgra_slar_bsdu
+SFX S 0 འང . +rgyan_sdud
+SFX S 0 འམ . +'byed_sdud
+SFX S 0 ར . +r
+SFX S 0 ལ . +l
+SFX S 0 ས . +s
+
+# Replace archaic forms by modern ones
+REP 6
+REP འིས ས
+REP འའིས ས
+REP འར ར
+REP ནད ན
+REP རད ར
+REP ལད ལ
diff --git a/bo/bo.dic b/bo/bo.dic
new file mode 100644
index 0000000..f54115d
--- /dev/null
+++ b/bo/bo.dic
@@ -0,0 +1,373 @@
+372
+བགླ/C
+དམེའ
+མདྲོན
+བརྡའ
+བརྟའ
+དབའས
+ཏྲ/C
+ཏྲེས
+ཐྲིག
+མྲ/C
+སྨྲ/C
+སྨྲང
+སྣྲོན
+སྣྲུབས
+སྣྲེལ
+རྒྭ/C
+ཧྥ/A
+ཀརྨ/C
+པདྨ/C
+ཨཱ/C
+རྒྭ/C
+བསྭེ/C
+རྭང
+རྭི/C
+དྭང
+དྭ/C
+ཏྭོན
+ཀྭན
+ཀྭས
+ཧྭང
+ཀྲའུ/C
+ཀྲུའུ/C
+ཁྲུའུ/C
+སྒྱིའུ/C
+ཅོའུ/C
+གཅོའུ/C
+ཐུའུ/C
+དུའུ/C
+དྲིའུ/C
+ནོའུར
+ཕེའུ/C
+མུའུ/C
+མོའུ/C
+ཚུའུ/C
+ལོའུ/C
+ཧུའུ/C
+ཧེའུ/C
+ཧྲུའུ/C
+བྲའོ/C
+སླེའོ/C
+ཀའུ/C
+ཀིའུ/C
+ཀེའུ/C
+ཁིའུ/C
+ཁེའུ/C
+ཁྱིའུ/C
+ཁྱེའུ/C
+ཁྲིའུ/C
+ཁྲེའུ/C
+གའུ/C
+གྲིའུ/C
+གྲེའུ/C
+གླེའུ/C
+འགིའུ/C
+རྒེའུ/C
+སྒའུ/C
+སྒེའུ/C
+སྒྱེའུ/C
+སྒྲེའུ/C
+རྔེའུ/C
+སྔེའུ/C
+ཅེའུ/C
+གཅིའུ/C
+གཅེའུ/C
+ལྕེའུ/C
+རྗེའུ/C
+ཉེའུ/C
+སྙེའུ/C
+ཏེའུ/C
+གཏེའུ/C
+རྟའུ/C
+རྟེའུ/C
+སྟེའུ/C
+ཐའུ/C
+ཐིའུ/C
+ཐེའུ/C
+ཐོའུ/C
+མཐེའུ/C
+དེའུ/C
+དྲེའུ/C
+མདེའུ/C
+རྡེའུ/C
+ལྡེའུ/C
+སྡེའུ/C
+ནའུ/C
+ནེའུ/C
+སྣེའུ/C
+དཔེའུ/C
+སྤའུ/C
+སྤེའུ/C
+སྤྱིའུ/C
+སྤྲེའུ/C
+ཕྲའུ/C
+ཕྲེའུ/C
+འཕེའུ/C
+བེའུ/C
+བྱའུ/C
+བྱིའུ/C
+བྱེའུ/C
+བྲའུ/C
+བྲེའུ/C
+བྲོའུ/C
+འབེའུ/C
+སྦྲེའུ/C
+མིའུ/C
+མྱིའུ/C
+རྨེའུ/C
+སྨེའུ/C
+ཙིའུ/C
+ཙེའུ/C
+གཙེའུ/C
+རྩིའུ/C
+རྩེའུ/C
+ཚའུ/C
+ཚེའུ/C
+མཚེའུ/C
+མཚེའུ/C
+རྫིའུ/C
+རྫེའུ/C
+གཞུའུ/C
+ཟེའུ/C
+ཡེའུ/C
+གཡིའུ/C
+རེའུ/C
+ལའུ/C
+ལིའུ/C
+ལེའུ/C
+ཤའུ/C
+ཤེའུ/C
+སིའུ/C
+སེའུ/C
+སྲིའུ/C
+སླེའུ/C
+བསེའུ/C
+ཨའུ/C
+ཀ/A
+ཀྱ/A
+ཀྲ/A
+ཀླ/A
+དཀ/NB
+དཀྱ/A
+དཀྲ/A
+བཀ/NB
+བཀྱ/A
+བཀྲ/A
+བཀླ/A
+རྐ/A
+རྐྱ/A
+ལྐ/A
+སྐ/A
+སྐྱ/A
+སྐྲ/A
+བརྐ/A
+བརྐྱ/A
+བསྐ/A
+བསྐྱ/A
+བསྐྲ/A
+ཁ/A
+ཁྱ/A
+ཁྲ/A
+མཁ/NB
+མཁྱ/A
+མཁྲ/A
+འཁ/NB
+འཁྱ/A
+འཁྲ/A
+ག/A
+གྱ/A
+གྲ/A
+གླ/A
+དག/NB
+དགྱ/A
+དགྲ/A
+བག/NB
+བགྱ/A
+བགྲ/A
+མག/NB
+མགྱ/A
+མགྲ/A
+འག/NB
+འགྱ/A
+འགྲ/A
+རྒ/A
+རྒྱ/A
+ལྒ/A
+སྒ/A
+སྒྱ/A
+སྒྲ/A
+བརྒ/A
+བརྒྱ/A
+བསྒ/A
+བསྒྱ/A
+བསྒྲ/A
+ང/A
+དང/NB
+མང/NB
+རྔ/A
+ལྔ/A
+སྔ/A
+བརྔ/A
+བསྔ/A
+ཅ/A
+གཅ/NB
+བཅ/NB
+ལྕ/A
+ཆ/A
+མཆ/NB
+འཆ/NB
+ཇ/A
+མཇ/NB
+འཇ/NB
+རྗ/A
+ལྗ/A
+བརྗ/A
+ཉ/A
+གཉ/NB
+མཉ/NB
+རྙ/A
+སྙ/A
+བརྙ/A
+བསྙ/A
+ཏ/A
+གཏ/NB
+བཏ/NB
+རྟ/A
+ལྟ/A
+སྟ/A
+བརྟ/A
+བལྟ/A
+བསྟ/A
+ཐ/A
+མཐ/NB
+འཐ/NB
+ད/A
+དྲ/A
+གད/NB
+བད/NB
+མད/NB
+འད/NB
+འདྲ/A
+རྡ/A
+ལྡ/A
+སྡ/A
+བརྡ/A
+བལྡ/A
+བསྡ/A
+ན/A
+གན/NB
+མན/NB
+རྣ/A
+སྣ/A
+བརྣ/A
+བསྣ/A
+པ/A
+པྱ/A
+པྲ/A
+དཔ/NB
+དཔྱ/A
+དཔྲ/A
+ལྤ/A
+སྤ/A
+སྤྱ/A
+སྤྲ/A
+ཕ/A
+ཕྱ/A
+ཕྲ/A
+འཕ/NB
+འཕྱ/A
+འཕྲ/A
+བ/A
+བྱ/A
+བྲ/A
+བླ/A
+དབ/NB
+དབྱ/A
+དབྲ/A
+འབ/NB
+འབྱ/A
+འབྲ/A
+རྦ/A
+ལྦ/A
+སྦ/A
+སྦྱ/A
+སྦྲ/A
+མ/A
+མྱ/A
+དམ/NB
+དམྱ/A
+རྨ/A
+རྨྱ/A
+སྨ/A
+སྨྱ/A
+ཙ/A
+གཙ/NB
+བཙ/NB
+རྩ/A
+སྩ/A
+བརྩ/A
+བསྩ/A
+ཚ/A
+མཚ/NB
+འཚ/NB
+ཛ/A
+མཛ/NB
+འཛ/NB
+རྫ/A
+བརྫ/A
+ཝ/A
+ཞ/A
+གཞ/NB
+བཞ/NB
+ཟ/A
+ཟླ/A
+གཟ/NB
+བཟ/NB
+བཟླ/A
+འ/A
+ཡ/A
+གཡ/NB
+ར/A
+རླ/A
+བརླ/A
+ལ/A
+ཤ/A
+གཤ/NB
+བཤ/NB
+ས/A
+སྲ/A
+སླ/A
+གས/NB
+བས/NB
+བསྲ/A
+བསླ/A
+ཧ/A
+ཧྲ/A
+ལྷ/A
+ཨ/A
+ཀྭ/C
+ཀྭའི/C
+ཁྭ/C
+གྭ/C
+གྲྭ/C
+ཉྭ/C
+དྭོ/C
+དྭངས
+དྭགས
+དྲྭ/C
+ཕྱྭ/C
+རྩྭ/C
+ཚྭ/C
+ཚྭབ
+ཞྭ/C
+ཟྭ/C
+རྭ/C
+ལྭ/C
+ཤྭ/C
+སྭོ/C
+བསྭ/C
+བསྭོ/C
+ཧྭ/C
+ཧྭག
+ཧྭགས
diff --git a/bo/description.xml b/bo/description.xml
new file mode 100644
index 0000000..f02b666
--- /dev/null
+++ b/bo/description.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<description xmlns="http://openoffice.org/extensions/description/2006" xmlns:d="http://openoffice.org/extensions/description/2006" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <!-- http://wiki.services.openoffice.org/wiki/Extension_Dictionaries -->
+ <version value="0.3" />
+ <identifier value="eu.telecom-bretagne.roux.elie.dict.bo" />
+ <display-name>
+ <name lang="en">Classical Tibetan syllable spellchecker for Hunspell</name>
+ </display-name>
+ <platform value="all" />
+ <!--<dependencies>
+ <OpenOffice.org-minimal-version value="4.1" d:name="LibreOffice 4.1" />
+ </dependencies>-->
+ <publisher>
+ <name xlink:href="https://github.com/eroux/hunspell-bo" lang="en">Elie Roux</name>
+ </publisher>
+</description>
diff --git a/bo/dictionaries.xcu b/bo/dictionaries.xcu
new file mode 100644
index 0000000..0d5fca8
--- /dev/null
+++ b/bo/dictionaries.xcu
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<oor:component-data xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" oor:name="Linguistic" oor:package="org.openoffice.Office">
+ <node oor:name="ServiceManager">
+ <node oor:name="Dictionaries">
+ <node oor:name="HunSpellDic_bo" oor:op="fuse">
+ <prop oor:name="Locations" oor:type="oor:string-list">
+ <value>%origin%/bo.aff %origin%/bo.dic</value>
+ </prop>
+ <prop oor:name="Format" oor:type="xs:string">
+ <value>DICT_SPELL</value>
+ </prop>
+ <prop oor:name="Locales" oor:type="oor:string-list">
+ <value>bo-CN bo-IN bo</value>
+ </prop>
+ </node>
+ </node>
+ </node>
+</oor:component-data>