From 76812dd089689fc486535d7748a21a1e1f55e503 Mon Sep 17 00:00:00 2001 From: Thomas Lange Date: Thu, 24 Sep 2009 08:26:57 +0000 Subject: #i105261# zulu fingerprint for language guessing added --- libtextcat/data/new_fingerprints/fpdb.conf | 1 + libtextcat/data/new_fingerprints/zulu.lm | 400 +++++++++++++++++++++++++++++ 2 files changed, 401 insertions(+) create mode 100644 libtextcat/data/new_fingerprints/zulu.lm (limited to 'libtextcat') diff --git a/libtextcat/data/new_fingerprints/fpdb.conf b/libtextcat/data/new_fingerprints/fpdb.conf index 5b54fef1d7ad..df56f9e270ef 100644 --- a/libtextcat/data/new_fingerprints/fpdb.conf +++ b/libtextcat/data/new_fingerprints/fpdb.conf @@ -82,3 +82,4 @@ ukrainian.lm uk--utf8 vietnamese.lm vi--utf8 welsh.lm cy--utf8 yiddish_utf.lm yi--utf8 +zulu.lm zu--utf8 diff --git a/libtextcat/data/new_fingerprints/zulu.lm b/libtextcat/data/new_fingerprints/zulu.lm new file mode 100644 index 000000000000..f30c09ced93f --- /dev/null +++ b/libtextcat/data/new_fingerprints/zulu.lm @@ -0,0 +1,400 @@ +_ +a +e +i +n +u +o +l +k +h +s +a_ +b +t +m +g +w +z +e_ +i_ +ng +ku +d +y +la +an +_n +th +le +_u +o_ +el +ba +_k +en +in +wa +p +_e +zi +. +hi +si +al +ha +uk +ab +_i +r +is +ka +_a +kh +we +li +ni +ma +_ng +he +ul +._ +ga +thi +la_ +be +ak +c +on +nd +na +ok +am +lo +ho +, +se +ph +hi_ +ut +es +nga +_ku +,_ +ez +thi_ +un +uth +le_ +uku +hu +f +u_ +um +ek +ne +go +q +_uk +at +aba +_l +sh +lu +M +_uku +ol +_b +hl +ni_ +ngo +kw +- +N +ik +oku +em +nt +as +ge +az +ya +iz +sa +_o +S +uthi +A +za +_w +wa_ +_s +mb +kut +kuth +ela +ye +_y +uthi_ +il +ay +ele +ba_ +I +dl +nge +ath +ub +ke +U +zo +na_ +yi +us +kuthi +esi +ob +v +om +ama +it +lo_ +bu +L +ezi +j +ny +im +ing +li_ +_ab +eni +no +de +ela_ +ze +ang +ko +ala +lw +yo +zin +_U +lel +eng +mi +_ngo +eb +uz +me +gi +ti +ukut +so +ukuth +bo +da +_ba +nz +_aba +the +eli +akh +eni_ +E +ban +s_ +aka +_kw +ma_ +ap +_ukut +he_ +ini +di +K +ka_ +ib +kwa +ulu +ele_ +kho +nj +bi +_z +khu +we_ +lal +enz +ho_ +et +C +gu +zi_ +and +hla +ngi +pha +_um +_ka +isi +_nge +isa +aph +ung +izi +dla +ala_ +zw +nde +to +n_ +ne_ +nk +ke_ +_I +athi +_no +lan +_wa +kul +B +ind +fu +wen +ikh +azi +ule +kub +e. +_S +x +o. +ona +kha +_iz +je +bh +_M +er +kwe +oba +ane +O +_N +sa_ +a. +lwa +_ez +kus +ki +mu +od +" +ebe +P +_nga +hul +_m +ase +ben +_be +T +ic +nda +_si +_na +/ +ant +ngu +ad +anga +nje +ith +a._ +ye_ +athi_ +R +os +alo +tha +za_ +eth +_es +uma +ana +ile +te +ale +aban +: +_A +oba_ +hat +kun +ha_ +phe +be_ +ali +_am +si_ +wo +uy +sik +ise +kan +hath +dlal +_ne +zwe +aw +han +tu +nye +qe +_ko +ah +hel +thu +isa_ +gob +_K +_lo +ta +_ama +ot +ula +_em +ze_ +i. +ngob +_izi +hol +ar +ani +ole +uba +_in +up +eka +ini_ +goba +tho +hon +_ezi +ona_ +ezin +ngoba +lu_ +goba_ +ip +a, +eli_ +t_ +nya +ndl +sha +_is +the_ +i._ +amb -- cgit From 637351b5bf6cf81b06beb4374a7eb6656e7f7394 Mon Sep 17 00:00:00 2001 From: Thomas Lange Date: Thu, 24 Sep 2009 08:39:39 +0000 Subject: #i105261# zulu fingerprint for language guessing added --- libtextcat/data/new_fingerprints/lm/zulu.lm | 400 ++++++++++++++++++++++++++++ libtextcat/data/new_fingerprints/zulu.lm | 400 ---------------------------- 2 files changed, 400 insertions(+), 400 deletions(-) create mode 100644 libtextcat/data/new_fingerprints/lm/zulu.lm delete mode 100644 libtextcat/data/new_fingerprints/zulu.lm (limited to 'libtextcat') diff --git a/libtextcat/data/new_fingerprints/lm/zulu.lm b/libtextcat/data/new_fingerprints/lm/zulu.lm new file mode 100644 index 000000000000..f30c09ced93f --- /dev/null +++ b/libtextcat/data/new_fingerprints/lm/zulu.lm @@ -0,0 +1,400 @@ +_ +a +e +i +n +u +o +l +k +h +s +a_ +b +t +m +g +w +z +e_ +i_ +ng +ku +d +y +la +an +_n +th +le +_u +o_ +el +ba +_k +en +in +wa +p +_e +zi +. +hi +si +al +ha +uk +ab +_i +r +is +ka +_a +kh +we +li +ni +ma +_ng +he +ul +._ +ga +thi +la_ +be +ak +c +on +nd +na +ok +am +lo +ho +, +se +ph +hi_ +ut +es +nga +_ku +,_ +ez +thi_ +un +uth +le_ +uku +hu +f +u_ +um +ek +ne +go +q +_uk +at +aba +_l +sh +lu +M +_uku +ol +_b +hl +ni_ +ngo +kw +- +N +ik +oku +em +nt +as +ge +az +ya +iz +sa +_o +S +uthi +A +za +_w +wa_ +_s +mb +kut +kuth +ela +ye +_y +uthi_ +il +ay +ele +ba_ +I +dl +nge +ath +ub +ke +U +zo +na_ +yi +us +kuthi +esi +ob +v +om +ama +it +lo_ +bu +L +ezi +j +ny +im +ing +li_ +_ab +eni +no +de +ela_ +ze +ang +ko +ala +lw +yo +zin +_U +lel +eng +mi +_ngo +eb +uz +me +gi +ti +ukut +so +ukuth +bo +da +_ba +nz +_aba +the +eli +akh +eni_ +E +ban +s_ +aka +_kw +ma_ +ap +_ukut +he_ +ini +di +K +ka_ +ib +kwa +ulu +ele_ +kho +nj +bi +_z +khu +we_ +lal +enz +ho_ +et +C +gu +zi_ +and +hla +ngi +pha +_um +_ka +isi +_nge +isa +aph +ung +izi +dla +ala_ +zw +nde +to +n_ +ne_ +nk +ke_ +_I +athi +_no +lan +_wa +kul +B +ind +fu +wen +ikh +azi +ule +kub +e. +_S +x +o. +ona +kha +_iz +je +bh +_M +er +kwe +oba +ane +O +_N +sa_ +a. +lwa +_ez +kus +ki +mu +od +" +ebe +P +_nga +hul +_m +ase +ben +_be +T +ic +nda +_si +_na +/ +ant +ngu +ad +anga +nje +ith +a._ +ye_ +athi_ +R +os +alo +tha +za_ +eth +_es +uma +ana +ile +te +ale +aban +: +_A +oba_ +hat +kun +ha_ +phe +be_ +ali +_am +si_ +wo +uy +sik +ise +kan +hath +dlal +_ne +zwe +aw +han +tu +nye +qe +_ko +ah +hel +thu +isa_ +gob +_K +_lo +ta +_ama +ot +ula +_em +ze_ +i. +ngob +_izi +hol +ar +ani +ole +uba +_in +up +eka +ini_ +goba +tho +hon +_ezi +ona_ +ezin +ngoba +lu_ +goba_ +ip +a, +eli_ +t_ +nya +ndl +sha +_is +the_ +i._ +amb diff --git a/libtextcat/data/new_fingerprints/zulu.lm b/libtextcat/data/new_fingerprints/zulu.lm deleted file mode 100644 index f30c09ced93f..000000000000 --- a/libtextcat/data/new_fingerprints/zulu.lm +++ /dev/null @@ -1,400 +0,0 @@ -_ -a -e -i -n -u -o -l -k -h -s -a_ -b -t -m -g -w -z -e_ -i_ -ng -ku -d -y -la -an -_n -th -le -_u -o_ -el -ba -_k -en -in -wa -p -_e -zi -. -hi -si -al -ha -uk -ab -_i -r -is -ka -_a -kh -we -li -ni -ma -_ng -he -ul -._ -ga -thi -la_ -be -ak -c -on -nd -na -ok -am -lo -ho -, -se -ph -hi_ -ut -es -nga -_ku -,_ -ez -thi_ -un -uth -le_ -uku -hu -f -u_ -um -ek -ne -go -q -_uk -at -aba -_l -sh -lu -M -_uku -ol -_b -hl -ni_ -ngo -kw -- -N -ik -oku -em -nt -as -ge -az -ya -iz -sa -_o -S -uthi -A -za -_w -wa_ -_s -mb -kut -kuth -ela -ye -_y -uthi_ -il -ay -ele -ba_ -I -dl -nge -ath -ub -ke -U -zo -na_ -yi -us -kuthi -esi -ob -v -om -ama -it -lo_ -bu -L -ezi -j -ny -im -ing -li_ -_ab -eni -no -de -ela_ -ze -ang -ko -ala -lw -yo -zin -_U -lel -eng -mi -_ngo -eb -uz -me -gi -ti -ukut -so -ukuth -bo -da -_ba -nz -_aba -the -eli -akh -eni_ -E -ban -s_ -aka -_kw -ma_ -ap -_ukut -he_ -ini -di -K -ka_ -ib -kwa -ulu -ele_ -kho -nj -bi -_z -khu -we_ -lal -enz -ho_ -et -C -gu -zi_ -and -hla -ngi -pha -_um -_ka -isi -_nge -isa -aph -ung -izi -dla -ala_ -zw -nde -to -n_ -ne_ -nk -ke_ -_I -athi -_no -lan -_wa -kul -B -ind -fu -wen -ikh -azi -ule -kub -e. -_S -x -o. -ona -kha -_iz -je -bh -_M -er -kwe -oba -ane -O -_N -sa_ -a. -lwa -_ez -kus -ki -mu -od -" -ebe -P -_nga -hul -_m -ase -ben -_be -T -ic -nda -_si -_na -/ -ant -ngu -ad -anga -nje -ith -a._ -ye_ -athi_ -R -os -alo -tha -za_ -eth -_es -uma -ana -ile -te -ale -aban -: -_A -oba_ -hat -kun -ha_ -phe -be_ -ali -_am -si_ -wo -uy -sik -ise -kan -hath -dlal -_ne -zwe -aw -han -tu -nye -qe -_ko -ah -hel -thu -isa_ -gob -_K -_lo -ta -_ama -ot -ula -_em -ze_ -i. -ngob -_izi -hol -ar -ani -ole -uba -_in -up -eka -ini_ -goba -tho -hon -_ezi -ona_ -ezin -ngoba -lu_ -goba_ -ip -a, -eli_ -t_ -nya -ndl -sha -_is -the_ -i._ -amb -- cgit