diff options
Diffstat (limited to 'libtextcat/data/new_fingerprints/lm/malay.lm')
-rw-r--r-- | libtextcat/data/new_fingerprints/lm/malay.lm | 400 |
1 files changed, 400 insertions, 0 deletions
diff --git a/libtextcat/data/new_fingerprints/lm/malay.lm b/libtextcat/data/new_fingerprints/lm/malay.lm new file mode 100644 index 000000000000..911d0cfef150 --- /dev/null +++ b/libtextcat/data/new_fingerprints/lm/malay.lm @@ -0,0 +1,400 @@ +_ 87128 +a 50232 +n 25424 +e 18746 +i 18605 +an 14419 +u 12470 +k 11955 +t 11875 +r 11007 +d 9856 +g 9545 +m 9390 +s 8926 +l 8631 +n_ 8004 +an_ 7095 +p 6890 +a_ 6739 +b 6645 +ng 6630 +h 5964 +da 5153 +_d 5107 +er 4625 +ka 4448 +la 4339 +y 4323 +i_ 4295 +en 4159 +ya 3800 +ang 3778 +_m 3750 +o 3593 +. 3539 +._ 3310 +at 3288 +ah 3216 +_b 3199 +_s 3177 +ta 3076 +ra 3036 +_k 2949 +g_ 2939 +ng_ 2933 +ar 2915 +_p 2906 +me 2884 +ga 2781 +di 2778 +ak 2727 +al 2705 +_me 2671 +ang_ 2524 +h_ 2509 +ba 2508 +pa 2454 +kan 2449 +in 2437 +tu 2411 +_t 2376 +sa 2338 +_da 2276 +j 2276 +pe 2225 +, 2150 +k_ 2147 +ma 2143 +se 2138 +am 2131 +kan_ 2117 +_di 2082 +,_ 2079 +ke 2048 +un 2004 +be 1947 +_a 1925 +na 1871 +ti 1868 +ri 1861 +u_ 1857 +as 1854 +ny 1827 +ha 1789 +te 1788 +_pe 1768 +em 1750 +it 1737 +_i 1732 +_ke 1711 +yan 1706 +ad 1698 +ia 1673 +yang 1673 +_y 1668 +_ya 1655 +yang_ 1653 +_se 1648 +ah_ 1646 +_yan 1639 +_yang 1639 +ala 1612 +nya 1587 +el 1576 +ik 1571 +t_ 1568 +ai 1549 +men 1531 +eng 1522 +_men 1464 +nga 1441 +dan 1366 +_be 1365 +si 1343 +uk 1328 +ada 1299 +nt 1291 +__ 1287 +ap 1276 +ua 1265 +___ 1238 +- 1213 +ja 1211 +ber 1204 +gan 1203 +_ba 1193 +____ 1189 +ni 1181 +_te 1169 +c 1143 +ran 1141 +_____ 1140 +m_ 1127 +ara 1118 +per 1099 +le 1084 +_dan 1083 +dan_ 1079 +ngan 1060 +_dan_ 1050 +ya_ 1046 +at_ 1044 +da_ 1021 +li 1016 +aka 1013 +A 999 +r_ 999 +w 997 +eb 995 +lah 980 +ata 980 +ak_ 978 +nd 974 +_ber 955 +gi 936 +is 933 +il 931 +tu_ 923 +s_ 920 +gan_ 915 +mb 913 +wa 904 +ag 903 +ngan_ 898 +ter 887 +nya_ 877 +S 873 +ek 853 +ru 852 +_l 838 +ela 828 +itu 824 +ol 822 +aha 822 +ada_ 820 +pu 812 +di_ 807 +bu 807 +am_ 804 +ur 801 +tan 790 +mp 790 +_per 786 +_sa 784 +M 782 +ut 781 +us 779 +era 779 +lam 778 +lah_ 775 +asa 767 +ki 761 +ir 759 +de 756 +enga 750 +su 748 +du 741 +id 739 +" 733 +akan 732 +apa 728 +_S 724 +ul 721 +lu 717 +ari 717 +dal 704 +et 698 +es 698 +pad 688 +_ma 688 +_M 685 +ana 684 +bi 679 +pada 673 +dala 673 +l_ 671 +ep 664 +f 662 +_di_ 658 +B 655 +ing 655 +_j 654 +ika 653 +ku 650 +_. 644 +akan_ 642 +ama 637 +pen 636 +alam 634 +eh 634 +pada_ 633 +ai_ 632 +_ter 632 +K 631 +mu 628 +ju 628 +P 626 +mem 625 +au 622 +_mem 614 +lan 612 +_._ 611 +ntu 608 +lam_ 605 +um 601 +on 600 +gk 597 +_in 597 +ngk 597 +a. 584 +meng 582 +_meng 578 +alam_ 577 +_A 576 +aa 575 +uk_ 572 +_pen 569 +ban 569 +or 569 +st 566 +ay 566 +dar 565 +_pa 564 +a._ 564 +_h 562 +bah 562 +_P 560 +D 559 +ri_ 558 +ini 552 +_de 551 +rt 550 +aan 545 +_it 542 +_itu 542 +nda 540 +eri 540 +dalam 537 +_B 533 +_dal 532 +ip 532 +_dala 532 +ta_ 528 +_u 527 +ung 525 +ih 524 +aw 520 +_n 519 +atu 517 +ila 513 +mi 513 +leh 513 +ian 512 +tuk 509 +awa 508 +gu 506 +ert 506 +engan 505 +ole 504 +_K 501 +seb 497 +ca 496 +gg 493 +_ta 489 +ra_ 488 +ngg 488 +itu_ 487 +emb 482 +ni_ 482 +ida 482 +nj 482 +_ti 479 +man 478 +den 477 +_D 474 +_ka 473 +aj 470 +oleh 468 +n. 468 +n._ 464 +ngka 464 +gka 464 +dak 464 +anga 461 +ena 459 +san 458 +pat 458 +rk 458 +( 455 +ent 454 +agi 453 +) 451 +ia_ 450 +ge 450 +ab 449 +im 447 +_ini 446 +ntuk 445 +I 445 +ar_ 440 +N 439 +aan_ 436 +_la 433 +pi 432 +baha 431 +deng 430 +han 430 +bag 429 +eh_ 429 +hu 429 +denga 428 +_o 427 +na_ 427 +T 425 +leh_ 422 +_den 422 +ka_ 419 +any 415 +ud 415 +rang 414 +anya 413 +gi_ 412 +angan 412 +a, 411 +_deng 410 +ita 409 +kat 408 +re 408 +_( 406 +tuk_ 403 +aga 401 +ne 400 +and 399 +aya 398 +_dar 397 +a,_ 396 +ro 396 +ntuk_ 393 +eba 392 +aran 390 +_" 387 +ed 385 +end 384 +ko 383 +sa_ 381 +p_ 381 +ara_ 380 +_seb 379 +alah 379 +oleh_ 379 +an. 378 +dak_ 378 +eg 378 +hi 376 +dari 375 +an._ 375 +au_ 373 +bo 373 +ti_ 371 +ula 371 |