summaryrefslogtreecommitdiff
path: root/pt_BR/th_gen_idx.pl
diff options
context:
space:
mode:
Diffstat (limited to 'pt_BR/th_gen_idx.pl')
-rwxr-xr-xpt_BR/th_gen_idx.pl59
1 files changed, 0 insertions, 59 deletions
diff --git a/pt_BR/th_gen_idx.pl b/pt_BR/th_gen_idx.pl
deleted file mode 100755
index 9bdab33..0000000
--- a/pt_BR/th_gen_idx.pl
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/perl
-
-# perl program to take a thesaurus structured text data file
-# and create the proper sorted index file (.idx)
-#
-# typcially invoked as follows:
-# cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx
-#
-
-sub by_entry {
- my ($aent, $aoff) = split('\|',$a);
- my ($bent, $boff) = split('\|',$b);
- $aent cmp $bent;
-}
-
-# main routine
-my $ne = 0; # number of entries in index
-my @tindex=(); # the index itself
-my $foffset = 0; # file position offset into thesaurus
-my $rec=""; # current string and related pieces
-my $rl=0; # misc string length
-my $entry=""; # current word being processed
-my $nm=0; # number of meaning for the current word
-my $meaning=""; # current meaning and synonyms
-my $p; # misc uses
-my $encoding; # encoding used by text file
-
-# top line of thesaurus provides encoding
-$encoding=<STDIN>;
-$foffset = $foffset + length($encoding);
-chomp($encoding);
-
-# read thesaurus line by line
-# first line of every block is an entry and meaning count
-while ($rec=<STDIN>){
- $rl = length($rec);
- chomp($rec);
- ($entry, $nm) = split('\|',$rec);
- $p = 0;
- while ($p < $nm) {
- $meaning=<STDIN>;
- $rl = $rl + length($meaning);
- chomp($meaning);
- $p++;
- }
- push(@tindex,"$entry|$foffset");
- $ne++;
- $foffset = $foffset + $rl;
-}
-
-# now we have all of the information
-# so sort it and then output the encoding, count and index data
-@tindex = sort by_entry @tindex;
-print STDOUT "$encoding\n";
-print STDOUT "$ne\n";
-foreach $one (@tindex) {
- print STDOUT "$one\n";
-}
-