summaryrefslogtreecommitdiff
path: root/vcl/source/gdi/genVerticalOrientationData.pl
diff options
context:
space:
mode:
authorKhaled Hosny <khaledhosny@eglug.org>2016-11-09 23:39:44 +0200
committerKhaled Hosny <khaledhosny@eglug.org>2016-11-10 01:01:55 +0200
commitf95018880ff71e00db4d4bb4b5f02b5818a2d1c5 (patch)
tree933b880309198bd37827adad56ff1d3b9bb26b1f /vcl/source/gdi/genVerticalOrientationData.pl
parent58d149f3b4d1e8ca47189b5f22421fc3f3bee732 (diff)
Add code to get Vertical_Orientation property
See http://unicode.org/reports/tr50/#vo ICU does not support getting this property yet, so I stole some (heavily redacted) Perl script from Mozilla that reads the data file and generates property tables. The original Mozilla script: https://dxr.mozilla.org/mozilla-central/source/intl/unicharutil/tools/genUnicodePropertyData.pl Change-Id: I2800711c3db3564515139227bdbd3b4d732917eb
Diffstat (limited to 'vcl/source/gdi/genVerticalOrientationData.pl')
-rwxr-xr-xvcl/source/gdi/genVerticalOrientationData.pl206
1 files changed, 206 insertions, 0 deletions
diff --git a/vcl/source/gdi/genVerticalOrientationData.pl b/vcl/source/gdi/genVerticalOrientationData.pl
new file mode 100755
index 000000000000..328727b269b3
--- /dev/null
+++ b/vcl/source/gdi/genVerticalOrientationData.pl
@@ -0,0 +1,206 @@
+#!/usr/bin/env perl
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This tool is used to prepare lookup tables of Unicode character properties.
+# The properties are read from the Unicode Character Database and compiled into
+# multi-level arrays for efficient lookup.
+#
+# To regenerate the tables in VerticalOrientationData.cxx:
+#
+# (1) Download the current Unicode data files from
+#
+# We require the latest data file for UTR50, currently revision-17:
+# http://www.unicode.org/Public/vertical/revision-17/VerticalOrientation-17.txt
+#
+#
+# (2) Run this tool using a command line of the form
+#
+# perl genVerticalOrientationData.pl \
+# /path/to/VerticalOrientation-17.txt
+#
+# This will generate (or overwrite!) the files
+#
+# VerticalOrientationData.cxx
+#
+# in the current directory.
+
+use strict;
+use List::Util qw(first);
+
+my $DATA_FILE = $ARGV[0];
+
+my %verticalOrientationCode = (
+ 'U' => 0, # U - Upright, the same orientation as in the code charts
+ 'R' => 1, # R - Rotated 90 degrees clockwise compared to the code charts
+ 'Tu' => 2, # Tu - Transformed typographically, with fallback to Upright
+ 'Tr' => 3 # Tr - Transformed typographically, with fallback to Rotated
+);
+
+my @verticalOrientation;
+for (my $i = 0; $i < 0x110000; ++$i) {
+ $verticalOrientation[$i] = 1; # default for unlisted codepoints is 'R'
+}
+
+# read VerticalOrientation-17.txt
+my @versionInfo;
+open FH, "< $DATA_FILE" or die "can't open UTR50 data file VerticalOrientation-17.txt\n";
+push @versionInfo, "";
+while (<FH>) {
+ chomp;
+ push @versionInfo, $_;
+ last if /Date:/;
+}
+while (<FH>) {
+ chomp;
+ s/#.*//;
+ if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) {
+ my $vo = $3;
+ warn "unknown Vertical_Orientation code $vo"
+ unless exists $verticalOrientationCode{$vo};
+ $vo = $verticalOrientationCode{$vo};
+ my $start = hex "0x$1";
+ my $end = (defined $2) ? hex "0x$2" : $start;
+ for (my $i = $start; $i <= $end; ++$i) {
+ $verticalOrientation[$i] = $vo;
+ }
+ }
+}
+close FH;
+
+my $timestamp = gmtime();
+
+open DATA_TABLES, "> VerticalOrientationData.cxx" or die "unable to open VerticalOrientationData.cxx for output";
+
+my $licenseBlock = q[
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+/*
+ * Derived from the Unicode Character Database by genVerticalOrientationData.pl
+ *
+ * For Unicode terms of use, see http://www.unicode.org/terms_of_use.html
+ */
+];
+
+my $versionInfo = join("\n", @versionInfo);
+
+print DATA_TABLES <<__END;
+$licenseBlock
+/*
+ * Created on $timestamp from UCD data files with version info:
+ *
+
+$versionInfo
+
+ *
+ * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
+ */
+
+__END
+
+our $totalData = 0;
+
+sub sprintVerticalOrientation
+{
+ my $usv = shift;
+ return sprintf("%d,",
+ $verticalOrientation[$usv]);
+}
+
+&genTables("VerticalOrientation", "uint8_t", 9, 7, \&sprintVerticalOrientation, 16, 1, 1);
+
+sub genTables
+{
+ my ($prefix, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_;
+
+ print DATA_TABLES "#define k${prefix}MaxPlane $maxPlane\n";
+ print DATA_TABLES "#define k${prefix}IndexBits $indexBits\n";
+ print DATA_TABLES "#define k${prefix}CharBits $charBits\n";
+
+ my $indexLen = 1 << $indexBits;
+ my $charsPerPage = 1 << $charBits;
+ my %charIndex = ();
+ my %pageMapIndex = ();
+ my @pageMap = ();
+ my @char = ();
+
+ my $planeMap = "\x00" x $maxPlane;
+ foreach my $plane (0 .. $maxPlane) {
+ my $pageMap = "\x00" x $indexLen * 2;
+ foreach my $page (0 .. $indexLen - 1) {
+ my $charValues = "";
+ for (my $ch = 0; $ch < $charsPerPage; $ch += $charsPerEntry) {
+ my $usv = $plane * 0x10000 + $page * $charsPerPage + $ch;
+ $charValues .= &$func($usv);
+ }
+ chop $charValues;
+
+ unless (exists $charIndex{$charValues}) {
+ $charIndex{$charValues} = scalar keys %charIndex;
+ $char[$charIndex{$charValues}] = $charValues;
+ }
+ substr($pageMap, $page * 2, 2) = pack('S', $charIndex{$charValues});
+ }
+
+ unless (exists $pageMapIndex{$pageMap}) {
+ $pageMapIndex{$pageMap} = scalar keys %pageMapIndex;
+ $pageMap[$pageMapIndex{$pageMap}] = $pageMap;
+ }
+ if ($plane > 0) {
+ substr($planeMap, $plane - 1, 1) = pack('C', $pageMapIndex{$pageMap});
+ }
+ }
+
+ if ($maxPlane) {
+ print DATA_TABLES "static const uint8_t s${prefix}Planes[$maxPlane] = {";
+ print DATA_TABLES join(',', map { sprintf("%d", $_) } unpack('C*', $planeMap));
+ print DATA_TABLES "};\n\n";
+ }
+
+ my $chCount = scalar @char;
+ my $pmBits = $chCount > 255 ? 16 : 8;
+ my $pmCount = scalar @pageMap;
+ if ($maxPlane == 0) {
+ die "there should only be one pageMap entry!" if $pmCount > 1;
+ print DATA_TABLES "static const uint${pmBits}_t s${prefix}Pages[$indexLen] = {\n";
+ } else {
+ print DATA_TABLES "static const uint${pmBits}_t s${prefix}Pages[$pmCount][$indexLen] = {\n";
+ }
+ for (my $i = 0; $i < scalar @pageMap; ++$i) {
+ print DATA_TABLES $maxPlane > 0 ? " {" : " ";
+ print DATA_TABLES join(',', map { sprintf("%d", $_) } unpack('S*', $pageMap[$i]));
+ print DATA_TABLES $maxPlane > 0 ? ($i < $#pageMap ? "},\n" : "}\n") : "\n";
+ }
+ print DATA_TABLES "};\n\n";
+
+ my $pageLen = $charsPerPage / $charsPerEntry;
+ print DATA_TABLES "static const $type s${prefix}Values[$chCount][$pageLen] = {\n";
+ for (my $i = 0; $i < scalar @char; ++$i) {
+ print DATA_TABLES " {";
+ print DATA_TABLES $char[$i];
+ print DATA_TABLES $i < $#char ? "},\n" : "}\n";
+ }
+ print DATA_TABLES "};\n";
+
+ my $dataSize = $pmCount * $indexLen * $pmBits/8 +
+ $chCount * $pageLen * $bytesPerEntry +
+ $maxPlane;
+ $totalData += $dataSize;
+
+ print STDERR "Data for $prefix = $dataSize\n";
+}
+print DATA_TABLES <<__END;
+/*
+ * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
+ */
+__END
+
+close DATA_TABLES;