| 1 | #!/usr/bin/perl -w
|
|---|
| 2 | require 5.008;
|
|---|
| 3 | use bytes;
|
|---|
| 4 | use strict;
|
|---|
| 5 | use Locale::PO;
|
|---|
| 6 |
|
|---|
| 7 | use integer;
|
|---|
| 8 |
|
|---|
| 9 | # Magic identifier (12 bytes)
|
|---|
| 10 | my $magic = "Svx\nMsg\r\n\xfe\xff\0";
|
|---|
| 11 | # Designed to be corrupted by ASCII ftp, top bit stripping (or
|
|---|
| 12 | # being used for parity). Contains a zero byte so more likely
|
|---|
| 13 | # to be flagged as data (e.g. by perl's "-B" test).
|
|---|
| 14 |
|
|---|
| 15 | my $srcdir = $0;
|
|---|
| 16 | $srcdir =~ s!/[^/]+$!!;
|
|---|
| 17 |
|
|---|
| 18 | my $major = 0;
|
|---|
| 19 | my $minor = 8;
|
|---|
| 20 |
|
|---|
| 21 | # File format (multi-byte numbers in network order (bigendian)):
|
|---|
| 22 | # 12 bytes: Magic identifier
|
|---|
| 23 | # 1 byte: File format major version number (0)
|
|---|
| 24 | # 1 byte: File format minor version number (8)
|
|---|
| 25 | # 2 bytes: Number of messages (N)
|
|---|
| 26 | # 4 bytes: Offset from XXXX to end of file
|
|---|
| 27 | # XXXX:
|
|---|
| 28 | # N*:
|
|---|
| 29 | # <message> NUL
|
|---|
| 30 |
|
|---|
| 31 | my %msgs = ();
|
|---|
| 32 | ${$msgs{'en'}}[0] = '©';
|
|---|
| 33 |
|
|---|
| 34 | # my %uses = ();
|
|---|
| 35 |
|
|---|
| 36 | my $file;
|
|---|
| 37 |
|
|---|
| 38 | my %n = ();
|
|---|
| 39 | my %loc = ();
|
|---|
| 40 | $file = "$srcdir/survex.pot";
|
|---|
| 41 | my $num_list = Locale::PO->load_file_asarray($file);
|
|---|
| 42 | foreach my $po_entry (@{$num_list}) {
|
|---|
| 43 | my $ref = $po_entry->reference;
|
|---|
| 44 | (defined $ref && $ref =~ /^n:(\d+)$/m) or next;
|
|---|
| 45 | my $msgno = $1;
|
|---|
| 46 | my $key = $po_entry->msgid;
|
|---|
| 47 | my $msg = c_unescape($po_entry->dequote($key));
|
|---|
| 48 | my $where = $file . ":" . $po_entry->loaded_line_number;
|
|---|
| 49 | ${$loc{'en'}}[$msgno] = $where;
|
|---|
| 50 | if (${$msgs{'en'}}[$msgno]) {
|
|---|
| 51 | print STDERR "$where: warning: already had message $msgno for language 'en'\n";
|
|---|
| 52 | }
|
|---|
| 53 | ${$msgs{'en'}}[$msgno] = $msg;
|
|---|
| 54 | ++$n{$msgno};
|
|---|
| 55 | }
|
|---|
| 56 | my $last = 0;
|
|---|
| 57 | for (sort { $a <=> $b } keys %n) {
|
|---|
| 58 | if ($_ > $last + 1) {
|
|---|
| 59 | print STDERR "$file: Unused msg numbers: ", join(" ", $last + 1 .. $_ - 1), "\n";
|
|---|
| 60 | }
|
|---|
| 61 | $last = $_;
|
|---|
| 62 | }
|
|---|
| 63 | print STDERR "$file: Last used msg number: $last\n";
|
|---|
| 64 | %n = ();
|
|---|
| 65 |
|
|---|
| 66 | my %fuzzy;
|
|---|
| 67 | my %c_format;
|
|---|
| 68 | for my $po_file (@ARGV) {
|
|---|
| 69 | my $language = $po_file;
|
|---|
| 70 | $language =~ s/\.po$//;
|
|---|
| 71 |
|
|---|
| 72 | $file = "$srcdir/$po_file";
|
|---|
| 73 | my $po_hash = Locale::PO->load_file_ashash($file);
|
|---|
| 74 |
|
|---|
| 75 | if (exists $$po_hash{'""'}) {
|
|---|
| 76 | if ($$po_hash{'""'}->msgstr =~ /^(?:.*\\n)?Language:\s*([^\s\\]+)/im) {
|
|---|
| 77 | if ($language ne $1) {
|
|---|
| 78 | my $line = 3 + scalar(@{[$& =~ /(\\n)/g]});
|
|---|
| 79 | print STDERR "$file:$line: Language code '$1' doesn't match '$language' from filename\n";
|
|---|
| 80 | }
|
|---|
| 81 | } else {
|
|---|
| 82 | my $line = 2 + scalar(@{[$$po_hash{'""'}->msgstr =~ /(\\n)/g]});
|
|---|
| 83 | print STDERR "$file:$line: No suitable 'Language:' field in header\n";
|
|---|
| 84 | }
|
|---|
| 85 | } else {
|
|---|
| 86 | print STDERR "$file:1: Expected 'msgid \"\"' with header\n";
|
|---|
| 87 | }
|
|---|
| 88 |
|
|---|
| 89 | my $fuzzy = 0;
|
|---|
| 90 | foreach my $po_entry (@{$num_list}) {
|
|---|
| 91 | my $ref = $po_entry->reference;
|
|---|
| 92 | (defined $ref && $ref =~ /^n:(\d+)$/m) or next;
|
|---|
| 93 | my $msgno = $1;
|
|---|
| 94 | my $key = $po_entry->msgid;
|
|---|
| 95 | my $ent = $$po_hash{$key};
|
|---|
| 96 | my $where = $file . ":" . $po_entry->loaded_line_number;
|
|---|
| 97 | ${$loc{$language}}[$msgno] = $where;
|
|---|
| 98 | if (defined $ent) {
|
|---|
| 99 | my $msg = c_unescape($po_entry->dequote($ent->msgstr));
|
|---|
| 100 | if ($msg eq '') {
|
|---|
| 101 | print STDERR "$where: warning: Empty translation marked as fuzzy\n" if $ent->fuzzy();
|
|---|
| 102 | next;
|
|---|
| 103 | }
|
|---|
| 104 | if (${$msgs{$language}}[$msgno]) {
|
|---|
| 105 | print STDERR "$where: warning: already had message $msgno for language $language\n";
|
|---|
| 106 | }
|
|---|
| 107 | ${$msgs{$language}}[$msgno] = $msg;
|
|---|
| 108 | $ent->fuzzy() and ++$fuzzy;
|
|---|
| 109 | }
|
|---|
| 110 | $po_entry->c_format and $c_format{$language}[$msgno]++;
|
|---|
| 111 | }
|
|---|
| 112 | $fuzzy{$language} = $fuzzy;
|
|---|
| 113 | }
|
|---|
| 114 |
|
|---|
| 115 | my $lang;
|
|---|
| 116 | my @langs = sort grep ! /_\*$/, keys %msgs;
|
|---|
| 117 |
|
|---|
| 118 | my $num_msgs = -1;
|
|---|
| 119 | foreach $lang (@langs) {
|
|---|
| 120 | my $aref = $msgs{$lang};
|
|---|
| 121 | $num_msgs = scalar @$aref if scalar @$aref > $num_msgs;
|
|---|
| 122 | }
|
|---|
| 123 |
|
|---|
| 124 | foreach $lang (@langs) {
|
|---|
| 125 | my $fnm = $lang;
|
|---|
| 126 | $file = "$srcdir/$lang.po";
|
|---|
| 127 | $fnm =~ s/(_.*)$/\U$1/;
|
|---|
| 128 | open OUT, ">$fnm.msg" or die $!;
|
|---|
| 129 |
|
|---|
| 130 | my $aref = $msgs{$lang};
|
|---|
| 131 |
|
|---|
| 132 | my $parentaref;
|
|---|
| 133 | my $mainlang = $lang;
|
|---|
| 134 | $parentaref = $msgs{$mainlang} if $mainlang =~ s/_.*$//;
|
|---|
| 135 |
|
|---|
| 136 | print OUT $magic or die $!;
|
|---|
| 137 | print OUT pack("CCn", $major, $minor, $num_msgs) or die $!;
|
|---|
| 138 |
|
|---|
| 139 | my $buff = '';
|
|---|
| 140 | my $missing = 0;
|
|---|
| 141 |
|
|---|
| 142 | for my $n (0 .. $num_msgs - 1) {
|
|---|
| 143 | my $warned = 0;
|
|---|
| 144 | my $msg = $$aref[$n];
|
|---|
| 145 | if (!defined $msg) {
|
|---|
| 146 | $msg = $$parentaref[$n] if defined $parentaref;
|
|---|
| 147 | if (!defined $msg) {
|
|---|
| 148 | $msg = ${$msgs{'en'}}[$n];
|
|---|
| 149 | # don't report if we have a parent (as the omission will be
|
|---|
| 150 | # reported there)
|
|---|
| 151 | if (defined $msg && $msg ne '' && $msg ne '©' && !defined $parentaref) {
|
|---|
| 152 | ++$missing;
|
|---|
| 153 | $warned = 1;
|
|---|
| 154 | } else {
|
|---|
| 155 | $msg = '' if !defined $msg;
|
|---|
| 156 | }
|
|---|
| 157 | }
|
|---|
| 158 | } else {
|
|---|
| 159 | if ($lang ne 'en') {
|
|---|
| 160 | my $c_format = $c_format{$lang}[$n] // 0;
|
|---|
| 161 | sanity_check("Message $n in language $lang", $msg, ${$msgs{'en'}}[$n], ${$loc{$lang}}[$n], $c_format);
|
|---|
| 162 | }
|
|---|
| 163 | }
|
|---|
| 164 | $buff .= $msg . "\0";
|
|---|
| 165 | }
|
|---|
| 166 |
|
|---|
| 167 | print OUT pack('N',length($buff)), $buff or die $!;
|
|---|
| 168 | close OUT or die $!;
|
|---|
| 169 |
|
|---|
| 170 | my $fuzzy = $fuzzy{$lang};
|
|---|
| 171 | if ($missing || $fuzzy) {
|
|---|
| 172 | print STDERR "Warning: $file: ";
|
|---|
| 173 | if ($missing) {
|
|---|
| 174 | print STDERR "$missing missing message(s)";
|
|---|
| 175 | if ($fuzzy) {
|
|---|
| 176 | print STDERR " and $fuzzy fuzzy message(s)";
|
|---|
| 177 | }
|
|---|
| 178 | } else {
|
|---|
| 179 | print STDERR "$fuzzy fuzzy message(s)";
|
|---|
| 180 | }
|
|---|
| 181 | print STDERR " for $lang\n";
|
|---|
| 182 | }
|
|---|
| 183 | }
|
|---|
| 184 |
|
|---|
| 185 | sub sanity_check {
|
|---|
| 186 | my ($what, $msg, $orig, $where, $c_format) = @_;
|
|---|
| 187 | if ($c_format) {
|
|---|
| 188 | # check printf-like specifiers match
|
|---|
| 189 | # allow valid printf specifiers, or %<any letter> to support strftime
|
|---|
| 190 | # and other printf-like formats.
|
|---|
| 191 | my @pcent_m = grep /\%/, split /(%(?:[-#0 +'I]*(?:[0-9]*|\*|\*m\$)(?:\.[0-9]*)?(?:hh|ll|[hlLqjzt])?[diouxXeEfFgGaAcsCSpn]|[a-zA-Z]))/, $msg;
|
|---|
| 192 | my @pcent_o = grep /\%/, split /(%(?:[-#0 +'I]*(?:[0-9]*|\*|\*m\$)(?:\.[0-9]*)?(?:hh|ll|[hlLqjzt])?[diouxXeEfFgGaAcsCSpn]|[a-zA-Z]))/, $orig;
|
|---|
| 193 | while (scalar @pcent_m || scalar @pcent_o) {
|
|---|
| 194 | if (!scalar @pcent_m) {
|
|---|
| 195 | print STDERR "$where: warning: $what misses out \%spec $pcent_o[0]\n";
|
|---|
| 196 | } elsif (!scalar @pcent_o) {
|
|---|
| 197 | print STDERR "$where: warning: $what has extra \%spec $pcent_m[0]\n";
|
|---|
| 198 | } elsif ($pcent_m[0] ne $pcent_o[0]) {
|
|---|
| 199 | print STDERR "$where: warning: $what has \%spec $pcent_m[0] instead of $pcent_o[0]\n";
|
|---|
| 200 | }
|
|---|
| 201 | pop @pcent_m;
|
|---|
| 202 | pop @pcent_o;
|
|---|
| 203 | }
|
|---|
| 204 | }
|
|---|
| 205 |
|
|---|
| 206 | # Check for missing (or added) ellipses (...)
|
|---|
| 207 | if ($msg =~ /\.\.\./ && $orig !~ /\.\.\./) {
|
|---|
| 208 | print STDERR "$where: warning: $what has ellipses but original doesn't\n";
|
|---|
| 209 | } elsif ($msg !~ /\.\.\./ && $orig =~ /\.\.\./) {
|
|---|
| 210 | print STDERR "$where: warning: $what is missing ellipses\n";
|
|---|
| 211 | }
|
|---|
| 212 |
|
|---|
| 213 | # Check for missing (or added) menu shortcut (&)
|
|---|
| 214 | if ($msg =~ /\&[A-Za-z\xc2-\xf4]/ && $orig !~ /\&[A-Za-z]/) {
|
|---|
| 215 | print STDERR "$where: warning: $what has menu shortcut but original doesn't\n";
|
|---|
| 216 | } elsif ($msg !~ /\&[A-Za-z\xc2-\xf4]/ && $orig =~ /\&[A-Za-z]/) {
|
|---|
| 217 | print STDERR "$where: warning: $what is missing menu shortcut\n";
|
|---|
| 218 | }
|
|---|
| 219 |
|
|---|
| 220 | # Check for missing (or added) double quotes (“ and ”)
|
|---|
| 221 | if (scalar($msg =~ s/(?:“|»)/$&/g) != scalar($orig =~ s/“/$&/g)) {
|
|---|
| 222 | print STDERR "$where: warning: $what has different numbers of “\n";
|
|---|
| 223 | print STDERR "$orig\n$msg\n\n";
|
|---|
| 224 | }
|
|---|
| 225 | if (scalar($msg =~ s/(?:”|«)/$&/g) != scalar($orig =~ s/”/$&/g)) {
|
|---|
| 226 | print STDERR "$where: warning: $what has different numbers of ”\n";
|
|---|
| 227 | print STDERR "$orig\n$msg\n\n";
|
|---|
| 228 | }
|
|---|
| 229 |
|
|---|
| 230 | # Check for missing (or added) menu accelerator "##"
|
|---|
| 231 | if ($msg =~ /\#\#/ && $orig !~ /\#\#/) {
|
|---|
| 232 | print STDERR "$where: warning: $what has menu accelerator but original doesn't\n";
|
|---|
| 233 | } elsif ($msg !~ /\#\#/ && $orig =~ /\#\#/) {
|
|---|
| 234 | print STDERR "$where: warning: $what is missing menu accelerator\n";
|
|---|
| 235 | } elsif ($orig =~ /\#\#(.*)/) {
|
|---|
| 236 | my $acc_o = $1;
|
|---|
| 237 | my ($acc_m) = $msg =~ /\#\#(.*)/;
|
|---|
| 238 | if ($acc_o ne $acc_m) {
|
|---|
| 239 | print STDERR "$where: warning: $what has menu accelerator $acc_m instead of $acc_o\n";
|
|---|
| 240 | }
|
|---|
| 241 | }
|
|---|
| 242 |
|
|---|
| 243 | # Check for missing (or added) menu accelerator "\t"
|
|---|
| 244 | if ($msg =~ /\t/ && $orig !~ /\t/) {
|
|---|
| 245 | print STDERR "$where: warning: $what has menu accelerator but original doesn't\n";
|
|---|
| 246 | } elsif ($msg !~ /\t/ && $orig =~ /\t/) {
|
|---|
| 247 | print STDERR "$where: warning: $what is missing menu accelerator\n";
|
|---|
| 248 | } elsif ($orig =~ /\t(.*)/) {
|
|---|
| 249 | my $acc_o = $1;
|
|---|
| 250 | my ($acc_m) = $msg =~ /\t(.*)/;
|
|---|
| 251 | if ($acc_o ne $acc_m) {
|
|---|
| 252 | print STDERR "$where: warning: $what has menu accelerator $acc_m instead of $acc_o\n";
|
|---|
| 253 | }
|
|---|
| 254 | }
|
|---|
| 255 | }
|
|---|
| 256 |
|
|---|
| 257 | sub c_unescape {
|
|---|
| 258 | my $str = shift @_;
|
|---|
| 259 | $str =~ s/\\(x..|0|[0-7][0-7][0-7]|.)/&c_unescape_char($1)/ge;
|
|---|
| 260 | return $str;
|
|---|
| 261 | }
|
|---|
| 262 |
|
|---|
| 263 | sub c_unescape_char {
|
|---|
| 264 | my $ch = shift @_;
|
|---|
| 265 | if ($ch eq '0' || $ch eq 'x00' || $ch eq '000') {
|
|---|
| 266 | print STDERR "Nul byte in translation! (\\$ch)\n";
|
|---|
| 267 | exit(1);
|
|---|
| 268 | }
|
|---|
| 269 | return $ch if $ch eq '"' || $ch eq '\\';
|
|---|
| 270 | return "\n" if $ch eq "n";
|
|---|
| 271 | return "\t" if $ch eq "t";
|
|---|
| 272 | return "\r" if $ch eq "r";
|
|---|
| 273 | return "\f" if $ch eq "f";
|
|---|
| 274 | return chr(hex(substr($ch,1))) if $ch =~ /^x../;
|
|---|
| 275 | return chr(oct($ch)) if $ch =~ /^[0-7][0-7][0-7]/;
|
|---|
| 276 | print STDERR "Unknown C-escape in translation! (\\$ch)\n";
|
|---|
| 277 | exit(1);
|
|---|
| 278 | }
|
|---|