Context Navigation

makemsgs.pl @ acf84e45

Visit:

RELEASE/1.0RELEASE/1.2debug-cidebug-ci-sanitisersfaster-cavernloglog-selectstereostereo-2025walls-datawalls-data-hanging-as-warningwarn-only-for-hanging-survey

Last change on this file since acf84e45 was acf84e45, checked in by Olly Betts <olly@…>, 22 years ago

Translation updates.

git-svn-id: file:///home/survex-svn/survex/trunk@2674 4b37db11-9a0c-4f06-9ece-9ab7cdaee568

Property mode set to 100755

File size: 6.4 KB

Line
1	#!/usr/bin/perl -w
2	require 5.004;
3	use strict;
4
5	use integer;
6
7	# messages >= this value are written to a header file
8	my $dontextract_threshold = 1000;
9
10	# Magic identifier (12 bytes)
11	my $magic = "Svx\nMsg\r\n\xfe\xff\0";
12	# Designed to be corrupted by ASCII ftp, top bit stripping (or
13	# being used for parity). Contains a zero byte so more likely
14	# to be flagged as data (e.g. by perl's "-B" test).
15
16	my $major = 0;
17	my $minor = 8;
18
19	# File format (multi-byte numbers in network order (bigendian)):
20	# 12 bytes: Magic identifier
21	# 1 byte: File format major version number (0)
22	# 1 byte: File format minor version number (8)
23	# 2 bytes: Number of messages (N)
24	# 4 bytes: Offset from XXXX to end of file
25	# XXXX:
26	# N*:
27	# <message> NUL
28
29	my %ent = ();
30
31	open ENT, ($ENV{srcdir}?"$ENV{srcdir}/":"")."named-entities.txt" or die $!;
32	while (<ENT>) {
33	my ($e, $v) = /^(\w+),(\d+)/;
34	$ent{$e} = $v;
35	}
36	close ENT;
37
38	my %msgs = ();
39	my %dontextract = ();
40
41	my %raw = ();
42	my $raw = '';
43	my $raw_comments = '';
44	my $curmsg = -1;
45
46	while (<>) {
47	if (/^\s*#/) {
48	$raw_comments .= $_;
49	next; # skip comments
50	}
51
52	# en: 0 0.81 the message
53	# en-us: 0 0.81 " the message "
54	my ($langs, $msgno, $dummy, $msg) = /^([-\w,]+):\s(\d+)\s+("?)(.)\3/;
55
56	if ($msgno != $curmsg) {
57	if ($msgno < $curmsg) {
58	print STDERR "Warning: message number jumps back from $curmsg to $msgno\n";
59	}
60	$raw{$curmsg} = $raw;
61	$raw = '';
62	$curmsg = $msgno;
63	}
64
65	$raw .= $raw_comments . $_;
66	$raw_comments = '';
67
68	unless (defined $langs) {
69	chomp;
70	print STDERR "Warning: Bad line: \"$_\"\n";
71	next;
72	}
73
74	$langs =~ tr/-/_/;
75
76	if ($msg =~ /[\0-\x1f\x7f-\xff]/) {
77	print STDERR "Warning: literal character in message $msgno\n";
78	}
79
80	my $utf8 = string_to_utf8($msg);
81	for (split /,/, $langs) {
82	if ($msgno >= $dontextract_threshold) {
83	if (${$dontextract{$_}}[$msgno - $dontextract_threshold]) {
84	print STDERR "Warning: already had message $msgno for language $_\n";
85	}
86	${$dontextract{$_}}[$msgno - $dontextract_threshold] = $utf8;
87	} else {
88	if (${$msgs{$_}}[$msgno]) {
89	print STDERR "Warning: already had message $msgno for language $_\n";
90	}
91	${$msgs{$_}}[$msgno] = $utf8;
92	}
93	}
94	}
95	$raw{$curmsg} = $raw;
96
97	my $lang;
98	my @langs = sort grep ! /_\*$/, keys %msgs;
99
100	my $num_msgs = -1;
101	foreach $lang (@langs) {
102	my $aref = $msgs{$lang};
103	$num_msgs = scalar @$aref if scalar @$aref > $num_msgs;
104	}
105
106	foreach $lang (@langs) {
107	my $fnm = $lang;
108	$fnm =~ s/(_.*)$/\U$1/;
109	open OUT, ">$fnm.msg" or die $!;
110
111	my $aref = $msgs{$lang};
112
113	my $parentaref;
114	my $mainlang = $lang;
115	$parentaref = $msgs{$mainlang} if $mainlang =~ s/_.*$//;
116
117	print OUT $magic or die $!;
118	print OUT pack("CCn", $major, $minor, $num_msgs) or die $!;
119
120	my $buff = '';
121
122	my $n;
123	for $n (0 .. $num_msgs - 1) {
124	my $warned = 0;
125	my $msg = $$aref[$n];
126	if (!defined $msg) {
127	$msg = $$parentaref[$n] if defined $parentaref;
128	if (!defined $msg) {
129	$msg = ${$msgs{'en'}}[$n];
130	# don't report if we have a parent (as the omission will be
131	# reported there)
132	if (defined $msg && $msg ne '' && !defined $parentaref) {
133	print STDERR "Warning: message $n not in language $lang\n";
134	open TODO, ">>$lang.todo" or die $!;
135	print TODO $raw{$n}, "\n";
136	close TODO;
137	$warned = 1;
138	} else {
139	$msg = '' if !defined $msg;
140	}
141	}
142	}
143	if (defined($raw{$n})
144	&& $raw{$n} =~ /^\s#\sTRANSLATE\b[- \ta-z]*\b$lang\b/m) {
145	if ($warned) {
146	print STDERR "Warning: message $n missing and also marked for retranslation for language $lang\n";
147	} else {
148	print STDERR "Warning: message $n needs retranslating for language $lang\n";
149	open TODO, ">>$lang.todo" or die $!;
150	print TODO $raw{$n}, "\n";
151	close TODO;
152	}
153	}
154	$buff .= $msg . "\0";
155	}
156
157	print OUT pack('N',length($buff)), $buff or die $!;
158	close OUT or die $!;
159	}
160
161	my $num_dontextract = -1;
162	foreach $lang (@langs) {
163	my $aref = $dontextract{$lang};
164	if (defined(@$aref)) {
165	$num_dontextract = scalar @$aref if scalar @$aref > $num_dontextract;
166	}
167	}
168
169	foreach $lang (@langs) {
170	my $fnm = $lang;
171	$fnm =~ s/(_.*)$/\U$1/;
172	open OUT, ">$fnm.h" or die $!;
173	print OUT "#define N_DONTEXTRACTMSGS ", $num_dontextract, "\n";
174	print OUT "static unsigned char dontextractmsgs[] =";
175
176	for my $n (0 .. $num_dontextract - 1) {
177	print OUT "\n";
178
179	my $aref = $dontextract{$lang};
180
181	my $parentaref;
182	my $mainlang = $lang;
183	$parentaref = $dontextract{$mainlang} if $mainlang =~ s/_.*$//;
184
185	my $msg = $$aref[$n];
186	if (!defined $msg) {
187	$msg = $$parentaref[$n] if defined $parentaref;
188	if (!defined $msg) {
189	$msg = ${$dontextract{'en'}}[$n];
190	# don't report if we have a parent (as the omission will be reported there)
191	if (defined $msg && $msg ne '' && !defined $parentaref) {
192	print STDERR "Warning: message ", $dontextract_threshold + $n, " not in language $lang\n";
193	open TODO, ">>$lang.todo" or die $!;
194	print TODO $raw{$dontextract_threshold + $n}, "\n";
195	close TODO;
196	} else {
197	$msg = '';
198	}
199	}
200	}
201	$msg =~ s/\\/\\\\/g;
202	$msg =~ s/"/\\"/g;
203	$msg =~ s/\t/\\t/g;
204	$msg =~ s/\n/\\n/g;
205	$msg =~ s/\r/\\r/g;
206	if ($msg =~ /^ / \|\| $msg =~ / $/) {
207	$msg =~ s/\\"/\\\\\\"/g;
208	$msg = '\\"'.$msg.'\\"';
209	}
210	print OUT " /", $dontextract_threshold + $n, "/ \"$msg\\0\"";
211	}
212	print OUT ";\n";
213	close OUT or die $!;
214	}
215
216	#for (sort {$a<=>$b} keys %raw) {print "$_ = [$raw{$_}]\n";}
217
218	sub string_to_utf8 {
219	my $s = shift;
220	$s =~ s/([\x80-\xff])/char_to_utf8(ord($1))/eg;
221	$s =~ s/\&(#\d+\|#x[a-f0-9]+\|[a-z0-9]+);?/decode_entity($1)/eig;
222	return $s;
223	}
224
225	sub decode_entity {
226	my $ent = shift;
227	return char_to_utf8($1) if $ent =~ /^#(\d+)$/;
228	return char_to_utf8(hex($1)) if $ent =~ /^#x([a-f0-9]+)$/;
229	return char_to_utf8($ent{$ent}) if exists $ent{$ent};
230	$ent = "\&$ent;";
231	print STDERR "Warning: entity \"$ent\" unknown\n";
232	return $ent;
233	}
234
235	sub char_to_utf8 {
236	my $unicode = shift;
237	# ASCII is easy, and the most common case
238	return chr($unicode) if $unicode < 0x80;
239
240	my $result = '';
241	my $n = 0x20;
242	while (1) {
243	$result = chr(0x80 \| ($unicode & 0x3f)) . $result;
244	$unicode >>= 6;
245	last if $unicode < $n;
246	$n >>= 1;
247	}
248	$result = chr((0x100 - $n*2) \| $unicode) . $result;
249	return $result;
250	}
251

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: git/lib/makemsgs.pl @ acf84e45

Download in other formats: