Context Navigation

makemsgs.pl @ dc639a8

Visit:

RELEASE/1.0RELEASE/1.1RELEASE/1.2debug-cidebug-ci-sanitisersfaster-cavernloglog-selectmainstereostereo-2025walls-datawalls-data-hanging-as-warningwarn-only-for-hanging-survey

Last change on this file since dc639a8 was dc639a8, checked in by Olly Betts <olly@…>, 25 years ago

Fixes to get a clean compile.

git-svn-id: file:///home/survex-svn/survex/trunk@997 4b37db11-9a0c-4f06-9ece-9ab7cdaee568

Property mode set to 100755

File size: 5.0 KB

Line
1	#!/usr/bin/perl -w
2	require 5.004;
3	use strict;
4
5	use integer;
6
7	# messages >= this value are written to a header file
8	my $dontextract_threshold = 1000;
9
10	# Magic identifier (12 bytes)
11	my $magic = "Svx\nMsg\r\n\xfe\xff\0";
12	# Designed to be corrupted by ASCII ftp, top bit stripping (or
13	# being used for parity). Contains a zero byte so more likely
14	# to be flagged as data (e.g. by perl's "-B" test).
15
16	my $major = 0;
17	my $minor = 8;
18
19	# File format (multi-byte numbers in network order (bigendian)):
20	# 12 bytes: Magic identifier
21	# 1 byte: File format major version number (0)
22	# 1 byte: File format minor version number (8)
23	# 2 bytes: Number of messages (N)
24	# 4 bytes: Offset from XXXX to end of file
25	# XXXX:
26	# N*:
27	# <message> NUL
28
29	my %ent = ();
30
31	open ENT, "named-entities.txt" or die $!;
32	while (<ENT>) {
33	my ($e, $v) = /^(\w+),(\d+)/;
34	$ent{$e} = $v;
35	}
36	close ENT;
37
38	my %msgs = ();
39	my %dontextract = ();
40
41	while (<>) {
42	next if /^\s*#/; # skip comments
43
44	# en: 0 0.81 the message
45	# en-us: 0 0.81 " the message "
46	my ($langs, $msgno, $dummy, $msg) = /^([-\w,]+):\s(\d+)\s+("?)(.)\3/;
47
48	unless (defined $langs) {
49	chomp;
50	print STDERR "Warning: Bad line: \"$_\"\n";
51	next;
52	}
53
54	$langs =~ tr/-/_/;
55
56	if ($msg =~ /[\0-\x1f\x7f-\xff]/) {
57	print STDERR "Warning: literal character in message $msgno\n";
58	}
59
60	my $utf8 = string_to_utf8($msg);
61	for (split /,/, $langs) {
62	if ($msgno >= $dontextract_threshold) {
63	${$dontextract{$_}}[$msgno - $dontextract_threshold] = $utf8;
64	} else {
65	${$msgs{$_}}[$msgno] = $utf8;
66	}
67	}
68	}
69
70	my $lang;
71	my @langs = sort grep ! /_\*$/, keys %msgs;
72
73	my $num_msgs = -1;
74	foreach $lang (@langs) {
75	my $aref = $msgs{$lang};
76	$num_msgs = scalar @$aref if scalar @$aref > $num_msgs;
77	}
78
79	foreach $lang (@langs) {
80	my $fnm = $lang;
81	$fnm =~ s/(_.*)$/\U$1/;
82	open OUT, ">$fnm.msg" or die $!;
83
84	my $aref = $msgs{$lang};
85
86	my $parentaref;
87	my $mainlang = $lang;
88	$parentaref = $msgs{$mainlang} if $mainlang =~ s/_.*$//;
89
90	print OUT $magic or die $!;
91	print OUT pack("CCn", $major, $minor, $num_msgs) or die $!;
92
93	my $buff = '';
94
95	my $n;
96	for $n (0 .. $num_msgs - 1) {
97	my $msg = $$aref[$n];
98	if (!defined $msg) {
99	$msg = $$parentaref[$n] if defined $parentaref;
100	if (!defined $msg) {
101	$msg = ${$msgs{'en'}}[$n];
102	if (defined $msg && $msg ne '') {
103	# don't report if we have a parent (as the omission will be reported there)
104	print STDERR "Warning: message $n not in language $lang\n" unless defined $parentaref;
105	} else {
106	$msg = '';
107	}
108	}
109	}
110	$buff .= $msg . "\0";
111	}
112
113	print OUT pack('N',length($buff)), $buff or die $!;
114	close OUT or die $!;
115	}
116
117	my $num_dontextract = -1;
118	foreach $lang (@langs) {
119	my $aref = $dontextract{$lang};
120	if (defined(@$aref)) {
121	$num_dontextract = scalar @$aref if scalar @$aref > $num_dontextract;
122	}
123	}
124
125	foreach $lang (@langs) {
126	my $fnm = $lang;
127	$fnm =~ s/(_.*)$/\U$1/;
128	open OUT, ">$fnm.h" or die $!;
129	print OUT "#define N_DONTEXTRACTMSGS ", $num_dontextract, "\n";
130	print OUT "static unsigned char dontextractmsgs[] =";
131
132	for my $n (0 .. $num_dontextract - 1) {
133	print OUT "\n";
134
135	my $aref = $dontextract{$lang};
136
137	my $parentaref;
138	my $mainlang = $lang;
139	$parentaref = $dontextract{$mainlang} if $mainlang =~ s/_.*$//;
140
141	my $msg = $$aref[$n];
142	if (!defined $msg) {
143	$msg = $$parentaref[$n] if defined $parentaref;
144	if (!defined $msg) {
145	$msg = ${$dontextract{'en'}}[$n];
146	if (defined $msg && $msg ne '') {
147	# don't report if we have a parent (as the omission will be reported there)
148	print STDERR "Warning: message ", $dontextract_threshold + $n, " not in language $lang\n" unless defined $parentaref;
149	} else {
150	$msg = '';
151	}
152	}
153	}
154	$msg =~ s/\\/\\\\/g;
155	$msg =~ s/"/\\"/g;
156	$msg =~ s/\t/\\t/g;
157	$msg =~ s/\n/\\n/g;
158	$msg =~ s/\r/\\r/g;
159	if ($msg =~ /^ / \|\| $msg =~ / $/) {
160	$msg =~ s/\\"/\\\\\\"/g;
161	$msg = '\\"'.$msg.'\\"';
162	}
163	print OUT " /", $dontextract_threshold + $n, "/ \"$msg\\0\"";
164	}
165	print OUT ";\n";
166	close OUT or die $!;
167	}
168
169	sub string_to_utf8 {
170	my $s = shift;
171	$s =~ s/([\x80-\xff])/char_to_utf8(ord($1))/eg;
172	$s =~ s/\&(#\d+\|#x[a-f0-9]+\|[a-z0-9]+);?/decode_entity($1)/eig;
173	return $s;
174	}
175
176	sub decode_entity {
177	my $ent = shift;
178	return char_to_utf8($1) if $ent =~ /^#(\d+)$/;
179	return char_to_utf8(hex($1)) if $ent =~ /^#x([a-f0-9]+)$/;
180	return char_to_utf8($ent{$ent}) if exists $ent{$ent};
181	$ent = "\&$ent;";
182	print STDERR "Warning: entity \"$ent\" unknown\n";
183	return $ent;
184	}
185
186	sub char_to_utf8 {
187	my $unicode = shift;
188	# ASCII is easy, and the most common case
189	return chr($unicode) if $unicode < 0x80;
190
191	my $result = '';
192	my $n = 0x20;
193	while (1) {
194	$result = chr(0x80 \| ($unicode & 0x3f)) . $result;
195	$unicode >>= 6;
196	last if $unicode < $n;
197	$n >>= 1;
198	}
199	$result = chr((0x100 - $n*2) \| $unicode) . $result;
200	return $result;
201	}
202

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: git/lib/makemsgs.pl @ dc639a8

Download in other formats: