Context Navigation

gettexttomsg.pl @ ea19167

Visit:

RELEASE/1.1RELEASE/1.2debug-cidebug-ci-sanitisersfaster-cavernloglog-selectstereowalls-datawalls-data-hanging-as-warningwarn-only-for-hanging-survey

Last change on this file since ea19167 was 6432368, checked in by Olly Betts <olly@…>, 19 years ago

More merging from 1.0

git-svn-id: file:///home/survex-svn/survex/branches/survex-1_1@2871 4b37db11-9a0c-4f06-9ece-9ab7cdaee568

Property mode set to 100755

File size: 3.0 KB

Line
1	#!/usr/bin/perl -w
2	require 5.008;
3	use bytes;
4	use strict;
5
6	use integer;
7
8	# Magic identifier (12 bytes)
9	my $magic = "Svx\nMsg\r\n\xfe\xff\0";
10	# Designed to be corrupted by ASCII ftp, top bit stripping (or
11	# being used for parity). Contains a zero byte so more likely
12	# to be flagged as data (e.g. by perl's "-B" test).
13
14	my $major = 0;
15	my $minor = 8;
16
17	# File format (multi-byte numbers in network order (bigendian)):
18	# 12 bytes: Magic identifier
19	# 1 byte: File format major version number (0)
20	# 1 byte: File format minor version number (8)
21	# 2 bytes: Number of messages (N)
22	# 4 bytes: Offset from XXXX to end of file
23	# XXXX:
24	# N*:
25	# <message> NUL
26
27	my %ent = ();
28
29	open ENT, "../lib/named-entities.txt" or die $!;
30	while (<ENT>) {
31	my ($e, $v) = /^(\w+),(\d+)/;
32	$ent{$e} = $v;
33	}
34	close ENT;
35
36	my %revmsgs = ();
37
38	open MSG, "../lib/messages.txt" or die $!;
39	while (<MSG>) {
40	next if /^\s*#/; # skip comments
41
42	# en: 0 0.81 the message
43	# en-us: 0 0.81 " the message "
44	my ($langs, $msgno, $dummy, $msg) = /^([-\w,]+):\s(\d+)\s+("?)(.)\3/;
45
46	unless (defined $langs) {
47	chomp;
48	print STDERR "Warning: Bad line: \"$_\"\n";
49	next;
50	}
51
52	$langs =~ tr/-/_/;
53
54	if ($msg =~ /[\0-\x1f\x7f-\xff]/) {
55	print STDERR "Warning: literal character in message $msgno\n";
56	}
57
58	my $utf8 = string_to_utf8($msg);
59	for (split /,/, $langs) {
60	if (lc $_ eq 'en') {
61	$revmsgs{$utf8} = $msgno;
62	}
63	}
64	}
65
66	my $die = 0;
67
68	while (<>) {
69	if (!/^\s*#/) {
70	while (/\\\n$/) {
71	$_ .= <>;
72	}
73	# very crude - doesn't know about comments, etc
74	s!\b_$"(.*?)"$!replacement($1)!gse;
75	} elsif (/\s#\sdefine\s+_\(/) {
76	$_ = "#include \"message.h\"\n";
77	}
78	print;
79	}
80
81	if ($die) {
82	die "Not all messages found!\n";
83	}
84
85	sub replacement {
86	my $msg = shift;
87	$msg =~ s/\\\n//g;
88	$msg =~ s/\\n/\n/g;
89	$msg =~ s/\\t/\t/g;
90	my $msgno = "";
91	if (exists $revmsgs{$msg}) {
92	$msgno = $revmsgs{$msg};
93	} else {
94	if (!$die) {
95	print STDERR "Message(s) not found in message file:\n";
96	$die = 1;
97	}
98	print STDERR "'$msg'\n";
99	}
100	$msg =~ s/\n/ /g;
101	$msg =~ s/\t/ /g;
102	return "msg(/$msg/$msgno)";
103	}
104
105	sub string_to_utf8 {
106	my $s = shift;
107	$s =~ s/([\x80-\xff])/char_to_utf8(ord($1))/eg;
108	$s =~ s/\&(#\d+\|#x[a-f0-9]+\|[a-z0-9]+);?/decode_entity($1)/eig;
109	return $s;
110	}
111
112	sub decode_entity {
113	my $ent = shift;
114	return char_to_utf8($1) if $ent =~ /^#(\d+)$/;
115	return char_to_utf8(hex($1)) if $ent =~ /^#x([a-f0-9]+)$/;
116	return char_to_utf8($ent{$ent}) if exists $ent{$ent};
117	$ent = "\&$ent;";
118	print STDERR "Warning: entity \"$ent\" unknown\n";
119	return $ent;
120	}
121
122	sub char_to_utf8 {
123	my $unicode = shift;
124	# ASCII is easy, and the most common case
125	return chr($unicode) if $unicode < 0x80;
126
127	my $result = '';
128	my $n = 0x20;
129	while (1) {
130	$result = chr(0x80 \| ($unicode & 0x3f)) . $result;
131	$unicode >>= 6;
132	last if $unicode < $n;
133	$n >>= 1;
134	}
135	$result = chr((0x100 - $n*2) \| $unicode) . $result;
136	return $result;
137	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: git/src/gettexttomsg.pl @ ea19167

Download in other formats: