Context Navigation

gettexttomsg.pl @ 68d7dfc

Visit:

RELEASE/1.1RELEASE/1.2debug-cidebug-ci-sanitisersfaster-cavernloglog-selectstereostereo-2025walls-datawalls-data-hanging-as-warningwarn-only-for-hanging-survey

Last change on this file since 68d7dfc was 68d7dfc, checked in by Olly Betts <olly@…>, 15 years ago

src/gettexttomsg.pl,src/wx.h: Add GPL boilerplate to files which were
missing it.

git-svn-id: file:///home/survex-svn/survex/branches/survex-1_1@3418 4b37db11-9a0c-4f06-9ece-9ab7cdaee568

Property mode set to 100755

File size: 3.7 KB

Line
1	#!/usr/bin/perl -w
2
3	# gettexttomsg.pl
4	#
5	# Copyright (C) 2001,2002,2005 Olly Betts
6	#
7	# This program is free software; you can redistribute it and/or modify
8	# it under the terms of the GNU General Public License as published by
9	# the Free Software Foundation; either version 2 of the License, or
10	# (at your option) any later version.
11	#
12	# This program is distributed in the hope that it will be useful,
13	# but WITHOUT ANY WARRANTY; without even the implied warranty of
14	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	# GNU General Public License for more details.
16	#
17	# You should have received a copy of the GNU General Public License
18	# along with this program; if not, write to the Free Software
19	# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
21	require 5.008;
22	use bytes;
23	use strict;
24
25	use integer;
26
27	# Magic identifier (12 bytes)
28	my $magic = "Svx\nMsg\r\n\xfe\xff\0";
29	# Designed to be corrupted by ASCII ftp, top bit stripping (or
30	# being used for parity). Contains a zero byte so more likely
31	# to be flagged as data (e.g. by perl's "-B" test).
32
33	my $major = 0;
34	my $minor = 8;
35
36	# File format (multi-byte numbers in network order (bigendian)):
37	# 12 bytes: Magic identifier
38	# 1 byte: File format major version number (0)
39	# 1 byte: File format minor version number (8)
40	# 2 bytes: Number of messages (N)
41	# 4 bytes: Offset from XXXX to end of file
42	# XXXX:
43	# N*:
44	# <message> NUL
45
46	my %ent = ();
47
48	open ENT, "../lib/named-entities.txt" or die $!;
49	while (<ENT>) {
50	my ($e, $v) = /^(\w+),(\d+)/;
51	$ent{$e} = $v;
52	}
53	close ENT;
54
55	my %revmsgs = ();
56
57	open MSG, "../lib/messages.txt" or die $!;
58	while (<MSG>) {
59	next if /^\s*#/; # skip comments
60
61	# en: 0 0.81 the message
62	# en-us: 0 0.81 " the message "
63	my ($langs, $msgno, $dummy, $msg) = /^([-\w,]+):\s(\d+)\s+("?)(.)\3/;
64
65	unless (defined $langs) {
66	chomp;
67	print STDERR "Warning: Bad line: \"$_\"\n";
68	next;
69	}
70
71	$langs =~ tr/-/_/;
72
73	if ($msg =~ /[\0-\x1f\x7f-\xff]/) {
74	print STDERR "Warning: literal character in message $msgno\n";
75	}
76
77	my $utf8 = string_to_utf8($msg);
78	for (split /,/, $langs) {
79	if (lc $_ eq 'en') {
80	$revmsgs{$utf8} = $msgno;
81	}
82	}
83	}
84
85	my $die = 0;
86
87	while (<>) {
88	if (!/^\s*#/) {
89	while (/\\\n$/) {
90	$_ .= <>;
91	}
92	# very crude - doesn't know about comments, etc
93	s!\b_$"(.*?)"$!replacement($1)!gse;
94	} elsif (/\s#\sdefine\s+_\(/) {
95	$_ = "#include \"message.h\"\n";
96	}
97	print;
98	}
99
100	if ($die) {
101	die "Not all messages found!\n";
102	}
103
104	sub replacement {
105	my $msg = shift;
106	$msg =~ s/\\\n//g;
107	$msg =~ s/\\n/\n/g;
108	$msg =~ s/\\t/\t/g;
109	my $msgno = "";
110	if (exists $revmsgs{$msg}) {
111	$msgno = $revmsgs{$msg};
112	} else {
113	if (!$die) {
114	print STDERR "Message(s) not found in message file:\n";
115	$die = 1;
116	}
117	print STDERR "'$msg'\n";
118	}
119	$msg =~ s/\n/ /g;
120	$msg =~ s/\t/ /g;
121	return "msg(/$msg/$msgno)";
122	}
123
124	sub string_to_utf8 {
125	my $s = shift;
126	$s =~ s/([\x80-\xff])/char_to_utf8(ord($1))/eg;
127	$s =~ s/\&(#\d+\|#x[a-f0-9]+\|[a-z0-9]+);?/decode_entity($1)/eig;
128	return $s;
129	}
130
131	sub decode_entity {
132	my $ent = shift;
133	return char_to_utf8($1) if $ent =~ /^#(\d+)$/;
134	return char_to_utf8(hex($1)) if $ent =~ /^#x([a-f0-9]+)$/;
135	return char_to_utf8($ent{$ent}) if exists $ent{$ent};
136	$ent = "\&$ent;";
137	print STDERR "Warning: entity \"$ent\" unknown\n";
138	return $ent;
139	}
140
141	sub char_to_utf8 {
142	my $unicode = shift;
143	# ASCII is easy, and the most common case
144	return chr($unicode) if $unicode < 0x80;
145
146	my $result = '';
147	my $n = 0x20;
148	while (1) {
149	$result = chr(0x80 \| ($unicode & 0x3f)) . $result;
150	$unicode >>= 6;
151	last if $unicode < $n;
152	$n >>= 1;
153	}
154	$result = chr((0x100 - $n*2) \| $unicode) . $result;
155	return $result;
156	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: git/src/gettexttomsg.pl @ 68d7dfc

Download in other formats: