source: git/src/message.c @ efb30c4

RELEASE/1.1RELEASE/1.2debug-cidebug-ci-sanitisersfaster-cavernloglog-selectstereostereo-2025walls-datawalls-data-hanging-as-warningwarn-only-for-hanging-survey
Last change on this file since efb30c4 was 1f81f3d, checked in by Olly Betts <olly@…>, 19 years ago

Set the encoding for aven to UTF-8.

git-svn-id: file:///home/survex-svn/survex/branches/survex-1_1@3230 4b37db11-9a0c-4f06-9ece-9ab7cdaee568

  • Property mode set to 100644
File size: 33.8 KB
Line 
1/* message.c
2 * Fairly general purpose message and error routines
3 * Copyright (C) 1993-2003,2004,2005 Olly Betts
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 */
19
20/*#define DEBUG 1*/
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <ctype.h>
30#include <limits.h>
31#include <errno.h>
32#include <locale.h>
33
34#include "cmdline.h"
35#include "whichos.h"
36#include "filename.h"
37#include "message.h"
38#include "osdepend.h"
39#include "filelist.h"
40#include "debug.h"
41
42#ifdef AVEN
43# include "aven.h"
44#endif
45
46#ifdef HAVE_SIGNAL
47# ifdef HAVE_SETJMP_H
48#  include <setjmp.h>
49static jmp_buf jmpbufSignal;
50#  include <signal.h>
51# else
52#  undef HAVE_SIGNAL
53# endif
54#endif
55
56#if OS_WIN32
57# define WIN32_LEAN_AND_MEAN
58# include <windows.h>
59#elif OS_UNIX
60# include <sys/types.h>
61# include <sys/stat.h>
62#endif
63
64/* For funcs which want to be immune from messing around with different
65 * calling conventions */
66#ifndef CDECL
67# define CDECL
68#endif
69
70int msg_warnings = 0; /* keep track of how many warnings we've given */
71int msg_errors = 0;   /* and how many (non-fatal) errors */
72
73/* in case osmalloc() fails before appname_copy is set up */
74static const char *appname_copy = "anonymous program";
75
76/* Path to use to look for executables (used by aven to find cavern). */
77static const char *exe_pth = "";
78
79/* error code for failed osmalloc and osrealloc calls */
80static void
81outofmem(OSSIZE_T size)
82{
83   fatalerror(/*Out of memory (couldn't find %lu bytes).*/1,
84              (unsigned long)size);
85}
86
87#ifdef TOMBSTONES
88#define TOMBSTONE_SIZE 16
89static const char tombstone[TOMBSTONE_SIZE] = "012345\xfftombstone";
90#endif
91
92/* malloc with error catching if it fails. Also allows us to write special
93 * versions easily eg for MS Windows.
94 */
95void Far *
96osmalloc(OSSIZE_T size)
97{
98   void Far *p;
99#ifdef TOMBSTONES
100   size += TOMBSTONE_SIZE * 2;
101   p = malloc(size);
102#else
103   p = xosmalloc(size);
104#endif
105   if (p == NULL) outofmem(size);
106#ifdef TOMBSTONES
107   printf("osmalloc truep=%p truesize=%d\n", p, size);
108   memcpy(p, tombstone, TOMBSTONE_SIZE);
109   memcpy(p + size - TOMBSTONE_SIZE, tombstone, TOMBSTONE_SIZE);
110   *(size_t *)p = size;
111   p += TOMBSTONE_SIZE;
112#endif
113   return p;
114}
115
116/* realloc with error catching if it fails. */
117void Far *
118osrealloc(void *p, OSSIZE_T size)
119{
120   /* some pre-ANSI realloc implementations don't cope with a NULL pointer */
121   if (p == NULL) {
122      p = xosmalloc(size);
123   } else {
124#ifdef TOMBSTONES
125      int true_size;
126      size += TOMBSTONE_SIZE * 2;
127      p -= TOMBSTONE_SIZE;
128      true_size = *(size_t *)p;
129      printf("osrealloc (in truep=%p truesize=%d)\n", p, true_size);
130      if (memcmp(p + sizeof(size_t), tombstone + sizeof(size_t),
131                 TOMBSTONE_SIZE - sizeof(size_t)) != 0) {
132         printf("start tombstone for block %p, size %d corrupted!",
133                p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
134      }
135      if (memcmp(p + true_size - TOMBSTONE_SIZE, tombstone,
136                 TOMBSTONE_SIZE) != 0) {
137         printf("end tombstone for block %p, size %d corrupted!",
138                p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
139      }
140      p = realloc(p, size);
141      if (p == NULL) outofmem(size);
142      printf("osrealloc truep=%p truesize=%d\n", p, size);
143      memcpy(p, tombstone, TOMBSTONE_SIZE);
144      memcpy(p + size - TOMBSTONE_SIZE, tombstone, TOMBSTONE_SIZE);
145      *(size_t *)p = size;
146      p += TOMBSTONE_SIZE;
147#else
148      p = xosrealloc(p, size);
149#endif
150   }
151   if (p == NULL) outofmem(size);
152   return p;
153}
154
155char Far *
156osstrdup(const char *str)
157{
158   char *p;
159   OSSIZE_T len;
160   len = strlen(str) + 1;
161   p = osmalloc(len);
162   memcpy(p, str, len);
163   return p;
164}
165
166/* osfree is usually just a macro in osalloc.h */
167#ifdef TOMBSTONES
168void
169osfree(void *p)
170{
171   int true_size;
172   if (!p) return;
173   p -= TOMBSTONE_SIZE;
174   true_size = *(size_t *)p;
175   printf("osfree truep=%p truesize=%d\n", p, true_size);
176   if (memcmp(p + sizeof(size_t), tombstone + sizeof(size_t),
177              TOMBSTONE_SIZE - sizeof(size_t)) != 0) {
178      printf("start tombstone for block %p, size %d corrupted!",
179             p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
180   }
181   if (memcmp(p + true_size - TOMBSTONE_SIZE, tombstone,
182              TOMBSTONE_SIZE) != 0) {
183      printf("end tombstone for block %p, size %d corrupted!",
184             p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
185   }
186   free(p);
187}
188#endif
189
190#ifdef HAVE_SIGNAL
191
192static int sigReceived;
193
194/* for systems not using autoconf, assume the signal handler returns void
195 * unless specified elsewhere */
196#ifndef RETSIGTYPE
197# define RETSIGTYPE void
198#endif
199
200static CDECL RETSIGTYPE Far
201report_sig(int sig)
202{
203   sigReceived = sig;
204   longjmp(jmpbufSignal, 1);
205}
206
207static void
208init_signals(void)
209{
210   int en;
211   if (!setjmp(jmpbufSignal)) {
212      signal(SIGABRT, report_sig); /* abnormal termination eg abort() */
213      signal(SIGFPE,  report_sig); /* arithmetic error eg /0 or overflow */
214      signal(SIGILL,  report_sig); /* illegal function image eg illegal instruction */
215      signal(SIGSEGV, report_sig); /* illegal storage access eg access outside memory limits */
216# ifdef SIGSTAK /* only on RISC OS AFAIK */
217      signal(SIGSTAK, report_sig); /* stack overflow */
218# endif
219      return;
220   }
221
222   /* Remove that signal handler to avoid the possibility of an infinite loop.
223    */
224   signal(sigReceived, SIG_DFL);
225
226   switch (sigReceived) {
227      case SIGABRT: en = /*Abnormal termination*/90; break;
228      case SIGFPE:  en = /*Arithmetic error*/91; break;
229      case SIGILL:  en = /*Illegal instruction*/92; break;
230      case SIGSEGV: en = /*Bad memory access*/94; break;
231# ifdef SIGSTAK
232      case SIGSTAK: en = /*Stack overflow*/96; break;
233# endif
234      default:      en = /*Unknown signal received*/97; break;
235   }
236   fputsnl(msg(en), STDERR);
237
238   /* Any of the signals we catch indicates a bug */
239   fatalerror(/*Bug in program detected! Please report this to the authors*/11);
240
241   exit(EXIT_FAILURE);
242}
243#endif
244
245static int
246default_charset(void)
247{
248#if OS_WIN32
249# ifdef AVEN
250#  define CODEPAGE GetACP()
251# else
252#  define CODEPAGE (getenv("SURVEX_UTF8") ? 0 : GetConsoleOutputCP())
253# endif
254   switch (CODEPAGE) {
255    case 0: return CHARSET_UTF8;
256    case 1252: return CHARSET_WINCP1252;
257    case 1250: return CHARSET_WINCP1250;
258    case 850: return CHARSET_DOSCP850;
259   }
260   return CHARSET_USASCII;
261#elif OS_UNIX
262#ifdef AVEN
263   return CHARSET_UTF8;
264#else
265   const char *p = getenv("LC_ALL");
266   if (p == NULL || p[0] == '\0') {
267      p = getenv("LC_CTYPE");
268      if (p == NULL || p[0] == '\0') {
269         p = msg_lang;
270      }
271   }
272
273   if (p) {
274      char *q = strchr(p, '.');
275      if (q) p = q + 1;
276   }
277
278   if (p) {
279      const char *chset = p;
280      size_t name_len;
281
282      while (*p != '\0' && *p != '@') p++;
283
284      name_len = p - chset;
285
286      if (name_len) {
287         int only_digit = 1;
288         size_t cnt;
289
290         for (cnt = 0; cnt < name_len; ++cnt)
291            if (isalpha((unsigned char)chset[cnt])) {
292               only_digit = 0;
293               break;
294            }
295
296         if (only_digit) goto iso;
297
298         switch (tolower(chset[0])) {
299          case 'i':
300            if (tolower(chset[1]) == 's' && tolower(chset[2]) == 'o') {
301               chset += 3;
302               iso:
303               if (strncmp(chset, "8859", 4) == 0) {
304                  chset += 4;
305                  while (chset < p && *chset && !isdigit((unsigned char)*chset))
306                     chset++;
307                  switch (atoi(chset)) {
308                   case 1: return CHARSET_ISO_8859_1;
309                   case 2: return CHARSET_ISO_8859_2;
310                   case 15: return CHARSET_ISO_8859_15;
311                   default: return CHARSET_USASCII;
312                  }
313               }
314            }
315            break;
316          case 'u':
317            if (tolower(chset[1]) == 't' && tolower(chset[2]) == 'f') {
318               chset += 3;
319               while (chset < p && *chset && !isdigit((unsigned char)*chset))
320                  chset++;
321               switch (atoi(chset)) {
322                case 8: return CHARSET_UTF8;
323                default: return CHARSET_USASCII;
324               }
325            }
326         }
327      }
328   }
329   return CHARSET_USASCII;
330#endif
331#else
332# error Do not know operating system!
333#endif
334}
335
336/* It seems that Swedish and maybe some other scandanavian languages don't
337 * transliterate &auml; to ae - but it seems there may be conflicting views
338 * on this...
339 */
340#define umlaut_to_e() 1
341
342/* values <= 127 already dealt with */
343static int
344add_unicode(int charset, unsigned char *p, int value)
345{
346#ifdef DEBUG
347   fprintf(stderr, "add_unicode(%d, %p, %d)\n", charset, p, value);
348#endif
349   if (value == 0) return 0;
350   switch (charset) {
351   case CHARSET_USASCII:
352      if (value < 0x80) {
353         *p = value;
354         return 1;
355      }
356      break;
357   case CHARSET_ISO_8859_1:
358      if (value < 0x100) {
359         *p = value;
360         return 1;
361      }
362      break;
363   case CHARSET_ISO_8859_2:
364      if (value >= 0xa0) {
365         int v = 0;
366         switch (value) {
367            case 0xa0: case 0xa4: case 0xa7: case 0xa8: case 0xad: case 0xb0:
368            case 0xb4: case 0xb8: case 0xc1: case 0xc2: case 0xc4: case 0xc7:
369            case 0xc9: case 0xcb: case 0xcd: case 0xce: case 0xd3: case 0xd4:
370            case 0xd6: case 0xd7: case 0xda: case 0xdc: case 0xdd: case 0xdf:
371            case 0xe1: case 0xe2: case 0xe4: case 0xe7: case 0xe9: case 0xeb:
372            case 0xed: case 0xee: case 0xf3: case 0xf4: case 0xf6: case 0xf7:
373            case 0xfa: case 0xfc: case 0xfd:
374               v = value; break;
375            case 0x104: v = '\xa1'; break;
376            case 0x2d8: v = '\xa2'; break;
377            case 0x141: v = '\xa3'; break;
378            case 0x13d: v = '\xa5'; break;
379            case 0x15a: v = '\xa6'; break;
380            case 0x160: v = '\xa9'; break;
381            case 0x15e: v = '\xaa'; break; /* Scedil */
382            case 0x164: v = '\xab'; break;
383            case 0x179: v = '\xac'; break;
384            case 0x17d: v = '\xae'; break;
385            case 0x17b: v = '\xaf'; break;
386            case 0x105: v = '\xb1'; break;
387            case 0x2db: v = '\xb2'; break;
388            case 0x142: v = '\xb3'; break;
389            case 0x13e: v = '\xb5'; break;
390            case 0x15b: v = '\xb6'; break;
391            case 0x2c7: v = '\xb7'; break;
392            case 0x161: v = '\xb9'; break;
393            case 0x15f: v = '\xba'; break; /* scedil */
394            case 0x165: v = '\xbb'; break;
395            case 0x17a: v = '\xbc'; break;
396            case 0x2dd: v = '\xbd'; break;
397            case 0x17e: v = '\xbe'; break;
398            case 0x17c: v = '\xbf'; break;
399            case 0x154: v = '\xc0'; break;
400            case 0x102: v = '\xc3'; break;
401            case 0x139: v = '\xc5'; break;
402            case 0x106: v = '\xc6'; break;
403            case 0x10c: v = '\xc8'; break;
404            case 0x118: v = '\xca'; break;
405            case 0x11a: v = '\xcc'; break;
406            case 0x10e: v = '\xcf'; break;
407            case 0x110: v = '\xd0'; break;
408            case 0x143: v = '\xd1'; break;
409            case 0x147: v = '\xd2'; break;
410            case 0x150: v = '\xd5'; break;
411            case 0x158: v = '\xd8'; break;
412            case 0x16e: v = '\xd9'; break;
413            case 0x170: v = '\xdb'; break;
414            case 0x162: v = '\xde'; break; /* &Tcedil; */
415            case 0x155: v = '\xe0'; break;
416            case 0x103: v = '\xe3'; break;
417            case 0x13a: v = '\xe5'; break;
418            case 0x107: v = '\xe6'; break;
419            case 0x10d: v = '\xe8'; break;
420            case 0x119: v = '\xea'; break;
421            case 0x11b: v = '\xec'; break;
422            case 0x10f: v = '\xef'; break;
423            case 0x111: v = '\xf0'; break;
424            case 0x144: v = '\xf1'; break;
425            case 0x148: v = '\xf2'; break;
426            case 0x151: v = '\xf5'; break;
427            case 0x159: v = '\xf8'; break;
428            case 0x16f: v = '\xf9'; break;
429            case 0x171: v = '\xfb'; break;
430            case 0x163: v = '\xfe'; break; /* tcedil */
431            case 0x2d9: v = '\xff'; break;
432         }
433         if (v == 0) break;
434         value = v;
435      }
436      *p = value;
437      return 1;
438   case CHARSET_ISO_8859_15:
439      switch (value) {
440       case 0xa4: case 0xa6: case 0xb0: case 0xc4:
441       case 0xd0: case 0xd4: case 0xd5: case 0xd6:
442         goto donthave;
443       case 0x152: value = 0xd4; break; /* &OElig; */
444       case 0x153: value = 0xd5; break; /* &oelig; */
445#if 0
446       case 0x0: value = 0xa4; break; /* euro */
447#endif
448       case 0x160: value = 0xa6; break; /* Scaron */
449       case 0x161: value = 0xb0; break; /* scaron */
450       case 0x17d: value = 0xc4; break; /* Zcaron */
451       case 0x17e: value = 0xd0; break; /* zcaron */
452#if 0
453       case 0x0: value = 0xd6; break; /* Ydiersis */
454#endif
455      }
456      if (value < 0x100) {
457         *p = value;
458         return 1;
459      }
460      donthave:
461      break;
462#if OS_WIN32
463   case CHARSET_WINCP1250:
464      /* MS Windows rough equivalent to ISO-8859-2 */
465      if (value >= 0x80) {
466         int v = 0;
467         switch (value) {
468            case 0xa0: case 0xa4: case 0xa6: case 0xa7: case 0xa8: case 0xa9:
469            case 0xab: case 0xac: case 0xad: case 0xae: case 0xb0: case 0xb1:
470            case 0xb4: case 0xb5: case 0xb6: case 0xb7: case 0xb8: case 0xbb:
471            case 0xc1: case 0xc2: case 0xc4: case 0xc7: case 0xc9: case 0xcb:
472            case 0xcd: case 0xce: case 0xd3: case 0xd4: case 0xd6: case 0xd7:
473            case 0xda: case 0xdc: case 0xdd: case 0xdf: case 0xe1: case 0xe2:
474            case 0xe4: case 0xe7: case 0xe9: case 0xeb: case 0xed: case 0xee:
475            case 0xf3: case 0xf4: case 0xf6: case 0xf7: case 0xfa: case 0xfc:
476            case 0xfd:
477               v = value; break;
478            case 0x20ac: v = '\x80'; break;
479            case 0x201a: v = '\x82'; break;
480            case 0x201e: v = '\x84'; break;
481            case 0x2026: v = '\x85'; break;
482            case 0x2020: v = '\x86'; break;
483            case 0x2021: v = '\x87'; break;
484            case 0x2030: v = '\x89'; break;
485            case 0x0160: v = '\x8a'; break;
486            case 0x2039: v = '\x8b'; break;
487            case 0x015a: v = '\x8c'; break;
488            case 0x0164: v = '\x8d'; break;
489            case 0x017d: v = '\x8e'; break;
490            case 0x0179: v = '\x8f'; break;
491            case 0x2018: v = '\x91'; break;
492            case 0x2019: v = '\x92'; break;
493            case 0x201c: v = '\x93'; break;
494            case 0x201d: v = '\x94'; break;
495            case 0x2022: v = '\x95'; break;
496            case 0x2013: v = '\x96'; break;
497            case 0x2014: v = '\x97'; break;
498            case 0x2122: v = '\x99'; break;
499            case 0x0161: v = '\x9a'; break;
500            case 0x203a: v = '\x9b'; break;
501            case 0x015b: v = '\x9c'; break;
502            case 0x0165: v = '\x9d'; break;
503            case 0x017e: v = '\x9e'; break;
504            case 0x017a: v = '\x9f'; break;
505            case 0x02c7: v = '\xa1'; break;
506            case 0x02d8: v = '\xa2'; break;
507            case 0x0141: v = '\xa3'; break;
508            case 0x0104: v = '\xa5'; break;
509            case 0x015e: v = '\xaa'; break; /* Scedil */
510            case 0x017b: v = '\xaf'; break;
511            case 0x02db: v = '\xb2'; break;
512            case 0x0142: v = '\xb3'; break;
513            case 0x0105: v = '\xb9'; break;
514            case 0x015f: v = '\xba'; break; /* scedil */
515            case 0x013d: v = '\xbc'; break;
516            case 0x02dd: v = '\xbd'; break;
517            case 0x013e: v = '\xbe'; break;
518            case 0x017c: v = '\xbf'; break;
519            case 0x0154: v = '\xc0'; break;
520            case 0x0102: v = '\xc3'; break;
521            case 0x0139: v = '\xc5'; break;
522            case 0x0106: v = '\xc6'; break;
523            case 0x010c: v = '\xc8'; break;
524            case 0x0118: v = '\xca'; break;
525            case 0x011a: v = '\xcc'; break;
526            case 0x010e: v = '\xcf'; break;
527            case 0x0110: v = '\xd0'; break;
528            case 0x0143: v = '\xd1'; break;
529            case 0x0147: v = '\xd2'; break;
530            case 0x0150: v = '\xd5'; break;
531            case 0x0158: v = '\xd8'; break;
532            case 0x016e: v = '\xd9'; break;
533            case 0x0170: v = '\xdb'; break;
534            case 0x0162: v = '\xde'; break; /* &Tcedil; */
535            case 0x0155: v = '\xe0'; break;
536            case 0x0103: v = '\xe3'; break;
537            case 0x013a: v = '\xe5'; break;
538            case 0x0107: v = '\xe6'; break;
539            case 0x010d: v = '\xe8'; break;
540            case 0x0119: v = '\xea'; break;
541            case 0x011b: v = '\xec'; break;
542            case 0x010f: v = '\xef'; break;
543            case 0x0111: v = '\xf0'; break;
544            case 0x0144: v = '\xf1'; break;
545            case 0x0148: v = '\xf2'; break;
546            case 0x0151: v = '\xf5'; break;
547            case 0x0159: v = '\xf8'; break;
548            case 0x016f: v = '\xf9'; break;
549            case 0x0171: v = '\xfb'; break;
550            case 0x0163: v = '\xfe'; break; /* tcedil */
551            case 0x02d9: v = '\xff'; break;
552         }
553         if (v == 0) break;
554         value = v;
555      }
556      *p = value;
557      return 1;
558   case CHARSET_WINCP1252:
559      /* MS Windows extensions to ISO-8859-1 */
560      switch (value) {
561       case 0x152: value = 0x8c; break; /* &OElig; */
562       case 0x153: value = 0x9c; break; /* &oelig; */
563#if 0
564      /* there are a few other obscure ones we don't currently need */
565#endif
566      }
567      if (value < 0x100) {
568         *p = value;
569         return 1;
570      }
571      break;
572#endif
573#if OS_WIN32
574   case CHARSET_DOSCP850: {
575      unsigned char uni2dostab[] = {
576         255, 173, 189, 156, 207, 190, 221, 245,
577         249, 184, 166, 174, 170, 240, 169, 238,
578         248, 241, 253, 252, 239, 230, 244, 250,
579         247, 251, 167, 175, 172, 171, 243, 168,
580         183, 181, 182, 199, 142, 143, 146, 128,
581         212, 144, 210, 211, 222, 214, 215, 216,
582         209, 165, 227, 224, 226, 229, 153, 158,
583         157, 235, 233, 234, 154, 237, 232, 225,
584         133, 160, 131, 198, 132, 134, 145, 135,
585         138, 130, 136, 137, 141, 161, 140, 139,
586         208, 164, 149, 162, 147, 228, 148, 246,
587         155, 151, 163, 150, 129, 236, 231, 152
588      };
589      if (value >= 160 && value < 256) {
590         *p = (int)uni2dostab[value - 160];
591         return 1;
592      }
593#if 0
594      if (value == 305) { /* LATIN SMALL LETTER DOTLESS I */
595         *p = 213;
596         return 1;
597      }
598      if (value == 402) { /* LATIN SMALL LETTER F WITH HOOK */
599         *p = 159;
600         return 1;
601      }
602#endif
603      break;
604   }
605#endif
606   }
607   /* Transliterate characters we can't represent */
608#ifdef DEBUG
609   fprintf(stderr, "transliterate `%c' 0x%x\n", value, value);
610#endif
611   switch (value) {
612    case 160:
613      *p = ' '; return 1;
614    case 161 /* ¡ */:
615      *p = '!'; return 1;
616    case 171 /* « */:
617      p[1] = *p = '<'; return 2;
618    case 187 /* » */:
619      p[1] = *p = '>'; return 2;
620    case 191 /* ¿ */:
621      *p = '?'; return 1;
622    case 192 /* À */: case 193 /* Á */: case 194 /* Â */: case 195 /* Ã */:
623      *p = 'A'; return 1;
624    case 197 /* Å */:
625      p[1] = *p = 'A'; return 2;
626    case 196 /* Ä */: /* &Auml; */
627      *p = 'A';
628      if (!umlaut_to_e()) return 1;
629      p[1] = 'E'; return 2;
630    case 198 /* Æ */:
631      *p = 'A'; p[1] = 'E'; return 2;
632    case 199 /* Ç */: case 268: /* &Ccaron; */
633      *p = 'C'; return 1;
634    case 270: /* &Dcaron; */
635      *p = 'D'; return 1;
636    case 200 /* È */: case 201 /* É */: case 202 /* Ê */: case 203 /* Ë */:
637      *p = 'E'; return 1;
638    case 204 /* Ì */: case 205 /* Í */: case 206 /* Î */: case 207 /* Ï */:
639      *p = 'I'; return 1;
640    case 208 /* Ð */: case 222 /* Þ */:
641      *p = 'T'; p[1] = 'H'; return 2;
642    case 315: /* &Lacute; */
643    case 317: /* &Lcaron; */
644      *p = 'L'; return 1;
645    case 209 /* Ñ */:
646      *p = 'N'; return 1;
647    case 210 /* Ò */: case 211 /* Ó */: case 212 /* Ô */: case 213 /* Õ */:
648      *p = 'O'; return 1;
649    case 214 /* Ö */: /* &Ouml; */ case 0x152: /* &OElig; */
650      *p = 'O'; p[1] = 'E'; return 2;
651    case 352: /* &Scaron; */
652    case 0x15e: /* &Scedil; */
653      *p = 'S'; return 1;
654    case 0x162: /* &Tcedil; */
655    case 0x164: /* &Tcaron; */
656      *p = 'T'; return 1;
657    case 217 /* Ù */: case 218 /* Ú */: case 219 /* Û */:
658      *p = 'U'; return 1;
659    case 220 /* Ü */: /* &Uuml; */
660      *p = 'U'; p[1] = 'E'; return 2;
661    case 221 /* Ý */:
662      *p = 'Y'; return 1;
663    case 381: /* &Zcaron; */
664      *p = 'Z'; return 1;
665    case 223 /* ß */:
666      p[1] = *p = 's'; return 2;
667    case 224 /* à */: case 225 /* á */: case 226 /* â */: case 227 /* ã */:
668    case 259: /* &abreve; */
669      *p = 'a'; return 1;
670    case 228 /* ä */: /* &auml; */ case 230 /* æ */:
671      *p = 'a'; p[1] = 'e'; return 2;
672    case 229 /* å */:
673      p[1] = *p = 'a'; return 2;
674    case 231 /* ç */: case 269 /* &ccaron; */:
675      *p = 'c'; return 1;
676    case 271: /* &dcaron; */
677      *p = 'd'; return 1;
678    case 232 /* è */: case 233 /* é */: case 234 /* ê */: case 235 /* ë */:
679    case 283 /* &ecaron; */:
680      *p = 'e'; return 1;
681    case 236 /* ì */: case 237 /* í */: case 238 /* î */: case 239 /* ï */:
682      *p = 'i'; return 1;
683    case 316 /* &lacute; */:
684    case 318 /* &lcaron; */:
685      *p = 'l'; return 1;
686    case 241 /* ñ */: case 328 /* &ncaron; */:
687      *p = 'n'; return 1;
688    case 345: /* &rcaron; */
689      *p = 'r'; return 1;
690    case 353: /* &scaron; */
691    case 0x15f: /* &scedil; */
692      *p = 's'; return 1;
693    case 357: /* &tcaron; */
694    case 0x163: /* &tcedil; */
695      *p = 't'; return 1;
696    case 240 /* ð */: case 254 /* þ */:
697      *p = 't'; p[1] = 'h'; return 2;
698    case 242 /* ò */: case 243 /* ó */: case 244 /* ô */: case 245 /* õ */:
699      *p = 'o'; return 1;
700    case 246 /* ö */: /* &ouml; */ case 0x153: /* &oelig; */
701      *p = 'o'; p[1] = 'e'; return 2;
702    case 249 /* ù */: case 250 /* ú */: case 251 /* û */:
703    case 367 /* &uring; */:
704      *p = 'u'; return 1;
705    case 252 /* ü */: /* &uuml; */
706      *p = 'u'; p[1] = 'e'; return 2;
707    case 253 /* ý */: case 255 /* ÿ */:
708      *p = 'y'; return 1;
709    case 382: /* &zcaron; */
710      *p = 'z'; return 1;
711   }
712#ifdef DEBUG
713   fprintf(stderr, "failed to transliterate\n");
714#endif
715   return 0;
716}
717
718#if OS_UNIX && defined DATADIR && defined PACKAGE
719/* Under Unix, we compile in the configured path */
720static const char *pth_cfg_files = DATADIR "/" PACKAGE;
721#else
722/* On other platforms, we fall back on looking in the current directory */
723static const char *pth_cfg_files = "";
724#endif
725
726static int num_msgs = 0;
727static char **msg_array = NULL;
728
729const char *msg_lang = NULL;
730const char *msg_lang2 = NULL;
731
732static char **
733parse_msgs(int n, unsigned char *p, int charset_code) {
734   int i;
735
736   char **msgs = osmalloc(n * sizeof(char *));
737
738   for (i = 0; i < n; i++) {
739      unsigned char *to = p;
740      int ch;
741      msgs[i] = (char *)p;
742
743      /* If we want UTF8 anyway, we just need to find the start of each
744       * message */
745      if (charset_code == CHARSET_UTF8) {
746         p += strlen((char *)p) + 1;
747         continue;
748      }
749
750      while ((ch = *p++) != 0) {
751         /* A byte in the range 0x80-0xbf or 0xf0-0xff isn't valid in
752          * this state, (0xf0-0xfd mean values > 0xffff) so treat as
753          * literal and try to resync so we cope better when fed
754          * non-utf-8 data.  Similarly we abandon a multibyte sequence
755          * if we hit an invalid character. */
756         if (ch >= 0xc0 && ch < 0xf0) {
757            int ch1 = *p;
758            if ((ch1 & 0xc0) != 0x80) goto resync;
759
760            if (ch < 0xe0) {
761               /* 2 byte sequence */
762               ch = ((ch & 0x1f) << 6) | (ch1 & 0x3f);
763               p++;
764            } else {
765               /* 3 byte sequence */
766               int ch2 = p[1];
767               if ((ch2 & 0xc0) != 0x80) goto resync;
768               ch = ((ch & 0x1f) << 12) | ((ch1 & 0x3f) << 6) | (ch2 & 0x3f);
769               p += 2;
770            }
771         }
772
773         resync:
774
775         if (ch < 127) {
776            *to++ = (char)ch;
777         } else {
778            /* We assume an N byte UTF-8 code never transliterates to more
779             * than N characters (so we can't transliterate © to (C) or
780             * ® to (R) for example) */
781            to += add_unicode(charset_code, to, ch);
782         }
783      }
784      *to++ = '\0';
785   }
786   return msgs;
787}
788
789/* This is the name of the default language, which can be set like so:
790 * ./configure --enable-defaultlang=fr
791 */
792#ifdef DEFAULTLANG
793/* No point extracting these errors as they won't get used if file opens */
794# include "../lib/defaultlang.h"
795#else
796#define N_DONTEXTRACTMSGS 5
797static unsigned char dontextractmsgs[] =
798   "Can't open message file `%s' using path `%s'\0"/*1000*/
799   "Problem with message file `%s'\0"/*1001*/
800   "I don't understand this message file version\0"/*1002*/
801   "Message file truncated?\0"/*1003*/
802   "Out of memory (couldn't find %lu bytes).\0"/*1004*/;
803#endif
804
805static char **dontextract = NULL;
806
807static void
808parse_msg_file(int charset_code)
809{
810   FILE *fh;
811   unsigned char header[20];
812   int i;
813   unsigned len;
814   unsigned char *p;
815   char *fnm, *s;
816   int n;
817
818#ifdef DEBUG
819   fprintf(stderr, "parse_msg_file(%d)\n", charset_code);
820#endif
821
822   /* sort out messages we need to print if we can't open the message file */
823   dontextract = parse_msgs(N_DONTEXTRACTMSGS, dontextractmsgs, charset_code);
824
825   fnm = osstrdup(msg_lang);
826   /* trim off charset from stuff like "de_DE.iso8859_1" */
827   s = strchr(fnm, '.');
828   if (s) *s = '\0';
829
830   fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
831
832   if (!fh) {
833      /* e.g. if 'en_GB' is unknown, see if we know 'en' */
834      if (strlen(fnm) > 3 && fnm[2] == '_') {
835         fnm[2] = '\0';
836         fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
837         if (!fh) fnm[2] = '_'; /* for error reporting */
838      }
839   }
840
841   if (!fh) {
842      fatalerror(/*Can't open message file `%s' using path `%s'*/1000,
843                 fnm, pth_cfg_files);
844   }
845
846   if (fread(header, 1, 20, fh) < 20 ||
847       memcmp(header, "Svx\nMsg\r\n\xfe\xff", 12) != 0) {
848      fatalerror(/*Problem with message file `%s'*/1001, fnm);
849   }
850
851   if (header[12] != 0)
852      fatalerror(/*I don't understand this message file version*/1002);
853
854   n = (header[14] << 8) | header[15];
855
856   len = 0;
857   for (i = 16; i < 20; i++) len = (len << 8) | header[i];
858
859   p = osmalloc(len);
860   if (fread(p, 1, len, fh) < len)
861      fatalerror(/*Message file truncated?*/1003);
862
863   fclose(fh);
864
865#ifdef DEBUG
866   fprintf(stderr, "fnm = `%s', n = %d, len = %d\n", fnm, n, len);
867#endif
868   osfree(fnm);
869
870   msg_array = parse_msgs(n, p, charset_code);
871   num_msgs = n;
872}
873
874const char *
875msg_cfgpth(void)
876{
877   return pth_cfg_files;
878}
879
880const char *
881msg_exepth(void)
882{
883   return exe_pth;
884}
885
886const char *
887msg_appname(void)
888{
889   return appname_copy;
890}
891
892void
893msg_init(char * const *argv)
894{
895   char *p;
896   SVX_ASSERT(argv);
897
898#ifdef HAVE_SIGNAL
899   init_signals();
900#endif
901   /* Point to argv[0] itself so we report a more helpful error if the
902    * code to work out the clean appname generates a signal */
903   appname_copy = argv[0];
904#if OS_UNIX
905   /* use name as-is on Unix - programs run from path get name as supplied */
906   appname_copy = osstrdup(argv[0]);
907#else
908   /* use the lower-cased leafname on other platforms */
909   p = leaf_from_fnm(argv[0]);
910   appname_copy = p;
911   while (*p) {
912      *p = tolower(*p);
913      ++p;
914   }
915#endif
916
917   /* shortcut --version so you can check the version number even when the
918    * correct message file can't be found... */
919   if (argv[1] && strcmp(argv[1], "--version") == 0) {
920      cmdline_version();
921      exit(0);
922   }
923   if (argv[0]) {
924      exe_pth = path_from_fnm(argv[0]);
925#ifdef MACOSX_BUNDLE
926      /* If we're being built into a bundle, always look relative to
927       * the path to the binary. */
928      pth_cfg_files = use_path(exe_pth, "share/survex");
929#elif OS_UNIX && defined DATADIR && defined PACKAGE
930      bool free_pth = fFalse;
931      char *pth = getenv("srcdir");
932      if (!pth || !pth[0]) {
933         pth = path_from_fnm(argv[0]);
934         free_pth = fTrue;
935      }
936      if (pth[0]) {
937         struct stat buf;
938#if OS_UNIX_MACOSX
939         /* On MacOS X the programs may be installed anywhere, with the
940          * share directory and the binaries in the same directory. */
941         p = use_path(pth, "share/survex/en.msg");
942         if (lstat(p, &buf) == 0 && S_ISREG(buf.st_mode)) {
943            pth_cfg_files = use_path(pth, "share/survex");
944            goto macosx_got_msg;
945         }
946         osfree(p);
947#endif
948         /* If we're run with an explicit path, check if "../lib/en.msg"
949          * from the program's path exists, and if so look there for
950          * support files - this allows us to test binaries in the build
951          * tree easily. */
952         p = use_path(pth, "../lib/en.msg");
953         if (lstat(p, &buf) == 0) {
954#ifdef S_ISDIR
955            /* POSIX way */
956            if (S_ISREG(buf.st_mode)) {
957               pth_cfg_files = use_path(pth, "../lib");
958            }
959#else
960            /* BSD way */
961            if ((buf.st_mode & S_IFMT) == S_IFREG) {
962               pth_cfg_files = use_path(pth, "../lib");
963            }
964#endif
965         }
966#if defined(__GNUC__) && defined(__APPLE_CC__)
967macosx_got_msg:
968#endif
969         osfree(p);
970      }
971
972      if (free_pth) osfree(pth);
973#elif OS_WIN32
974      DWORD len = 256;
975      char *buf = NULL, *modname;
976      while (1) {
977          DWORD got;
978          buf = osrealloc(buf, len);
979          got = GetModuleFileName(NULL, buf, len);
980          if (got < len) break;
981          len += len;
982      }
983      modname = buf;
984      /* Strange Win32 nastiness - strip prefix "\\?\" if present */
985      if (strncmp(modname, "\\\\?\\", 4) == 0) modname += 4;
986      pth_cfg_files = path_from_fnm(modname);
987      osfree(buf);
988#else
989      /* Get the path to the support files from argv[0] */
990      pth_cfg_files = path_from_fnm(argv[0]);
991#endif
992   }
993
994   msg_lang = getenv("SURVEXLANG");
995#ifdef DEBUG
996   fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
997#endif
998
999   if (!msg_lang || !*msg_lang) {
1000      msg_lang = getenv("LC_MESSAGES");
1001      if (!msg_lang || !*msg_lang) msg_lang = getenv("LANG");
1002      if (!msg_lang || !*msg_lang) {
1003#if OS_WIN32
1004         LCID locid;
1005#endif
1006#ifdef DEFAULTLANG
1007         msg_lang = STRING(DEFAULTLANG);
1008#else
1009         msg_lang = "en";
1010#endif
1011#if OS_WIN32
1012         locid = GetUserDefaultLCID();
1013         if (locid) {
1014            WORD langid = LANGIDFROMLCID(locid);
1015            switch (PRIMARYLANGID(langid)) {
1016/* older mingw compilers don't seem to supply this value */
1017#ifndef LANG_CATALAN
1018# define LANG_CATALAN 0x03
1019#endif
1020             case LANG_CATALAN:
1021               msg_lang = "ca";
1022               break;
1023             case LANG_CHINESE:
1024               msg_lang = "zh";
1025               break;
1026             case LANG_ENGLISH:
1027               if (SUBLANGID(langid) == SUBLANG_ENGLISH_US)
1028                  msg_lang = "en_US";
1029               else
1030                  msg_lang = "en";
1031               break;
1032             case LANG_FRENCH:
1033               msg_lang = "fr";
1034               break;
1035             case LANG_GERMAN:
1036               switch (SUBLANGID(langid)) {
1037                case SUBLANG_GERMAN_SWISS:
1038                  msg_lang = "de_CH";
1039                  break;
1040                case SUBLANG_GERMAN:
1041                  msg_lang = "de_DE";
1042                  break;
1043                default:
1044                  msg_lang = "de";
1045               }
1046               break;
1047             case LANG_ITALIAN:
1048               msg_lang = "it";
1049               break;
1050             case LANG_PORTUGUESE:
1051               if (SUBLANGID(langid) == SUBLANG_PORTUGUESE_BRAZILIAN)
1052                  msg_lang = "pt_BR";
1053               else
1054                  msg_lang = "pt";
1055               break;
1056             case LANG_ROMANIAN:
1057               msg_lang = "ro";
1058               break;
1059             case LANG_SLOVAK:
1060               msg_lang = "sk";
1061               break;
1062             case LANG_SPANISH:
1063               msg_lang = "es";
1064               break;
1065            }
1066         }
1067#endif
1068      }
1069   }
1070#ifdef DEBUG
1071   fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
1072#endif
1073
1074   /* On Mandrake LANG defaults to C */
1075   if (strcmp(msg_lang, "C") == 0) msg_lang = "en";
1076
1077   msg_lang = osstrdup(msg_lang);
1078
1079   /* Convert en-us to en_US, etc */
1080   p = strchr(msg_lang, '-');
1081   if (p) {
1082      *p++ = '_';
1083      while (*p) {
1084         *p = toupper(*p);
1085         p++;
1086      }
1087   }
1088
1089   p = strchr(msg_lang, '_');
1090   if (p) {
1091      *p = '\0';
1092      msg_lang2 = osstrdup(msg_lang);
1093      *p = '_';
1094   }
1095
1096#ifdef LC_MESSAGES
1097   /* try to setlocale() appropriately too */
1098   if (!setlocale(LC_MESSAGES, msg_lang)) {
1099      if (msg_lang2) setlocale(LC_MESSAGES, msg_lang2);
1100   }
1101#endif
1102
1103   select_charset(default_charset());
1104}
1105
1106/* Message may be overwritten by next call
1107 * (but not in current implementation) */
1108const char *
1109msg(int en)
1110{
1111   /* NB can't use SVX_ASSERT here! */
1112   static char badbuf[256];
1113   if (en >= 1000 && en < 1000 + N_DONTEXTRACTMSGS)
1114      return dontextract[en - 1000];
1115   if (!msg_array) {
1116      if (en != 1)  {
1117         sprintf(badbuf, "Message %d requested before msg_array initialised\n",
1118                 en);
1119         return badbuf;
1120      }
1121      /* this should be the only other message which can be requested before
1122       * the message file is opened and read... */
1123      if (!dontextract) return "Out of memory (couldn't find %lu bytes).";
1124      return dontextract[(/*Out of memory (couldn't find %lu bytes).*/1004)
1125                         - 1000];
1126   }
1127
1128   if (en < 0 || en >= num_msgs) {
1129      sprintf(badbuf, "Message %d out of range\n", en);
1130      return badbuf;
1131   }
1132
1133   if (en == 0) {
1134      const char *p = msg_array[0];
1135      if (!*p) p = "(C)";
1136      return p;
1137   }
1138
1139   return msg_array[en];
1140}
1141
1142/* returns persistent copy of message */
1143const char *
1144msgPerm(int en)
1145{
1146   return msg(en);
1147}
1148
1149void
1150v_report(int severity, const char *fnm, int line, int en, va_list ap)
1151{
1152#ifdef AVEN
1153   aven_v_report(severity, fnm, line, en, ap);
1154#else
1155   if (fnm) {
1156      fputs(fnm, STDERR);
1157      if (line) fprintf(STDERR, ":%d", line);
1158   } else {
1159      fputs(appname_copy, STDERR);
1160   }
1161   fputs(": ", STDERR);
1162
1163   if (severity == 0) {
1164      fputs(msg(/*warning*/4), STDERR);
1165      fputs(": ", STDERR);
1166   }
1167
1168   vfprintf(STDERR, msg(en), ap);
1169   fputnl(STDERR);
1170#endif
1171
1172   switch (severity) {
1173    case 0:
1174      msg_warnings++;
1175      break;
1176    case 1:
1177      msg_errors++;
1178      if (msg_errors == 50)
1179         fatalerror_in_file(fnm, 0, /*Too many errors - giving up*/19);
1180      break;
1181    case 2:
1182      exit(EXIT_FAILURE);
1183   }
1184}
1185
1186void
1187warning(int en, ...)
1188{
1189   va_list ap;
1190   va_start(ap, en);
1191   v_report(0, NULL, 0, en, ap);
1192   va_end(ap);
1193}
1194
1195void
1196error(int en, ...)
1197{
1198   va_list ap;
1199   va_start(ap, en);
1200   v_report(1, NULL, 0, en, ap);
1201   va_end(ap);
1202}
1203
1204void
1205fatalerror(int en, ...)
1206{
1207   va_list ap;
1208   va_start(ap, en);
1209   v_report(2, NULL, 0, en, ap);
1210   va_end(ap);
1211}
1212
1213void
1214warning_in_file(const char *fnm, int line, int en, ...)
1215{
1216   va_list ap;
1217   va_start(ap, en);
1218   v_report(0, fnm, line, en, ap);
1219   va_end(ap);
1220}
1221
1222void
1223error_in_file(const char *fnm, int line, int en, ...)
1224{
1225   va_list ap;
1226   va_start(ap, en);
1227   v_report(1, fnm, line, en, ap);
1228   va_end(ap);
1229}
1230
1231void
1232fatalerror_in_file(const char *fnm, int line, int en, ...)
1233{
1234   va_list ap;
1235   va_start(ap, en);
1236   v_report(2, fnm, line, en, ap);
1237   va_end(ap);
1238}
1239
1240/* Code to support switching character set at runtime (e.g. for a printer
1241 * driver to support different character sets on screen and on the printer)
1242 */
1243typedef struct charset_li {
1244   struct charset_li *next;
1245   int code;
1246   char **msg_array;
1247} charset_li;
1248
1249static charset_li *charset_head = NULL;
1250
1251static int charset = CHARSET_BAD;
1252
1253int
1254select_charset(int charset_code)
1255{
1256   int old_charset = charset;
1257   charset_li *p;
1258
1259#ifdef DEBUG
1260   fprintf(stderr, "select_charset(%d), old charset = %d\n", charset_code,
1261           charset);
1262#endif
1263
1264   charset = charset_code;
1265
1266   /* check if we've already parsed messages for new charset */
1267   for (p = charset_head; p; p = p->next) {
1268#ifdef DEBUG
1269      printf("%p: code %d msg_array %p\n", p, p->code, p->msg_array);
1270#endif
1271      if (p->code == charset) {
1272         msg_array = p->msg_array;
1273         return old_charset;
1274      }
1275   }
1276
1277   /* nope, got to reparse message file */
1278   parse_msg_file(charset_code);
1279
1280   /* add to list */
1281   p = osnew(charset_li);
1282   p->code = charset;
1283   p->msg_array = msg_array;
1284   p->next = charset_head;
1285   charset_head = p;
1286
1287   return old_charset;
1288}
Note: See TracBrowser for help on using the repository browser.