source: git/trunk/src/message.c @ 7bb8184

Last change on this file since 7bb8184 was 7bb8184, checked in by Olly Betts <olly@…>, 13 years ago

Retagging 1.2.0

git-svn-id: file:///home/survex-svn/survex/tags/1.2.0@3664 4b37db11-9a0c-4f06-9ece-9ab7cdaee568

  • Property mode set to 100644
File size: 34.4 KB
Line 
1/* message.c
2 * Fairly general purpose message and error routines
3 * Copyright (C) 1993-2003,2004,2005,2006,2007,2010,2011 Olly Betts
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
18 */
19
20/*#define DEBUG 1*/
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <ctype.h>
30#include <limits.h>
31#include <errno.h>
32#include <locale.h>
33
34#include "cmdline.h"
35#include "whichos.h"
36#include "filename.h"
37#include "message.h"
38#include "osdepend.h"
39#include "filelist.h"
40#include "debug.h"
41
42#ifdef AVEN
43# include "aven.h"
44#endif
45
46#ifdef HAVE_SIGNAL
47# ifdef HAVE_SETJMP_H
48#  include <setjmp.h>
49static jmp_buf jmpbufSignal;
50#  include <signal.h>
51# else
52#  undef HAVE_SIGNAL
53# endif
54#endif
55
56#if OS_WIN32
57# define WIN32_LEAN_AND_MEAN
58# include <windows.h>
59#elif OS_UNIX
60# include <sys/types.h>
61# include <sys/stat.h>
62#endif
63
64/* For funcs which want to be immune from messing around with different
65 * calling conventions */
66#ifndef CDECL
67# define CDECL
68#endif
69
70int msg_warnings = 0; /* keep track of how many warnings we've given */
71int msg_errors = 0;   /* and how many (non-fatal) errors */
72
73/* in case osmalloc() fails before appname_copy is set up */
74static const char *appname_copy = "anonymous program";
75
76/* Path to use to look for executables (used by aven to find cavern). */
77static const char *exe_pth = "";
78
79/* error code for failed osmalloc and osrealloc calls */
80static void
81outofmem(OSSIZE_T size)
82{
83   fatalerror(/*Out of memory (couldn't find %lu bytes).*/1,
84              (unsigned long)size);
85}
86
87#ifdef TOMBSTONES
88#define TOMBSTONE_SIZE 16
89static const char tombstone[TOMBSTONE_SIZE] = "012345\xfftombstone";
90#endif
91
92/* malloc with error catching if it fails. Also allows us to write special
93 * versions easily eg for MS Windows.
94 */
95void *
96osmalloc(OSSIZE_T size)
97{
98   void *p;
99#ifdef TOMBSTONES
100   size += TOMBSTONE_SIZE * 2;
101   p = malloc(size);
102#else
103   p = xosmalloc(size);
104#endif
105   if (p == NULL) outofmem(size);
106#ifdef TOMBSTONES
107   printf("osmalloc truep=%p truesize=%d\n", p, size);
108   memcpy(p, tombstone, TOMBSTONE_SIZE);
109   memcpy(p + size - TOMBSTONE_SIZE, tombstone, TOMBSTONE_SIZE);
110   *(size_t *)p = size;
111   p += TOMBSTONE_SIZE;
112#endif
113   return p;
114}
115
116/* realloc with error catching if it fails. */
117void *
118osrealloc(void *p, OSSIZE_T size)
119{
120   /* some pre-ANSI realloc implementations don't cope with a NULL pointer */
121   if (p == NULL) {
122      p = xosmalloc(size);
123   } else {
124#ifdef TOMBSTONES
125      int true_size;
126      size += TOMBSTONE_SIZE * 2;
127      p -= TOMBSTONE_SIZE;
128      true_size = *(size_t *)p;
129      printf("osrealloc (in truep=%p truesize=%d)\n", p, true_size);
130      if (memcmp(p + sizeof(size_t), tombstone + sizeof(size_t),
131                 TOMBSTONE_SIZE - sizeof(size_t)) != 0) {
132         printf("start tombstone for block %p, size %d corrupted!",
133                p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
134      }
135      if (memcmp(p + true_size - TOMBSTONE_SIZE, tombstone,
136                 TOMBSTONE_SIZE) != 0) {
137         printf("end tombstone for block %p, size %d corrupted!",
138                p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
139      }
140      p = realloc(p, size);
141      if (p == NULL) outofmem(size);
142      printf("osrealloc truep=%p truesize=%d\n", p, size);
143      memcpy(p, tombstone, TOMBSTONE_SIZE);
144      memcpy(p + size - TOMBSTONE_SIZE, tombstone, TOMBSTONE_SIZE);
145      *(size_t *)p = size;
146      p += TOMBSTONE_SIZE;
147#else
148      p = xosrealloc(p, size);
149#endif
150   }
151   if (p == NULL) outofmem(size);
152   return p;
153}
154
155char *
156osstrdup(const char *str)
157{
158   char *p;
159   OSSIZE_T len;
160   len = strlen(str) + 1;
161   p = osmalloc(len);
162   memcpy(p, str, len);
163   return p;
164}
165
166/* osfree is usually just a macro in osalloc.h */
167#ifdef TOMBSTONES
168void
169osfree(void *p)
170{
171   int true_size;
172   if (!p) return;
173   p -= TOMBSTONE_SIZE;
174   true_size = *(size_t *)p;
175   printf("osfree truep=%p truesize=%d\n", p, true_size);
176   if (memcmp(p + sizeof(size_t), tombstone + sizeof(size_t),
177              TOMBSTONE_SIZE - sizeof(size_t)) != 0) {
178      printf("start tombstone for block %p, size %d corrupted!",
179             p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
180   }
181   if (memcmp(p + true_size - TOMBSTONE_SIZE, tombstone,
182              TOMBSTONE_SIZE) != 0) {
183      printf("end tombstone for block %p, size %d corrupted!",
184             p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
185   }
186   free(p);
187}
188#endif
189
190#ifdef HAVE_SIGNAL
191
192static int sigReceived;
193
194/* for systems not using autoconf, assume the signal handler returns void
195 * unless specified elsewhere */
196#ifndef RETSIGTYPE
197# define RETSIGTYPE void
198#endif
199
200static CDECL RETSIGTYPE
201report_sig(int sig)
202{
203   sigReceived = sig;
204   longjmp(jmpbufSignal, 1);
205}
206
207static void
208init_signals(void)
209{
210   int en;
211   if (!setjmp(jmpbufSignal)) {
212      signal(SIGABRT, report_sig); /* abnormal termination eg abort() */
213      signal(SIGFPE,  report_sig); /* arithmetic error eg /0 or overflow */
214      signal(SIGILL,  report_sig); /* illegal function image eg illegal instruction */
215      signal(SIGSEGV, report_sig); /* illegal storage access eg access outside memory limits */
216      return;
217   }
218
219   /* Remove that signal handler to avoid the possibility of an infinite loop.
220    */
221   signal(sigReceived, SIG_DFL);
222
223   switch (sigReceived) {
224      case SIGABRT: en = /*Abnormal termination*/90; break;
225      case SIGFPE:  en = /*Arithmetic error*/91; break;
226      case SIGILL:  en = /*Illegal instruction*/92; break;
227      case SIGSEGV: en = /*Bad memory access*/94; break;
228      default:      en = /*Unknown signal received*/97; break;
229   }
230   fputsnl(msg(en), STDERR);
231
232   /* Any of the signals we catch indicates a bug */
233   fatalerror(/*Bug in program detected! Please report this to the authors*/11);
234
235   exit(EXIT_FAILURE);
236}
237#endif
238
239static int
240default_charset(void)
241{
242   if (getenv("SURVEX_UTF8")) return CHARSET_UTF8;
243#if OS_WIN32
244# ifdef AVEN
245#  define CODEPAGE GetACP()
246# else
247#  define CODEPAGE GetConsoleOutputCP()
248# endif
249   switch (CODEPAGE) {
250    case 0: return CHARSET_UTF8;
251    case 1252: return CHARSET_WINCP1252;
252    case 1250: return CHARSET_WINCP1250;
253    case 850: return CHARSET_DOSCP850;
254   }
255   return CHARSET_USASCII;
256#elif OS_UNIX
257#ifdef AVEN
258   return CHARSET_UTF8;
259#else
260   const char *p = getenv("LC_ALL");
261   if (p == NULL || p[0] == '\0') {
262      p = getenv("LC_CTYPE");
263      if (p == NULL || p[0] == '\0') {
264         p = msg_lang;
265      }
266   }
267
268   if (p) {
269      char *q = strchr(p, '.');
270      if (q) p = q + 1;
271   }
272
273   if (p) {
274      const char *chset = p;
275      size_t name_len;
276
277      while (*p != '\0' && *p != '@') p++;
278
279      name_len = p - chset;
280
281      if (name_len) {
282         int only_digit = 1;
283         size_t cnt;
284
285         for (cnt = 0; cnt < name_len; ++cnt)
286            if (isalpha((unsigned char)chset[cnt])) {
287               only_digit = 0;
288               break;
289            }
290
291         if (only_digit) goto iso;
292
293         switch (tolower(chset[0])) {
294          case 'i':
295            if (tolower(chset[1]) == 's' && tolower(chset[2]) == 'o') {
296               chset += 3;
297               iso:
298               if (strncmp(chset, "8859", 4) == 0) {
299                  chset += 4;
300                  while (chset < p && *chset && !isdigit((unsigned char)*chset))
301                     chset++;
302                  switch (atoi(chset)) {
303                   case 1: return CHARSET_ISO_8859_1;
304                   case 2: return CHARSET_ISO_8859_2;
305                   case 15: return CHARSET_ISO_8859_15;
306                   default: return CHARSET_USASCII;
307                  }
308               }
309            }
310            break;
311          case 'u':
312            if (tolower(chset[1]) == 't' && tolower(chset[2]) == 'f') {
313               chset += 3;
314               while (chset < p && *chset && !isdigit((unsigned char)*chset))
315                  chset++;
316               switch (atoi(chset)) {
317                case 8: return CHARSET_UTF8;
318                default: return CHARSET_USASCII;
319               }
320            }
321         }
322      }
323   }
324   return CHARSET_USASCII;
325#endif
326#else
327# error Do not know operating system!
328#endif
329}
330
331/* It seems that Swedish and maybe some other scandanavian languages don't
332 * transliterate &auml; to ae - but it seems there may be conflicting views
333 * on this...
334 */
335#define umlaut_to_e() 1
336
337/* values <= 127 already dealt with */
338static int
339add_unicode(int charset, unsigned char *p, int value)
340{
341#ifdef DEBUG
342   fprintf(stderr, "add_unicode(%d, %p, %d)\n", charset, p, value);
343#endif
344   if (value == 0) return 0;
345   switch (charset) {
346   case CHARSET_USASCII:
347      if (value < 0x80) {
348         *p = value;
349         return 1;
350      }
351      break;
352   case CHARSET_ISO_8859_1:
353      if (value < 0x100) {
354         *p = value;
355         return 1;
356      }
357      break;
358   case CHARSET_ISO_8859_2:
359      if (value >= 0xa0) {
360         int v = 0;
361         switch (value) {
362            case 0xa0: case 0xa4: case 0xa7: case 0xa8: case 0xad: case 0xb0:
363            case 0xb4: case 0xb8: case 0xc1: case 0xc2: case 0xc4: case 0xc7:
364            case 0xc9: case 0xcb: case 0xcd: case 0xce: case 0xd3: case 0xd4:
365            case 0xd6: case 0xd7: case 0xda: case 0xdc: case 0xdd: case 0xdf:
366            case 0xe1: case 0xe2: case 0xe4: case 0xe7: case 0xe9: case 0xeb:
367            case 0xed: case 0xee: case 0xf3: case 0xf4: case 0xf6: case 0xf7:
368            case 0xfa: case 0xfc: case 0xfd:
369               v = value; break;
370            case 0x104: v = '\xa1'; break;
371            case 0x2d8: v = '\xa2'; break;
372            case 0x141: v = '\xa3'; break;
373            case 0x13d: v = '\xa5'; break;
374            case 0x15a: v = '\xa6'; break;
375            case 0x160: v = '\xa9'; break;
376            case 0x15e: v = '\xaa'; break; /* Scedil */
377            case 0x164: v = '\xab'; break;
378            case 0x179: v = '\xac'; break;
379            case 0x17d: v = '\xae'; break;
380            case 0x17b: v = '\xaf'; break;
381            case 0x105: v = '\xb1'; break;
382            case 0x2db: v = '\xb2'; break;
383            case 0x142: v = '\xb3'; break;
384            case 0x13e: v = '\xb5'; break;
385            case 0x15b: v = '\xb6'; break;
386            case 0x2c7: v = '\xb7'; break;
387            case 0x161: v = '\xb9'; break;
388            case 0x15f: v = '\xba'; break; /* scedil */
389            case 0x165: v = '\xbb'; break;
390            case 0x17a: v = '\xbc'; break;
391            case 0x2dd: v = '\xbd'; break;
392            case 0x17e: v = '\xbe'; break;
393            case 0x17c: v = '\xbf'; break;
394            case 0x154: v = '\xc0'; break;
395            case 0x102: v = '\xc3'; break;
396            case 0x139: v = '\xc5'; break;
397            case 0x106: v = '\xc6'; break;
398            case 0x10c: v = '\xc8'; break;
399            case 0x118: v = '\xca'; break;
400            case 0x11a: v = '\xcc'; break;
401            case 0x10e: v = '\xcf'; break;
402            case 0x110: v = '\xd0'; break;
403            case 0x143: v = '\xd1'; break;
404            case 0x147: v = '\xd2'; break;
405            case 0x150: v = '\xd5'; break;
406            case 0x158: v = '\xd8'; break;
407            case 0x16e: v = '\xd9'; break;
408            case 0x170: v = '\xdb'; break;
409            case 0x162: v = '\xde'; break; /* &Tcedil; */
410            case 0x155: v = '\xe0'; break;
411            case 0x103: v = '\xe3'; break;
412            case 0x13a: v = '\xe5'; break;
413            case 0x107: v = '\xe6'; break;
414            case 0x10d: v = '\xe8'; break;
415            case 0x119: v = '\xea'; break;
416            case 0x11b: v = '\xec'; break;
417            case 0x10f: v = '\xef'; break;
418            case 0x111: v = '\xf0'; break;
419            case 0x144: v = '\xf1'; break;
420            case 0x148: v = '\xf2'; break;
421            case 0x151: v = '\xf5'; break;
422            case 0x159: v = '\xf8'; break;
423            case 0x16f: v = '\xf9'; break;
424            case 0x171: v = '\xfb'; break;
425            case 0x163: v = '\xfe'; break; /* tcedil */
426            case 0x2d9: v = '\xff'; break;
427         }
428         if (v == 0) break;
429         value = v;
430      }
431      *p = value;
432      return 1;
433   case CHARSET_ISO_8859_15:
434      switch (value) {
435       case 0xa4: case 0xa6: case 0xb0: case 0xc4:
436       case 0xd0: case 0xd4: case 0xd5: case 0xd6:
437         goto donthave;
438       case 0x152: value = 0xd4; break; /* &OElig; */
439       case 0x153: value = 0xd5; break; /* &oelig; */
440#if 0
441       case 0x0: value = 0xa4; break; /* euro */
442#endif
443       case 0x160: value = 0xa6; break; /* Scaron */
444       case 0x161: value = 0xb0; break; /* scaron */
445       case 0x17d: value = 0xc4; break; /* Zcaron */
446       case 0x17e: value = 0xd0; break; /* zcaron */
447#if 0
448       case 0x0: value = 0xd6; break; /* Ydiersis */
449#endif
450      }
451      if (value < 0x100) {
452         *p = value;
453         return 1;
454      }
455      donthave:
456      break;
457#if OS_WIN32
458   case CHARSET_WINCP1250:
459      /* MS Windows rough equivalent to ISO-8859-2 */
460      if (value >= 0x80) {
461         int v = 0;
462         switch (value) {
463            case 0xa0: case 0xa4: case 0xa6: case 0xa7: case 0xa8: case 0xa9:
464            case 0xab: case 0xac: case 0xad: case 0xae: case 0xb0: case 0xb1:
465            case 0xb4: case 0xb5: case 0xb6: case 0xb7: case 0xb8: case 0xbb:
466            case 0xc1: case 0xc2: case 0xc4: case 0xc7: case 0xc9: case 0xcb:
467            case 0xcd: case 0xce: case 0xd3: case 0xd4: case 0xd6: case 0xd7:
468            case 0xda: case 0xdc: case 0xdd: case 0xdf: case 0xe1: case 0xe2:
469            case 0xe4: case 0xe7: case 0xe9: case 0xeb: case 0xed: case 0xee:
470            case 0xf3: case 0xf4: case 0xf6: case 0xf7: case 0xfa: case 0xfc:
471            case 0xfd:
472               v = value; break;
473            case 0x20ac: v = '\x80'; break;
474            case 0x201a: v = '\x82'; break;
475            case 0x201e: v = '\x84'; break;
476            case 0x2026: v = '\x85'; break;
477            case 0x2020: v = '\x86'; break;
478            case 0x2021: v = '\x87'; break;
479            case 0x2030: v = '\x89'; break;
480            case 0x0160: v = '\x8a'; break;
481            case 0x2039: v = '\x8b'; break;
482            case 0x015a: v = '\x8c'; break;
483            case 0x0164: v = '\x8d'; break;
484            case 0x017d: v = '\x8e'; break;
485            case 0x0179: v = '\x8f'; break;
486            case 0x2018: v = '\x91'; break;
487            case 0x2019: v = '\x92'; break;
488            case 0x201c: v = '\x93'; break;
489            case 0x201d: v = '\x94'; break;
490            case 0x2022: v = '\x95'; break;
491            case 0x2013: v = '\x96'; break;
492            case 0x2014: v = '\x97'; break;
493            case 0x2122: v = '\x99'; break;
494            case 0x0161: v = '\x9a'; break;
495            case 0x203a: v = '\x9b'; break;
496            case 0x015b: v = '\x9c'; break;
497            case 0x0165: v = '\x9d'; break;
498            case 0x017e: v = '\x9e'; break;
499            case 0x017a: v = '\x9f'; break;
500            case 0x02c7: v = '\xa1'; break;
501            case 0x02d8: v = '\xa2'; break;
502            case 0x0141: v = '\xa3'; break;
503            case 0x0104: v = '\xa5'; break;
504            case 0x015e: v = '\xaa'; break; /* Scedil */
505            case 0x017b: v = '\xaf'; break;
506            case 0x02db: v = '\xb2'; break;
507            case 0x0142: v = '\xb3'; break;
508            case 0x0105: v = '\xb9'; break;
509            case 0x015f: v = '\xba'; break; /* scedil */
510            case 0x013d: v = '\xbc'; break;
511            case 0x02dd: v = '\xbd'; break;
512            case 0x013e: v = '\xbe'; break;
513            case 0x017c: v = '\xbf'; break;
514            case 0x0154: v = '\xc0'; break;
515            case 0x0102: v = '\xc3'; break;
516            case 0x0139: v = '\xc5'; break;
517            case 0x0106: v = '\xc6'; break;
518            case 0x010c: v = '\xc8'; break;
519            case 0x0118: v = '\xca'; break;
520            case 0x011a: v = '\xcc'; break;
521            case 0x010e: v = '\xcf'; break;
522            case 0x0110: v = '\xd0'; break;
523            case 0x0143: v = '\xd1'; break;
524            case 0x0147: v = '\xd2'; break;
525            case 0x0150: v = '\xd5'; break;
526            case 0x0158: v = '\xd8'; break;
527            case 0x016e: v = '\xd9'; break;
528            case 0x0170: v = '\xdb'; break;
529            case 0x0162: v = '\xde'; break; /* &Tcedil; */
530            case 0x0155: v = '\xe0'; break;
531            case 0x0103: v = '\xe3'; break;
532            case 0x013a: v = '\xe5'; break;
533            case 0x0107: v = '\xe6'; break;
534            case 0x010d: v = '\xe8'; break;
535            case 0x0119: v = '\xea'; break;
536            case 0x011b: v = '\xec'; break;
537            case 0x010f: v = '\xef'; break;
538            case 0x0111: v = '\xf0'; break;
539            case 0x0144: v = '\xf1'; break;
540            case 0x0148: v = '\xf2'; break;
541            case 0x0151: v = '\xf5'; break;
542            case 0x0159: v = '\xf8'; break;
543            case 0x016f: v = '\xf9'; break;
544            case 0x0171: v = '\xfb'; break;
545            case 0x0163: v = '\xfe'; break; /* tcedil */
546            case 0x02d9: v = '\xff'; break;
547         }
548         if (v == 0) break;
549         value = v;
550      }
551      *p = value;
552      return 1;
553   case CHARSET_WINCP1252:
554      /* MS Windows extensions to ISO-8859-1 */
555      switch (value) {
556       case 0x2026: value = 0x85; break; /* hellip */
557       case 0x160: value = 0x8a; break; /* Scaron */
558       case 0x152: value = 0x8c; break; /* OElig */
559       case 0x17d: value = 0x8e; break; /* Zcaron */
560       case 0x161: value = 0x9a; break; /* scaron */
561       case 0x153: value = 0x9c; break; /* oelig */
562       case 0x17e: value = 0x9e; break; /* zcaron */
563#if 0
564      /* there are a few other obscure ones we don't currently need */
565#endif
566      }
567      if (value < 0x100) {
568         *p = value;
569         return 1;
570      }
571      break;
572#endif
573#if OS_WIN32
574   case CHARSET_DOSCP850: {
575      unsigned char uni2dostab[] = {
576         255, 173, 189, 156, 207, 190, 221, 245,
577         249, 184, 166, 174, 170, 240, 169, 238,
578         248, 241, 253, 252, 239, 230, 244, 250,
579         247, 251, 167, 175, 172, 171, 243, 168,
580         183, 181, 182, 199, 142, 143, 146, 128,
581         212, 144, 210, 211, 222, 214, 215, 216,
582         209, 165, 227, 224, 226, 229, 153, 158,
583         157, 235, 233, 234, 154, 237, 232, 225,
584         133, 160, 131, 198, 132, 134, 145, 135,
585         138, 130, 136, 137, 141, 161, 140, 139,
586         208, 164, 149, 162, 147, 228, 148, 246,
587         155, 151, 163, 150, 129, 236, 231, 152
588      };
589      if (value >= 160 && value < 256) {
590         *p = (int)uni2dostab[value - 160];
591         return 1;
592      }
593#if 0
594      if (value == 305) { /* LATIN SMALL LETTER DOTLESS I */
595         *p = 213;
596         return 1;
597      }
598      if (value == 402) { /* LATIN SMALL LETTER F WITH HOOK */
599         *p = 159;
600         return 1;
601      }
602#endif
603      break;
604   }
605#endif
606   }
607   /* Transliterate characters we can't represent */
608#ifdef DEBUG
609   fprintf(stderr, "transliterate `%c' 0x%x\n", value, value);
610#endif
611   switch (value) {
612    case 160:
613      *p = ' '; return 1;
614    case 161 /* ¡ */:
615      *p = '!'; return 1;
616    case 171 /* « */:
617      p[1] = *p = '<'; return 2;
618    case 187 /* » */:
619      p[1] = *p = '>'; return 2;
620    case 191 /* ¿ */:
621      *p = '?'; return 1;
622    case 192 /* À */: case 193 /* Á */: case 194 /* Â */: case 195 /* Ã */:
623      *p = 'A'; return 1;
624    case 197 /* Å */:
625      p[1] = *p = 'A'; return 2;
626    case 196 /* Ä */: /* &Auml; */
627      *p = 'A';
628      if (!umlaut_to_e()) return 1;
629      p[1] = 'E'; return 2;
630    case 198 /* Æ */:
631      *p = 'A'; p[1] = 'E'; return 2;
632    case 199 /* Ç */: case 268: /* &Ccaron; */
633      *p = 'C'; return 1;
634    case 270: /* &Dcaron; */
635      *p = 'D'; return 1;
636    case 200 /* È */: case 201 /* É */: case 202 /* Ê */: case 203 /* Ë */:
637      *p = 'E'; return 1;
638    case 204 /* Ì */: case 205 /* Í */: case 206 /* Î */: case 207 /* Ï */:
639      *p = 'I'; return 1;
640    case 208 /* Ð */: case 222 /* Þ */:
641      *p = 'T'; p[1] = 'H'; return 2;
642    case 315: /* &Lacute; */
643    case 317: /* &Lcaron; */
644      *p = 'L'; return 1;
645    case 209 /* Ñ */:
646      *p = 'N'; return 1;
647    case 210 /* Ò */: case 211 /* Ó */: case 212 /* Ô */: case 213 /* Õ */:
648      *p = 'O'; return 1;
649    case 214 /* Ö */: /* &Ouml; */ case 0x152: /* &OElig; */
650      *p = 'O'; p[1] = 'E'; return 2;
651    case 352: /* &Scaron; */
652    case 0x15e: /* &Scedil; */
653      *p = 'S'; return 1;
654    case 0x162: /* &Tcedil; */
655    case 0x164: /* &Tcaron; */
656      *p = 'T'; return 1;
657    case 217 /* Ù */: case 218 /* Ú */: case 219 /* Û */:
658      *p = 'U'; return 1;
659    case 220 /* Ü */: /* &Uuml; */
660      *p = 'U'; p[1] = 'E'; return 2;
661    case 221 /* Ý */:
662      *p = 'Y'; return 1;
663    case 381: /* &Zcaron; */
664      *p = 'Z'; return 1;
665    case 223 /* ß */:
666      p[1] = *p = 's'; return 2;
667    case 224 /* à */: case 225 /* á */: case 226 /* â */: case 227 /* ã */:
668    case 259: /* &abreve; */
669      *p = 'a'; return 1;
670    case 228 /* ä */: /* &auml; */ case 230 /* æ */:
671      *p = 'a'; p[1] = 'e'; return 2;
672    case 229 /* å */:
673      p[1] = *p = 'a'; return 2;
674    case 231 /* ç */: case 269 /* &ccaron; */:
675      *p = 'c'; return 1;
676    case 271: /* &dcaron; */
677      *p = 'd'; return 1;
678    case 232 /* è */: case 233 /* é */: case 234 /* ê */: case 235 /* ë */:
679    case 283 /* &ecaron; */:
680      *p = 'e'; return 1;
681    case 236 /* ì */: case 237 /* í */: case 238 /* î */: case 239 /* ï */:
682      *p = 'i'; return 1;
683    case 316 /* &lacute; */:
684    case 318 /* &lcaron; */:
685      *p = 'l'; return 1;
686    case 241 /* ñ */: case 328 /* &ncaron; */:
687      *p = 'n'; return 1;
688    case 345: /* &rcaron; */
689      *p = 'r'; return 1;
690    case 353: /* &scaron; */
691    case 0x15f: /* &scedil; */
692      *p = 's'; return 1;
693    case 357: /* &tcaron; */
694    case 0x163: /* &tcedil; */
695      *p = 't'; return 1;
696    case 240 /* ð */: case 254 /* þ */:
697      *p = 't'; p[1] = 'h'; return 2;
698    case 242 /* ò */: case 243 /* ó */: case 244 /* ô */: case 245 /* õ */:
699      *p = 'o'; return 1;
700    case 246 /* ö */: /* &ouml; */ case 0x153: /* &oelig; */
701      *p = 'o'; p[1] = 'e'; return 2;
702    case 249 /* ù */: case 250 /* ú */: case 251 /* û */:
703    case 367 /* &uring; */:
704      *p = 'u'; return 1;
705    case 252 /* ü */: /* &uuml; */
706      *p = 'u'; p[1] = 'e'; return 2;
707    case 253 /* ý */: case 255 /* ÿ */:
708      *p = 'y'; return 1;
709    case 382: /* &zcaron; */
710      *p = 'z'; return 1;
711    case 0x2026: /* &hellip; */
712      *p = '.'; p[1] = '.'; p[2] = '.'; return 3;
713    case 0x2192: /* &rarr; */
714      *p = '-'; p[1] = '>'; return 2;
715   }
716#ifdef DEBUG
717   fprintf(stderr, "failed to transliterate\n");
718#endif
719   return 0;
720}
721
722#if OS_UNIX && defined DATADIR && defined PACKAGE
723/* Under Unix, we compile in the configured path */
724static const char *pth_cfg_files = DATADIR "/" PACKAGE;
725#else
726/* On other platforms, we fall back on looking in the current directory */
727static const char *pth_cfg_files = "";
728#endif
729
730static int num_msgs = 0;
731static char **msg_array = NULL;
732
733const char *msg_lang = NULL;
734const char *msg_lang2 = NULL;
735
736static char **
737parse_msgs(int n, unsigned char *p, int charset_code) {
738   int i;
739
740   char **msgs = osmalloc(n * sizeof(char *));
741
742   for (i = 0; i < n; i++) {
743      unsigned char *to = p;
744      int ch;
745      msgs[i] = (char *)p;
746
747      /* If we want UTF8 anyway, we just need to find the start of each
748       * message */
749      if (charset_code == CHARSET_UTF8) {
750         p += strlen((char *)p) + 1;
751         continue;
752      }
753
754      while ((ch = *p++) != 0) {
755         /* A byte in the range 0x80-0xbf or 0xf0-0xff isn't valid in
756          * this state, (0xf0-0xfd mean values > 0xffff) so treat as
757          * literal and try to resync so we cope better when fed
758          * non-utf-8 data.  Similarly we abandon a multibyte sequence
759          * if we hit an invalid character. */
760         if (ch >= 0xc0 && ch < 0xf0) {
761            int ch1 = *p;
762            if ((ch1 & 0xc0) != 0x80) goto resync;
763
764            if (ch < 0xe0) {
765               /* 2 byte sequence */
766               ch = ((ch & 0x1f) << 6) | (ch1 & 0x3f);
767               p++;
768            } else {
769               /* 3 byte sequence */
770               int ch2 = p[1];
771               if ((ch2 & 0xc0) != 0x80) goto resync;
772               ch = ((ch & 0x1f) << 12) | ((ch1 & 0x3f) << 6) | (ch2 & 0x3f);
773               p += 2;
774            }
775         }
776
777         resync:
778
779         if (ch < 127) {
780            *to++ = (char)ch;
781         } else {
782            /* We assume an N byte UTF-8 code never transliterates to more
783             * than N characters (so we can't transliterate © to (C) or
784             * ® to (R) for example) */
785            to += add_unicode(charset_code, to, ch);
786         }
787      }
788      *to++ = '\0';
789   }
790   return msgs;
791}
792
793/* This is the name of the default language, which can be set like so:
794 * ./configure --enable-defaultlang=fr
795 */
796#ifdef DEFAULTLANG
797/* No point extracting these errors as they won't get used if file opens */
798# include "../lib/defaultlang.h"
799#else
800#define N_DONTEXTRACTMSGS 5
801static unsigned char dontextractmsgs[] =
802   "Can't open message file `%s' using path `%s'\0"/*1000*/
803   "Problem with message file `%s'\0"/*1001*/
804   "I don't understand this message file version\0"/*1002*/
805   "Message file truncated?\0"/*1003*/
806   "Out of memory (couldn't find %lu bytes).\0"/*1004*/;
807#endif
808
809static char **dontextract = NULL;
810
811static void
812parse_msg_file(int charset_code)
813{
814   FILE *fh;
815   unsigned char header[20];
816   int i;
817   unsigned len;
818   unsigned char *p;
819   char *fnm, *s;
820   int n;
821
822#ifdef DEBUG
823   fprintf(stderr, "parse_msg_file(%d)\n", charset_code);
824#endif
825
826   /* sort out messages we need to print if we can't open the message file */
827   dontextract = parse_msgs(N_DONTEXTRACTMSGS, dontextractmsgs, charset_code);
828
829   fnm = osstrdup(msg_lang);
830   /* trim off charset from stuff like "de_DE.iso8859_1" */
831   s = strchr(fnm, '.');
832   if (s) *s = '\0';
833
834   fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
835
836   if (!fh) {
837      /* e.g. if 'en_GB' is unknown, see if we know 'en' */
838      if (strlen(fnm) > 3 && fnm[2] == '_') {
839         fnm[2] = '\0';
840         fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
841         if (!fh) fnm[2] = '_'; /* for error reporting */
842      }
843   }
844
845   if (!fh) {
846      fatalerror(/*Can't open message file `%s' using path `%s'*/1000,
847                 fnm, pth_cfg_files);
848   }
849
850   if (fread(header, 1, 20, fh) < 20 ||
851       memcmp(header, "Svx\nMsg\r\n\xfe\xff", 12) != 0) {
852      fatalerror(/*Problem with message file `%s'*/1001, fnm);
853   }
854
855   if (header[12] != 0)
856      fatalerror(/*I don't understand this message file version*/1002);
857
858   n = (header[14] << 8) | header[15];
859
860   len = 0;
861   for (i = 16; i < 20; i++) len = (len << 8) | header[i];
862
863   p = osmalloc(len);
864   if (fread(p, 1, len, fh) < len)
865      fatalerror(/*Message file truncated?*/1003);
866
867   fclose(fh);
868
869#ifdef DEBUG
870   fprintf(stderr, "fnm = `%s', n = %d, len = %d\n", fnm, n, len);
871#endif
872   osfree(fnm);
873
874   msg_array = parse_msgs(n, p, charset_code);
875   num_msgs = n;
876}
877
878const char *
879msg_cfgpth(void)
880{
881   return pth_cfg_files;
882}
883
884const char *
885msg_exepth(void)
886{
887   return exe_pth;
888}
889
890const char *
891msg_appname(void)
892{
893   return appname_copy;
894}
895
896void
897msg_init(char * const *argv)
898{
899   char *p;
900   SVX_ASSERT(argv);
901
902#ifdef HAVE_SIGNAL
903   init_signals();
904#endif
905   /* Point to argv[0] itself so we report a more helpful error if the
906    * code to work out the clean appname generates a signal */
907   appname_copy = argv[0];
908#if OS_UNIX
909   /* use name as-is on Unix - programs run from path get name as supplied */
910   appname_copy = osstrdup(argv[0]);
911#else
912   /* use the lower-cased leafname on other platforms */
913   p = leaf_from_fnm(argv[0]);
914   appname_copy = p;
915   while (*p) {
916      *p = tolower(*p);
917      ++p;
918   }
919#endif
920
921   /* shortcut --version so you can check the version number even when the
922    * correct message file can't be found... */
923   if (argv[1] && strcmp(argv[1], "--version") == 0) {
924      cmdline_version();
925      exit(0);
926   }
927   if (argv[0]) {
928      exe_pth = path_from_fnm(argv[0]);
929#ifdef MACOSX_BUNDLE
930      /* If we're being built into a bundle, always look relative to
931       * the path to the binary. */
932#ifdef AVEN
933      /* Aven is packaged as an application, so we must look inside there. */
934      pth_cfg_files = use_path(exe_pth, "../Resources");
935#else
936      pth_cfg_files = use_path(exe_pth, "share/survex");
937#endif
938#elif OS_UNIX && defined DATADIR && defined PACKAGE
939      bool free_pth = fFalse;
940      char *pth = getenv("srcdir");
941      if (!pth || !pth[0]) {
942         pth = path_from_fnm(argv[0]);
943         free_pth = fTrue;
944      }
945      if (pth[0]) {
946         struct stat buf;
947#if OS_UNIX_MACOSX
948         /* On MacOS X the programs may be installed anywhere, with the
949          * share directory and the binaries in the same directory. */
950         p = use_path(pth, "share/survex/en.msg");
951         if (lstat(p, &buf) == 0 && S_ISREG(buf.st_mode)) {
952            pth_cfg_files = use_path(pth, "share/survex");
953            goto macosx_got_msg;
954         }
955         osfree(p);
956#endif
957         /* If we're run with an explicit path, check if "../lib/en.msg"
958          * from the program's path exists, and if so look there for
959          * support files - this allows us to test binaries in the build
960          * tree easily. */
961         p = use_path(pth, "../lib/en.msg");
962         if (lstat(p, &buf) == 0) {
963#ifdef S_ISREG
964            /* POSIX way */
965            if (S_ISREG(buf.st_mode)) {
966               pth_cfg_files = use_path(pth, "../lib");
967            }
968#else
969            /* BSD way */
970            if ((buf.st_mode & S_IFMT) == S_IFREG) {
971               pth_cfg_files = use_path(pth, "../lib");
972            }
973#endif
974         }
975#if defined(__GNUC__) && defined(__APPLE_CC__)
976macosx_got_msg:
977#endif
978         osfree(p);
979      }
980
981      if (free_pth) osfree(pth);
982#elif OS_WIN32
983      DWORD len = 256;
984      char *buf = NULL, *modname;
985      while (1) {
986          DWORD got;
987          buf = osrealloc(buf, len);
988          got = GetModuleFileName(NULL, buf, len);
989          if (got < len) break;
990          len += len;
991      }
992      modname = buf;
993      /* Strange Win32 nastiness - strip prefix "\\?\" if present */
994      if (strncmp(modname, "\\\\?\\", 4) == 0) modname += 4;
995      pth_cfg_files = path_from_fnm(modname);
996      osfree(buf);
997#else
998      /* Get the path to the support files from argv[0] */
999      pth_cfg_files = path_from_fnm(argv[0]);
1000#endif
1001   }
1002
1003   msg_lang = getenv("SURVEXLANG");
1004#ifdef DEBUG
1005   fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
1006#endif
1007
1008   if (!msg_lang || !*msg_lang) {
1009      msg_lang = getenv("LC_MESSAGES");
1010      if (!msg_lang || !*msg_lang) {
1011         msg_lang = getenv("LANG");
1012         /* Something (AutoCAD?) on Microsoft Windows sets LANG to a number. */
1013         if (msg_lang && !isalpha(msg_lang[0])) msg_lang = NULL;
1014      }
1015      if (!msg_lang || !*msg_lang) {
1016#if OS_WIN32
1017         LCID locid;
1018#endif
1019#ifdef DEFAULTLANG
1020         msg_lang = STRING(DEFAULTLANG);
1021#else
1022         msg_lang = "en";
1023#endif
1024#if OS_WIN32
1025         locid = GetUserDefaultLCID();
1026         if (locid) {
1027            WORD langid = LANGIDFROMLCID(locid);
1028            switch (PRIMARYLANGID(langid)) {
1029/* older mingw compilers don't seem to supply this value */
1030#ifndef LANG_CATALAN
1031# define LANG_CATALAN 0x03
1032#endif
1033             case LANG_CATALAN:
1034               msg_lang = "ca";
1035               break;
1036             case LANG_CHINESE:
1037               msg_lang = "zh";
1038               break;
1039             case LANG_ENGLISH:
1040               if (SUBLANGID(langid) == SUBLANG_ENGLISH_US)
1041                  msg_lang = "en_US";
1042               else
1043                  msg_lang = "en";
1044               break;
1045             case LANG_FRENCH:
1046               msg_lang = "fr";
1047               break;
1048             case LANG_GERMAN:
1049               switch (SUBLANGID(langid)) {
1050                case SUBLANG_GERMAN_SWISS:
1051                  msg_lang = "de_CH";
1052                  break;
1053                case SUBLANG_GERMAN:
1054                  msg_lang = "de_DE";
1055                  break;
1056                default:
1057                  msg_lang = "de";
1058               }
1059               break;
1060             case LANG_ITALIAN:
1061               msg_lang = "it";
1062               break;
1063             case LANG_PORTUGUESE:
1064               if (SUBLANGID(langid) == SUBLANG_PORTUGUESE_BRAZILIAN)
1065                  msg_lang = "pt_BR";
1066               else
1067                  msg_lang = "pt";
1068               break;
1069             case LANG_ROMANIAN:
1070               msg_lang = "ro";
1071               break;
1072             case LANG_SLOVAK:
1073               msg_lang = "sk";
1074               break;
1075             case LANG_SPANISH:
1076               msg_lang = "es";
1077               break;
1078            }
1079         }
1080#endif
1081      }
1082   }
1083#ifdef DEBUG
1084   fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
1085#endif
1086
1087   /* On Mandrake LANG defaults to C */
1088   if (strcmp(msg_lang, "C") == 0) msg_lang = "en";
1089
1090   msg_lang = osstrdup(msg_lang);
1091
1092   /* Convert en-us to en_US, etc */
1093   p = strchr(msg_lang, '-');
1094   if (p) {
1095      *p++ = '_';
1096      while (*p) {
1097         *p = toupper(*p);
1098         p++;
1099      }
1100   }
1101
1102   p = strchr(msg_lang, '_');
1103   if (p) {
1104      *p = '\0';
1105      msg_lang2 = osstrdup(msg_lang);
1106      *p = '_';
1107   }
1108
1109   int def_charset = default_charset();
1110
1111#ifdef LC_MESSAGES
1112   /* try to setlocale() appropriately too */
1113   if (!setlocale(LC_MESSAGES, msg_lang)) {
1114      if (!msg_lang2 || !setlocale(LC_MESSAGES, msg_lang2)) {
1115         def_charset = CHARSET_ISO_8859_1;
1116      }
1117   }
1118#endif
1119
1120   select_charset(def_charset);
1121}
1122
1123/* Message may be overwritten by next call
1124 * (but not in current implementation) */
1125const char *
1126msg(int en)
1127{
1128   /* NB can't use SVX_ASSERT here! */
1129   static char badbuf[256];
1130   if (en >= 1000 && en < 1000 + N_DONTEXTRACTMSGS)
1131      return dontextract[en - 1000];
1132   if (!msg_array) {
1133      if (en != 1)  {
1134         sprintf(badbuf, "Message %d requested before msg_array initialised\n",
1135                 en);
1136         return badbuf;
1137      }
1138      /* this should be the only other message which can be requested before
1139       * the message file is opened and read... */
1140      if (!dontextract) return "Out of memory (couldn't find %lu bytes).";
1141      return dontextract[(/*Out of memory (couldn't find %lu bytes).*/1004)
1142                         - 1000];
1143   }
1144
1145   if (en < 0 || en >= num_msgs) {
1146      sprintf(badbuf, "Message %d out of range\n", en);
1147      return badbuf;
1148   }
1149
1150   if (en == 0) {
1151      const char *p = msg_array[0];
1152      if (!*p) p = "(C)";
1153      return p;
1154   }
1155
1156   return msg_array[en];
1157}
1158
1159/* returns persistent copy of message */
1160const char *
1161msgPerm(int en)
1162{
1163   return msg(en);
1164}
1165
1166void
1167v_report(int severity, const char *fnm, int line, int en, va_list ap)
1168{
1169#ifdef AVEN
1170   aven_v_report(severity, fnm, line, en, ap);
1171#else
1172   if (fnm) {
1173      fputs(fnm, STDERR);
1174      if (line) fprintf(STDERR, ":%d", line);
1175   } else {
1176      fputs(appname_copy, STDERR);
1177   }
1178   fputs(": ", STDERR);
1179
1180   if (severity == 0) {
1181      fputs(msg(/*warning*/4), STDERR);
1182      fputs(": ", STDERR);
1183   }
1184
1185   vfprintf(STDERR, msg(en), ap);
1186   fputnl(STDERR);
1187#endif
1188
1189   switch (severity) {
1190    case 0:
1191      msg_warnings++;
1192      break;
1193    case 1:
1194      msg_errors++;
1195      if (msg_errors == 50)
1196         fatalerror_in_file(fnm, 0, /*Too many errors - giving up*/19);
1197      break;
1198    case 2:
1199      exit(EXIT_FAILURE);
1200   }
1201}
1202
1203void
1204warning(int en, ...)
1205{
1206   va_list ap;
1207   va_start(ap, en);
1208   v_report(0, NULL, 0, en, ap);
1209   va_end(ap);
1210}
1211
1212void
1213error(int en, ...)
1214{
1215   va_list ap;
1216   va_start(ap, en);
1217   v_report(1, NULL, 0, en, ap);
1218   va_end(ap);
1219}
1220
1221void
1222fatalerror(int en, ...)
1223{
1224   va_list ap;
1225   va_start(ap, en);
1226   v_report(2, NULL, 0, en, ap);
1227   va_end(ap);
1228}
1229
1230void
1231warning_in_file(const char *fnm, int line, int en, ...)
1232{
1233   va_list ap;
1234   va_start(ap, en);
1235   v_report(0, fnm, line, en, ap);
1236   va_end(ap);
1237}
1238
1239void
1240error_in_file(const char *fnm, int line, int en, ...)
1241{
1242   va_list ap;
1243   va_start(ap, en);
1244   v_report(1, fnm, line, en, ap);
1245   va_end(ap);
1246}
1247
1248void
1249fatalerror_in_file(const char *fnm, int line, int en, ...)
1250{
1251   va_list ap;
1252   va_start(ap, en);
1253   v_report(2, fnm, line, en, ap);
1254   va_end(ap);
1255}
1256
1257/* Code to support switching character set at runtime (e.g. for a printer
1258 * driver to support different character sets on screen and on the printer)
1259 */
1260typedef struct charset_li {
1261   struct charset_li *next;
1262   int code;
1263   char **msg_array;
1264} charset_li;
1265
1266static charset_li *charset_head = NULL;
1267
1268static int charset = CHARSET_BAD;
1269
1270int
1271select_charset(int charset_code)
1272{
1273   int old_charset = charset;
1274   charset_li *p;
1275
1276#ifdef DEBUG
1277   fprintf(stderr, "select_charset(%d), old charset = %d\n", charset_code,
1278           charset);
1279#endif
1280
1281   charset = charset_code;
1282
1283   /* check if we've already parsed messages for new charset */
1284   for (p = charset_head; p; p = p->next) {
1285#ifdef DEBUG
1286      printf("%p: code %d msg_array %p\n", p, p->code, p->msg_array);
1287#endif
1288      if (p->code == charset) {
1289         msg_array = p->msg_array;
1290         return old_charset;
1291      }
1292   }
1293
1294   /* nope, got to reparse message file */
1295   parse_msg_file(charset_code);
1296
1297   /* add to list */
1298   p = osnew(charset_li);
1299   p->code = charset;
1300   p->msg_array = msg_array;
1301   p->next = charset_head;
1302   charset_head = p;
1303
1304   return old_charset;
1305}
Note: See TracBrowser for help on using the repository browser.