/* message.c
* Fairly general purpose message and error routines
* Copyright (C) 1993-2025 Olly Betts
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see
* .
*/
/*#define DEBUG 1*/
#include
#include
#include
#include
#include
#include
#include
#include
#include "cmdline.h"
#include "filename.h"
#include "message.h"
#include "osalloc.h"
#include "filelist.h"
#include "debug.h"
#include "str.h"
#ifdef AVEN
# include "aven.h"
#endif
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include
#else
# include
#endif
#include
int msg_warnings = 0; /* keep track of how many warnings we've given */
int msg_errors = 0; /* and how many (non-fatal) errors */
/* in case osmalloc() fails before appname_copy is set up */
static const char *appname_copy = "anonymous program";
/* Path to use to look for executables (used by aven to find cavern). */
static const char *exe_pth = "";
/* error code for failed osmalloc and osrealloc calls */
static void
outofmem(size_t size)
{
/* TRANSLATORS: "%lu" is a placeholder for the number of bytes which Survex
* was trying to allocate space for. */
fatalerror(/*Out of memory (couldn’t find %lu bytes).*/24,
(unsigned long)size);
}
/* malloc with error catching if it fails. Also allows us to write special
* versions easily eg for MS Windows.
*/
void *
osmalloc(size_t size)
{
void *p = malloc(size);
if (p == NULL) outofmem(size);
return p;
}
/* realloc with error catching if it fails. */
void *
osrealloc(void *p, size_t size)
{
/* some pre-ANSI realloc implementations don't cope with a NULL pointer */
if (p == NULL) {
p = malloc(size);
} else {
p = realloc(p, size);
}
if (p == NULL) outofmem(size);
return p;
}
char *
osstrdup(const char *str)
{
char *p;
size_t len;
len = strlen(str) + 1;
p = osmalloc(len);
memcpy(p, str, len);
return p;
}
static int
default_charset(void)
{
if (getenv("SURVEX_UTF8")) return CHARSET_UTF8;
#ifdef _WIN32
switch (GetConsoleOutputCP()) {
case 0: return CHARSET_UTF8;
case 1252: return CHARSET_WINCP1252;
case 1250: return CHARSET_WINCP1250;
case 850: return CHARSET_DOSCP850;
}
return CHARSET_USASCII;
#else
const char *p = getenv("LC_ALL");
if (p == NULL || p[0] == '\0') {
p = getenv("LC_CTYPE");
if (p == NULL || p[0] == '\0') {
p = getenv("LANG");
}
}
if (p) {
const char *q = strchr(p, '.');
if (q) p = q + 1;
}
if (p) {
const char *chset = p;
size_t name_len;
while (*p != '\0' && *p != '@') p++;
name_len = p - chset;
if (name_len) {
bool only_digit = true;
size_t cnt;
for (cnt = 0; cnt < name_len; ++cnt)
if (isalpha((unsigned char)chset[cnt])) {
only_digit = false;
break;
}
if (only_digit) goto iso;
switch (tolower((unsigned char)chset[0])) {
case 'i':
if (tolower((unsigned char)chset[1]) == 's' &&
tolower((unsigned char)chset[2]) == 'o') {
chset += 3;
iso:
if (strncmp(chset, "8859", 4) == 0) {
chset += 4;
while (chset < p && *chset && !isalnum((unsigned char)*chset))
chset++;
switch (atoi(chset)) {
case 1: return CHARSET_ISO_8859_1;
case 2: return CHARSET_ISO_8859_2;
case 15: return CHARSET_ISO_8859_15;
default: return CHARSET_USASCII;
}
}
}
break;
case 'u':
if (tolower((unsigned char)chset[1]) == 't' &&
tolower((unsigned char)chset[2]) == 'f') {
chset += 3;
while (chset < p && *chset && !isalnum((unsigned char)*chset))
chset++;
switch (atoi(chset)) {
case 8: return CHARSET_UTF8;
default: return CHARSET_USASCII;
}
}
}
}
}
return CHARSET_USASCII;
#endif
}
/* It seems that Swedish and maybe some other scandanavian languages don't
* transliterate ä to ae - but it seems there may be conflicting views
* on this...
*/
#define umlaut_to_e() 1
/* values <= 127 already dealt with */
static int
add_unicode(int charset, unsigned char *p, int value)
{
#ifdef DEBUG
fprintf(stderr, "add_unicode(%d, %p, %d)\n", charset, p, value);
#endif
if (value == 0) return 0;
switch (charset) {
case CHARSET_USASCII:
if (value < 0x80) {
*p = value;
return 1;
}
break;
case CHARSET_ISO_8859_1:
if (value < 0x100) {
*p = value;
return 1;
}
switch (value) {
case 0x2032:
/* prime -> spacing acute accent */
*p = '\xb4';
return 1;
case 0x2033:
/* double prime -> 2x spacing acute accent */
p[1] = *p = '\xb4';
return 2;
}
break;
case CHARSET_ISO_8859_2:
if (value >= 0xa0) {
int v = 0;
switch (value) {
case 0xa0: case 0xa4: case 0xa7: case 0xa8: case 0xad: case 0xb0:
case 0xb4: case 0xb8: case 0xc1: case 0xc2: case 0xc4: case 0xc7:
case 0xc9: case 0xcb: case 0xcd: case 0xce: case 0xd3: case 0xd4:
case 0xd6: case 0xd7: case 0xda: case 0xdc: case 0xdd: case 0xdf:
case 0xe1: case 0xe2: case 0xe4: case 0xe7: case 0xe9: case 0xeb:
case 0xed: case 0xee: case 0xf3: case 0xf4: case 0xf6: case 0xf7:
case 0xfa: case 0xfc: case 0xfd:
v = value; break;
case 0x104: v = '\xa1'; break;
case 0x2d8: v = '\xa2'; break;
case 0x141: v = '\xa3'; break;
case 0x13d: v = '\xa5'; break;
case 0x15a: v = '\xa6'; break;
case 0x160: v = '\xa9'; break;
case 0x15e: v = '\xaa'; break; /* Scedil */
case 0x164: v = '\xab'; break;
case 0x179: v = '\xac'; break;
case 0x17d: v = '\xae'; break;
case 0x17b: v = '\xaf'; break;
case 0x105: v = '\xb1'; break;
case 0x2db: v = '\xb2'; break;
case 0x142: v = '\xb3'; break;
case 0x2032: v = '\xb4'; break; /* prime -> spacing acute accent */
case 0x13e: v = '\xb5'; break;
case 0x15b: v = '\xb6'; break;
case 0x2c7: v = '\xb7'; break;
case 0x161: v = '\xb9'; break;
case 0x15f: v = '\xba'; break; /* scedil */
case 0x165: v = '\xbb'; break;
case 0x17a: v = '\xbc'; break;
case 0x2dd: v = '\xbd'; break;
case 0x2033: v = '\xbd'; break; /* double prime -> spacing double acute accent */
case 0x17e: v = '\xbe'; break;
case 0x17c: v = '\xbf'; break;
case 0x154: v = '\xc0'; break;
case 0x102: v = '\xc3'; break;
case 0x139: v = '\xc5'; break;
case 0x106: v = '\xc6'; break;
case 0x10c: v = '\xc8'; break;
case 0x118: v = '\xca'; break;
case 0x11a: v = '\xcc'; break;
case 0x10e: v = '\xcf'; break;
case 0x110: v = '\xd0'; break;
case 0x143: v = '\xd1'; break;
case 0x147: v = '\xd2'; break;
case 0x150: v = '\xd5'; break;
case 0x158: v = '\xd8'; break;
case 0x16e: v = '\xd9'; break;
case 0x170: v = '\xdb'; break;
case 0x162: v = '\xde'; break; /* Ţ */
case 0x155: v = '\xe0'; break;
case 0x103: v = '\xe3'; break;
case 0x13a: v = '\xe5'; break;
case 0x107: v = '\xe6'; break;
case 0x10d: v = '\xe8'; break;
case 0x119: v = '\xea'; break;
case 0x11b: v = '\xec'; break;
case 0x10f: v = '\xef'; break;
case 0x111: v = '\xf0'; break;
case 0x144: v = '\xf1'; break;
case 0x148: v = '\xf2'; break;
case 0x151: v = '\xf5'; break;
case 0x159: v = '\xf8'; break;
case 0x16f: v = '\xf9'; break;
case 0x171: v = '\xfb'; break;
case 0x163: v = '\xfe'; break; /* tcedil */
case 0x2d9: v = '\xff'; break;
}
if (v == 0) break;
value = v;
}
*p = value;
return 1;
case CHARSET_ISO_8859_15:
switch (value) {
case 0xa4: case 0xa6: case 0xb0: case 0xc4:
case 0xd0: case 0xd4: case 0xd5: case 0xd6:
goto donthave;
case 0x152: value = 0xd4; break; /* Œ */
case 0x153: value = 0xd5; break; /* œ */
#if 0
case 0x0: value = 0xa4; break; /* euro */
#endif
case 0x160: value = 0xa6; break; /* Scaron */
case 0x161: value = 0xb0; break; /* scaron */
case 0x17d: value = 0xc4; break; /* Zcaron */
case 0x17e: value = 0xd0; break; /* zcaron */
#if 0
case 0x0: value = 0xd6; break; /* Ydiersis */
#endif
}
if (value < 0x100) {
*p = value;
return 1;
}
donthave:
break;
#ifdef _WIN32
case CHARSET_WINCP1250:
/* MS Windows rough equivalent to ISO-8859-2 */
if (value >= 0x80) {
int v = 0;
/* This mapping is complete - there are 5 unused positions:
* 0x81 0x83 0x88 0x90 0x98 */
switch (value) {
case 0xa0: case 0xa4: case 0xa6: case 0xa7: case 0xa8: case 0xa9:
case 0xab: case 0xac: case 0xad: case 0xae: case 0xb0: case 0xb1:
case 0xb4: case 0xb5: case 0xb6: case 0xb7: case 0xb8: case 0xbb:
case 0xc1: case 0xc2: case 0xc4: case 0xc7: case 0xc9: case 0xcb:
case 0xcd: case 0xce: case 0xd3: case 0xd4: case 0xd6: case 0xd7:
case 0xda: case 0xdc: case 0xdd: case 0xdf: case 0xe1: case 0xe2:
case 0xe4: case 0xe7: case 0xe9: case 0xeb: case 0xed: case 0xee:
case 0xf3: case 0xf4: case 0xf6: case 0xf7: case 0xfa: case 0xfc:
case 0xfd:
v = value; break;
case 0x20ac: v = '\x80'; break;
case 0x201a: v = '\x82'; break;
case 0x201e: v = '\x84'; break;
case 0x2026: v = '\x85'; break;
case 0x2020: v = '\x86'; break;
case 0x2021: v = '\x87'; break;
case 0x2030: v = '\x89'; break;
case 0x0160: v = '\x8a'; break;
case 0x2039: v = '\x8b'; break;
case 0x015a: v = '\x8c'; break;
case 0x0164: v = '\x8d'; break;
case 0x017d: v = '\x8e'; break;
case 0x0179: v = '\x8f'; break;
case 0x2018: v = '\x91'; break;
case 0x2019: v = '\x92'; break;
case 0x201c: v = '\x93'; break;
case 0x201d: v = '\x94'; break;
case 0x2022: v = '\x95'; break;
case 0x2013: v = '\x96'; break;
case 0x2014: v = '\x97'; break;
case 0x2122: v = '\x99'; break;
case 0x0161: v = '\x9a'; break;
case 0x203a: v = '\x9b'; break;
case 0x015b: v = '\x9c'; break;
case 0x0165: v = '\x9d'; break;
case 0x017e: v = '\x9e'; break;
case 0x017a: v = '\x9f'; break;
case 0x02c7: v = '\xa1'; break;
case 0x02d8: v = '\xa2'; break;
case 0x0141: v = '\xa3'; break;
case 0x0104: v = '\xa5'; break;
case 0x015e: v = '\xaa'; break; /* Scedil */
case 0x017b: v = '\xaf'; break;
case 0x02db: v = '\xb2'; break;
case 0x0142: v = '\xb3'; break;
case 0x2032: v = '\xb4'; break; /* prime -> spacing acute accent */
case 0x0105: v = '\xb9'; break;
case 0x015f: v = '\xba'; break; /* scedil */
case 0x013d: v = '\xbc'; break;
case 0x02dd: v = '\xbd'; break;
case 0x2033: v = '\xbd'; break; /* double prime -> spacing double acute accent */
case 0x013e: v = '\xbe'; break;
case 0x017c: v = '\xbf'; break;
case 0x0154: v = '\xc0'; break;
case 0x0102: v = '\xc3'; break;
case 0x0139: v = '\xc5'; break;
case 0x0106: v = '\xc6'; break;
case 0x010c: v = '\xc8'; break;
case 0x0118: v = '\xca'; break;
case 0x011a: v = '\xcc'; break;
case 0x010e: v = '\xcf'; break;
case 0x0110: v = '\xd0'; break;
case 0x0143: v = '\xd1'; break;
case 0x0147: v = '\xd2'; break;
case 0x0150: v = '\xd5'; break;
case 0x0158: v = '\xd8'; break;
case 0x016e: v = '\xd9'; break;
case 0x0170: v = '\xdb'; break;
case 0x0162: v = '\xde'; break; /* Ţ */
case 0x0155: v = '\xe0'; break;
case 0x0103: v = '\xe3'; break;
case 0x013a: v = '\xe5'; break;
case 0x0107: v = '\xe6'; break;
case 0x010d: v = '\xe8'; break;
case 0x0119: v = '\xea'; break;
case 0x011b: v = '\xec'; break;
case 0x010f: v = '\xef'; break;
case 0x0111: v = '\xf0'; break;
case 0x0144: v = '\xf1'; break;
case 0x0148: v = '\xf2'; break;
case 0x0151: v = '\xf5'; break;
case 0x0159: v = '\xf8'; break;
case 0x016f: v = '\xf9'; break;
case 0x0171: v = '\xfb'; break;
case 0x0163: v = '\xfe'; break; /* tcedil */
case 0x02d9: v = '\xff'; break;
}
if (v == 0) break;
value = v;
}
*p = value;
return 1;
case CHARSET_WINCP1252:
/* MS Windows extensions to ISO-8859-1 */
/* This mapping is complete - there are 5 unused positions:
* 0x81 0x8d 0x8f 0x90 0x9d */
switch (value) {
case 0x2026: value = 0x85; break; /* hellip */
case 0x0160: value = 0x8a; break; /* Scaron */
case 0x0152: value = 0x8c; break; /* OElig */
case 0x017d: value = 0x8e; break; /* Zcaron */
case 0x2019: value = 0x92; break; /* rsquo */
case 0x201c: value = 0x93; break; /* ldquo */
case 0x201d: value = 0x94; break; /* rdquo */
case 0x0161: value = 0x9a; break; /* scaron */
case 0x0153: value = 0x9c; break; /* oelig */
case 0x017e: value = 0x9e; break; /* zcaron */
#if 0
/* there are a few other obscure ones we don't currently need */
case 0x20ac: value = 0x80; break; /* euro */
case 0x201a: value = 0x82; break; /* sbquo */
case 0x0192: value = 0x83; break; /* fnof */
case 0x201e: value = 0x84; break; /* bdquo */
case 0x2020: value = 0x86; break; /* dagger */
case 0x2021: value = 0x87; break; /* Dagger */
case 0x02c6: value = 0x88; break; /* circ */
case 0x2030: value = 0x89; break; /* permil */
case 0x2039: value = 0x8b; break; /* lsaquo */
case 0x2018: value = 0x91; break; /* lsquo */
case 0x2022: value = 0x95; break; /* bull */
case 0x2013: value = 0x96; break; /* ndash */
case 0x2014: value = 0x97; break; /* mdash */
case 0x02dc: value = 0x98; break; /* tilde */
case 0x2122: value = 0x99; break; /* trade */
case 0x203a: value = 0x9b; break; /* rsaquo */
case 0x0178: value = 0x9f; break; /* Yuml */
#endif
case 0x2032: value = 0xb4; break; /* prime -> spacing acute accent */
case 0x2033:
/* double prime -> 2x spacing acute accent */
p[1] = *p = '\xb4';
return 2;
}
if (value < 0x100) {
*p = value;
return 1;
}
break;
#endif
#ifdef _WIN32
case CHARSET_DOSCP850: {
static const unsigned char uni2dostab[] = {
255, 173, 189, 156, 207, 190, 221, 245,
249, 184, 166, 174, 170, 240, 169, 238,
248, 241, 253, 252, 239, 230, 244, 250,
247, 251, 167, 175, 172, 171, 243, 168,
183, 181, 182, 199, 142, 143, 146, 128,
212, 144, 210, 211, 222, 214, 215, 216,
209, 165, 227, 224, 226, 229, 153, 158,
157, 235, 233, 234, 154, 237, 232, 225,
133, 160, 131, 198, 132, 134, 145, 135,
138, 130, 136, 137, 141, 161, 140, 139,
208, 164, 149, 162, 147, 228, 148, 246,
155, 151, 163, 150, 129, 236, 231, 152
};
if (value >= 160 && value < 256) {
*p = (int)uni2dostab[value - 160];
return 1;
}
switch (value) {
case 0x2032:
/* prime -> spacing acute accent */
*p = '\xef';
return 1;
case 0x2033:
/* double prime -> 2x spacing acute accent */
p[1] = *p = '\xef';
return 2;
}
#if 0
if (value == 305) { /* LATIN SMALL LETTER DOTLESS I */
*p = 213; /* "Modified CP850" has the Euro sign here. */
return 1;
}
if (value == 402) { /* LATIN SMALL LETTER F WITH HOOK */
*p = 159;
return 1;
}
#endif
break;
}
#endif
}
/* Transliterate characters we can't represent */
#ifdef DEBUG
fprintf(stderr, "transliterate “%c” 0x%x\n", value, value);
#endif
switch (value) {
case 160:
*p = ' '; return 1;
case 161 /* ¡ */:
*p = '!'; return 1;
case 176 /* ° */:
*p = 'd'; p[1] = 'g'; return 2;
case 191 /* ¿ */:
*p = '?'; return 1;
case 192 /* À */: case 193 /* Á */: case 194 /* Â */: case 195 /* Ã */:
*p = 'A'; return 1;
case 197 /* Å */:
p[1] = *p = 'A'; return 2;
case 196 /* Ä */: /* Ä */
*p = 'A';
if (!umlaut_to_e()) return 1;
p[1] = 'E'; return 2;
case 198 /* Æ */:
*p = 'A'; p[1] = 'E'; return 2;
case 199 /* Ç */: case 268: /* Č */
*p = 'C'; return 1;
case 270: /* Ď */
*p = 'D'; return 1;
case 200 /* È */: case 201 /* É */: case 202 /* Ê */: case 203 /* Ë */:
*p = 'E'; return 1;
case 204 /* Ì */: case 205 /* Í */: case 206 /* Î */: case 207 /* Ï */:
*p = 'I'; return 1;
case 208 /* Ð */: case 222 /* Þ */:
*p = 'T'; p[1] = 'H'; return 2;
case 315: /* Ĺ */
case 317: /* Ľ */
*p = 'L'; return 1;
case 209 /* Ñ */:
*p = 'N'; return 1;
case 210 /* Ò */: case 211 /* Ó */: case 212 /* Ô */: case 213 /* Õ */:
*p = 'O'; return 1;
case 214 /* Ö */: /* Ö */ case 0x152: /* Œ */
*p = 'O'; p[1] = 'E'; return 2;
case 352: /* Š */
case 0x15e: /* Ş */
*p = 'S'; return 1;
case 0x162: /* Ţ */
case 0x164: /* Ť */
*p = 'T'; return 1;
case 217 /* Ù */: case 218 /* Ú */: case 219 /* Û */:
*p = 'U'; return 1;
case 220 /* Ü */: /* Ü */
*p = 'U'; p[1] = 'E'; return 2;
case 221 /* Ý */:
*p = 'Y'; return 1;
case 381: /* Ž */
*p = 'Z'; return 1;
case 223 /* ß */:
p[1] = *p = 's'; return 2;
case 224 /* à */: case 225 /* á */: case 226 /* â */: case 227 /* ã */:
case 259: /* ă */
*p = 'a'; return 1;
case 228 /* ä */: /* ä */ case 230 /* æ */:
*p = 'a'; p[1] = 'e'; return 2;
case 229 /* å */:
p[1] = *p = 'a'; return 2;
case 231 /* ç */: case 269 /* č */:
*p = 'c'; return 1;
case 271: /* ď */
*p = 'd'; return 1;
case 232 /* è */: case 233 /* é */: case 234 /* ê */: case 235 /* ë */:
case 283 /* ě */:
*p = 'e'; return 1;
case 236 /* ì */: case 237 /* í */: case 238 /* î */: case 239 /* ï */:
*p = 'i'; return 1;
case 316 /* ĺ */:
case 318 /* ľ */:
*p = 'l'; return 1;
case 241 /* ñ */: case 328 /* ň */:
*p = 'n'; return 1;
case 345: /* ř */
*p = 'r'; return 1;
case 353: /* š */
case 0x15f: /* ş */
*p = 's'; return 1;
case 357: /* ť */
case 0x163: /* ţ */
*p = 't'; return 1;
case 240 /* ð */: case 254 /* þ */:
*p = 't'; p[1] = 'h'; return 2;
case 242 /* ò */: case 243 /* ó */: case 244 /* ô */: case 245 /* õ */:
*p = 'o'; return 1;
case 246 /* ö */: /* ö */ case 0x153: /* œ */
*p = 'o'; p[1] = 'e'; return 2;
case 249 /* ù */: case 250 /* ú */: case 251 /* û */:
case 367 /* ů */:
*p = 'u'; return 1;
case 252 /* ü */: /* ü */
*p = 'u'; p[1] = 'e'; return 2;
case 253 /* ý */: case 255 /* ÿ */:
*p = 'y'; return 1;
case 382: /* ž */
*p = 'z'; return 1;
case 0x2019: /* ‘ */
*p = '\''; return 1;
case 171: /* « */ case 187: /* » */
case 0x201c: /* “ */ case 0x201d: /* ” */
*p = '"'; return 1;
case 0x2026: /* … */
*p = '.'; p[1] = '.'; p[2] = '.'; return 3;
case 0x2032: /* prime -> apostrophe */
*p = '\''; return 1;
case 0x2033: /* double prime -> double quote */
*p = '"'; return 1;
case 0x2192: /* → */
*p = '-'; p[1] = '>'; return 2;
case 0x1d4d: /* gradient symbol */
*p = 'g'; p[1] = 'r'; p[2] = 'd'; return 3;
case 0x221e: /* infinity symbol */
*p = 'i'; p[1] = 'n'; p[2] = 'f'; return 3;
}
#ifdef DEBUG
/* 169 is reported (copyright symbol), but there isn't a good <= 2 ASCII
* character transliteration for that, so we handle that elsewhere. */
fprintf(stderr, "failed to transliterate codepoint %d\n", value);
#endif
return 0;
}
#if !defined _WIN32 && defined DATADIR && defined PACKAGE
/* Under Unix, we compile in the configured path */
static const char *pth_cfg_files = DATADIR "/" PACKAGE;
#else
/* On other platforms, we fall back on looking in the current directory */
static const char *pth_cfg_files = "";
#endif
static int num_msgs = 0;
static char **msg_array = NULL;
static bool msg_lang_explicit = false;
const char *msg_lang = NULL;
const char *msg_lang2 = NULL;
static char **
parse_msgs(int n, unsigned char *p, int charset_code) {
int i;
char **msgs = osmalloc(n * sizeof(char *));
for (i = 0; i < n; i++) {
unsigned char *to = p;
int ch;
msgs[i] = (char *)p;
/* If we want UTF8 anyway, we just need to find the start of each
* message */
if (charset_code == CHARSET_UTF8) {
p += strlen((char *)p) + 1;
continue;
}
while ((ch = *p++) != 0) {
/* A byte in the range 0x80-0xbf or 0xf0-0xff isn't valid in
* this state, (0xf0-0xfd mean values > 0xffff) so treat as
* literal and try to resync so we cope better when fed
* non-utf-8 data. Similarly we abandon a multibyte sequence
* if we hit an invalid character. */
if (ch >= 0xc0 && ch < 0xf0) {
int ch1 = *p;
if ((ch1 & 0xc0) != 0x80) goto resync;
if (ch < 0xe0) {
/* 2 byte sequence */
ch = ((ch & 0x1f) << 6) | (ch1 & 0x3f);
p++;
} else {
/* 3 byte sequence */
int ch2 = p[1];
if ((ch2 & 0xc0) != 0x80) goto resync;
ch = ((ch & 0x1f) << 12) | ((ch1 & 0x3f) << 6) | (ch2 & 0x3f);
p += 2;
}
}
resync:
if (ch < 127) {
*to++ = (char)ch;
} else {
/* We assume an N byte UTF-8 code never transliterates to more
* than N characters (so we can't transliterate © to (C) or
* ® to (R) for example) */
to += add_unicode(charset_code, to, ch);
}
}
*to++ = '\0';
}
return msgs;
}
/* No point making these errors translatable as they only get used if we fail
* to open the messages file.
*
* NB These messages should be in plain ASCII.
*/
static const char dontextractmsgs[][45] = {
"Out of memory (couldn't find %lu bytes)."/*1000*/,
"Problem with message file \"%s\""/*1001*/,
"I don't understand this message file version"/*1002*/,
"Message file truncated?"/*1003*/,
"Can't open message file \"%s\" using path \"%s\""/*1004*/
};
#define N_DONTEXTRACTMSGS \
((int)(sizeof(dontextractmsgs) / sizeof(dontextractmsgs[0])))
#define DONTEXTRACTMSGS_BASE 1000
static void
parse_msg_file(int charset_code)
{
FILE *fh;
unsigned char header[20];
int i;
unsigned len;
unsigned char *p;
char *fnm, *s;
int n;
#ifdef DEBUG
fprintf(stderr, "parse_msg_file(%d)\n", charset_code);
#endif
fnm = osstrdup(msg_lang);
/* trim off charset from stuff like "de_DE.iso8859_1" */
s = strchr(fnm, '.');
if (s) *s = '\0';
/* trim off any "@" modifier. */
s = strchr(fnm, '@');
if (s) *s = '\0';
fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
if (!fh && strlen(fnm) > 3 && fnm[2] == '_') {
/* e.g. if 'en_GB' is unknown, see if we know 'en' */
fnm[2] = '\0';
fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
if (!fh) fnm[2] = '_'; /* for error reporting */
}
if (!fh && !msg_lang_explicit) {
/* If msg_lang wasn't specified using environment variable SURVEXLANG,
* then default to 'en' if we don't find messages for language msg_lang.
*/
if (fnm[0] && fnm[1]) {
strcpy(fnm, "en");
} else {
free(fnm);
fnm = osstrdup("en");
}
fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
}
if (!fh) {
fatalerror(/*Can't open message file “%s” using path “%s”*/1004,
fnm, pth_cfg_files);
}
if (FREAD(header, 1, 20, fh) < 20 ||
memcmp(header, "Svx\nMsg\r\n\xfe\xff", 12) != 0) {
fatalerror(/*Problem with message file “%s”*/1001, fnm);
}
if (header[12] != 0)
fatalerror(/*I don't understand this message file version*/1002);
n = (header[14] << 8) | header[15];
len = 0;
for (i = 16; i < 20; i++) len = (len << 8) | header[i];
p = osmalloc(len);
if (FREAD(p, 1, len, fh) < len)
fatalerror(/*Message file truncated?*/1003);
fclose(fh);
#ifdef DEBUG
fprintf(stderr, "fnm = “%s”, n = %d, len = %d\n", fnm, n, len);
#endif
free(fnm);
msg_array = parse_msgs(n, p, charset_code);
num_msgs = n;
}
const char *
msg_cfgpth(void)
{
return pth_cfg_files;
}
const char *
msg_exepth(void)
{
return exe_pth;
}
const char *
msg_appname(void)
{
return appname_copy;
}
void
(msg_init)(char * const *argv)
{
char *p;
SVX_ASSERT(argv);
/* Point to argv[0] itself so we report a more helpful error if the
* code to work out the clean appname generates a signal */
appname_copy = argv[0];
#ifndef _WIN32
/* use name as-is on Unix - programs run from path get name as supplied */
appname_copy = osstrdup(argv[0]);
#else
// Use the lower-cased leafname on Microsoft Windows.
p = leaf_from_fnm(argv[0]);
appname_copy = p;
while (*p) {
*p = tolower((unsigned char)*p);
++p;
}
/* Remove .exe extension if present. */
if (p - appname_copy > 4 && memcmp(p - 4, ".exe", 4) == 0) {
p[-4] = '\0';
}
#endif
/* shortcut --version so you can check the version number even when the
* correct message file can't be found... */
if (argv[1] && strcmp(argv[1], "--version") == 0) {
cmdline_version();
exit(0);
}
char *pth = getenv("SURVEXLIB");
if (pth && pth[0]) {
pth_cfg_files = osstrdup(pth);
} else if (argv[0]) {
exe_pth = path_from_fnm(argv[0]);
#if !defined _WIN32 && defined DATADIR && defined PACKAGE
if (exe_pth[0]) {
struct stat buf;
#ifdef __APPLE__
# ifndef AVEN
/* On macOS the programs may be installed anywhere, with the
* share directory and the binaries in the same directory. */
p = use_path(exe_pth, "share/survex/en.msg");
if (stat(p, &buf) == 0 && S_ISREG(buf.st_mode)) {
pth_cfg_files = use_path(exe_pth, "share/survex");
goto macos_got_msg;
}
free(p);
# endif
/* In the diskimage package, this case is used for aven, and for
* the hardlinked copies of cavern and extend alongside the aven
* binary, which are the ones which aven runs.
*/
p = use_path(exe_pth, "../Resources/en.msg");
if (stat(p, &buf) == 0 && S_ISREG(buf.st_mode)) {
pth_cfg_files = use_path(exe_pth, "../Resources");
goto macos_got_msg;
}
free(p);
#endif
/* If we're run with an explicit path, check if "../lib/en.msg"
* from the program's path exists, and if so look there for
* support files - this allows us to test binaries in the build
* tree easily. */
p = use_path(exe_pth, "../lib/en.msg");
if (stat(p, &buf) == 0) {
#ifdef S_ISREG
/* POSIX way */
if (S_ISREG(buf.st_mode)) {
pth_cfg_files = use_path(exe_pth, "../lib");
}
#else
/* BSD way */
if ((buf.st_mode & S_IFMT) == S_IFREG) {
pth_cfg_files = use_path(exe_pth, "../lib");
}
#endif
}
#ifdef __APPLE__
macos_got_msg:
#endif
free(p);
}
#elif defined _WIN32
DWORD len = 256;
char *buf = NULL, *modname;
while (1) {
DWORD got;
buf = osrealloc(buf, len);
got = GetModuleFileName(NULL, buf, len);
if (got < len) break;
len += len;
}
modname = buf;
/* Strange Win32 nastiness - strip prefix "\\?\" if present */
if (strncmp(modname, "\\\\?\\", 4) == 0) modname += 4;
pth_cfg_files = path_from_fnm(modname);
free(buf);
#else
/* Get the path to the support files from argv[0] */
pth_cfg_files = exe_path;
#endif
}
msg_lang = getenv("SURVEXLANG");
#ifdef DEBUG
fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
#endif
msg_lang_explicit = true;
if (!msg_lang || !*msg_lang) {
msg_lang_explicit = false;
msg_lang = getenv("LC_ALL");
}
if (!msg_lang || !*msg_lang) {
msg_lang = getenv("LC_MESSAGES");
if (!msg_lang || !*msg_lang) {
msg_lang = getenv("LANG");
/* Something (AutoCAD?) on Microsoft Windows sets LANG to a number. */
if (msg_lang && !isalpha(msg_lang[0])) msg_lang = NULL;
}
if (!msg_lang || !*msg_lang) {
#ifdef _WIN32
LCID locid;
#endif
#ifdef DEFAULTLANG
msg_lang = STRING(DEFAULTLANG);
#else
msg_lang = "en";
#endif
#ifdef _WIN32
/* GetUserDefaultUILanguage() requires Microsoft Windows 2000 or
* newer, but we don't support anything earlier than Vista.
*/
locid = GetUserDefaultUILanguage();
if (locid) {
WORD langid = LANGIDFROMLCID(locid);
switch (PRIMARYLANGID(langid)) {
case LANG_BULGARIAN:
msg_lang = "bg";
break;
/* older mingw compilers don't seem to supply this value */
#ifndef LANG_CATALAN
# define LANG_CATALAN 0x03
#endif
case LANG_CATALAN:
msg_lang = "ca";
break;
case LANG_CHINESE:
msg_lang = "zh_CN";
break;
case LANG_CZECH:
msg_lang = "cs";
break;
case LANG_ENGLISH:
if (SUBLANGID(langid) == SUBLANG_ENGLISH_US)
msg_lang = "en_US";
else
msg_lang = "en";
break;
case LANG_FRENCH:
msg_lang = "fr";
break;
case LANG_GERMAN:
switch (SUBLANGID(langid)) {
case SUBLANG_GERMAN_SWISS:
msg_lang = "de_CH";
break;
default:
msg_lang = "de";
}
break;
case LANG_GREEK:
msg_lang = "el";
break;
case LANG_HUNGARIAN:
msg_lang = "hu";
break;
case LANG_INDONESIAN:
msg_lang = "id";
break;
case LANG_ITALIAN:
msg_lang = "it";
break;
case LANG_POLISH:
msg_lang = "pl";
break;
case LANG_PORTUGUESE:
if (SUBLANGID(langid) == SUBLANG_PORTUGUESE_BRAZILIAN)
msg_lang = "pt_BR";
else
msg_lang = "pt";
break;
case LANG_ROMANIAN:
msg_lang = "ro";
break;
case LANG_RUSSIAN:
msg_lang = "ru";
break;
case LANG_SLOVAK:
msg_lang = "sk";
break;
case LANG_SPANISH:
msg_lang = "es";
break;
}
}
#endif
}
}
#ifdef DEBUG
fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
#endif
/* On Mandrake LANG defaults to C */
if (strcmp(msg_lang, "C") == 0) msg_lang = "en";
{ /* If msg_lang has a country code, snip it out to give msg_lang2. */
size_t b = 0;
while (isalpha((unsigned char)msg_lang[b])) {
++b;
}
if (msg_lang[b] == '_') {
char * tmp;
size_t e = b + 1;
while (isalpha((unsigned char)msg_lang[e])) {
++e;
}
tmp = osstrdup(msg_lang);
memmove(tmp + b, tmp + e, strlen(tmp + e) + 1);
msg_lang2 = tmp;
}
}
#ifdef LC_MESSAGES
/* try to setlocale() appropriately too */
if (!setlocale(LC_MESSAGES, msg_lang)) {
if (msg_lang2) {
(void)setlocale(LC_MESSAGES, msg_lang2);
}
}
#endif
select_charset(default_charset());
}
#ifndef AVEN
/* Return message if messages available, else a fallback value. */
static const char *
msg_opt(int en, const char * fallback)
{
/* NB can't use SVX_ASSERT here! */
if (!msg_array || en <= 0 || en >= num_msgs) {
return fallback;
}
return msg_array[en];
}
#endif
const char *
msg(int en)
{
/* NB can't use SVX_ASSERT here! */
if (en >= DONTEXTRACTMSGS_BASE &&
en < DONTEXTRACTMSGS_BASE + N_DONTEXTRACTMSGS) {
return dontextractmsgs[en - DONTEXTRACTMSGS_BASE];
}
if (!msg_array) {
/* This should be the only other message which can be requested before
* the message file is opened and read.
*/
if (en != /*Out of memory (couldn’t find %lu bytes).*/24) {
fprintf(STDERR, "Message %d requested before fully initialised\n", en);
return "Message requested before fully initialised\n";
}
en = /*Out of memory (couldn't find %lu bytes).*/1000;
return dontextractmsgs[en - DONTEXTRACTMSGS_BASE];
}
if (en < 0 || en >= num_msgs) {
fprintf(STDERR, "Message %d out of range\n", en);
return "Message out of range\n";
}
if (en == 0) {
const char *p = msg_array[0];
if (!*p) p = "(C)";
return p;
}
return msg_array[en];
}
void
v_report(int severity, const char *fnm, int line, int col, int en, va_list ap)
{
#ifdef AVEN
(void)col;
aven_v_report(severity, fnm, line, en, ap);
#else
const char * level;
if (fnm) {
fputs(fnm, STDERR);
if (line) fprintf(STDERR, ":%d", line);
if (col > 0) fprintf(STDERR, ":%d", col);
} else {
fputs(appname_copy, STDERR);
}
fputs(": ", STDERR);
switch (severity) {
case DIAG_INFO:
/* TRANSLATORS: Indicates a informational message e.g.:
* "spoon.svx:12: info: Declination: [...]" */
level = msg_opt(/*info*/485, "info");
break;
case DIAG_WARN:
/* TRANSLATORS: Indicates a warning message e.g.:
* "spoon.svx:12: warning: *prefix is deprecated" */
level = msg_opt(/*warning*/106, "warning");
break;
default:
/* TRANSLATORS: Indicates an error message e.g.:
* "spoon.svx:13:4: error: Field may not be omitted" */
level = msg_opt(/*error*/93, "error");
break;
}
fputs(level, STDERR);
fputs(": ", STDERR);
#ifdef HAVE__VSPRINTF_P
// Microsoft's vfprintf() doesn't support positional argument specifiers,
// so we need to use the Microsoft-specific _vfprintf_p().
_vfprintf_p(STDERR, msg(en), ap);
#else
vfprintf(STDERR, msg(en), ap);
#endif
fputnl(STDERR);
#endif
switch (severity) {
case DIAG_WARN:
msg_warnings++;
break;
case DIAG_ERR:
msg_errors++;
if (msg_errors == 50)
fatalerror_in_file(fnm, 0, /*Too many errors - giving up*/19);
break;
case DIAG_FATAL:
exit(EXIT_FAILURE);
}
}
void
diag(int severity, int en, ...)
{
va_list ap;
va_start(ap, en);
v_report(severity, NULL, 0, 0, en, ap);
va_end(ap);
}
void
diag_in_file(int severity, const char *fnm, int line, int en, ...)
{
va_list ap;
va_start(ap, en);
v_report(severity, fnm, line, 0, en, ap);
va_end(ap);
}
/* Code to support switching character set at runtime (e.g. for a printer
* driver to support different character sets on screen and on the printer)
*/
typedef struct charset_li {
struct charset_li *next;
int code;
char **msg_array;
} charset_li;
static charset_li *charset_head = NULL;
static int charset = CHARSET_BAD;
int
select_charset(int charset_code)
{
int old_charset = charset;
charset_li *p;
#ifdef DEBUG
fprintf(stderr, "select_charset(%d), old charset = %d\n", charset_code,
charset);
#endif
if (charset == charset_code) return charset;
charset = charset_code;
/* check if we've already parsed messages for new charset */
for (p = charset_head; p; p = p->next) {
#ifdef DEBUG
printf("%p: code %d msg_array %p\n", p, p->code, p->msg_array);
#endif
if (p->code == charset) {
msg_array = p->msg_array;
return old_charset;
}
}
/* nope, got to reparse message file */
parse_msg_file(charset_code);
/* add to list */
p = osnew(charset_li);
p->code = charset;
p->msg_array = msg_array;
p->next = charset_head;
charset_head = p;
return old_charset;
}