source: git/src/matrix.c @ 86907b0

stereo-2025
Last change on this file since 86907b0 was 86907b0, checked in by Olly Betts <olly@…>, 12 months ago

Stop trying to reallocate stn_tab smaller

This was implemented back in 2001 with the intention of freeing up as
much memory as possible before allocating memory for the matrix.

However modern realloc() implementations are unlikely to actually
free up any memory - e.g. glibc's never shrinks, tcmalloc's
only shrinks if the size is reduced by >= 50%, which will never
happen for real-world survey data. Also, unless the implementation
can shrink a block in place we're unlikely to free up memory that
can be use for a large matrix allocation anyway.

Meanwhile even the oldest machines still in use have enough memory
that we probably don't need to worry about memory here anyway.

  • Property mode set to 100644
File size: 12.7 KB
RevLine 
[421b7d2]1/* matrix.c
[d1b1380]2 * Matrix building and solving routines
[a4adf09]3 * Copyright (C) 1993-2003,2010,2013 Olly Betts
[846746e]4 *
[89231c4]5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
[846746e]9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
[89231c4]12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
[846746e]14 *
[89231c4]15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
[ecbc6c18]17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
[d1b1380]18 */
19
[2164fa4]20/*#define SOR 1*/
[702f518]21
[032ed06]22#if 0
23# define DEBUG_INVALID 1
24#endif
25
[4c83f84]26#include <config.h>
[d1b1380]27
28#include "debug.h"
[a420b49]29#include "cavern.h"
[c082b69]30#include "filename.h"
31#include "message.h"
[d1b1380]32#include "netbits.h"
33#include "matrix.h"
34#include "out.h"
35
36#undef PRINT_MATRICES
37#define PRINT_MATRICES 0
38
39#undef DEBUG_MATRIX_BUILD
40#define DEBUG_MATRIX_BUILD 0
41
42#undef DEBUG_MATRIX
43#define DEBUG_MATRIX 0
44
45#if PRINT_MATRICES
[9965b2b]46static void print_matrix(real *M, real *B, long n);
[d1b1380]47#endif
48
[9965b2b]49static void choleski(real *M, real *B, long n);
[3fde384f]50
[d1b1380]51#ifdef SOR
[9965b2b]52static void sor(real *M, real *B, long n);
[d1b1380]53#endif
54
[a420b49]55/* for M(row, col) col must be <= row, so Y <= X */
[9965b2b]56# define M(X, Y) ((real *)M)[((((OSSIZE_T)(X)) * ((X) + 1)) >> 1) + (Y)]
[421b7d2]57              /* +(Y>X?0*printf("row<col (line %d)\n",__LINE__):0) */
[9965b2b]58/*#define M_(X, Y) ((real *)M)[((((OSSIZE_T)(Y)) * ((Y) + 1)) >> 1) + (X)]*/
[d1b1380]59
[a420b49]60static int find_stn_in_tab(node *stn);
61static int add_stn_to_tab(node *stn);
[eb18f4d]62static void build_matrix(node *list);
[d1b1380]63
64static long n_stn_tab;
65
[c19f129]66static pos **stn_tab;
[d1b1380]67
[032ed06]68extern void
[d9b5db53]69solve_matrix(node *list)
[032ed06]70{
71   node *stn;
[702f518]72   long n = 0;
[d9b5db53]73   FOR_EACH_STN(stn, list) {
[032ed06]74      if (!fixed(stn)) n++;
75   }
76   if (n == 0) return;
77
[86907b0]78   /* We need to allocate stn_tab with one entry per unfixed cluster of equated
79    * stations, but it's much simpler to count the number of unfixed nodes.
80    * This will over-count stations with more than 3 legs and equated stations
81    * but in a typical survey that's a small minority of stations.
[032ed06]82    */
[66de220]83   stn_tab = osmalloc((OSSIZE_T)(n * ossizeof(pos*)));
[4f613e0]84   n_stn_tab = 0;
[cb3d1e2]85
[d9b5db53]86   FOR_EACH_STN(stn, list) {
[032ed06]87      if (!fixed(stn)) add_stn_to_tab(stn);
88   }
89
[eb18f4d]90   build_matrix(list);
[2c9c3ff]91#if DEBUG_MATRIX
[2164fa4]92   FOR_EACH_STN(stn, list) {
[2aa930f]93      printf("(%8.2f, %8.2f, %8.2f ) ", POS(stn, 0), POS(stn, 1), POS(stn, 2));
[2164fa4]94      print_prefix(stn->name);
[2aa930f]95      putnl();
[2164fa4]96   }
[2c9c3ff]97#endif
[4f613e0]98
99   osfree(stn_tab);
[032ed06]100}
[d1b1380]101
[3fde384f]102#ifdef NO_COVARIANCES
[702f518]103# define FACTOR 1
[3fde384f]104#else
[702f518]105# define FACTOR 3
[3fde384f]106#endif
107
[a420b49]108static void
[eb18f4d]109build_matrix(node *list)
[a420b49]110{
[eb18f4d]111   if (n_stn_tab == 0) {
[5b68ae1]112      if (!fQuiet)
113         puts(msg(/*Network solved by reduction - no simultaneous equations to solve.*/74));
[dbd68203]114      return;
115   }
[eb18f4d]116   /* (OSSIZE_T) cast may be needed if n_stn_tab>=181 */
[5bb3dc4]117   real *M = osmalloc((OSSIZE_T)((((OSSIZE_T)n_stn_tab * FACTOR * (n_stn_tab * FACTOR + 1)) >> 1)) * ossizeof(real));
118   real *B = osmalloc((OSSIZE_T)(n_stn_tab * FACTOR * ossizeof(real)));
[dbd68203]119
[647407d]120   if (!fQuiet) {
[a4adf09]121      if (n_stn_tab == 1)
122         out_current_action(msg(/*Solving one equation*/78));
123      else
124         out_current_action1(msg(/*Solving %d simultaneous equations*/75), n_stn_tab);
[dbd68203]125   }
126
[3fde384f]127#ifdef NO_COVARIANCES
[5bb3dc4]128   int dim = 2;
[3fde384f]129#else
[5bb3dc4]130   int dim = 0; /* fudge next loop for now */
[3fde384f]131#endif
[a420b49]132   for ( ; dim >= 0; dim--) {
[2164fa4]133      node *stn;
134      int row;
135
[907fe10]136      /* Initialise M and B to zero - zeroing "linearly" will minimise
[421b7d2]137       * paging when the matrix is large */
[66de220]138      {
139         int end = n_stn_tab * FACTOR;
140         for (row = 0; row < end; row++) B[row] = (real)0.0;
141         end = ((OSSIZE_T)n_stn_tab * FACTOR * (n_stn_tab * FACTOR + 1)) >> 1;
142         for (row = 0; row < end; row++) M[row] = (real)0.0;
143      }
[dbd68203]144
[3c7ab9a]145      /* Construct matrix by going through the stn list.
[421b7d2]146       *
[907fe10]147       * All legs between two fixed stations can be ignored here.
[421b7d2]148       *
[3c7ab9a]149       * Other legs we want to add exactly once to M.  To achieve this we
150       * wan to:
151       *
152       * - add forward legs between two unfixed stations,
153       *
154       * - add legs from unfixed stations to fixed stations (we do them from
155       *   the unfixed end so we don't need to detect when we're at a fixed
156       *   point cut line and determine which side we're currently dealing
157       *   with).
158       *
159       * To implement this, we only look at legs from unfixed stations and add
160       * a leg if to a fixed station, or to an unfixed station and it's a
161       * forward leg.
162       */
[d9b5db53]163      FOR_EACH_STN(stn, list) {
[2164fa4]164#ifdef NO_COVARIANCES
165         real e;
166#else
[dac18d8]167         svar e;
[eb18f4d]168         delta a;
[2164fa4]169#endif
[b5d3988]170#if DEBUG_MATRIX_BUILD
[dbd68203]171         print_prefix(stn->name);
[b5d3988]172         printf(" used: %d colour %ld\n",
[a420b49]173                (!!stn->leg[2]) << 2 | (!!stn -> leg[1]) << 1 | (!!stn->leg[0]),
[b5d3988]174                stn->colour);
[3fde384f]175
[5bb3dc4]176         for (int dirn = 0; dirn <= 2 && stn->leg[dirn]; dirn++) {
[b5d3988]177#ifdef NO_COVARIANCES
[907fe10]178            printf("Leg %d, vx=%f, reverse=%d, to ", dirn,
179                   stn->leg[dirn]->v[0], stn->leg[dirn]->l.reverse);
[b5d3988]180#else
[907fe10]181            printf("Leg %d, vx=%f, reverse=%d, to ", dirn,
182                   stn->leg[dirn]->v[0][0], stn->leg[dirn]->l.reverse);
[b5d3988]183#endif
[907fe10]184            print_prefix(stn->leg[dirn]->l.to->name);
185            putnl();
186         }
[dbd68203]187         putnl();
[d1b1380]188#endif /* DEBUG_MATRIX_BUILD */
[b5d3988]189
[907fe10]190         if (!fixed(stn)) {
[5bb3dc4]191            int f = find_stn_in_tab(stn);
192            for (int dirn = 0; dirn <= 2 && stn->leg[dirn]; dirn++) {
[907fe10]193               linkfor *leg = stn->leg[dirn];
194               node *to = leg->l.to;
195               if (fixed(to)) {
196                  bool fRev = !data_here(leg);
197                  if (fRev) leg = reverse_leg(leg);
198                  /* Ignore equated nodes */
[3fde384f]199#ifdef NO_COVARIANCES
[907fe10]200                  e = leg->v[dim];
201                  if (e != (real)0.0) {
202                     e = ((real)1.0) / e;
203                     M(f,f) += e;
[f52dcc7]204                     B[f] += e * POS(to, dim);
[907fe10]205                     if (fRev) {
[f52dcc7]206                        B[f] += leg->d[dim];
[907fe10]207                     } else {
[f52dcc7]208                        B[f] -= leg->d[dim];
[564f471]209                     }
[907fe10]210                  }
[3fde384f]211#else
[907fe10]212                  if (invert_svar(&e, &leg->v)) {
213                     if (fRev) {
214                        adddd(&a, &POSD(to), &leg->d);
215                     } else {
216                        subdd(&a, &POSD(to), &leg->d);
217                     }
[5bb3dc4]218                     delta b;
[907fe10]219                     mulsd(&b, &e, &a);
[5bb3dc4]220                     for (int i = 0; i < 3; i++) {
[907fe10]221                        M(f * FACTOR + i, f * FACTOR + i) += e[i];
222                        B[f * FACTOR + i] += b[i];
[564f471]223                     }
[907fe10]224                     M(f * FACTOR + 1, f * FACTOR) += e[3];
225                     M(f * FACTOR + 2, f * FACTOR) += e[4];
226                     M(f * FACTOR + 2, f * FACTOR + 1) += e[5];
227                  }
[3fde384f]228#endif
[907fe10]229               } else if (data_here(leg)) {
230                  /* forward leg, unfixed -> unfixed */
[5bb3dc4]231                  int t = find_stn_in_tab(to);
[d1b1380]232#if DEBUG_MATRIX
[907fe10]233                  printf("Leg %d to %d, var %f, delta %f\n", f, t, e,
234                         leg->d[dim]);
[d1b1380]235#endif
[907fe10]236                  /* Ignore equated nodes & lollipops */
[3fde384f]237#ifdef NO_COVARIANCES
[907fe10]238                  e = leg->v[dim];
239                  if (t != f && e != (real)0.0) {
240                     e = ((real)1.0) / e;
241                     M(f,f) += e;
242                     M(t,t) += e;
243                     if (f < t) M(t,f) -= e; else M(f,t) -= e;
[5bb3dc4]244                     real a = e * leg->d[dim];
[907fe10]245                     B[f] -= a;
246                     B[t] += a;
247                  }
[3fde384f]248#else
[907fe10]249                  if (t != f && invert_svar(&e, &leg->v)) {
250                     mulsd(&a, &e, &leg->d);
[5bb3dc4]251                     for (int i = 0; i < 3; i++) {
[907fe10]252                        M(f * FACTOR + i, f * FACTOR + i) += e[i];
253                        M(t * FACTOR + i, t * FACTOR + i) += e[i];
254                        if (f < t)
255                           M(t * FACTOR + i, f * FACTOR + i) -= e[i];
256                        else
257                           M(f * FACTOR + i, t * FACTOR + i) -= e[i];
258                        B[f * FACTOR + i] -= a[i];
259                        B[t * FACTOR + i] += a[i];
260                     }
261                     M(f * FACTOR + 1, f * FACTOR) += e[3];
262                     M(t * FACTOR + 1, t * FACTOR) += e[3];
263                     M(f * FACTOR + 2, f * FACTOR) += e[4];
264                     M(t * FACTOR + 2, t * FACTOR) += e[4];
265                     M(f * FACTOR + 2, f * FACTOR + 1) += e[5];
266                     M(t * FACTOR + 2, t * FACTOR + 1) += e[5];
267                     if (f < t) {
268                        M(t * FACTOR + 1, f * FACTOR) -= e[3];
269                        M(t * FACTOR, f * FACTOR + 1) -= e[3];
270                        M(t * FACTOR + 2, f * FACTOR) -= e[4];
271                        M(t * FACTOR, f * FACTOR + 2) -= e[4];
272                        M(t * FACTOR + 2, f * FACTOR + 1) -= e[5];
273                        M(t * FACTOR + 1, f * FACTOR + 2) -= e[5];
274                     } else {
275                        M(f * FACTOR + 1, t * FACTOR) -= e[3];
276                        M(f * FACTOR, t * FACTOR + 1) -= e[3];
277                        M(f * FACTOR + 2, t * FACTOR) -= e[4];
278                        M(f * FACTOR, t * FACTOR + 2) -= e[4];
279                        M(f * FACTOR + 2, t * FACTOR + 1) -= e[5];
280                        M(f * FACTOR + 1, t * FACTOR + 2) -= e[5];
[dbd68203]281                     }
282                  }
[907fe10]283#endif
[564f471]284               }
[907fe10]285            }
[dbd68203]286         }
[d1b1380]287      }
288
289#if PRINT_MATRICES
[eb18f4d]290      print_matrix(M, B, n_stn_tab * FACTOR); /* 'ave a look! */
[d1b1380]291#endif
292
293#ifdef SOR
[032ed06]294      /* defined in network.c, may be altered by -z<letters> on command line */
[a420b49]295      if (optimize & BITA('i'))
[eb18f4d]296         sor(M, B, n_stn_tab * FACTOR);
[dbd68203]297      else
[d1b1380]298#endif
[eb18f4d]299         choleski(M, B, n_stn_tab * FACTOR);
[d1b1380]300
[dbd68203]301      {
[5bb3dc4]302         for (int m = (int)(n_stn_tab - 1); m >= 0; m--) {
[3fde384f]303#ifdef NO_COVARIANCES
[c19f129]304            stn_tab[m]->p[dim] = B[m];
[032ed06]305            if (dim == 0) {
[4c07c51]306               SVX_ASSERT2(pos_fixed(stn_tab[m]),
[032ed06]307                       "setting station coordinates didn't mark pos as fixed");
308            }
[3fde384f]309#else
[5bb3dc4]310            for (int i = 0; i < 3; i++) {
[c19f129]311               stn_tab[m]->p[i] = B[m * FACTOR + i];
[702f518]312            }
[4c07c51]313            SVX_ASSERT2(pos_fixed(stn_tab[m]),
[032ed06]314                    "setting station coordinates didn't mark pos as fixed");
[3fde384f]315#endif
[a420b49]316         }
[d1b1380]317#if EXPLICIT_FIXED_FLAG
[5bb3dc4]318         for (int m = n_stn_tab - 1; m >= 0; m--) fixpos(stn_tab[m]);
[d1b1380]319#endif
[dbd68203]320      }
321   }
322   osfree(B);
323   osfree(M);
[d1b1380]324}
325
[a420b49]326static int
327find_stn_in_tab(node *stn)
328{
[dbd68203]329   int i = 0;
[eb18f4d]330   pos *p = stn->name->pos;
331   while (stn_tab[i] != p)
[dbd68203]332      if (++i == n_stn_tab) {
[d1b1380]333#if DEBUG_INVALID
[a420b49]334         fputs("Station ", stderr);
[eb18f4d]335         fprint_prefix(stderr, stn->name);
336         fputs(" not in table\n\n", stderr);
[d1b1380]337#endif
338#if 0
[dbd68203]339         print_prefix(stn->name);
[b5d3988]340         printf(" used: %d colour %d\n",
[dbd68203]341                (!!stn->leg[2])<<2 | (!!stn->leg[1])<<1 | (!!stn->leg[0]),
[eb18f4d]342                stn->colour);
[d1b1380]343#endif
[a420b49]344         fatalerror(/*Bug in program detected! Please report this to the authors*/11);
[dbd68203]345      }
346   return i;
[d1b1380]347}
348
[a420b49]349static int
350add_stn_to_tab(node *stn)
351{
[dbd68203]352   int i;
[eb18f4d]353   pos *p = stn->name->pos;
[a420b49]354   for (i = 0; i < n_stn_tab; i++) {
[eb18f4d]355      if (stn_tab[i] == p) return i;
[dbd68203]356   }
[eb18f4d]357   stn_tab[n_stn_tab++] = p;
[dbd68203]358   return i;
[d1b1380]359}
360
[702f518]361/* Solve MX=B for X by Choleski factorisation - modified Choleski actually
362 * since we factor into LDL' while Choleski is just LL'
363 */
[d1b1380]364/* Note M must be symmetric positive definite */
365/* routine is entitled to scribble on M and B if it wishes */
[a420b49]366static void
[9965b2b]367choleski(real *M, real *B, long n)
[a420b49]368{
[5bb3dc4]369   for (int j = 1; j < n; j++) {
[3fde384f]370      real V;
[5bb3dc4]371      for (int i = 0; i < j; i++) {
[421b7d2]372         V = (real)0.0;
[5bb3dc4]373         for (int k = 0; k < i; k++) V += M(i,k) * M(j,k) * M(k,k);
[a420b49]374         M(j,i) = (M(j,i) - V) / M(i,i);
[dbd68203]375      }
376      V = (real)0.0;
[5bb3dc4]377      for (int k = 0; k < j; k++) V += M(j,k) * M(j,k) * M(k,k);
[3fde384f]378      M(j,j) -= V; /* may be best to add M() last for numerical reasons too */
[dbd68203]379   }
[d1b1380]380
[dbd68203]381   /* Multiply x by L inverse */
[5bb3dc4]382   for (int i = 0; i < n - 1; i++) {
383      for (int j = i + 1; j < n; j++) {
[dbd68203]384         B[j] -= M(j,i) * B[i];
[3fde384f]385      }
[dbd68203]386   }
[d1b1380]387
[dbd68203]388   /* Multiply x by D inverse */
[5bb3dc4]389   for (int i = 0; i < n; i++) {
[dbd68203]390      B[i] /= M(i,i);
[3fde384f]391   }
392
393   /* Multiply x by (L transpose) inverse */
[5bb3dc4]394   for (int i = (int)(n - 1); i > 0; i--) {
395      for (int j = i - 1; j >= 0; j--) {
[421b7d2]396         B[j] -= M(i,j) * B[i];
[3fde384f]397      }
[dbd68203]398   }
[d1b1380]399
[dbd68203]400   /* printf("\n%ld/%ld\n\n",flops,flopsTot); */
[d1b1380]401}
402
403#ifdef SOR
404/* factor to use for SOR (must have 1 <= SOR_factor < 2) */
[702f518]405#define SOR_factor 1.93 /* 1.95 */
[d1b1380]406
407/* Solve MX=B for X by SOR of Gauss-Siedel */
408/* routine is entitled to scribble on M and B if it wishes */
[a420b49]409static void
[9965b2b]410sor(real *M, real *B, long n)
[a420b49]411{
[dbd68203]412   long it = 0;
[d1b1380]413
[5bb3dc4]414   real *X = osmalloc(n * ossizeof(real));
[d1b1380]415
[5bb3dc4]416   const real threshold = 0.00001;
[d1b1380]417
[647407d]418   printf("reciprocating diagonal\n"); /* TRANSLATE */
[d1b1380]419
[3fde384f]420   /* munge diagonal so we can multiply rather than divide */
[5bb3dc4]421   for (int row = n - 1; row >= 0; row--) {
[dbd68203]422      M(row,row) = 1 / M(row,row);
[702f518]423      X[row] = 0;
[dbd68203]424   }
[d1b1380]425
[647407d]426   printf("starting iteration\n"); /* TRANSLATE */
[d1b1380]427
[5bb3dc4]428   real t;
[dbd68203]429   do {
430      /*printf("*");*/
431      it++;
432      t = 0.0;
[5bb3dc4]433      for (int row = 0; row < n; row++) {
434         real x = B[row];
435         int col;
[a420b49]436         for (col = 0; col < row; col++) x -= M(row,col) * X[col];
437         for (col++; col < n; col++) x -= M(col,row) * X[col];
[dbd68203]438         x *= M(row,row);
[5bb3dc4]439         real delta = (x - X[row]) * SOR_factor;
[dbd68203]440         X[row] += delta;
[5bb3dc4]441         real t2 = fabs(delta);
[dbd68203]442         if (t2 > t) t = t2;
443      }
[702f518]444      printf("% 6d: %8.6f\n", it, t);
[dbd68203]445   } while (t >= threshold && it < 100000);
[d1b1380]446
[dbd68203]447   if (t >= threshold) {
448      fprintf(stderr, "*not* converged after %ld iterations\n", it);
449      BUG("iteration stinks");
450   }
[d1b1380]451
[647407d]452   printf("%ld iterations\n", it); /* TRANSLATE */
[d1b1380]453
454#if 0
[dbd68203]455   putnl();
[5bb3dc4]456   for (int row = n - 1; row >= 0; row--) {
[dbd68203]457      t = 0.0;
[5bb3dc4]458      for (int col = 0; col < row; col++) t += M(row, col) * X[col];
[a420b49]459      t += X[row] / M(row, row);
460      for (col = row + 1; col < n; col++)
461         t += M(col, row) * X[col];
[b5d3988]462      printf("[ %f %f ]\n", t, B[row]);
[dbd68203]463   }
[d1b1380]464#endif
465
[5bb3dc4]466   for (int row = n - 1; row >= 0; row--) B[row] = X[row];
[d1b1380]467
[dbd68203]468   osfree(X);
[647407d]469   printf("\ndone\n"); /* TRANSLATE */
[dbd68203]470}
[d1b1380]471#endif
472
473#if PRINT_MATRICES
[a420b49]474static void
[9965b2b]475print_matrix(real *M, real *B, long n)
[a420b49]476{
[dbd68203]477   printf("Matrix, M and vector, B:\n");
[5bb3dc4]478   for (long row = 0; row < n; row++) {
479      long col;
[a420b49]480      for (col = 0; col <= row; col++) printf("%6.2f\t", M(row, col));
481      for (; col <= n; col++) printf(" \t");
[dbd68203]482      printf("\t%6.2f\n", B[row]);
483   }
484   putnl();
485   return;
[d1b1380]486}
487#endif
Note: See TracBrowser for help on using the repository browser.