/*************************************************************
*  This file is part of the Surface Evolver source code.     *
*  Programmer:  Ken Brakke, brakke@susqu.edu                 *
*************************************************************/
#include "include.h"

/*************************************************************
*
*     file:        mindeg.c
*
*     Purpose:    My own minimum degree sparse matrix factoring algorithm
*/

/* Algorithm: at each stage, eliminated vertices fall into disjoint
clusters, representing factored hessian of region with fixed boundary.
Vertex of minimal degree is eliminated, sometimes resulting in 
merging of clusters. */
 
/* clusters or regions also called cliques in the literature */

/* Nodes are gathered into supernodes, defined as having same set of
    cliques.  Supernodes are ordered by degree, and independent 
    supernodes are eliminated in degree order until reach lower bound
    estimate for next round.  Independence means not sharing region.
*/
/* This version has supernodes as persistent structures */

#define MEMTEST
#define MEMTESTX { char *ptr,*q; q=malloc(20); ptr=malloc(5*4); myfree(ptr); myfree(q);}

/* data structures */

static struct md_vertex { 
                     int supernode; /* which it belongs to */
                     int timestamp;  
                     int newspot; /* for merging region matrices */
                  } *vlist;

struct region { int vercount;  /* number of boundary vertices */
                int verlist;  /* vertex list spot, dynamically allocated */
                int verpnum;  /* vertex positions in parent */
                int supercount; /* number of boundary supernodes */
                int superlist; /* boundary supernode list spot */
                int timestamp; /* marking for independence */
                int merged;     /* region merged with, or self */
                int son; /* for start of merge list */ 
                int size;  /* number of vertices, incl eliminated */
#define BROTHER superlist
                /* for merging to parent */
                int next;  /* doubly linked list of actives */
                int prev;  /* doubly linked list of actives */
             } ;
static int regionstart; /* start of active list */
static  int last_region;  /* end of region chain */

/* supernode is collection of equivalent vertices */
/* defined as having same set of regions */

static struct supernode { int rcount;  /* number of regions */
                         int rlist;    /* list of regions spot */
                         int degree;
                         float height; /* for ordering of elimination */
                         int verlist;  /* list of vertices spot */
                         int vercount; /* number of vertices */
                         int rtimestamp; /* for marking on changed region */
                         int stimestamp; /* for marking touched */
                         int next;  /* doubly linked list of actives */
                         int prev;  /* doubly linked list of actives */
                        } *slist ;
static int superstart; /* start of active list */

static int margin;  /* how high to go above minimum degree, command line -m */
static int total_fill;  /* total number of fill in factored matrix */
static int total_flops; /* total operation count, counting mul+add as 1 */

void dsolve();
static int debug_level; /* command line arg for printing diagnostics.
                          0  none
                          1  print lowest degree
                          2  print supernode sizes
                          3  print vertex elim
                          4  print region absorbing
                          5  print region merging
                      */
static int minabsorb = 2; /* max size region to try to absorb */
static int vtimestamp;  /* for marking vertices */
static int stimestamp;  /* for marking supernodes */
static int rtimestamp; /* for marking regions for independence */ 
static int old_rtimestamp; /* for saving previous value */ 

static int L_total;  /* allocated spaces for final trianuglar matrix */
static int elim_count; /* number of variables eliminated so far */

/* trial stuff */
int K;  /* current row number */
int *xIL;  /* next uneliminated vertex */
int *xJL;  /* lists of rows to be added to uneliminated rows: 
                  i > K => xJL(i) is first row to be added to row i,
                  i < K => xJL(i) is row following row i in some list of rows.
                  xJL(i) = -1 indicates end of list */ 

int md_alloc ARGS((struct linsys *, int));
int region_alloc ARGS((struct linsys *));
int supernode_compare ARGS((struct supernode **,struct supernode **));
int degree_compare ARGS((struct supernode *,struct supernode *));
void do_region_absorb ARGS((struct linsys *,int,int));
int exact_degree ARGS((struct linsys *,struct supernode *));
int exact_sdegree ARGS((struct linsys *,struct supernode *));
void  traverse_recur ARGS((struct linsys *,struct region *,int*,int*));
void  permute_recur ARGS((struct linsys *,struct region *));
void  factor_recur ARGS((struct linsys *,struct region *));
void  sparse_permute ARGS((struct linsys *));
void md_vertex_setup ARGS((struct linsys *));
void md_supernode_setup ARGS((struct linsys *));
void md_region_string ARGS((struct linsys *));
void md_supernode_regions ARGS((struct linsys *));
void degree_sort ARGS((struct linsys *));
void multiple_eliminate ARGS((struct linsys *));
void region_absorb ARGS((struct linsys *));
void clean_supernodes ARGS((struct linsys *));
void merge_supernodes ARGS((struct linsys *));
void traverse_region_tree ARGS((struct linsys *));
void mass_eliminate ARGS((struct linsys *,struct supernode*));

/*****************************************************************************
*
* function: md_alloc()
*
* purpose: allocate workspace in linsys ISP. ISP[0] is amount allocated in ints.
*             No deallocation. No initialization. NSP is arena size in ints.
*             User must allocate ISP and set NSP before first call.
* return:  offset from start
*/
int md_alloc(S,bytes)
struct linsys *S;
int bytes; /* bytes wanted */
{ int spot; 
  int size; /* in ints */

  size = (bytes+3)/sizeof(int);
  if ( S->ISP[0] + size + 1 > S->NSP )
  { int newsize = (int)(1.5*S->NSP)+bytes;
     S->ISP = (int*)kb_realloc((char*)S->ISP,newsize*sizeof(int),
                                        S->NSP*sizeof(int));
     S->NSP = newsize;
  }
  spot = S->ISP[0] + 1;
  S->ISP[0] += size;
  return spot;
}
/* macros for conveniently accessing working storage */
#define REG(spot) ((struct region *)(S->ISP+(spot)))
#define SNODE(spot) ((struct supernode *)(S->ISP+(spot)))
#define INT(spot)  (S->ISP+(spot))
#define CHAR(spot)  ((char*)(S->ISP+(spot)))
/***************************************************************************
*
* function: region_alloc()
*
* purpose: allocate new region structure in workspace
*
*/
int region_alloc(S)
struct linsys *S;
{ int spot;
  struct region *r;

  spot = md_alloc(S,sizeof(struct region));
  if ( last_region >= 0 ) REG(last_region)->next = spot;
  else regionstart = spot;
  r = REG(spot);
  r->prev = last_region;
  r->next = -1;
  r->merged = spot;  /* self */
  r->son = -1;
  last_region = spot;
  return spot;
}

/*****************************************************************************
*
* function: md_vertex_setup()
*
* purpose: initialize matrix vertex list, one vertex per matrix row
*             
*/

void md_vertex_setup(S)
struct linsys *S;
{ int n,k;
  vlist = (struct md_vertex *)temp_calloc(S->N,sizeof(struct md_vertex));
  for ( n = 0 ; n <= web.skel[VERTEX].max_ord ; n++ )
     for ( k = 0 ; k < vhead[n].freedom ; k++ )
        vlist[vhead[n].rownum+k].newspot = ordinal(vhead[n].v_id);
}

/*****************************************************************************
*
* function: md_region_string()
*
* purpose: initialize region list with one region per edge, 
*/

void md_region_string(S)
struct linsys *S;
{ int j,k,n1,n2,p,q;
  int rcount;
  struct region *r;
  vertex_id v_id;
  char *entrymark;  /* for tagging done entries of A */
  int size;
  int newspot,spot;  /* index into workspace */

  entrymark = temp_calloc(S->IA[S->N],sizeof(char));

  /* estimate regions needed */
  regionstart = md_alloc(S,0);          /* since may need realloc for vertices */
  last_region = -1;

  FOR_ALL_VERTICES(v_id)  /* mark as undone */
        set_attr(v_id,NEWVERTEX);

  /* fill in one region per edge detected in array */
  rcount = 0;
  for ( p = 0 ; p < S->N ; p++ )
  { for ( q =  S->IA[p]-A_OFF ; q < S->IA[p+1]-A_OFF ; q++ )
     { 
        if ( entrymark[q] ) continue;
        n1 = vlist[p].newspot;
        n2 = vlist[S->JA[q]-A_OFF].newspot;
        if ( n1 == n2 ) continue;  /* don't do just one vertex here */

        if ( n1 > n2 ) { int tmp = n1; n1 = n2; n2 = tmp; }

        /* need new region */
        spot = region_alloc(S);
        r = REG(spot);

        r->supercount = 2;
        newspot = md_alloc(S,r->supercount*sizeof(int));
        r = REG(spot);  /* in case of reallocation of workspace */
        r->superlist = newspot;
        INT(r->superlist)[0] = vlist[vhead[n1].rownum].supernode;
        INT(r->superlist)[1] = vlist[vhead[n2].rownum].supernode;
        size = vhead[n1].freedom + vhead[n2].freedom;
        r->size = r->vercount = size;
        newspot = md_alloc(S,r->vercount*sizeof(int));
        r = REG(spot);  /* in case of reallocation of workspace */
        r->verlist = newspot;
        for ( k = 0 ; k < vhead[n1].freedom ; k++ )
          INT(r->verlist)[k] = vhead[n1].rownum + k;
        for ( k = 0 ; k < vhead[n2].freedom ; k++ )
          INT(r->verlist)[vhead[n1].freedom+k] = vhead[n2].rownum + k;

        /* the edge */
        for ( k = 0 ; k < vhead[n1].freedom ; k++ )
        { int row;
          row = vhead[n1].rownum + k;
          for ( spot = S->IA[row]-A_OFF ; spot < S->IA[row+1]-A_OFF ; spot++ )
             if ( S->JA[spot]-A_OFF >= vhead[n2].rownum ) break;
          /* entry may not exist if value 0 */
          for ( j = 0 ; (spot<S->IA[row+1]-A_OFF) && (j<vhead[n2].freedom) ; j++ )
          {
             if ( S->JA[spot]-A_OFF > vhead[n2].rownum+j ) continue;
             entrymark[spot] = 'x';
             spot++;
          }
        }
        /* the vertices */
        if ( get_attr(vhead[n1].v_id) & NEWVERTEX )
        { for ( k = 0 ; k < vhead[n1].freedom ; k++ )
          { int row;
             row = vhead[n1].rownum + k;
             for ( spot = S->IA[row]-A_OFF ; spot < S->IA[row+1]-A_OFF ; spot++ )
                if ( S->JA[spot]-A_OFF >= vhead[n1].rownum ) break;
             for ( j = 0 ; (spot<S->IA[row+1]-A_OFF) && (j<vhead[n1].freedom) ; j++ )
             { if ( S->JA[spot]-A_OFF > vhead[n1].rownum+j ) continue;
                entrymark[spot] = 'x';
                spot++;
             }
          }
          unset_attr(vhead[n1].v_id,NEWVERTEX);
        }
        if ( get_attr(vhead[n2].v_id) & NEWVERTEX )
        { for ( k = 0 ; k < vhead[n2].freedom ; k++ )
          { int row;
             row = vhead[n2].rownum + k;
             for ( spot = S->IA[row]-A_OFF ; spot < S->IA[row+1]-A_OFF ; spot++ )
                if ( S->JA[spot]-A_OFF >= vhead[n2].rownum ) break;
             for ( j = 0 ; (spot<S->IA[row+1]-A_OFF)&&(j<vhead[n2].freedom) ; j++ )
             { if ( S->JA[spot]-A_OFF > vhead[n2].rownum+j ) continue;
                entrymark[spot] = 'x';
                spot++;
             }
          }
          unset_attr(vhead[n2].v_id,NEWVERTEX);
        }


        rcount++;
     }
  }

  /* now see if any vertices have been missed */
  FOR_ALL_VERTICES(v_id)
     { if ( !(get_attr(v_id) & NEWVERTEX) ) continue;
        n1 = ordinal(v_id);
        if ( vhead[n1].freedom == 0 ) continue;

        /* need new region */
        spot = region_alloc(S);
        r = REG(spot);

        r->supercount = 1;
        newspot = md_alloc(S,r->supercount*sizeof(int));
        r = REG(spot);  /* in case of reallocation of workspace */
        r->superlist = newspot;
        INT(r->superlist)[0] = vlist[vhead[n1].rownum].supernode;
        size = vhead[n1].freedom;
        r->size = r->vercount = size;
        newspot = md_alloc(S,r->vercount*sizeof(int));
        r = REG(spot);  /* in case of reallocation of workspace */
        r->verlist = newspot;
        for ( k = 0 ; k < vhead[n1].freedom ; k++ )
          INT(r->verlist)[k] = vhead[n1].rownum + k;

        /* fill matrix */
        for ( k = 0 ; k < vhead[n1].freedom ; k++ )
        { int row;
          row = vhead[n1].rownum + k;
          for ( spot = S->IA[row]-A_OFF ; spot < S->IA[row+1]-A_OFF ; spot++ )
             if ( S->JA[spot]-A_OFF >= vhead[n1].rownum ) break;
          for ( j=0 ; (spot<S->IA[row+1]-A_OFF) && (j<vhead[n1].freedom) ; j++ )
          { if ( S->JA[spot]-A_OFF > vhead[n1].rownum+j ) continue;
             entrymark[spot] = 'x';
             spot++;
          }
        }
        unset_attr(vhead[n1].v_id,NEWVERTEX);
        rcount++;
     }

  temp_free(entrymark);
}

/*****************************************************************************
*
* function: md_region_soapfilm()
*
* purpose: initialize region list with one region per facet or edge
*             Film quadratic mode will need per facet.
*/

#ifdef FACETREGIONS
void md_region_soapfilm(S)
struct linsys *S;
{ int i,j,k,n;
  int ecount;
  struct md_vertex *v;
  struct region *r;
}
#endif

/*****************************************************************************
*
* function: md_supernode_regions()
*
* purpose: set up lists of regions per supernode
*/
void md_supernode_regions(S)
struct linsys *S;
{ int k,rnum,snum;
  struct region *r;
  struct supernode *s;

  /* count number of correspondences needed */
  for ( rnum = regionstart ; rnum >= 0 ; rnum = r->next )
  { r = REG(rnum);
     for ( k = 0 ; k < r->supercount ; k++ )
        slist[INT(r->superlist)[k]].rcount++;
  }

  for ( snum = superstart ; snum >= 0 ; snum = s->next )
  { s = slist + snum;
     s->rlist = md_alloc(S,s->rcount*sizeof(int));
     s->rcount = 0;  /* reset so can use as index in loading */
  }
  for ( rnum = regionstart; rnum >= 0 ; rnum = r->next )
  { r = REG(rnum);
     for ( k = 0 ; k < r->supercount ; k++ )
     { s = slist + INT(r->superlist)[k];
        INT(s->rlist)[s->rcount++] = rnum;
     } 
  }
}

/***************************************************************************
*
* function: md_supernode_setup()
*
* purpose: Create one supernode per vertex. Does not set regions.
*/
int supercount;

void md_supernode_setup(S)
struct linsys *S;
{ int n,i;
  struct supernode *s;
  int vvcount; /* total number of vertices */
  
  /* allocate supernode list */
  vvcount = web.skel[VERTEX].max_ord+1;
  for ( n = 0, supercount = 0; n < vvcount ; n++ )
     if ( vhead[n].freedom > 0 ) supercount++;
  slist = (struct supernode *)temp_calloc(supercount,sizeof(struct supernode));
  for ( n = 0, s = slist, supercount = 0; n < vvcount ; n++ )
     if ( vhead[n].freedom > 0 )
     { s->vercount = vhead[n].freedom;
        s->verlist = md_alloc(S,s->vercount*sizeof(int));
        for ( i = 0 ; i < vhead[n].freedom ; i++ )
        { INT(s->verlist)[i] = vhead[n].rownum + i;
          vlist[INT(s->verlist)[i]].supernode = supercount;
        }
        /* s->height = get_coord(vhead[n].v_id)[0];*/ /* not so good */
        s->height = (float)n; /* pretty good */
        s->next = supercount+1;
        s->prev = supercount-1;
        supercount++;
        s++; 
     }
  slist[0].prev = -1;
  slist[supercount-1].next = -1;
  superstart = 0;
}

/****************************************************************************
*
* function: clean_supernodes()
*
* purpose: eliminate merged regions from supernode region lists
*/
void clean_supernodes(S)
struct linsys *S;
{ struct supernode *s;
  int i,j,k,m;
  int n;
  for ( n = superstart ; n >= 0 ; n = s->next )
  { s = slist + n;
     for ( i=0,j=0 ; i < s->rcount ; i++ )
     { int rnum = INT(s->rlist)[i];
        while ( REG(rnum)->merged != rnum ) rnum = REG(rnum)->merged;
        if ( REG(rnum)->timestamp > old_rtimestamp )
          s->rtimestamp = REG(rnum)->timestamp;  /* mark as affected */
        for ( k = 0 ; k < j ; k++ )
        { if ( INT(s->rlist)[k] == rnum ) break; /* duplicate */
          if ( INT(s->rlist)[k] > rnum ) /* insert */
             { for ( m = j ; m > k ; m-- ) INT(s->rlist)[m] = INT(s->rlist)[m-1];
                INT(s->rlist)[k] = rnum; 
                j++;
                break;
             }
        }
        if ( k == j ) { INT(s->rlist)[j++] = rnum; }
     }
     s->rcount = j;
  }
}
/****************************************************************************
*
* function: supernode_compare()
*
* purpose: lexical comparison of supernode region lists
* return  -1 for a < b, 0 for a == b, 1 for a > b
*/
struct linsys *SSS;  /* for communication with compare routine */
int supernode_compare(aa,bb)
struct supernode **aa,**bb;
{ int k;
  struct linsys *S;
  struct supernode *a = *aa, *b = *bb;
  if ( a->rcount < b->rcount ) return -1;
  if ( a->rcount > b->rcount ) return 1;
  S = SSS;
  for ( k = 0 ; k < a->rcount ; k++ )
  { if ( INT(a->rlist)[k] < INT(b->rlist)[k] ) return -1;
     if ( INT(a->rlist)[k] > INT(b->rlist)[k] ) return  1;
  }
  return 0;
}
/****************************************************************************
*
* function: merge_supernodes()
*
* purpose: merge supernodes with same regions
*/
void merge_supernodes(S)
struct linsys *S;
{
  struct supernode **sslist,*s,*ss;
  int n,k,j,i;
  int count;
  int newverlist;

  if ( superstart < 0 ) { supercount = 0; return; } /* all done */

  /* make list to sort */
  sslist = (struct supernode **)temp_calloc(supercount,sizeof(struct supernode *));
  /* fill list */
  for ( n = superstart, count = 0 ; n >= 0 ; n = s->next )
  { s = slist + n;  /* only do touched supernodes */
     if ( s->rtimestamp > old_rtimestamp ) sslist[count++] = s;
  }
  /* sort */
  SSS = S; /* for communication with compare routine */
  qsort((char*)sslist,count,sizeof(struct supernode *),FCAST supernode_compare);
  /* merge */
  for ( n = 1, k = 0 ; n < count ; n++ )
  { s = sslist[k];
     if ( supernode_compare(sslist+n ,&s) == 0 )
     { /* merge */
        ss = sslist[n];
        if ( debug_level > 7 )
          printf("Merging supernode %d into %d.\n",ss-slist,s-slist);
        newverlist = md_alloc(S,(s->vercount+ss->vercount)*sizeof(int));
        memcpy(CHAR(newverlist),CHAR(s->verlist),s->vercount*sizeof(int));
        memcpy(CHAR(newverlist+s->vercount),CHAR(ss->verlist),
                  ss->vercount*sizeof(int));
        s->verlist = newverlist;
        s->vercount += ss->vercount;
        if ( s->height < ss->height ) s->height = ss->height;
        ss->verlist = -1;
        ss->vercount = 0;
        if ( ss->next >= 0 ) slist[ss->next].prev = ss->prev;
        if ( ss->prev >= 0 ) slist[ss->prev].next = ss->next;
        else superstart = ss->next;
        supercount--;

        /* remove from region supernode list */
        for ( j = 0 ; j < ss->rcount ; j++ )
        { struct region *r = REG(INT(ss->rlist)[j]);
          for ( i = 0 ; i < r->supercount ; i++ )
             if ( INT(r->superlist)[i] == (sslist[n]-slist) ) break;
          r->supercount--;
          for ( ; i < r->supercount ; i++ )
             INT(r->superlist)[i] = INT(r->superlist)[i+1];
        }
     }
     else /* keep supernode */
     { k = n;
     }
  }
  temp_free((char*)sslist);
}

/****************************************************************************
*
* function do_region_absorb(keeper,goner)
*
* purpose: absorb one region into another
*
*/
void do_region_absorb(S,keeper,goner)
struct linsys *S;
int keeper,goner; /* the regions */
{ int n;
  struct region *rk = REG(keeper),*rg = REG(goner);

  /* find spots in keeper matrix */
  for ( n = 0 ; n < rk->vercount ; n++ )
     vlist[INT(rk->verlist)[n]].newspot = n;
  rg->merged = keeper;
  if ( rg->next >= 0 ) REG(rg->next)->prev = rg->prev; 
  else last_region = rg->prev;
  if ( rg->prev >= 0 ) REG(rg->prev)->next = rg->next;
  else regionstart = rg->next;
  rg->BROTHER = rk->son;
  rk->son = goner;
/*  rg->size = rg->vercount; */
}

/****************************************************************************
*
* function region_absorb()
*
* purpose: see if any regions entirely contained in others
*
*/

void region_absorb(S)
struct linsys *S;
{ int i,j,k;
  struct region *r;
  struct supernode *s;
  int absorbcount = 0;
  int maxab = 0; 

  for ( i = regionstart ; i >= 0 ; i = r->next )
  { r = REG(i);
     if ( r->supercount > minabsorb ) continue;
     /* timestamp supernodes of r */
     stimestamp++;
     for ( j = 0 ; j < r->supercount ; j++ )
        slist[INT(r->superlist)[j]].stimestamp = stimestamp;
     /* pick a supernode */
     s = slist + INT(r->superlist)[0];
     /* test regions of s */
     for ( k = 0 ; k < s->rcount ; k++ )
     { int rnum = INT(s->rlist)[k];
        struct region *rr;
        int count;
        
        while ( REG(rnum)->merged != rnum ) rnum = REG(rnum)->merged;
        rr = REG(rnum);
        if ( rr->supercount <= r->supercount ) continue;
        for ( j = 0, count = 0 ; j < rr->supercount ; j++ )
          count += (slist[INT(rr->superlist)[j]].stimestamp == stimestamp);
        if ( count == r->supercount ) /* have it */
        {
          if ( r->supercount > maxab ) maxab = r->supercount;
          do_region_absorb(S,rnum,i);
          if ( debug_level >= 5 )
             printf("Absorbing region %d with %d.\n",i,rnum);
          absorbcount++;
          break;
        }
     }
  }
  if ( debug_level > 1 ) 
      printf("Absorbed %d; max size %d\n",absorbcount,maxab);
}

/****************************************************************************
*
* function: degree_compare()
*
* purpose: compare supernode degrees, breaking ties by verlist spot.
*/

int degree_compare(s1,s2)
struct supernode *s1,*s2;
{ int diff = s1->degree - s2->degree;
  if ( diff ) return diff;
  return (s1->height < s2->height) ? -1 : 1;  
  /* high order, since high first */
}

/**************************************************************************
*
* function: exact_degree()
*
* purpose: calculate exact external degree of supernode as union of
*             component regions.  Union is calculated by timestamping
*             vertices.
* return:  number of vertices in union.
*/

int exact_degree(S,s)
struct linsys *S;
struct supernode *s;
{ int i,j,degree;
  struct region *r;
  struct supernode *ss;

  stimestamp += 2; /* for current supernode */
  SSS = S;
  for ( i = 0, degree = 0 ; i < s->rcount ; i++ )
  { r = REG(INT(s->rlist)[i]);
     for ( j = 0 ; j < r->supercount ; j++ )
     { ss = slist + INT(r->superlist)[j];
        if ( ss->stimestamp < stimestamp )
        { degree += ss->vercount ; ss->stimestamp = stimestamp; }
     }
  }
  return degree - s->vercount;
}


/**************************************************************************
*
* function: exact_sdegree()
*
* purpose: calculate exact supernode degree of supernode as union of
*             component regions.  Union is calculated by timestamping
*             supernode.
* return:  number of supernodes in union, including self.
*/

int exact_sdegree(S,s)
struct linsys *S;
struct supernode *s;
{ int i,j,degree;
  struct region *r;
  struct supernode *ss;

  stimestamp++; /* for current supernode */
  for ( i = 0, degree = 0 ; i < s->rcount ; i++ )
  { r = REG(INT(s->rlist)[i]);
     for ( j = 0 ; j < r->supercount ; j++ )
     { ss = slist + INT(r->superlist)[j];
        if ( ss->stimestamp < stimestamp )
        { degree++ ; ss->stimestamp = stimestamp; }
     }
  }
  return degree;
}

/****************************************************************************
*
* function: degree_sort()
*
* purpose: order supernodes by degree. Degree is external degree, the number
* of nodes bordering the region left after removing the supernode.
*/

struct supernode **superheap;  /* for heap ordering */
int heapcount;  /* how many in heap */
void degree_sort(S)
struct linsys *S;
{ int n;
  struct supernode *s;
  int spot;

  if ( superheap ) temp_free((char*)superheap);
  superheap = (struct supernode**)temp_calloc(supercount+1,sizeof(struct supernode*));
  heapcount = 0;
  for ( n = superstart ; n >= 0 ; n = s->next )
  { s = slist + n;
     if ( s->rcount == 0 ) continue; /* empty */
     /* calculate degree */
     if ( s->rtimestamp > old_rtimestamp )  s->degree = exact_degree(S,s);
     /* insert in heap */
     spot = ++heapcount;    /* using heap[1] as root for convenience */
     while ( spot>1 ) /* filter down */
     { 
        if ( degree_compare(s,superheap[spot>>1]) < 0 )
          superheap[spot] = superheap[spot>>1];
        else break;
        spot >>= 1;
     }
     superheap[spot] = s;
  }
}

/******************************************************************************
*
* function: mass_eliminate()
*
* purpose: eliminate one supernode, if independent
*/
void mass_eliminate(S,s)
struct linsys *S;
struct supernode *s;
{ int i,k;
  struct region *r,*rk;
  struct md_vertex *v;
  int newvlist;
  int newslist;
  int vercount;
  int size,ssize;
  int scount;
  int rkeep;

  /* check independence */
  for ( i = 0 ; i < s->rcount ; i++ )
  { r = REG(INT(s->rlist)[i]);
     if ( r->timestamp == rtimestamp ) return; /* not independent */
  }
  if ( debug_level >= 2 )
     printf("Eliminating supernode %d, size %d.\n",s-slist,s->vercount);
  /* mark timestamps */
  for ( i = 0 ; i < s->rcount ; i++ )
  { r = REG(INT(s->rlist)[i]);
     r->timestamp = rtimestamp;
  }

  /* create merged supernode and vertex lists */
  /* with elim supernode at end */
  size = s->degree+s->vercount;
  newvlist = md_alloc(S,size*sizeof(int));
  ssize = exact_sdegree(S,s);
  newslist = md_alloc(S,ssize*sizeof(int));

  /* first, elim supernode at end */
  vtimestamp++; /* new round */
  stimestamp++; /* new round */
  INT(newslist)[ssize-1] = s-slist;
  s->stimestamp = stimestamp;
  vercount = s->degree;
  for ( k = 0 ; k < s->vercount ; k++ )
  { INT(newvlist)[vercount++] = INT(s->verlist)[k];
     vlist[INT(s->verlist)[k]].timestamp = vtimestamp;
     S->P[elim_count+k] = INT(s->verlist)[k];
     S->IP[INT(s->verlist)[k]] = elim_count+k;
  }
  vercount = 0;
  scount = 0;
  for ( i = 0 ; i < s->rcount ; i++ )
  { struct supernode *ss;
     r = REG(INT(s->rlist)[i]);
     for ( k = 0 ; k < r->supercount ; k++ )
     { ss = slist + INT(r->superlist)[k];
        if ( ss->stimestamp == stimestamp ) continue;
        INT(newslist)[scount++] = INT(r->superlist)[k];
        ss->stimestamp = stimestamp;
        ss->rtimestamp = rtimestamp;
     }
     for ( k = 0 ; k < r->vercount ; k++ )
     { v = vlist + INT(r->verlist)[k];
        if ( v->timestamp == vtimestamp ) continue;
        INT(newvlist)[vercount++] = INT(r->verlist)[k];
        v->timestamp = vtimestamp;
     }
  }

  /* record fills per vertex for later factoring */
  for ( k = vercount ; k < size ; k++ )
     S->LIA[INT(newvlist)[k]] += size - (k-vercount);

  /* set up new region */
  rkeep = region_alloc(S);
  rk = REG(rkeep);
  rk->verlist = newvlist;
  rk->vercount = vercount;
  rk->superlist = newslist;
  rk->supercount = scount;
  if ( vercount != s->degree ) 
     printf("vercount %d != degree %d, supernode %d\n",vercount,s->degree,s-slist);
     
  /* mark all old regions as merged and remove from active list */
  for ( i = 0 ; i < s->rcount ; i++ )
  { 
     r = REG(INT(s->rlist)[i]);
     r->merged = rkeep;
     r->BROTHER = rk->son; rk->son = INT(s->rlist)[i];
     if ( r->next >= 0 ) REG(r->next)->prev = r->prev; 
     else last_region = r->prev;
     if ( r->prev >= 0 ) REG(r->prev)->next = r->next;
     else regionstart = r->next;
     if ( debug_level >= 5 )
        printf("Merging region %d with %d.\n",INT(s->rlist)[i],rkeep);
  }

  /* eliminate nodes */
  rk = REG(rkeep);
  rk->size = size;

  elim_count += s->vercount;

  /* delete this supernode */
  if ( s->next >= 0 ) slist[s->next].prev = s->prev;
  if ( s->prev >= 0 ) slist[s->prev].next = s->next;
  else superstart = s->next;
  s->verlist = -1;
  supercount--;

  /* gather stats */
  { int vn = vercount,vd=vercount+s->vercount;
     total_fill += (vd*(vd+1) - vn*(vn+1))/2;
     total_flops += ((vd+1)*vd*(vd+2) - (vn+1)*vn*(vn+2))/6;
  }
  s->rcount = 0; /* inactivate */
}

/******************************************************************************
*
* function: multiple_eliminate()
*
* purpose: eliminate independent supernodes of low degree
*/

void multiple_eliminate(S)
struct linsys *S;
{ 
  int lowdegree;
  int bounddegree; /* cutoff for multiple elimination */
  int spot;
  struct supernode sentinel;

  old_rtimestamp = rtimestamp; /* save, so know who changed */
  rtimestamp++;    /* new round of independence */
  lowdegree = superheap[1]->degree;  /* so we know where we started */
  if ( debug_level >= 1 ) printf("Low degree %d.\n",lowdegree);
  bounddegree = lowdegree+margin;        /* conservative to start with */
  sentinel.degree = 2+bounddegree;    /* big degree */
  sentinel.verlist = 1 << (sizeof(int)-2); /* big for degree_compare */
  while ( superheap[1]->degree <= bounddegree )
  { 
     mass_eliminate(S,superheap[1]);
     /* now adjust heap */
     spot = 1;  /* empty spot */
     for ( ;; )
     { if ( spot*2 > heapcount ) { superheap[spot] = &sentinel;  break; }
        if ( spot*2 == heapcount )
        { superheap[spot] = superheap[spot*2];
          superheap[spot*2] = &sentinel;
          break;
        }
        if ( degree_compare(superheap[spot*2],superheap[spot*2+1]) < 0 )
        { superheap[spot] = superheap[spot*2];
          spot *= 2;
        }
        else
        { superheap[spot] = superheap[spot*2+1];
          spot = spot*2 + 1;
        }
     }
  }
}

/*************************************************************************
*
* function: traverse_region_tree()
*
* purpose: Traverse region tree after it's constructed, doing factoring.
*             Uses remaining LA space as scratch space to assemble
*             current supernode rows, prior to compaction.
*             Everything stored in permuted order.
*/

static int treedepth;
static int IJA_base; /* number of entries in LIJA */

void traverse_recur(S,r,depth,fill)
struct linsys *S;
struct region *r;
int *depth;  /* matrix depth of r and below */
int *fill;  /* total fill of subtree */
{ int son;
  struct region *rr;
  int fillsum = 0;
  int sondepth;
  int maxdepth = 0;
  int sonfill;
  int thisfill;

treedepth++;
  for ( son = r->son ; son >= 0 ; son = rr->BROTHER )
  { rr = REG(son);
     traverse_recur(S,rr,&sondepth,&sonfill);
     if ( sondepth > maxdepth ) maxdepth = sondepth;
     fillsum += sonfill;
  }
  thisfill = (r->size*(r->size+1) 
                  - r->vercount*(r->vercount+1))/2;
  *fill = fillsum + thisfill;
  *depth = maxdepth + (r->size*(r->size+1))/2;
{ int n; for (n=0;n<treedepth;n++)printf("  ");printf("%d\n",(r->size*(r->size+1))/2); }
treedepth--;
}

/*********************************************************************************
*
* function: permute_recur()
*
* purpose: traverse region tree to find elimnation order
*
*/
void permute_recur(S,r)
struct linsys *S;
struct region *r;
{ int k;
  struct region *rr;
  int son;

  if ( debug_level > 5 ) printf("permute_recur region %d, vercount %d rsize %d\n",
  (int*)r-S->ISP,r->vercount,r->size);
  for ( son = r->son ; son >= 0 ; son = rr->BROTHER )
  { rr = REG(son);
     permute_recur(S,rr);
  }
  for ( k = r->vercount ; k < r->size ; k++ )
     { S->P[K] = INT(r->verlist)[k];
        S->IP[S->P[K]] = K;
        K++;
     }
  IJA_base += r->size;
}


/*********************************************************************************
*
* function: sparse_permute()
*
* purpose: Permute sparse matrix structure according to permutation S->IP.
*             Does upper triangular form, with rows in order.
*             Leaves permuted sparse matrix in  S->pIA,pJA,pA.
*             Uses radix sort.
*/
void sparse_permute(S)
struct linsys *S;
{ int i,j,end,total;
  int *cIA;
  int *cJA;
  REAL *cA;

  /* allocate */
  if ( S->pIA ) myfree((char*)S->pIA);
  if ( S->pJA ) myfree((char*)S->pJA);
  if ( S->pA ) myfree((char*)S->pA);
  S->pIA = (int*)mycalloc(S->N+1,sizeof(int));
  S->pJA = (int*)mycalloc(S->IA[S->N]-A_OFF,sizeof(int));
  S->pA = (REAL*)mycalloc(S->IA[S->N]-A_OFF,sizeof(REAL));
  cIA = (int*)temp_calloc(S->N+1,sizeof(int));
  cJA = (int*)temp_calloc(S->IA[S->N]-A_OFF,sizeof(int));
  cA = (REAL*)temp_calloc(S->IA[S->N]-A_OFF,sizeof(REAL));

  /* sort on permuted column */
  for ( i = 0 ; i < S->N ; i++ )
  { int ii = S->IP[i];
     end = S->IA[i+1] - A_OFF;
     for ( j = S->IA[i] - A_OFF ; j < end ; j++ )
     { int m = S->IP[S->JA[j]-A_OFF];
        if ( ii < m )
          cIA[m]++;
        else cIA[ii]++;
     }
  }
  for ( i = 0, total = 0 ; i < S->N ; i++ )
  { int tmp = cIA[i]; cIA[i] = total; total += tmp; 
  }
  cIA[S->N] = total;
  for ( i = 0 ; i < S->N ; i++ )
  { int ii = S->IP[i];
     end = S->IA[i+1] - A_OFF;
     for ( j = S->IA[i] - A_OFF ; j < end ; j++ )
     { int m = S->IP[S->JA[j]-A_OFF];
        if ( ii < m )
        { cA[cIA[m]] = S->A[j];  cJA[cIA[m]++] = ii; }
        else { cA[cIA[ii]] = S->A[j]; cJA[cIA[ii]++] = m; }
     }
  }
  for ( i = S->N-1 ; i > 0 ; i-- ) cIA[i] = cIA[i-1];
  cIA[0] = 0;
  /* now sort on permuted row */
  for ( i = 0 ; i < S->N ; i++ )
  { end = cIA[i+1];
     for ( j = cIA[i] ; j < end ; j++ )
         S->pIA[cJA[j]]++;
  }
  for ( i = 0, total = 0 ; i < S->N ; i++ )
  { int tmp = S->pIA[i]; S->pIA[i] = total; total += tmp; 
  }
  S->pIA[S->N] = total;
  for ( i = 0 ; i < S->N ; i++ )
  { end = cIA[i+1];
     for ( j = cIA[i] ; j < end ; j++ )
        { S->pA[S->pIA[cJA[j]]] = cA[j]; S->pJA[S->pIA[cJA[j]]++] = i; }
  }
  for ( i = S->N-1 ; i > 0 ; i-- ) S->pIA[i] = S->pIA[i-1];
  S->pIA[0] = 0;
  temp_free((char*)cIA);
  temp_free((char*)cJA);
  temp_free((char*)cA); 
}

/********************************************************************************
*
* function: factor_recur()
*
* purpose: Traverse region tree, factoring supernode at each.
*/

int vcompare(a,b)
int *a,*b;
{ return *a-*b;
}

void test_print(S,m)
struct linsys *S;
int m;
{ int n,i,j,jj;

  printf("\n");
  for ( n = 0 ; n < K+m ; n++ )
  { for ( i = 0 ; i < n ; i++ ) printf("            "); 
     for ( j = S->LIA[n],jj = S->LIJA[n] ; j < S->LIA[n+1]  ; j++,jj++ )
     { while ( i < S->LJA[jj] ) { printf("            "); i++; }
        printf("%8.6f ",(DOUBLE)S->LA[j]);
        i++;
     }
     printf("\n");
  }
  for ( i = 0 ; i < K+m ; i++ ) printf("            "); 
  for ( i = S->LIA[K+m] ; i < S->LIA[K+m] + S->N - n ; i++ )
        printf("%8.6f ",(DOUBLE)S->LA[i]);
  printf("\n");
}
void factor_recur(S,r)
struct linsys *S;
struct region *r;
{ int to_elim = r->size - r->vercount;
  REAL *base;  /* scratch row start in LA */
  int i,j,m,ii,ii_next;
  int end;
  REAL pivot;
  int *jspot;
  struct region *reg;
  int son;

  /* first, do sons */
  for ( son = r->son ; son >= 0 ; son = reg->BROTHER )
  { reg = REG(son);
     factor_recur(S,reg);
  }

  if ( to_elim == 0 ) return;

  /* get verlist in proper order with permuted numbers */
  for ( j = 0 ; j < r->size ; j++ )
        INT(r->verlist)[j] = S->IP[INT(r->verlist)[j]];
  qsort((char*)(INT(r->verlist)),r->size,sizeof(int), FCAST vcompare);

  /* fill in full rows in scratch space */
  for ( i = 0, base = S->LA + S->LIA[K] ; i < to_elim ; i++ )
  { REAL pa,pb,p11,p12,p21,p22;
     int jj;

     /* first, fill from original matrix */
     end = S->pIA[K+i+1];
     for ( j = S->pIA[K+i] ; j < end ; j++ )
        base[S->pJA[j]-(K+i)] = S->pA[j];
     if ( hessian_linear_metric_flag )
     { if ( S->lambda != 0.0 )
        { end = Met.pIA[K+i+1];
          for ( j = Met.pIA[K+i] ; j < end ; j++ )
             base[Met.pJA[j]-(K+i)] -= S->lambda*Met.pA[j];
        }
     }
     else if ( web.area_norm_flag )
         base[0] -= S->lambda*Met.A[S->P[K+i]]; /* special metric matrix */
     else base[0] -= S->lambda;

     /* next, add in previous rows that have entry in col K */
     for ( ii = xJL[K+i] ; ii >= 0 ; ii = ii_next )
     { REAL *subbase = base - (K+i);
        REAL *la;
        int *ja;
        int  IJdiff = S->LIA[ii] - S->LIJA[ii];
        switch ( S->psize[ii] )
        { case ONEBYONE:
            pivot = S->LA[S->LIA[ii]]*S->LA[xIL[ii]];
            end = S->LIA[ii+1];
            for ( j = xIL[ii],ja = S->LJA+j-IJdiff,la=S->LA+j ;
                             j < end ; j++,la++,ja++ )
                     subbase[*ja] -= pivot*(*la);  /* big improvement */
            ii_next = xJL[ii];
            if ( ++(xIL[ii]) < end )
            { m = S->LJA[xIL[ii]-IJdiff];
              xJL[ii] = xJL[m]; xJL[m] = ii;
            }
          break;
          case ZEROPIVOT:  /* nothing to add */
            ii_next = xJL[ii];
            if ( ++(xIL[ii]) < end )
            { m = S->LJA[xIL[ii]-IJdiff];
              xJL[ii] = xJL[m]; xJL[m] = ii;
            }
            break;
          case SECONDOFPAIR:  /* will handle under FIRSTOFPAIR */
            ii_next = xJL[ii];
            break;
          case FIRSTOFPAIR:
            p11 = S->LA[S->LIA[ii]]; p21 = p12 = S->LA[S->LIA[ii]+1];
            p22 = S->LA[S->LIA[ii+1]];
            pa = p11*S->LA[xIL[ii]] + p12*S->LA[xIL[ii+1]];
            pb = p21*S->LA[xIL[ii]] + p22*S->LA[xIL[ii+1]];
            end = S->LIA[ii+1];
            for ( j = xIL[ii], jj = xIL[ii+1] ; j < end ; j++,jj++ )
                base[S->LJA[j-IJdiff]-(K+i)] -= pa*S->LA[j] + pb*S->LA[jj];
            ii_next = xJL[ii];
            if ( ++(xIL[ii]) < end )
            { m = S->LJA[xIL[ii]-IJdiff];
              xJL[ii] = xJL[m]; xJL[m] = ii;
            }
            if ( ii_next == ii+1 ) ii_next = xJL[ii+1];
            if ( ++(xIL[ii+1]) < end )
            { m = S->LJA[xIL[ii+1]-(S->LIA[ii+1]-S->LIJA[ii+1])];
              xJL[ii+1] = xJL[m]; xJL[m] = ii+1;
            }

             break;
          default: kb_error(1835,"Internal error: Illegal case of psize\n",RECOVERABLE);

        }
     }

     /* compress to dense form */ 
     for ( j = to_elim ; j < r->size ; j++ )
     { REAL *to =  base + j - i;
        REAL *from = base + INT(r->verlist)[j] - (K+i);
        if ( from != to )
        { *to = *from;
          *from = 0.0;  /* clear for next round */
        }
     }
     S->LIA[K+i+1] = S->LIA[K+i] + r->size - i;

     base += r->size - i;
  }

  /* fill in LJA */
  for ( i = 0 ; i < to_elim ; i++ )
        S->LIJA[K+i] =  IJA_base + i;
  jspot = S->LJA + S->LIJA[K];
  for ( m = 0 ; m < r->size ; m++,jspot++ ) 
          *jspot = INT(r->verlist)[m];
  IJA_base += r->size;

  /* factor */
  for ( i = 0 ; i < to_elim ; i++ )  /* pivot row */
  { REAL *pivrow = S->LA + S->LIA[K+i];
     int p;
     REAL big,sigma;
     int br;

     /* first, decide on 1x1 or 2x2 pivot */
     p = ONEBYONE; /* default pivot size 1 */
     /* find max element in pivot row (same as pivot col) */
     /* but only considering current supernode */
     for ( j=i+1,big=-1.0,br = 0  ; j < to_elim ; j++ )
        if ( fabs(pivrow[j-i]) > big )
        { big = fabs(pivrow[j-i]); br = j; }
     if ( ((big < hessian_epsilon)||(!BK_flag))
                    && (fabs(pivrow[0]) < hessian_epsilon) )
        p = ZEROPIVOT;
     else if ( (big*BKalpha > fabs(pivrow[0])) && BK_flag )
     { /* find max in row/col r */
        REAL *rr;
        for ( j = i, sigma = -1.0; j<br ; j++,rr++ )
        { rr = S->LA + S->LIA[K+j] + br - j;
          if ( fabs(*rr) > sigma ) sigma = fabs(*rr);
        }
        for ( rr = S->LA + S->LIA[K+br], j = br ; j < to_elim; rr++, j++ )
         if ( fabs(*rr) > sigma ) sigma = fabs(*rr);
        if ( BKalpha*big*big > sigma*fabs(pivrow[0]) )
        { p = FIRSTOFPAIR; }
     }

     if ( p == ZEROPIVOT )
     { S->psize[K+i] = ZEROPIVOT; S->zero++;
        memset((char*)pivrow,0,(r->size-i)*sizeof(REAL));
        xIL[K+i] = -1;
     }
     else if ( p == ONEBYONE )
     { S->psize[K+i] = ONEBYONE;
        pivot = 1/pivrow[0];
        if ( pivot > 0.0 ) S->pos++; else S->neg++;
        for ( j = i+1; j < to_elim ; j++ ) /* row down */
        { REAL *spot = S->LA + S->LIA[K+j];
          REAL pp = pivrow[j-i]*pivot;
          REAL *pr = pivrow + j - i;
          for ( m = j ; m < r->size ; m++ , spot++, pr++)
/* semiheavy */  /*  *spot -= pivrow[m-i]*pp; */
                    *spot -= (*pr)*pp;    /* no improvement */
        }
        for ( m = i+1, pivrow++ ; m < r->size ; m++,pivrow++ ) 
            *pivrow *= pivot;  /* pivot row */
        xIL[K+i] = S->LIA[K+i]+(to_elim-i);
        if ( xIL[K+i] < S->LIA[K+i+1] )
        { m = S->LJA[xIL[K+i]-(S->LIA[K+i]-S->LIJA[K+i])];
          xJL[K+i] = xJL[m]; xJL[m] = K+i;
        }
        S->psize[K+i] = ONEBYONE;
     }
     else /* 2x2 pivot */
     { REAL *pivrow1 = pivrow;
        REAL *pivrow2 = S->LA + S->LIA[K+i+1];
        REAL p11,p12,p22,detinv,*yy1,*yy2,pa1,pa2;
        int k;
        REAL *x;

        S->psize[K+i] = FIRSTOFPAIR;
        S->psize[K+i+1] = SECONDOFPAIR;
        if ( br != i+1 ) /* swap rows to get adjacent */
        { REAL dtmp,*rii,*rr;
          int c;

          /* swap in matrix, both A and JA */
#define DSWAP(a,b)  {dtmp=(a);(a)=(b);(b)=dtmp;}
          for ( j = 0 ; j <= i ; j++ ) /* swap columns */
          { rr = S->LA + S->LIA[K+j] - j;
             DSWAP(rr[i+1],rr[br]);
          }
          for ( j = i+2 ; j < br ; j++ )  /* across to down */
          { DSWAP(pivrow2[j-(i+1)],S->LA[S->LIA[K+j]+br-j]);
          }
          for ( j = br+1, rr=pivrow2+(br-i), rii=S->LA+S->LIA[K+br]+1 ;
                      j < r->size ; j++, rr++, rii++ )
          { DSWAP(*rr,*rii); }
          DSWAP(pivrow2[0],S->LA[S->LIA[K+br]]); /* diag elements */

          /* fix up LJA in swapped columns */
          c = S->LJA[S->LIJA[K+i+1]];
          S->LJA[S->LIJA[K+i+1]] = S->LJA[S->LIJA[K+br]];
          S->LJA[S->LIJA[K+br]] = c;
        }

        /* now, actual 2x2 pivot */
        p11 = pivrow1[0]; p12 = pivrow1[1]; p22 = pivrow2[0];
        detinv = 1/(p11*p22 - p12*p12);
        if ( detinv > 0.0 )
          { if ( p11+p22 > 0.0 ) S->pos += 2; else S->neg += 2; }
        else { S->pos++; S->neg++; }
        /* sweep through matrix */
        for ( k = i+2, x = S->LA+S->LIA[K+i+2] ; k < to_elim ; k++ ) /* row */
          { pa1 = (pivrow1[k-i]*p22 - pivrow2[k-i-1]*p12)*detinv;
             pa2 = (pivrow2[k-i-1]*p11 - pivrow1[k-i]*p12)*detinv;
             for ( j=k,yy1=pivrow1+k-i,yy2=pivrow2+k-i-1 ; j<r->size; j++ ,x++,yy1++,yy2++) /*col*/
             {  *x -=  pa1*(*yy1) + pa2*(*yy2); 
             }
             pivrow1[k-i] = pa1;
             pivrow2[k-i-1] = pa2;
          }
        for ( k = to_elim ; k < r->size ; k++ ) /* finish pivot rows */
          { pa1 = (pivrow1[k-i]*p22 - pivrow2[k-i-1]*p12)*detinv;
             pa2 = (pivrow2[k-i-1]*p11 - pivrow1[k-i]*p12)*detinv;
             pivrow1[k-i] = pa1;
             pivrow2[k-i-1] = pa2;
          }
        xIL[K+i] = S->LIA[K+i]+(to_elim-i);
        if ( xIL[K+i] < S->LIA[K+i+1] )
        { m = S->LJA[xIL[K+i]-(S->LIA[K+i]-S->LIJA[K+i])];
          xJL[K+i] = xJL[m]; xJL[m] = K+i;
        }
        i++;  /* since just did 2 rows */
        xIL[K+i] = S->LIA[K+i]+(to_elim-i);
        if ( xIL[K+i] < S->LIA[K+i+1] )
        { m = S->LJA[xIL[K+i]-(S->LIA[K+i]-S->LIJA[K+i])];
          xJL[K+i] = xJL[m]; xJL[m] = K+i;
        }
     }
  }

  K += to_elim;
}

void traverse_region_tree(S)
struct linsys *S;
{ struct region *r;
  int rnum,n;
  int fill;

  /* allocate space */
  for ( fill = 0, n = 0 ; n < S->N ; n++ )
  { int tmp = S->LIA[n]; S->LIA[n] = fill; fill += tmp; }
  S->LIA[S->N] = fill;
  if ( fill != total_fill )
  { sprintf(errmsg,"Internal error: fill %d  !=  total_fill %d\n",fill,total_fill);
     kb_error(1836,errmsg,RECOVERABLE);
  }
  xIL = (int*)temp_calloc(S->N,sizeof(int));
  xJL = (int*)temp_calloc(S->N,sizeof(int));
  for ( n = 0 ; n < S->N ; n++ ) xJL[n] = -1;  /* list terminators */

  /* get tree traverse permutation order */
  K = 0;
  IJA_base = 0; /* for total LIJA entries */
  for ( rnum = regionstart ; rnum >= 0 ; rnum = r->next )
  { r = REG(rnum);
     permute_recur(S,r);
  }
  sparse_permute(S);
  if ( hessian_linear_metric_flag && (S->lambda != 0.0) )
  { memcpy((char*)Met.P,(char*)S->P,S->N*sizeof(int));
     memcpy((char*)Met.IP,(char*)S->IP,S->N*sizeof(int));
     sparse_permute(&Met);
  }

  /* allocate L space */
  S->Lsize = total_fill; 
  if ( S->LJA ) myfree((char*)S->LJA);
  if ( S->LIJA ) myfree((char*)S->LIJA);
  if ( S->LA ) myfree((char*)S->LA);
  S->LJA = (int*)mycalloc(IJA_base,sizeof(int));
  S->LIJA = (int*)mycalloc(S->N+1,sizeof(int));
  S->LA = (REAL*)mycalloc(S->Lsize,sizeof(REAL));

  K = 0;  /* current row */
  IJA_base = 0 ;
  for ( rnum = regionstart ; rnum >= 0 ; rnum = r->next )
  { r = REG(rnum);
     factor_recur(S,r);
  }
  temp_free((char*)xIL); xIL = NULL;
  temp_free((char*)xJL); xJL = NULL;

  /* test_print(S,0); */
/*debug */    /*  dsolve(S); */  
}


/*************************************************************************
*
* function: xmd_solve()
* 
* purpose: solve factored system for given right hand side
*             Factor stored as U, permuted order
*             LJA indices in LIJA, LA indices in LIA
*
*/

void xmd_solve(S,B,x)
struct linsys *S; /* factored system */
REAL *B;    /* incoming right hand side */
REAL *x;    /* solution, may be rhs */
{
  int n; /* row index */
  int i;
  int *jp;  /* pointer into LIJA */
  REAL *BB,*Y,*e;


  if ( S->psize == NULL )
     kb_error(1837,"Internal error: Must call xmd_factor before xmd_solve.\n",RECOVERABLE);


  BB = (REAL*)temp_calloc(S->N,sizeof(REAL));  /* intermediate solutions */
  Y = (REAL*)temp_calloc(S->N,sizeof(REAL));  /* intermediate solutions */

  /* solve U^T Y = B */
  for ( n = 0 ; n < S->N ; n++ ) BB[n] = B[S->P[n]]; /* permute */
  for ( n = 0 ; n < S->N ; n++ )
     { int start,end;
        Y[n] = BB[S->LJA[S->LIJA[n]]];  /* for BK inner permutation */
        if ( S->psize[n] == FIRSTOFPAIR ) start = 2;
        else start = 1; 
        end = S->LIA[n+1];
        for ( i=S->LIA[n]+start, e=S->LA+i , jp=S->LJA+S->LIJA[n]+start ; 
                     i < end ; i++,e++,jp++ )
            BB[*jp] -= (*e)*Y[n];
     }

  /* solve D V = Y (will use Y to store V) */
  for ( n = 0 ; n < S->N ; n++ )
  { if ( S->psize[n] == ONEBYONE )
        Y[n] /= S->LA[S->LIA[n]];
     else if ( S->psize[n] == ZEROPIVOT ) Y[n] = 0.0;  /* generalized inverse */
     else
     { REAL piv[2][2];
        REAL pinv[2][2];
        REAL det,yy;
        piv[0][0] = S->LA[S->LIA[n]];
        piv[0][1] = piv[1][0] = S->LA[S->LIA[n]+1];
        piv[1][1] = S->LA[S->LIA[n+1]];
        det = piv[0][0]*piv[1][1] - piv[0][1]*piv[1][0];
        pinv[0][0] = piv[1][1]/det;
        pinv[1][0] = pinv[0][1] = -piv[0][1]/det;
        pinv[1][1] = piv[0][0]/det;
        yy = Y[n]*pinv[0][0] + Y[n+1]*pinv[1][0];
        Y[n+1] = Y[n]*pinv[0][1] + Y[n+1]*pinv[1][1];
        Y[n] = yy;
        n++; 
     }
  }

  /* solve U X = V */
  for ( n = S->N-1 ; n >= 0 ; n-- )
     { int start,end;
        if ( S->psize[n] == FIRSTOFPAIR ) start = 2;
        else start = 1; 
        end = S->LIA[n+1];
        for ( i=S->LIA[n]+start, e=S->LA+i, jp=S->LJA+S->LIJA[n]+start  ; 
            i < end ; i++,e++,jp++ )
             Y[n] -= (*e)*BB[*jp];
        BB[S->LJA[S->LIJA[n]]] = Y[n];
      }

  /* unpermute */
  for ( n = 0 ; n < S->N ; n++ )
     x[S->P[n]] = BB[n];

  temp_free((char*)Y);
  temp_free((char*)BB);
}


/*************************************************************************
*
* function: xmd_solve_multi()
* 
* purpose: solve factored system for multiple right hand sides
*             Factor stored as U, permuted order
*             LJA indices in LIJA, LA indices in LIA
*
*/

void xmd_solve_multi(S,B,x,rk)
struct linsys *S; /* factored system */
REAL **B;    /* incoming right hand side */
REAL **x;    /* solution, may be rhs */
int rk;         /* number of right sides */
{
  int n; /* row index */
  int k; /* rhs column index */
  int i;
  int *jp;  /* pointer into LIJA */
  REAL **BB,**Y,*e;


  if ( S->psize == NULL )
     kb_error(1838,"Internal error: Must call BK_factor before BK_solve.\n",RECOVERABLE);


  BB = dmatrix(0,S->N-1,0,rk-1);  /* intermediate solutions */
  Y =  dmatrix(0,S->N-1,0,rk-1);  /* intermediate solutions */

  /* solve U^T Y = B */
  for ( n = 0 ; n < S->N ; n++ ) 
    for ( k = 0 ; k < rk ; k++ )
      BB[n][k] = B[k][S->P[n]]; /* permute */

  for ( n = 0 ; n < S->N ; n++ )
     { int start,end;
        REAL *yy,*bb,ee;
        for ( yy = Y[n], k = 0 , bb = BB[S->LJA[S->LIJA[n]]]; k < rk ; k++ )
            *(yy++) = *(bb++);
        if ( S->psize[n] == FIRSTOFPAIR ) start = 2;
        else start = 1; 
        end = S->LIA[n+1];
        for ( i=S->LIA[n]+start, e=S->LA+i , jp=S->LJA+S->LIJA[n]+start ; 
                     i < end ; i++,e++,jp++ )
        { ee = *e;
          for ( bb = BB[*jp], yy = Y[n], k = 0 ; k < rk ; k++ )
             *(bb++) -= ee*(*(yy++));
        }
     }

  /* solve D V = Y (will use Y to store V) */
  for ( n = 0 ; n < S->N ; n++ )
  { if ( S->psize[n] == ONEBYONE )
     { REAL *y,pinv;
        pinv = 1.0/S->LA[S->LIA[n]];
        for ( y = Y[n], k = 0 ; k < rk ; k++ )
          *(y++) *= pinv;
     }
     else if ( S->psize[n] == ZEROPIVOT )
        for ( k = 0 ; k < rk ; k++ ) Y[n][k] = 0.0;  /* generalized inverse */
     else
     { REAL piv[2][2];
        REAL pinv[2][2];
        REAL det,yy;
        piv[0][0] = S->LA[S->LIA[n]];
        piv[0][1] = piv[1][0] = S->LA[S->LIA[n]+1];
        piv[1][1] = S->LA[S->LIA[n+1]];
        det = piv[0][0]*piv[1][1] - piv[0][1]*piv[1][0];
        pinv[0][0] = piv[1][1]/det;
        pinv[1][0] = pinv[0][1] = -piv[0][1]/det;
        pinv[1][1] = piv[0][0]/det;
        for ( k = 0 ; k < rk ; k++ )
        { yy = Y[n][k]*pinv[0][0] + Y[n+1][k]*pinv[1][0];
          Y[n+1][k] = Y[n][k]*pinv[0][1] + Y[n+1][k]*pinv[1][1];
          Y[n][k] = yy;
        }
        n++; 
     }
  }

  /* solve U X = V */
  for ( n = S->N-1 ; n >= 0 ; n-- )
     { int start,end;
        REAL *yy,*bb,ee;
        if ( S->psize[n] == FIRSTOFPAIR ) start = 2;
        else start = 1; 
        end = S->LIA[n+1];
        for ( i=S->LIA[n]+start, e=S->LA+i, jp=S->LJA+S->LIJA[n]+start  ; 
            i < end ; i++,e++,jp++ )
        { ee = *e;
          for ( yy = Y[n], bb = BB[*jp], k = 0 ; k < rk ; k++ )
             *(yy++) -= ee*(*(bb++));
        }
        for ( yy = Y[n], bb = BB[S->LJA[S->LIJA[n]]], k = 0 ; k < rk ; k++ )
          *(bb++) = *(yy++);
      }

  /* unpermute */
  for ( n = 0 ; n < S->N ; n++ )
    for ( k = 0 ; k < rk ; k++ )
     x[k][S->P[n]] = BB[n][k];

  free_matrix(Y);
  free_matrix(BB);
}

/*************************************************************************/

void dsolve(S)  /* solve as dense matrix */
struct linsys *S;
{ REAL **a,**u;
  int i,j,k,m,jj;

  int *xIP = (int*)temp_calloc(S->N,sizeof(int));
  for ( k = 0 ; k < S->N ; k++ )
     xIP[S->LJA[S->LIJA[k]]] = k;

  a = dmatrix(0,S->N-1,0,S->N-1);
  for ( i = 0 ; i < S->N ; i++ )
  { for ( j = S->IA[i]-A_OFF  ; j < S->IA[i+1] - A_OFF ; j++ )
     { k = xIP[S->IP[i]];
        m = xIP[S->IP[S->JA[j]-A_OFF]];
        a[k][m] = a[m][k] = S->A[j];
     }
  }
  for ( i = 0 ; i < S->N ; i++ )
  { 
     if ( S->psize[i] == ONEBYONE )
     { for ( j = i+1 ; j < S->N ; j++ )
          for ( k = i+1 ; k < S->N ; k++ )
             a[j][k] -= a[i][j]*a[i][k]/a[i][i];
        for ( k = i+1 ; k < S->N ; k++ )
          a[i][k] /= a[i][i];
     }
     else if ( S->psize[i] == ZEROPIVOT )
     { a[i][i] = 0.0;
     }
     else
     { REAL p11,p12,p22,detinv,pa1,pa2;
       p11 = a[i][i]; p12 = a[i][i+1]; p22 = a[i+1][i+1];
       detinv = 1/(p11*p22 - p12*p12);
       /* sweep through matrix */
       for ( k = i+2 ; k < S->N ; k++ ) /* row */
          { pa1 = (a[i][k]*p22 - a[i+1][k]*p12)*detinv;
             pa2 = (a[i+1][k]*p11 - a[i][k]*p12)*detinv;
             for ( j=k ; j < S->N ; j++ ) /*col*/
             {  a[k][j] -=  pa1*a[i][j] + pa2*a[i+1][j]; 
             }
             a[i][k] = pa1;
             a[i+1][k] = pa2;
          }
       i++;  /* since did 2 rows */
     }

  }
  u = dmatrix(0,S->N-1,0,S->N-1);
  for ( i = 0 ; i < S->N ; i++ )
     for ( j = S->LIA[i], jj = S->LIJA[i]; j < S->LIA[i+1] ; j++ )
        u[i][S->LJA[jj]] = S->LA[j];
  for ( i = 0 ; i < S->N ; i++ )
     for ( j = i ; j < S->N ; j++ )
        if ( fabs(a[i][j]-u[i][j]) > 1e-10 )
        { printf("u[%d][%d] = %20.15f    a[%d][%d] = %20.15f\n",
              i,j,(DOUBLE)u[i][j],i,j,(DOUBLE)a[i][j]);
          break;
        }

  free_matrix(a);
  free_matrix(u);
  temp_free((char*)xIP);
}
     
void test_mult(S)
struct linsys *S;
{ int i,j,k;
  REAL **lower,**diag,**upper,**prod,**prodd;
  lower = dmatrix(0,S->N-1,0,S->N-1);
  diag = dmatrix(0,S->N-1,0,S->N-1);
  upper = dmatrix(0,S->N-1,0,S->N-1);
  prod = dmatrix(0,S->N-1,0,S->N-1);
  prodd = dmatrix(0,S->N-1,0,S->N-1);
  for ( i = 0 ; i < S->N ; i++ )
  { if ( S->psize[i] == FIRSTOFPAIR )
     { for ( j = S->LIA[i] ; j < S->LIA[i+1] - 2 ; j++ )
        { k = S->IP[S->LJA[j]];
          lower[k][i] = upper[i][k] = S->LA[j];
        }
        diag[i+1][i] = diag[i][i+1] = S->LA[j];
        j++;
     }
     else 
     for ( j = S->LIA[i] ; j < S->LIA[i+1] - 1 ; j++ )
     { k = S->IP[S->LJA[j]];
        lower[k][i] = upper[i][k] = S->LA[j];
     }
     diag[i][i] = S->LA[j];
     lower[i][i] = upper[i][i] = 1.0;
  }
  mat_mult(lower,diag,prod,S->N,S->N,S->N);
  mat_mult(prod,upper,prodd,S->N,S->N,S->N);
  for ( i = 0 ; i < S->N ; i++ )
  { for ( j = 0 ; j < S->N ; j++ ) 
        printf("%8.6f ",(DOUBLE)prodd[S->IP[i]][S->IP[j]]);
     printf("\n");
  }
  free_matrix(lower);
  free_matrix(diag);
  free_matrix(upper);
  free_matrix(prod);
  free_matrix(prodd);
}


void test_vecmult(S,X)
struct linsys *S;
REAL *X;
{ int i,j,k;
  REAL **lower,**diag,**upper,**prod,**prodd,*AX,*XP;
  lower = dmatrix(0,S->N-1,0,S->N-1);
  diag = dmatrix(0,S->N-1,0,S->N-1);
  upper = dmatrix(0,S->N-1,0,S->N-1);
  prod = dmatrix(0,S->N-1,0,S->N-1);
  prodd = dmatrix(0,S->N-1,0,S->N-1);
  AX = (REAL*)mycalloc(S->N,sizeof(REAL));
  XP = (REAL*)mycalloc(S->N,sizeof(REAL));
  for ( i = 0 ; i < S->N ; i++ )
  { if ( S->psize[i] == FIRSTOFPAIR )
     { for ( j = S->LIA[i] ; j < S->LIA[i+1] - 2 ; j++ )
        { k = S->IP[S->LJA[j]];
          lower[k][i] = upper[i][k] = S->LA[j];
        }
        diag[i+1][i] = diag[i][i+1] = S->LA[j];
        j++;
     }
     else 
     for ( j = S->LIA[i] ; j < S->LIA[i+1] - 1 ; j++ )
     { k = S->IP[S->LJA[j]];
        lower[k][i] = upper[i][k] = S->LA[j];
     }
     diag[i][i] = S->LA[j];
     lower[i][i] = upper[i][i] = 1.0;
  }
  mat_mult(lower,diag,prod,S->N,S->N,S->N);
  mat_mult(prod,upper,prodd,S->N,S->N,S->N);
  for ( i = 0 ; i < S->N ; i++ ) XP[i] = X[S->P[i]];
  matvec_mul(prodd,XP,AX,S->N,S->N);
  puts("AX:");
  for ( i = 0 ; i < S->N ; i++ )
  { printf("%8.6f ",(DOUBLE)AX[S->IP[i]]);
     printf("\n");
  }
  free_matrix(lower);
  free_matrix(diag);
  free_matrix(upper);
  free_matrix(prod);
  free_matrix(prodd);
  myfree((char*)AX);
  myfree((char*)XP);
}

void md_dump(S)
struct linsys *S;
{ int k,i;
  struct region *r;
  struct supernode *s;
  int rnum;

  for ( k = 0 , rnum = regionstart ; rnum >= 0;  rnum = r->next )
  { r = REG(rnum);
     if ( r->supercount == 0 ) continue; 
     printf("Region %3d, %3d supernodes:",k,r->supercount);
     for ( i = 0 ; i < r->supercount ; i++ ) printf(" %3d",INT(r->superlist)[i]);
     printf("\n");
  }
  for ( k = superstart ; k >= 0  ; k = s->next )
  { s = slist + k;
     if ( s->rcount <= 0 ) continue;
     printf("Supernode %d, degree %d, regions ",k,s->degree);
     for ( i = 0 ; i < s->rcount ; i++ ) printf("%d ",INT(s->rlist)[i]);
     printf("  vertices ");
     for ( i=0 ; i<s->vercount ; i++ )
        printf("%d ",INT(s->verlist)[i]);
     printf("\n");
  }
}


/***************************************************************************
*
* function: xmd_factor()
*
* purpose: experimental minimal degree factoring of system
*
*/
void xmd_factor(S)
struct linsys *S;
{ 

  if ( S->N <= 0 )
     { kb_error(1839,"Internal error: Empty linear system.\n",WARNING); 
        return;
     }
  total_fill = total_flops = 0;
  rtimestamp = 0;
  stimestamp = 0;
  old_rtimestamp = -1; /* less than rtimestamp first time around */
  vtimestamp = 0;
  S->neg = S->zero = S->pos = 0;

  /* working storage */
  S->NSP = 4*S->IA[S->N];
  if ( S->ISP ) myfree((char*)S->ISP);
  S->ISP = (int*)mycalloc(S->NSP,sizeof(int));

  md_vertex_setup(S);
  md_supernode_setup(S);
  md_region_string(S);
  md_supernode_regions(S);

  /* final lower triangular matrix storage */
  if ( S->psize ) myfree((char*)S->psize);
  if ( S->LIA ) myfree((char*)S->LIA);
  S->psize = (int*)mycalloc(S->N,sizeof(int));
  S->LIA = (int*)mycalloc(S->N+1,sizeof(int));
  L_total = 0;
  elim_count = 0;

  for(;;)
  { 
     if ( supercount == 0 ) break;
     degree_sort(S);
     multiple_eliminate(S);
     region_absorb(S);
     clean_supernodes(S);  /* supernodes stamped */
     merge_supernodes(S);
  }
  S->LIA[elim_count] = L_total;  /* final sentinel */

  /* free temp storage */
  temp_free((char*)slist);
  temp_free((char*)vlist); vlist = NULL;
  temp_free((char*)superheap); superheap = NULL;

  traverse_region_tree(S); 

  eigen_pos = S->pos;
  eigen_neg = S->neg;
  eigen_zero = S->zero;

  if ( !hessian_quiet_flag )
  {
  printf("Variables: %d  Original fill: %d\n",S->N,S->IA[S->N]);
  printf("Workspace fill: %d bytes\n",S->ISP[0]*sizeof(int));

  printf("IJA_base = %d\n",IJA_base);
  printf("Total_fill:  %d\n",total_fill);
  printf("Total_flops: %d\n",total_flops);
#ifdef IRIS
  { struct mallinfo m;
     m = mallinfo();
     printf("Arena %d     Ordblocks: %d    Orduse: %d     Ordfree: %d\n",
            m.arena,m.ordblks,m.uordblks,m.fordblks);
     printf("Small blocks: %d Small use: %d Small free: %d\n",
            m.smblks,m.usmblks,m.fsmblks);
  }
#endif
  }
  if ( !hessian_quiet_flag )
  { printf("Alt mindeg fill:  %d\n",total_fill);
     printf("Flops (flop=add+mul): %d\n",total_flops);
  }
  myfree((char*)(S->ISP)); S->ISP = NULL; S->NSP = 0;
}


