#ifdef PETSC_RCS_HEADER
static char vcid[] = "$Id: gmat2d.c,v 1.24 2000/01/31 17:34:32 knepley Exp $";
#endif

/* Implements FE matrices derived from 2d triangular grids */
#include "petscsles.h"                 /* For ALE Operators */
#include "src/gvec/gvecimpl.h"         /*I "gvec.h" I*/
#include "src/mesh/impls/triangular/triimpl.h"
#include "src/grid/impls/triangular/2d/elemvec2d.h"
#include "gmat2d.h"

extern int GridResetConstrainedMultiply_Private(Grid, GMat);

#undef  __FUNCT__
#define __FUNCT__ "PlaceVariables_Private"
static int PlaceVariables_Private(int startVar, int nodeVars, int locColStart, int locColEnd, int sStartVar, int sNodeVars,
                                  PetscTruth rectangular, int *diagRows, int *offdiagRows)
{
  int var;

  PetscFunctionBegin;
  if ((nodeVars == 0) || (sNodeVars == 0)) PetscFunctionReturn(0);
  /* Check to see whether the variables fall within the diagonal block */
  if ((sStartVar + sNodeVars <= locColStart) || (sStartVar >= locColEnd)) {
    for(var = 0; var < nodeVars; var++) {
      offdiagRows[startVar+var] += sNodeVars;
    }
  } else if ((sStartVar >= locColStart) && (sStartVar + sNodeVars <= locColEnd)) {
    for(var = 0; var < nodeVars; var++) {
      diagRows[startVar+var]    += sNodeVars;
    }
  } else if (rectangular) {
    /* Allow cuts on a single node for rectangular matrices */
    if (sStartVar < locColStart) {
      /* Cut is from below */
      for(var = 0; var < nodeVars; var++) {
        diagRows[startVar+var]       += (sStartVar + sNodeVars) - locColStart;
        offdiagRows[startVar+var]    += locColStart - sStartVar;
      }
    } else {
      /* Cut is from above */
      for(var = 0; var < nodeVars; var++) {
        diagRows[startVar+var]       += locColEnd - sStartVar;
        offdiagRows[startVar+var]    += (sStartVar + sNodeVars) - locColEnd;
      }
    }
  } else {
    /* Row blocking cuts variables on a single node. This is bad partitioning. */
    SETERRQ(PETSC_ERR_ARG_WRONG, "Row blocking cut variables on a single node");
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridCreateGMat_Triangular_2D"
int GridCreateGMat_Triangular_2D(Grid grid, VarOrdering sOrder, VarOrdering tOrder, PetscTruth bdCols, GMat *gmat)
{
  MPI_Comm              comm;
  Mesh                  mesh;
  Partition             part;
  int                   locRowStart;   /* The row that this partition starts on */
  int                   locRowEnd;     /* The row that the next partition starts on */
  int                   locColStart;   /* The column that this partition starts on */
  int                   locColEnd;     /* The column that the next partition starts on */
  int                   newLocColStart;/* The column that the new variable domain starts on */
  int                   newLocColEnd;  /* The column after the new variable domain ends */
  int                  *diagRows;      /* Number of nonzeros in each diagonal portion */
  int                  *offdiagRows;   /* Number of nonzeros in each off-diagonal portion */
  int                   nodeVars;      /* Number of variables on node */
  int                   newNodeVars;   /* Number of new variables on node */
  int                   sNodeVars;     /* Number of variables on a node in the support of a given node */
  int                   sNewNodeVars;  /* Number of new variables on a node in the support of a given node */
  int                   startVar;      /* First variable on a node */
  int                   newStartVar;   /* First new variable on a node */
  int                   sStartVar;     /* First variable on a support node (global numbering) */
  int                   sNewStartVar;  /* First new variable on a support node (global numbering) */
  int                  *nodeDone;      /* A 1 indicates that the node has already been processed */
  int                  *nodeNeighbors; /* A list of the nodes in the support of a given node */
  int                   degree;        /* The degree of a vertex */
  int                  *support;       /* A list of elements in the support of a basis function */
  PetscTruth            rectangular;   /* Flag for a rectangular matrix */
  int                   numGhostNodes; /* The number of nodes constrained by variables in another domain */
  int                   numGhostVars;  /* The number of new variables which lie in another domain */
  int                  *ghostProcs;    /* The processor for each ghost node */
  int                  *ghostNodes;    /* The global index for each ghost node */
  int                  *ghostVarProcs; /* The processor for each ghost variable */
  int                  *ghostVars;     /* The global index for each ghost variables */
  int                   newComp;       /* The number of components in the new field */
  int                   numOverlapElements;
  PetscConstraintObject constCtx      = grid->constraintCtx;
  FieldClassMap         rowMap, colMap;
  int                   numCorners;
  int                   numNodes;
  int                   marker;
  int                   maxDegree;
  int                  *rowClasses,    *colClasses;
  int                  *rowClassSizes, *colClassSizes;
  int                  *rowIsConst,    *colIsConst;
  int                   rowLocVars    = tOrder->numLocVars;
  int                   rowVars       = tOrder->numVars;
  int                  *rowFirstVar   = tOrder->firstVar;
  int                  *rowOffsets    = tOrder->offsets;
  int                   colLocVars    = sOrder->numLocVars;
  int                   colVars       = sOrder->numVars;
  int                  *colFirstVar   = sOrder->firstVar;
  int                  *colOffsets    = sOrder->offsets;
  int                   rank, numProcs;
  int                   proc, elem, sElem, corner, sCorner, neighbor, node, sNode, nclass, sNclass, var, count;
  PetscTruth            opt;
  int                   ierr;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject) grid, &comm);                                                   CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm, &numProcs);                                                                  CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm, &rank);                                                                      CHKERRQ(ierr);
  ierr = GridGetMesh(grid, &mesh);                                                                        CHKERRQ(ierr);
  ierr = MeshGetPartition(mesh, &part);                                                                   CHKERRQ(ierr);
  ierr = VarOrderingGetClassMap(tOrder, &rowMap);                                                         CHKERRQ(ierr);
  ierr = VarOrderingGetClassMap(sOrder, &colMap);                                                         CHKERRQ(ierr);
  ierr = MeshGetNumCorners(mesh, &numCorners);                                                            CHKERRQ(ierr);
  ierr = MeshGetMaxDegree(mesh, &maxDegree);                                                              CHKERRQ(ierr);
  ierr = PartitionGetNumOverlapElements(part, &numOverlapElements);                                       CHKERRQ(ierr);
  numNodes       = rowMap->numNodes;
  rowClasses     = rowMap->classes;
  rowClassSizes  = rowMap->classSizes;
  rowIsConst     = rowMap->isClassConstrained;
  colClasses     = colMap->classes;
  colClassSizes  = colMap->classSizes;
  colIsConst     = colMap->isClassConstrained;
  newLocColStart = -1;
  newLocColEnd   = -1;
  /* Get partition information */
  locRowStart   = rowFirstVar[rank];
  locRowEnd     = rowFirstVar[rank+1];
  locColStart   = colFirstVar[rank];
  locColEnd     = colFirstVar[rank+1];
  rectangular   = (sOrder->numVars != tOrder->numVars) ? PETSC_TRUE : PETSC_FALSE;
  /* Get new field information */
  if (constCtx != PETSC_NULL) {
    ierr = (*constCtx->ops->getsize)(constCtx, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, &newComp);
    CHKERRQ(ierr);
  }

  /* Preallocate possible nonzeros - Note that we are being pessimistic since we set
	   the whole dense element matrix, which we know contains some zeros for certain
	   operators */
  ierr = PetscMalloc(numNodes                   * sizeof(int), &nodeDone);                                CHKERRQ(ierr);
  ierr = PetscMalloc(maxDegree*numCorners * sizeof(int), &nodeNeighbors);                                 CHKERRQ(ierr);

  /* Get the number of ghost variables due to constraints */
  numGhostNodes = 0;
  numGhostVars  = 0;
  if ((grid->isConstrained == PETSC_TRUE) && (numProcs > 1)) {
    ierr = PetscMemzero(nodeDone, numNodes * sizeof(int));                                                CHKERRQ(ierr);
    for(elem = 0; elem < numOverlapElements; elem++)
      for(corner = 0; corner < numCorners; corner++) {
        ierr = MeshGetNodeFromElement(mesh, elem, corner, &node);                                         CHKERRQ(ierr);
        if (node >= numNodes) continue;
        if (nodeDone[node])   continue;
        nodeDone[node] = 1;

        nclass = rowClasses[node];
        if (rowIsConst[nclass]) {
          ierr = (*constCtx->ops->getindices)(constCtx, mesh, tOrder, node, CONSTRAINT_ROW_INDEX, &startVar); CHKERRQ(ierr);
          /* Include only new variables since only they can be ghosts */
          nodeVars = newComp;
          if ((startVar < locRowStart) || (startVar >= locRowEnd)) {
            /* This is a constraint which generates an off-processor variable */
            numGhostNodes++;
            numGhostVars += nodeVars;
          }
        }
      }
  }

  /* Calculate matrix allocation */
  if (numGhostNodes > 0) {
    ierr = PetscMalloc(numGhostNodes * sizeof(int), &ghostNodes);                                         CHKERRQ(ierr);
    ierr = PetscMalloc(numGhostNodes * sizeof(int), &ghostProcs);                                         CHKERRQ(ierr);
    ierr = PetscMalloc(numGhostVars  * sizeof(int), &ghostVars);                                          CHKERRQ(ierr);
    ierr = PetscMalloc(numGhostVars  * sizeof(int), &ghostVarProcs);                                      CHKERRQ(ierr);
  }
  ierr = PetscMalloc((rowLocVars+numGhostVars) * sizeof(int), &diagRows);                                 CHKERRQ(ierr);
  ierr = PetscMalloc((rowLocVars+numGhostVars) * sizeof(int), &offdiagRows);                              CHKERRQ(ierr);
  ierr = PetscMemzero(diagRows,    (rowLocVars+numGhostVars) * sizeof(int));                              CHKERRQ(ierr);
  ierr = PetscMemzero(offdiagRows, (rowLocVars+numGhostVars) * sizeof(int));                              CHKERRQ(ierr);
  ierr = PetscMemzero(nodeDone,     numNodes                 * sizeof(int));                              CHKERRQ(ierr);
  for(elem = 0, numGhostNodes = 0, numGhostVars = 0; elem < numOverlapElements; elem++) {
    for(corner = 0; corner < numCorners; corner++) {
      ierr = MeshGetNodeFromElement(mesh, elem, corner, &node);                                           CHKERRQ(ierr);
      if (node >= numNodes)
        continue;
      if (nodeDone[node])
        continue;
      nodeDone[node] = 1;

      nclass      = rowClasses[node];
      startVar    = rowOffsets[node] - locRowStart;
      nodeVars    = rowClassSizes[nclass];
      newNodeVars = 0;
      if (rowIsConst[nclass]) {
        ierr = (*constCtx->ops->getindices)(constCtx, mesh, tOrder, node, CONSTRAINT_ROW_INDEX, &newStartVar); CHKERRQ(ierr);
        /* Include only new variables */
        newNodeVars = newComp;
        if ((newStartVar < locRowStart) || (newStartVar >= locRowEnd)) {
          /* This is a constraint which generates an off-processor variable */
          ghostNodes[numGhostNodes]     = newStartVar;
          for(proc = 0; newStartVar >= rowFirstVar[proc+1]; proc++) ;
          ghostProcs[numGhostNodes]     = proc;
          for(var = 0; var < newComp; var++, numGhostVars++) {
            ghostVars[numGhostVars]     = newStartVar + var;
            ghostVarProcs[numGhostVars] = proc;
          }
          numGhostNodes++;
          /* Set partition for the appropriate processor */
          newLocColStart = colFirstVar[proc];
          newLocColEnd   = colFirstVar[proc+1];
          /* Reset newStartVar to the correct position in diagRows */
          newStartVar    = rowLocVars + (numGhostVars - newComp);
        } else {
          newLocColStart = locColStart;
          newLocColEnd   = locColEnd;
          /* Reset newStartVar to the correct position in diagRows */
          newStartVar   -= locRowStart;
        }
      }
      if (nodeVars+newNodeVars == 0) continue;

      /* Loop over nodes on each element in the support of the node */
      ierr = MeshGetNodeSupport(mesh, node, elem, &degree, &support);                                     CHKERRQ(ierr);
      for(sElem = 0, count = 0; sElem < degree; sElem++) {
        for(sCorner = 0; sCorner < numCorners; sCorner++) {
          /* Disregard normal columns if we are forming a boundary matrix */
          ierr = MeshGetNodeFromElement(mesh, support[sElem], sCorner, &sNode);                           CHKERRQ(ierr);
          ierr = MeshGetNodeBoundary(mesh, sNode, &marker);                                               CHKERRQ(ierr);
          if ((bdCols == PETSC_TRUE) && (marker == 0)) continue;
          sNclass      = colClasses[sNode];
          sStartVar    = colOffsets[sNode];
          sNodeVars    = colClassSizes[sNclass];
          sNewNodeVars = 0;

          if (colIsConst[sNclass]) {
            ierr = (*constCtx->ops->getindices)(constCtx, mesh, sOrder, sNode, CONSTRAINT_COL_INDEX, &sNewStartVar);
            CHKERRQ(ierr);
            sNewNodeVars = newComp;
          }

          /* Check for duplicate node */
          for(neighbor = 0;  neighbor < count; neighbor++) {
            if (nodeNeighbors[neighbor] == sNode) break;
          }
          if (neighbor < count) {
            continue;
          } else {
#ifdef PETSC_USE_BOPT_g
            if (count >= maxDegree*numCorners) {
              SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Too many neighboring nodes: %d", count);
            }
#endif
            nodeNeighbors[count++] = sNode;
          }

          ierr = PlaceVariables_Private(startVar,    nodeVars,    locColStart,    locColEnd,    sStartVar,    sNodeVars,
                                        rectangular, diagRows, offdiagRows);
          CHKERRQ(ierr);
          ierr = PlaceVariables_Private(newStartVar, newNodeVars, newLocColStart, newLocColEnd, sStartVar,    sNodeVars,
                                        rectangular, diagRows, offdiagRows);
          CHKERRQ(ierr);
          ierr = PlaceVariables_Private(startVar,    nodeVars,    locColStart,    locColEnd,    sNewStartVar, sNewNodeVars,
                                        rectangular, diagRows, offdiagRows);
          CHKERRQ(ierr);
          ierr = PlaceVariables_Private(newStartVar, newNodeVars, newLocColStart, newLocColEnd, sNewStartVar, sNewNodeVars,
                                        rectangular, diagRows, offdiagRows);
          CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
          if ((numProcs == 1) && (offdiagRows[startVar] > 0)) {
            for(proc = 0; proc <= numProcs; proc++)
              PetscPrintf(PETSC_COMM_SELF, "colFirstVar[%d]: %d\n", proc, colFirstVar[proc]);
            for(node = 0; node < colMap->numNodes; node++)
              PetscPrintf(PETSC_COMM_SELF, "colOffsets[%d]: %d\n", node, colOffsets[node]);
            PetscPrintf(PETSC_COMM_SELF, "sNode %d sStartVar %d in [%d,%d)\n", sNode, sStartVar, locColStart, locColEnd);
            SETERRQ2(PETSC_ERR_PLIB, "Invalid var alloc in elem %d var %d", elem, startVar);
          }
          if ((numProcs == 1) && (rowIsConst[nclass]) && (offdiagRows[newStartVar] > 0)) {
            SETERRQ2(PETSC_ERR_PLIB, "Invalid var alloc in elem %d var %d", elem, newStartVar);
          }
#endif
        }
      }
      ierr = MeshRestoreNodeSupport(mesh, node, elem, &degree, &support);                                 CHKERRQ(ierr);
    }
  }

#ifdef PETSC_USE_BOPT_g
  /* Check that we looked at every node */
  for(node = 0; node < numNodes; node++){
    if (!nodeDone[node]) SETERRQ1(PETSC_ERR_PLIB, "Node %d was not encountered", node);
  }
#endif
  ierr = PetscOptionsHasName(PETSC_NULL, "-trace_alloc", &opt);                                           CHKERRQ(ierr);
  if (opt == PETSC_TRUE) {
    for(var = 0; var < rowLocVars; var++) {
      PetscSynchronizedPrintf(comm, "diagRows[%d]: %d offdiagRows[%d]: %d\n",
                              var + rowFirstVar[rank], diagRows[var], var + rowFirstVar[rank], offdiagRows[var]);
    }
    PetscSynchronizedFlush(comm);
  }
#ifdef PETSC_USE_BOPT_g
  ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                            CHKERRQ(ierr);
#endif

  /* Communicate */
  if ((grid->isConstrained == PETSC_TRUE) && (numProcs > 1)) {
    ierr = PetscGhostExchange(comm, numGhostVars, ghostVarProcs, ghostVars, PETSC_INT, rowFirstVar,
                              ADD_VALUES, SCATTER_REVERSE, diagRows,    &diagRows[rowLocVars]);
    CHKERRQ(ierr);
    ierr = PetscGhostExchange(comm, numGhostVars, ghostVarProcs, ghostVars, PETSC_INT, rowFirstVar,
                              ADD_VALUES, SCATTER_REVERSE, offdiagRows, &offdiagRows[rowLocVars]);
    CHKERRQ(ierr);
  }

  /* Stopgap solution for constrained variables */
  if (grid->isConstrained == PETSC_TRUE) {
    for(var = 0; var < rowLocVars; var++) {
      if (diagRows[var]    > colLocVars) diagRows[var] = colLocVars;
      if (offdiagRows[var] > colLocVars) offdiagRows[var] = colLocVars;
    }
  }

  /* Create the matrix */
  ierr = MatCreateMPIAIJ(comm, rowLocVars, colLocVars, rowVars, colVars, 0, diagRows, 0, offdiagRows, gmat); CHKERRQ(ierr);
  ierr = PetscObjectCompose((PetscObject) *gmat, "Grid", (PetscObject) grid);                             CHKERRQ(ierr);
  ierr = MatSetOption(*gmat, MAT_NEW_NONZERO_ALLOCATION_ERR);                                             CHKERRQ(ierr);

  /* Cleanup */
  ierr   = PetscFree(diagRows);                                                                           CHKERRQ(ierr);
  ierr   = PetscFree(offdiagRows);                                                                        CHKERRQ(ierr);
  ierr   = PetscFree(nodeDone);                                                                           CHKERRQ(ierr);
  ierr   = PetscFree(nodeNeighbors);                                                                      CHKERRQ(ierr);
  if (numGhostNodes > 0) {
    ierr = PetscFree(ghostNodes);                                                                         CHKERRQ(ierr);
    ierr = PetscFree(ghostProcs);                                                                         CHKERRQ(ierr);
    ierr = PetscFree(ghostVars);                                                                          CHKERRQ(ierr);
    ierr = PetscFree(ghostVarProcs);                                                                      CHKERRQ(ierr);
  }

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GMatView_Draw_Triangular_2D"
int GMatView_Draw_Triangular_2D(GMat gmat, PetscViewer v)
{
  int ierr;

  PetscFunctionBegin;
  ierr = MatView(gmat, v);
  PetscFunctionReturn(ierr);
}

#undef  __FUNCT__
#define __FUNCT__ "GMatView_Triangular_2D"
int GMatView_Triangular_2D(GMat gmat, PetscViewer viewer)
{
  Grid       grid;
  PetscTruth isascii, isdraw;
  int        ierr;

  PetscFunctionBegin;
  ierr = PetscTypeCompare((PetscObject) viewer, PETSC_VIEWER_ASCII, &isascii);                            CHKERRQ(ierr);
  ierr = PetscTypeCompare((PetscObject) viewer, PETSC_VIEWER_DRAW,  &isdraw);                             CHKERRQ(ierr);
  if (isascii == PETSC_TRUE) {
    ierr = GMatGetGrid(gmat, &grid);                                                                      CHKERRQ(ierr);
    ierr = GridView(grid, viewer);                                                                        CHKERRQ(ierr);
    ierr = PetscViewerFlush(viewer);                                                                      CHKERRQ(ierr);
    ierr = MatView(gmat, viewer);                                                                         CHKERRQ(ierr);
  } else if (isdraw == PETSC_TRUE) {
    ierr = GMatView_Draw_Triangular_2D(gmat, viewer);                                                     CHKERRQ(ierr);
  }

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GMatEvaluateALEOperatorGalerkin_Triangular_2D"
int GMatEvaluateALEOperatorGalerkin_Triangular_2D(Grid grid, GMat M, int numFields, int *sFields, VarOrdering sOrder,
                                                  LocalVarOrdering sLocOrder, int *tFields, VarOrdering tOrder,
                                                  LocalVarOrdering tLocOrder, int op, PetscScalar alpha, MatAssemblyType type,
                                                  void *ctx)
{
  Mesh         mesh        = grid->mesh;
  Partition    part;
  int          numElements;
  int          sElemSize   = sLocOrder->elemSize;
  int          tElemSize   = tLocOrder->elemSize;
  int         *sElemStart  = sLocOrder->elemStart;
  int         *tElemStart  = tLocOrder->elemStart;
  ElementVec   ghostVec    = grid->ghostElementVec; /* Local solution vector */
  PetscScalar *ghostArray  = ghostVec->array;       /* The values in the ghost element vector */
  MeshMover    mover;
  Grid         ALEGrid;      /* The grid describing the mesh velocity */
  ElementMat   mat;          /* The element matrix */
  PetscScalar *array;        /* The values in the element matrix */
  ElementVec   MeshALEVec;   /* The ALE velocity vector with mesh discretization */
  ElementVec   ALEVec;       /* The ALE velocity vector */
  PetscScalar *ALEArray;     /* The values in the ALE element vector */
  int          sField, tField;
  int          f, elem;
#ifdef PETSC_USE_BOPT_g
  int          i, j;
  PetscTruth   opt;
#endif
  int          ierr;

  PetscFunctionBegin;
  ierr = MeshGetPartition(mesh, &part);                                                                   CHKERRQ(ierr);
  ierr = MeshGetMover(mesh, &mover);                                                                      CHKERRQ(ierr);
  ierr = PartitionGetNumElements(part, &numElements);                                                     CHKERRQ(ierr);
  ierr = MeshMoverGetVelocityGrid(mover, &ALEGrid);                                                       CHKERRQ(ierr);
  /* Setup element matrix */
  ierr  = ElementMatCreate(grid->comm, tElemSize, sElemSize, &mat);                                       CHKERRQ(ierr);
  array = mat->array;

  /* Setup ALE variables */
  if (grid->ALEActive == PETSC_TRUE) {
    /* Notice that the ALEArray is from this grid, not the mesh velocity grid */
    MeshALEVec = ALEGrid->vec;
    ALEVec     = grid->vec;
    ALEArray   = ALEVec->array;
  } else {
    MeshALEVec = PETSC_NULL;
    ALEVec     = PETSC_NULL;
    ALEArray   = PETSC_NULL;
  }

  /* Setup the operator with information about the test function space */
  for(f = 0; f < numFields; f++) {
    grid->fields[sFields[f]].disc->operators[op]->test = grid->fields[tFields[f]].disc;
  }

  for(elem = 0; elem < numElements; elem++) {
    /* Initialize element matrix */
    ierr = ElementMatZero(mat);                                                                           CHKERRQ(ierr);

    /* Setup global row and column indices */
    ierr = GridCalcLocalElementVecIndices(grid, elem, ghostVec);                                          CHKERRQ(ierr);

    /* Setup local solution vector */
    ierr = GridLocalToElement(grid, ghostVec);                                                            CHKERRQ(ierr);

    /* Setup ALE variables */
    if (grid->ALEActive == PETSC_TRUE)
    {
      ierr = GridCalcLocalElementVecIndices(ALEGrid, elem, MeshALEVec);                                   CHKERRQ(ierr);
      ierr = GridLocalToElement(ALEGrid, MeshALEVec);                                                     CHKERRQ(ierr);
    }

    for(f = 0; f < numFields; f++)
    {
      sField = sFields[f];
      tField = tFields[f];
      /* Calculate the contribution to the element matrix from the field */
      ierr = GridInterpolateElementVec(ALEGrid, 0, MeshALEVec, grid, sField, ALEVec);                     CHKERRQ(ierr);
      ierr = DiscretizationEvaluateALEOperatorGalerkin(grid->fields[sField].disc, mesh, sElemSize, tElemStart[tField], sElemStart[sField],
                                                       op, alpha, elem, &ghostArray[sElemStart[sField]], ALEArray, array, ctx);
      CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
      ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                        CHKERRQ(ierr);
#endif
    }

    /* Setup global row and column indices */
    ierr = GridCalcGeneralElementMatIndices(grid, elem, sOrder, tOrder, PETSC_FALSE, mat);                CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
    ierr = PetscOptionsHasName(PETSC_NULL, "-trace_mat_assembly", &opt);                                  CHKERRQ(ierr);
    if (opt == PETSC_TRUE) {
      PetscPrintf(grid->comm, "      %3d", mat->colIndices[0]);
      for(i = 1; i < mat->reduceColSize; i++)
        PetscPrintf(grid->comm, "   %3d", mat->colIndices[i]);
      PetscPrintf(grid->comm, "\n");
      for(i = 0; i < mat->reduceRowSize; i++)
      {
        PetscPrintf(grid->comm, "%3d ", mat->rowIndices[i]);
        for(j = 0; j < mat->reduceColSize; j++)
          PetscPrintf(grid->comm, "%5.2g ", PetscRealPart(mat->array[i*mat->reduceColSize+j]));
        PetscPrintf(grid->comm, "\n");
      }
    }
#endif
    /* Put values in global matrix */
    ierr = ElementMatSetValues(mat, M, ADD_VALUES);                                                       CHKERRQ(ierr);
  }
  ierr = MatAssemblyBegin(M, type);                                                                       CHKERRQ(ierr);
  ierr = MatAssemblyEnd(M, type);                                                                         CHKERRQ(ierr);

  /* Cleanup */
  ierr = ElementMatDestroy(mat);                                                                          CHKERRQ(ierr);

  /* Reset size functions */
  ierr = GridResetConstrainedMultiply_Private(grid, M);                                                   CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GMatEvaluateOperatorGalerkin_Triangular_2D"
int GMatEvaluateOperatorGalerkin_Triangular_2D(Grid grid, GMat M, GVec x, VarOrdering sOrder, LocalVarOrdering sLocOrder,
                                               VarOrdering tOrder, LocalVarOrdering tLocOrder, int op, PetscScalar alpha,
                                               MatAssemblyType type, void *ctx)
{
  Mesh             mesh          = grid->mesh;
  PetscTruth       reduceSystem  = grid->reduceSystem;
  PetscTruth       reduceElement = grid->reduceElement;
  int              sElemSize     = sLocOrder->elemSize;
  int              tElemSize     = tLocOrder->elemSize;
  int             *sElemStart    = sLocOrder->elemStart;
  int             *tElemStart    = tLocOrder->elemStart;
  FieldClassMap    sMap,         tMap;
  int              numSFields,   numTFields;
  int             *sFields,     *tFields;
  PetscTruth       sConstrained, tConstrained;
  Vec              ghostVec;     /* The local ghost vector for x (usually the solution) */
  VecScatter       ghostScatter; /* The scatter from x to ghostVec */
  ElementMat       mat;
  ElementVec       elemGhostVec;
  PetscScalar     *ghostArray, *array;
  int              numElements;
  int              sField, tField;
  int              f, elem;
#ifdef PETSC_USE_BOPT_g
  PetscTruth       opt;
#endif
  int              ierr;

  PetscFunctionBegin;
  ierr = MeshGetInfo(mesh, PETSC_NULL, PETSC_NULL, PETSC_NULL, &numElements);                             CHKERRQ(ierr);
  ierr = VarOrderingGetClassMap(sOrder, &sMap);                                                           CHKERRQ(ierr);
  ierr = VarOrderingGetClassMap(tOrder, &tMap);                                                           CHKERRQ(ierr);
  numSFields   = sMap->numFields;
  sFields      = sMap->fields;
  sConstrained = sMap->isConstrained;
  numTFields   = tMap->numFields;
  tFields      = tMap->fields;
  tConstrained = tMap->isConstrained;
  /* Setup reduction */
  ierr = (*grid->ops->gridsetupghostscatter)(grid, tOrder, &ghostVec, &ghostScatter);                     CHKERRQ(ierr);
  /* Setup element vector and matrix */
  if (tConstrained == PETSC_TRUE) {
    for(f = 0; f < numTFields; f++) {
      if (grid->fields[tFields[f]].isConstrained == PETSC_TRUE)
        tElemSize += grid->fields[tFields[f]].disc->funcs*grid->fields[tFields[f]].constraintCompDiff;
    }
  }
  if (sConstrained == PETSC_TRUE) {
    for(f = 0; f < numSFields; f++) {
      if (grid->fields[sFields[f]].isConstrained == PETSC_TRUE)
        sElemSize += grid->fields[sFields[f]].disc->funcs*grid->fields[sFields[f]].constraintCompDiff;
    }
  }
  ierr       = ElementVecCreate(grid->comm, tElemSize, &elemGhostVec);                                    CHKERRQ(ierr);
  ghostArray = elemGhostVec->array;
  ierr       = ElementMatCreate(grid->comm, tElemSize, sElemSize, &mat);                                  CHKERRQ(ierr);
  array      = mat->array;
  ierr       = ElementVecZero(elemGhostVec);                                                              CHKERRQ(ierr);

  /* Fill the local solution vectors */
  if (x != PETSC_NULL) {
    ierr = GridGlobalToLocalGeneral(grid, x, ghostVec, INSERT_VALUES, ghostScatter);                      CHKERRQ(ierr);
  }

  /* Setup the operator with information about the test function space */
  for(f = 0; f < numSFields; f++) {
    grid->fields[sFields[f]].disc->operators[op]->test = grid->fields[tFields[f]].disc;
  }

  for(elem = 0; elem < numElements; elem++) {
    /* Initialize element matrix */
    ierr = ElementMatZero(mat);                                                                           CHKERRQ(ierr);
    mat->reduceRowSize       = tLocOrder->elemSize;
    mat->reduceColSize       = sLocOrder->elemSize;
    elemGhostVec->reduceSize = tLocOrder->elemSize;

    if (x != PETSC_NULL) {
      /* Setup local row indices for the ghost vector */
      ierr = GridCalcGeneralElementVecIndices(grid, elem, tOrder, PETSC_NULL, PETSC_TRUE, elemGhostVec);  CHKERRQ(ierr);
      /* Setup local solution vector */
      ierr = GridLocalToElementGeneral(grid, ghostVec, grid->bdReduceVecCur, reduceSystem, reduceElement, elemGhostVec);CHKERRQ(ierr);
      /* Must transform to unconstrained variables for element integrals */
      ierr = GridProjectElementVec(grid, mesh, elem, tOrder, PETSC_FALSE, elemGhostVec);                  CHKERRQ(ierr);
    }
    for(f = 0; f < numSFields; f++) {
      sField = sFields[f];
      tField = tFields[f];
      /* Calculate the contribution to the element matrix from the field */
      ierr = DiscretizationEvaluateOperatorGalerkin(grid->fields[sField].disc, mesh, sElemSize, tElemStart[tField], sElemStart[sField],
                                                    op, alpha, elem, &ghostArray[sElemStart[sField]], array, ctx);
      CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
      ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                        CHKERRQ(ierr);
#endif
    }

    /* Setup global row and column indices */
    ierr = GridCalcGeneralElementMatIndices(grid, elem, sOrder, tOrder, PETSC_FALSE, mat);                CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
    ierr = PetscOptionsHasName(PETSC_NULL, "-trace_mat_assembly", &opt);                                  CHKERRQ(ierr);
    if (opt == PETSC_TRUE) {
      ierr = ElementMatView(mat, PETSC_VIEWER_STDOUT_(mat->comm));                                        CHKERRQ(ierr);
    }
#endif
    /* Put values in global matrix */
    ierr = ElementMatSetValues(mat, M, ADD_VALUES);                                                       CHKERRQ(ierr);
  }

  ierr = MatAssemblyBegin(M, type);                                                                       CHKERRQ(ierr);
  ierr = MatAssemblyEnd(M, type);                                                                         CHKERRQ(ierr);

  /* Cleanup */
  ierr = VecDestroy(ghostVec);                                                                            CHKERRQ(ierr);
  ierr = VecScatterDestroy(ghostScatter);                                                                 CHKERRQ(ierr);
  ierr = ElementVecDestroy(elemGhostVec);                                                                 CHKERRQ(ierr);
  ierr = ElementMatDestroy(mat);                                                                          CHKERRQ(ierr);

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GMatEvaluateALEConstrainedOperatorGalerkin_Triangular_2D"
int GMatEvaluateALEConstrainedOperatorGalerkin_Triangular_2D(Grid grid, GMat M, int numFields, int *sFields, VarOrdering sOrder,
                                                             LocalVarOrdering sLocOrder, int *tFields, VarOrdering tOrder,
                                                             LocalVarOrdering tLocOrder, int op, PetscScalar alpha, MatAssemblyType type,
                                                             void *ctx)
{
  Mesh         mesh        = grid->mesh;
  Partition    part;
  int          numElements;
  int          sElemSize   = sLocOrder->elemSize;
  int          tElemSize   = tLocOrder->elemSize;
  int         *sElemStart  = sLocOrder->elemStart;
  int         *tElemStart  = tLocOrder->elemStart;
  ElementVec   ghostVec    = grid->ghostElementVec; /* Local solution vector */
  PetscScalar *ghostArray  = ghostVec->array;       /* The values in the ghost element vector */
  MeshMover    mover;
  Grid         ALEGrid;      /* The grid describing the mesh velocity */
  ElementMat   mat;          /* The element matrix */
  PetscScalar *array;        /* The values in the element matrix */
  ElementVec   MeshALEVec;   /* The ALE velocity vector with mesh discretization */
  ElementVec   ALEVec;       /* The ALE velocity vector */
  PetscScalar *ALEArray;     /* The values in the ALE element vector */
  int          sField, tField;
  int          f, elem;
#ifdef PETSC_USE_BOPT_g
  PetscTruth   opt;
  int          i, j;
#endif
  int          ierr;

  PetscFunctionBegin;
  ierr = MeshGetPartition(mesh, &part);                                                                   CHKERRQ(ierr);
  ierr = MeshGetMover(mesh, &mover);                                                                      CHKERRQ(ierr);
  ierr = PartitionGetNumElements(part, &numElements);                                                     CHKERRQ(ierr);
  ierr = MeshMoverGetVelocityGrid(mover, &ALEGrid);                                                       CHKERRQ(ierr);
  /* Setup element matrix */
  for(f = 0; f < numFields; f++) {
    if (grid->fields[sFields[f]].isConstrained == PETSC_TRUE)
      sElemSize += grid->fields[sFields[f]].disc->funcs*grid->fields[sFields[f]].constraintCompDiff;
    if (grid->fields[tFields[f]].isConstrained == PETSC_TRUE)
      tElemSize += grid->fields[tFields[f]].disc->funcs*grid->fields[tFields[f]].constraintCompDiff;
  }
  ierr  = ElementMatCreate(grid->comm, tElemSize, sElemSize, &mat);                                       CHKERRQ(ierr);
  array = mat->array;

  /* Setup ALE variables -- No new variables should be ALE so ALEVec is not recalculated */
  if (grid->ALEActive == PETSC_TRUE) {
    /* Notice that the ALEArray is from this grid, not the mesh velocity grid */
    MeshALEVec = ALEGrid->vec;
    ALEVec     = grid->vec;
    ALEArray   = ALEVec->array;
  } else {
    MeshALEVec = PETSC_NULL;
    ALEVec     = PETSC_NULL;
    ALEArray   = PETSC_NULL;
  }

  /* Setup the operator with information about the test function space */
  for(f = 0; f < numFields; f++) {
    grid->fields[sFields[f]].disc->operators[op]->test = grid->fields[tFields[f]].disc;
  }

  for(elem = 0; elem < numElements; elem++)
  {
    /* Initialize element matrix */
    ierr = ElementMatZero(mat);                                                                           CHKERRQ(ierr);
    mat->reduceRowSize = tLocOrder->elemSize;
    mat->reduceColSize = sLocOrder->elemSize;

    /* Setup global row and column indices */
    ierr = GridCalcLocalElementVecIndices(grid, elem, ghostVec);                                          CHKERRQ(ierr);

    /* Setup local solution vector */
    ierr = GridLocalToElement(grid, ghostVec);                                                            CHKERRQ(ierr);

    /* Setup ALE variables */
    if (grid->ALEActive == PETSC_TRUE) {
      ierr = GridCalcLocalElementVecIndices(ALEGrid, elem, MeshALEVec);                                   CHKERRQ(ierr);
      ierr = GridLocalToElement(ALEGrid, MeshALEVec);                                                     CHKERRQ(ierr);
    }

    for(f = 0; f < numFields; f++)
    {
      sField = sFields[f];
      tField = tFields[f];
      /* Calculate the contribution to the element matrix from the field */
      ierr = GridInterpolateElementVec(ALEGrid, 0, MeshALEVec, grid, sField, ALEVec);                     CHKERRQ(ierr);
      ierr = DiscretizationEvaluateALEOperatorGalerkin(grid->fields[sField].disc, mesh, sElemSize, tElemStart[tField], sElemStart[sField],
                                                       op, alpha, elem, &ghostArray[sElemStart[sField]], ALEArray, array, ctx);
      CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
      ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                        CHKERRQ(ierr);
#endif
    }

    /* Setup global row and column indices */
    ierr = GridCalcGeneralElementMatIndices(grid, elem, sOrder, tOrder, PETSC_FALSE, mat);                CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
    ierr = PetscOptionsHasName(PETSC_NULL, "-trace_mat_assembly", &opt);                                  CHKERRQ(ierr);
    if (opt == PETSC_TRUE) {
      PetscPrintf(grid->comm, "      %3d", mat->colIndices[0]);
      for(i = 1; i < mat->reduceColSize; i++)
        PetscPrintf(grid->comm, "   %3d", mat->colIndices[i]);
      PetscPrintf(grid->comm, "\n");
      for(i = 0; i < mat->reduceRowSize; i++) {
        PetscPrintf(grid->comm, "%3d ", mat->rowIndices[i]);
        for(j = 0; j < mat->reduceColSize; j++)
          PetscPrintf(grid->comm, "%5.2g ", PetscRealPart(mat->array[i*mat->reduceColSize+j]));
        PetscPrintf(grid->comm, "\n");
      }
    }
#endif
    /* Put values in global matrix */
    ierr = ElementMatSetValues(mat, M, ADD_VALUES);                                                       CHKERRQ(ierr);
  }

  ierr = MatAssemblyBegin(M, type);                                                                       CHKERRQ(ierr);
  ierr = MatAssemblyEnd(M, type);                                                                         CHKERRQ(ierr);

  /* Cleanup */
  ierr = ElementMatDestroy(mat);                                                                          CHKERRQ(ierr);

  /* Reset size functions */
  ierr = GridResetConstrainedMultiply_Private(grid, M);                                                   CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GMatEvaluateNewFields_Triangular_2D"
int GMatEvaluateNewFields_Triangular_2D(Grid grid, GMat M, int numFields, int *sFields, VarOrdering sOrder,
                                        LocalVarOrdering sLocOrder, int *tFields, VarOrdering tOrder,
                                        LocalVarOrdering tLocOrder, PetscScalar alpha, MatAssemblyType type, void *ctx)
{
  VarOrdering           constOrder = grid->constraintOrder; /* The constrained variable ordering */
  PetscConstraintObject constCtx   = grid->constraintCtx;   /* The constraint object */
  int                   sElemSize  = 0;
  int                   tElemSize  = 0;
  ElementMat            mat;          /* The element matrix */
  int                   f, newField;
#ifdef PETSC_USE_BOPT_g
  int                   i, j;
  PetscTruth            opt;
#endif
  int                   ierr;

  PetscFunctionBegin;
  /* Setup element matrix */
  for(f = 0; f < numFields; f++) {
    if (grid->fields[sFields[f]].isConstrained == PETSC_TRUE)
      sElemSize += grid->fields[sFields[f]].disc->comp + grid->fields[sFields[f]].constraintCompDiff;
    if (grid->fields[tFields[f]].isConstrained == PETSC_TRUE)
      tElemSize += grid->fields[tFields[f]].disc->comp + grid->fields[tFields[f]].constraintCompDiff;
  }
  ierr = ElementMatCreate(grid->comm, tElemSize, sElemSize, &mat);                                        CHKERRQ(ierr);

  for(newField = 0; newField < grid->numNewFields; newField++) {
    /* Initialize element matrix */
    ierr = ElementMatZero(mat);                                                                           CHKERRQ(ierr);

    /* Calculate the indices and contribution to the element matrix from the new field */
    ierr = (*constCtx->ops->newelemmat)(constCtx, constOrder, newField, mat);                             CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
    ierr = PetscOptionsHasName(PETSC_NULL, "-trace_mat_assembly", &opt);                                  CHKERRQ(ierr);
    if (opt == PETSC_TRUE) {
      PetscPrintf(grid->comm, "      %3d", mat->colIndices[0]);
      for(i = 1; i < mat->reduceColSize; i++)
        PetscPrintf(grid->comm, "   %3d", mat->colIndices[i]);
      PetscPrintf(grid->comm, "\n");
      for(i = 0; i < mat->reduceRowSize; i++)
      {
        PetscPrintf(grid->comm, "%3d ", mat->rowIndices[i]);
        for(j = 0; j < mat->reduceColSize; j++)
          PetscPrintf(grid->comm, "%5.2g ", PetscRealPart(mat->array[i*mat->reduceColSize+j]));
        PetscPrintf(grid->comm, "\n");
      }
    }
#endif
    /* Put values in global matrix */
    ierr = ElementMatSetValues(mat, M, ADD_VALUES);                                                       CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
    ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                         CHKERRQ(ierr);
#endif
  }

  ierr = MatAssemblyBegin(M, type);                                                                       CHKERRQ(ierr);
  ierr = MatAssemblyEnd(M, type);                                                                         CHKERRQ(ierr);

  /* Cleanup */
  ierr = ElementMatDestroy(mat);                                                                          CHKERRQ(ierr);

  ierr = GridResetConstrainedMultiply_Private(grid, M);                                                   CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GMatEvaluateBoundaryOperatorGalerkin_Triangular_2D"
int GMatEvaluateBoundaryOperatorGalerkin_Triangular_2D(Grid grid, GMat M, GVec x, VarOrdering sOrder, LocalVarOrdering sLocOrder,
                                                       VarOrdering tOrder, LocalVarOrdering tLocOrder, int op, PetscScalar alpha,
                                                       MatAssemblyType type, void *ctx)
{
  MPI_Comm                 comm;
  Mesh                     mesh          = grid->mesh;
  Partition                part;
  Mesh_Triangular         *tri           = (Mesh_Triangular *) mesh->data;
  PetscTruth               reduceSystem  = grid->reduceSystem;
  PetscTruth               reduceElement = grid->reduceElement;
  int                      sElemSize     = sLocOrder->elemSize;
  int                      tElemSize     = tLocOrder->elemSize;
  int                     *sElemStart    = sLocOrder->elemStart;
  int                     *tElemStart    = tLocOrder->elemStart;
  int                      numEdges;
  int                     *bdEdges       = tri->bdEdges;
  int                      rank;
  FieldClassMap            sMap,         tMap;
  int                      firstEdge;
  int                      numSFields,   numTFields;
  int                     *sFields,     *tFields;
  PetscTruth               sConstrained, tConstrained;
  Vec                      ghostVec;     /* The local ghost vector for x (usually the solution) */
  VecScatter               ghostScatter; /* The scatter from x to ghostVec */
  ElementMat               mat;
  ElementVec               elemGhostVec;
  PetscScalar             *array;
  EdgeContext              bdCtx;       /* A context wrapper to communicate the midnode of an edge */
  int                      sField, tField;
  int                      f, bd, edge, bdEdge, midNode;
#ifdef PETSC_USE_BOPT_g
  PetscTruth               opt;
#endif
  int                      ierr;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject) grid, &comm);                                                   CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm, &rank);                                                                      CHKERRQ(ierr);
  ierr = VarOrderingGetClassMap(sOrder, &sMap);                                                           CHKERRQ(ierr);
  ierr = VarOrderingGetClassMap(tOrder, &tMap);                                                           CHKERRQ(ierr);
  ierr = MeshGetPartition(mesh, &part);                                                                   CHKERRQ(ierr);
  ierr = PartitionGetNumEdges(part, &numEdges);                                                           CHKERRQ(ierr);
  ierr = PartitionGetStartEdge(part, &firstEdge);                                                         CHKERRQ(ierr);
  numSFields   = sMap->numFields;
  sFields      = sMap->fields;
  sConstrained = sMap->isConstrained;
  numTFields   = tMap->numFields;
  tFields      = tMap->fields;
  tConstrained = tMap->isConstrained;
  /* Setup reduction */
  ierr = (*grid->ops->gridsetupghostscatter)(grid, tOrder, &ghostVec, &ghostScatter);                     CHKERRQ(ierr);
  /* Setup element vector and matrix */
  if (tConstrained == PETSC_TRUE) {
    for(f = 0; f < numTFields; f++) {
      if (grid->fields[tFields[f]].isConstrained == PETSC_TRUE)
        tElemSize += grid->fields[tFields[f]].disc->funcs*grid->fields[tFields[f]].constraintCompDiff;
    }
  }
  if (sConstrained == PETSC_TRUE) {
    for(f = 0; f < numSFields; f++) {
      if (grid->fields[sFields[f]].isConstrained == PETSC_TRUE)
        sElemSize += grid->fields[sFields[f]].disc->funcs*grid->fields[sFields[f]].constraintCompDiff;
    }
  }
  ierr = ElementVecCreate(comm, tElemSize, &elemGhostVec);                                                CHKERRQ(ierr);
  ierr = ElementMatCreate(comm, tElemSize, sElemSize, &mat);                                              CHKERRQ(ierr);
  ierr = ElementVecZero(elemGhostVec);                                                                    CHKERRQ(ierr);
  array = mat->array;

  /* Setup user context */
  bdCtx.ctx = ctx;

  /* Fill the local solution vectors */
  if (x != PETSC_NULL) {
    ierr = GridGlobalToLocalGeneral(grid, x, ghostVec, INSERT_VALUES, ghostScatter);                      CHKERRQ(ierr);
  }

  /* Setup the operator with information about the test function space */
  for(f = 0; f < numSFields; f++) {
    grid->fields[sFields[f]].disc->bdDisc->operators[op]->test = grid->fields[tFields[f]].disc;
  }

  /* Our problem here is that "edges" are not data structures like "elements". The element
     holds the midnodes which appear on it, but edges do not. Thus we must pass the midnode
     number to the discretization, which we do using a context wrapper. Unfortunately, the
     row indices were derived from element, so we must introduce another numbering function
     which operates on nodes alone. The midnode number is found by a search of the elements
     which could certainly be improved with geometric hints. We might also assume that it
     is the node lying between the two endpoints in the bdNodes[] array. In addition, the
     boundary variable ordering is in relation to boundary node numbers, so that the node
     number must be converted before calling the numbering function. This could be speeded up
     by placing boundary node numbers in the bdEdges[] array instead. */

  /* Loop over boundary edges */
  for(bd = 0, bdEdge = 0; bd < grid->numBd; bd++) {
    for(bdEdge = tri->bdEdgeBegin[bd]; bdEdge < tri->bdEdgeBegin[bd+1]; bdEdge++) {
      /* Check that edge is on this processor */
      edge = bdEdges[bdEdge] - firstEdge;
      if ((edge < 0) || (edge > numEdges)) continue;

      ierr = MeshGetMidnodeFromEdge(mesh, edge, &midNode);                                                CHKERRQ(ierr);
      bdCtx.midnode = midNode;

      /* Initialize element matrix */
      ierr = ElementMatZero(mat);                                                                         CHKERRQ(ierr);
      mat->reduceRowSize       = tLocOrder->elemSize;
      mat->reduceColSize       = sLocOrder->elemSize;
      elemGhostVec->reduceSize = tLocOrder->elemSize;

      if (x != PETSC_NULL) {
        /* Setup local row indices for the ghost vector */
        ierr = GridCalcBoundaryElementVecIndices(grid, bd, edge, midNode, tOrder, PETSC_TRUE, elemGhostVec);CHKERRQ(ierr);
        /* Setup local solution vector */
        ierr = GridLocalToElementGeneral(grid, ghostVec, grid->bdReduceVecCur, reduceSystem, reduceElement, elemGhostVec);CHKERRQ(ierr);
        /* Must transform to unconstrained variables for element integrals */
        ierr = GridProjectElementVec(grid, mesh, edge, tOrder, PETSC_FALSE, elemGhostVec);                CHKERRQ(ierr);
        SETERRQ(PETSC_ERR_SUP, "Being reworked");
      }
      for(f = 0; f < numSFields; f++) {
        sField = sFields[f];
        tField = tFields[f];
        /* Calculate the contribution to the element matrix from the field */
        ierr = DiscretizationEvaluateOperatorGalerkin(grid->fields[sField].disc->bdDisc, mesh, sElemSize, tElemStart[tField],
                                                      sElemStart[sField], op, alpha, edge, PETSC_NULL, array, &bdCtx);
        CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
        ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                      CHKERRQ(ierr);
#endif
      }

      /* Setup global row and column indices */
      ierr = GridCalcBoundaryElementMatIndices(grid, bd, edge, midNode, sOrder, tOrder, PETSC_FALSE, mat); CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
      ierr = PetscOptionsHasName(PETSC_NULL, "-trace_mat_bd_assembly", &opt);                             CHKERRQ(ierr);
      if (opt == PETSC_TRUE) {
        ierr = ElementMatView(mat, PETSC_VIEWER_STDOUT_(mat->comm));                                      CHKERRQ(ierr);
      }
#endif
      /* Put values in the global matrix */
      ierr = ElementMatSetValues(mat, M, ADD_VALUES);                                                     CHKERRQ(ierr);
    }
  }
#ifdef PETSC_USE_BOPT_g
  if (bdEdge != mesh->numBdEdges) SETERRQ(PETSC_ERR_PLIB, "Invalid boundary edge numbering");
#endif

  ierr = MatAssemblyBegin(M, type);                                                                       CHKERRQ(ierr);
  ierr = MatAssemblyEnd(M, type);                                                                         CHKERRQ(ierr);

  /* Cleanup */
  ierr = VecDestroy(ghostVec);                                                                            CHKERRQ(ierr);
  ierr = VecScatterDestroy(ghostScatter);                                                                 CHKERRQ(ierr);
  ierr = ElementVecDestroy(elemGhostVec);                                                                 CHKERRQ(ierr);
  ierr = ElementMatDestroy(mat);                                                                          CHKERRQ(ierr);

  PetscFunctionReturn(0);
}
