//************************************************************************************//
// Module       : glop.cpp
// Date         : 2/14/02 (DLR)
// Copyright    : 2002-2006 Copyright University Corporation for Atmospheric
//                Research
// Description  : Encapsulates the methods and data associated with
//                a geometry--free global gather/scatter operator
// Derived From : none.
// Modifications:
//************************************************************************************//

#include <math.h>
#include "glop.hpp"
#include "timer.h"
#include "gcomm.hpp"

//************************************************************************************
//************************************************************************************
// Constructor Method (1)
GlOp::GlOp()
:
NProcs                (1),
nMaxNodes             (0),
iLocalStack           (-1),
rank                  (0),
nUsedHandles          (0),
opHandle              (NULL),
iUsedHandles          (NULL),
g2lbuff               (NULL)
{
  rank   = GComm::WorldRank();
  NProcs = GComm::WorldSize();

  GINT  i;

  opHandle = new GBuffer<GCHandle>  (MAX_OPHANDLES);
  iUsedHandles = new GSBuffer (MAX_OPHANDLES);
  for (i=0; i<opHandle->dim(); i++ )
    (*opHandle)(i) = (GCHandle) i;
  iUsedHandles->Set(-1);
  
  for (i=0; i<MAX_OPHANDLES; i++ )
  {
    LocToGlob       [i]    = NULL;
    ig2l            [i]    = NULL;
    glob_dups       [i]    = NULL;
    local_op_indices[i]    = NULL;
    srProc_list     [i]    = NULL;
    srGlNode_buffs  [i]    = NULL;
    gmultiplicity   [i]    = NULL;
    vflat           [i]    = NULL;
    u_send          [i]    = NULL;
    u_recv          [i]    = NULL;
    rhandle         [i]    = NULL;
    glNodeList      [i]    = NULL;
    index           [i]    = NULL;
    distinct_nodes_op[i]   = NULL;
    node_indices_op [i]    = NULL;
    lresult         [i]    = NULL;
    gresult         [i]    = NULL;
    n_recv          [i]    = 0; 
    n_recv_data     [i]    = 0; 
    maxsend         [i]    = 0; 
  }

} // end of constructor (1) method


//************************************************************************************
//************************************************************************************
// Destructor
GlOp::~GlOp()
{
  DeleteDynamic();
}


//************************************************************************************
//************************************************************************************
// Copy constructor method
GlOp::GlOp(const GlOp &a)
{

} // end of copy constructor method


//************************************************************************************
//************************************************************************************
// Assignment operator method (1)
GlOp  &GlOp::operator=(const GlOp &a)
{

  return *this;
 
} // end of = operator


//************************************************************************************
//************************************************************************************
// METHOD     : DeleteDynamic
// DESCRIPTION: Deletes dynamically allocated quantities
// ARGUMENTS  : none
// RETURNS    : none
//************************************************************************************
void GlOp::DeleteDynamic()
{
  GINT  i;

  if ( g2lbuff != NULL ) delete g2lbuff;
  for ( i=0; i<nUsedHandles; i++ )
  {
    FreeHandle((*opHandle)((*iUsedHandles)(i)));
  }
  delete iUsedHandles;
  delete opHandle;

} //end of method DeleteDynamic


//************************************************************************************
//************************************************************************************
// METHOD     : Init (1)
// DESCRIPTION: Performs initialization of global gather-scatter operation, for
//              nodes sorted only by processor id. This Init is meant to be
//              used with the DSOp methods that take GVector arguments, on which
//              to be operated by the direct-stiffness operation.
// ARGUMENTS  : glob_index: global index list
//              nMax      : maximum value of glob_index
// RETURNS    : GCHandle giving unique handle to the operation, and its
//              relevant indices.
//************************************************************************************
GCHandle GlOp::Init(GNIDBuffer *glob_index, GNODEID nMax)
{
  GINT       i, j, k, n, iHandle, irank, maxNumRecvBuffs=0, maxSharedNodes, n_last;
  GINT       prrec_len;
  GBOOL      bRet;
  GNIDBuffer **irPrBin=NULL;
  GCHandle   hRet;

 
  // Do initial sorting of all sortable data. 
//cout << "GlOp::Init: InitSort..." << endl;
#if defined(GLOP_DEBUG_OUTPUT)
    cout << "GlOp::Init: MaxIndexDynamicRange=" << nMax << endl;
#endif
#if defined(GLOP_TRACE_OUTPUT)
    cout << "GlOp::Init: InitSort..." << endl;
#endif
  if ( (hRet=InitSort(glob_index, nMax, irPrBin, 
                      maxNumRecvBuffs, maxSharedNodes,
                      prrec_len)) == NULL_HANDLE ) 
  {
    cout << "Init: InitSort failed" << endl;
    exit(1);
  }
  iHandle = HandleIndex(hRet);
#if defined(GLOP_TRACE_OUTPUT)
    cout << "GlOp::Init: InitSort done." << endl;
#endif

#if defined(GLOP_DEBUG_OUTPUT)
    cout << "GlOp::Init: maxNumRecvBuffs=" << maxNumRecvBuffs << endl;
    for ( i=0; i<maxNumRecvBuffs; i++ ) {
      cout << "GlOp::Init: irPrBin[" << i << "]=" << *irPrBin[i] << endl;
    }
#endif

  if ( irPrBin != NULL ) 
  {
#if defined(GLOP_TRACE_OUTPUT)
    cout << "GlOp::Init: Repack..." << endl;
#endif
    // Current structure of data in the sorted-data-recv buffers, while convenient
    // for data transmission, isn't really convenient for local computations, so
    // repack the data from the recv buffers:
    if ( !Repack(irPrBin, maxNumRecvBuffs+1, NProcs, maxSharedNodes,
          prrec_len, srProc_list[iHandle], srGlNode_buffs[iHandle], numGlNode_buffs[iHandle]) )
    {
      cout << "GlOp::Init: Repack failed" << endl;
      exit(1);
    }
#if defined(GLOP_TRACE_OUTPUT)
    cout << "GlOp::Init: Repack done." << endl;
#endif
#if defined(GLOP_DEBUG_OUTPUT)
    cout << "GlOp::Init: srProc_list=" << *(srProc_list[iHandle]) << " numGlNode_buffs=" << numGlNode_buffs[iHandle] << endl;
    for ( i=0; i<numGlNode_buffs[iHandle]; i++ ) {
      cout << "GlOp::Init: i=" << i << endl;
      cout << "GlOp::Init: srGlNode_buffs[" << i << "]=" << (*srGlNode_buffs[iHandle][i])<< endl;
    }
#endif
  
#if defined(GLOP_TRACE_OUTPUT)
    cout << "GlOp::Init: performing IndexSort..." << endl;
#endif
    bRet = TRUE;
    for ( i=0; i<numGlNode_buffs[iHandle] && bRet; i++ )
    {
       bRet = IndexSort( (*(srGlNode_buffs[iHandle][i])) );
    }
  
    if ( !bRet )
    { 
      cout << "GlOp::Init: Index sorting of repack failed" << endl;
      exit(1);
    }

    // Make a global-to-local map:
    ig2l[iHandle] = new GINT  * [numGlNode_buffs[iHandle]];
    for ( i=0; i<numGlNode_buffs[iHandle]; i++ )
    {
      ig2l[iHandle][i] = new GINT  [srGlNode_buffs[iHandle][i]->dim()];
      for ( j=0; j<srGlNode_buffs[iHandle][i]->dim(); j++ )
        ig2l[iHandle][i][j]= GlobalToLocal(LocToGlob[iHandle], 0, (*srGlNode_buffs[iHandle][i])(j));
    }

    // Clean up temp data:
    for ( i=0; i<maxNumRecvBuffs+1; i++ )
       if ( irPrBin[i] != NULL ) delete irPrBin[i];
    delete [] irPrBin;
  }

  // Find the number of this proc's duplicate global index entries, and the local
  // indices to which they correspond:
#if defined(GLOP_TRACE_OUTPUT)
  cout << "GlOp::Init: Getting local multiplicity..." << endl;
#endif
  gmultiplicity[iHandle] = new GVector (glob_index->dim());
  if  ( !GetMultiplicity(LocToGlob[iHandle], glob_dups[iHandle], local_op_indices[iHandle]) )
  {
    FreeHandle(hRet);
    cout << "Init: get of local multiplicity failed" << endl;
    return NULL_HANDLE;
  }

  // Set some quantities for DoOp operation:
  // ...find proc ids != rank of this proc:
#if defined(GLOP_TRACE_OUTPUT)
  cout << "GlOp::Init: computing DoOp quantities..." << endl;
#endif
  lresult   [iHandle] = new GDOUBLE [glob_dups[iHandle]->dim()];
  if ( NProcs > 1 && numGlNode_buffs[iHandle] > 0 ) 
  {
#if defined(GLOP_TRACE_OUTPUT)
  cout << "GlOp::Init: n_recv..." << endl;
#endif
    index     [iHandle] = new GIBuffer(numGlNode_buffs[iHandle]);
    n_recv[iHandle]= 0;
    for ( i=0; i<numGlNode_buffs[iHandle]; i++ )
      if ( (*srProc_list[iHandle])(i) != rank )  (*index[iHandle])(n_recv[iHandle]++) = i;

    // ...find size of send data buffers:
#if defined(GLOP_TRACE_OUTPUT)
  cout << "GlOp::Init: n_recv_data, maxsend..." << endl;
#endif
    n_recv_data[iHandle] = 0;
    maxsend[iHandle]     = 0;
    for ( i=0; i<numGlNode_buffs[iHandle]; i++ )
    {
      n_recv_data[iHandle] += srGlNode_buffs[iHandle][i]->dim();
      maxsend    [iHandle] = MAX(srGlNode_buffs[iHandle][i]->dim(), maxsend[iHandle]);
    }
    if ( n_recv_data[iHandle] <= 0 ) 
    {
      cout << "GlOp::Init: invalid message size" << endl;
      exit(1);
    }

    rhandle   [iHandle] = new CHandle[n_recv[iHandle]];
    u_send    [iHandle] = new GDBuffer(maxsend[iHandle]);
    u_recv    [iHandle] = new GDBuffer(n_recv_data[iHandle]);
    glNodeList[iHandle] = new GNIDBuffer(n_recv_data[iHandle]);

    // Set global node list from the send/recv nodes--the working
    // node list for this handle for global nodes that are on this proc:
    for ( k=0,j=0; k<n_recv[iHandle]; k++ )
    {
      i = (*index[iHandle])(k);
      for ( n=j; n<j+srGlNode_buffs[iHandle][i]->dim(); n++ )
        (*glNodeList[iHandle])(n) = (*srGlNode_buffs[iHandle][i])(n-j);
      j += srGlNode_buffs[iHandle][i]->dim();
    }
    n_last = j;
    irank = 0;
    while ( (*srProc_list[iHandle])(irank) != rank && irank < srProc_list[iHandle]->dim() ) irank++; 

    // Set global node list for node on other procs, fill
    // after n_last:
    for ( j=n_last; j<n_last+srGlNode_buffs[iHandle][irank]->dim(); j++ )
    {
      (*glNodeList[iHandle])(j) = (*srGlNode_buffs[iHandle][irank])(j-n_last);
    }
#if defined(GLOP_TRACE_OUTPUT)
    cout << "GlOp::Init: computing glNodeList multiplicity..." << endl;
#endif
#if defined(GLOP_DEBUG_OUTPUT)
    cout << "GlOp::Init: glNodeList=" << *glNodeList[iHandle] << endl;
#endif
    if ( NProcs > 1 && numGlNode_buffs[iHandle] > 0 &&
         !GetMultiplicity(glNodeList[iHandle], distinct_nodes_op[iHandle], node_indices_op[iHandle]) ) 
    {  
      cout << "GlOp::Init: GetMultiplicity failed" << endl;
      exit(1);
    }
#if defined(GLOP_DEBUG_OUTPUT)
    cout << "GlOp::Init: distinct_nodes_op=" << *distinct_nodes_op[iHandle] << endl;
#endif
    gresult   [iHandle] = new GDOUBLE [distinct_nodes_op[iHandle]->dim()];
  }

#if defined(GLOP_TRACE_OUTPUT)
  cout << "GlOp::Init: calling ComputeGlobalMultiplicity..." << endl;
#endif
  GComm::Synch();
  if ( !ComputeGlobalMultiplicity(hRet, gmultiplicity[iHandle]) )
  {
    FreeHandle(hRet);
    cout << "Init: get of global multiplicity failed" << endl;
    return NULL_HANDLE;
  }

#if defined(GLOP_TRACE_OUTPUT)
  cout << "GlOp::Init: done." << endl;
#endif

  return hRet;
} // end of method Init (1)


//************************************************************************************
//************************************************************************************
// METHOD     : InitSort
// DESCRIPTION: Performs sorting for initialization of global gather-scatter operation.
// ARGUMENTS  : glob_index: global index list
//              nMax      : max value of glob_index
//              irBin     : array of buffers allocated here that contain the 
//                          bin-sorted (by proc id) nodal data. Caller responsible for
//                          deletion. 
//              maxNumRecvBuffs: number of irBin buffers.
// RETURNS    : GCHandle giving unique handle to the operation, and its
//              relevant indices; else NULL_HANDLE on failure.
//************************************************************************************
GCHandle GlOp::InitSort(GNIDBuffer  *glob_index, GNODEID nMax, 
                        GNIDBuffer  **&irBin, 
                        GINT  &maxNumRecvBuffs, GINT  &maxSharedNodes,
                        GINT  &prrec_len)
{

  char        *serr = "GlOp::InitSort: ";
  GINT        i, j, ne, iHandle, NumWorkRecv, nNodes;
  GINT        PrRecvSize, maxMembers=0;
  GINT        NumRecvBuffs, *iSend=NULL, *nwLen=NULL; 
  GBOOL       bRet=TRUE;
  GIBuffer    *bInRange=NULL, gbInRange;
  GIBuffer    *nInRange=NULL, *maxNodes=NULL;
  GNIDBuffer  **isBin=NULL;
  GNIDBuffer  *iWorkNodes=NULL, **irWork=NULL, **isWork=NULL;
  GSBuffer    *iRecvProcID=NULL, *iRecvWorkProcID=NULL;
  GCHandle     hRet;

  if ( nUsedHandles >= MAX_OPHANDLES ) {
    cout << serr << "nUsedHandles >= MAX_OPHANDLES; returning NULL_HANDLE" << endl;
    return NULL_HANDLE;
  }
  if ( glob_index == NULL ) {
    cout << serr << "glob_index NULL; returning NULL_HANDLE" << endl;
    return NULL_HANDLE;
  }

#if defined(GLOP_DEBUG_OUTPUT)
  cout << serr << "glob_index=" << *glob_index << endl;
#endif

  nNodes      = glob_index->dim();
  nMaxNodes = nMax; 
  gbInRange.Resize(NProcs);
#if defined(GLOP_DEBUG_OUTPUT)
    cout << serr << "nNodes    =" << nNodes << endl;
    cout << serr << "nMaxNodes =" << nMaxNodes << endl;
#endif

  // Get new valid handle:
  hRet    = GetNewHandle();
  iHandle = HandleIndex(hRet);
#if defined(GLOP_DEBUG_OUTPUT)
    cout << serr << "iHandle =" << iHandle << endl;
#endif
  FreeHandleMem(hRet);
#if defined(GLOP_DEBUG_OUTPUT)
  cout << serr << "0" << endl;
  cout << serr << "LocToGlob[" << iHandle << "]=" << LocToGlob[iHandle] << endl;
#endif
  GComm::Synch();
  LocToGlob[iHandle] = new GNIDBuffer  (nNodes); 
#if defined(GLOP_DEBUG_OUTPUT)
  cout << serr << "1" << endl;
#endif
  for ( i=0; i<nNodes; i++ )
  {
#if 0
cout << "GlOp::InitSort: 2: " << i << endl;
cout << "InitSort: LocToGlob[iHandle]=" << *LocToGlob[iHandle] << endl;
#endif
    (*LocToGlob[iHandle])(i) = *(glob_index->Data()+i);
  }
#if defined(GLOP_DEBUG_OUTPUT)
  cout << "3" << endl;
#endif

  GComm::Synch();
#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "Entering Sort loop..." << endl;
#endif
  if ( hRet != NULL_HANDLE && NProcs > 1 && (bRet=BinSort(glob_index,bInRange,nInRange,iWorkNodes)) )
  {
    iHandle = HandleIndex(hRet);

    maxNodes = new GIBuffer  (NProcs);

#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "Doing reductions..." << endl;
#endif
    // Do reductions on _InRange vectors: 
    GComm::Allreduce(bInRange->Data(), gbInRange.Data(), NProcs, GC_GINT , G_OP_SUM);
    GComm::Allreduce(nInRange->Data(), maxNodes ->Data(), NProcs, GC_GINT , G_OP_MAX);
#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "Doing reductions..." << endl;
#endif

#if defined(GLOP_DEBUG_OUTPUT)
    cout << serr << "glob_index=" << *glob_index << endl;
    cout << serr << "bInRange  =" << *bInRange   << endl;
    cout << serr << "nInRange  =" << *nInRange   << endl;
#endif

    // Determine max number of members in each bin, and the 
    // total required number of recv buffers:
    NumWorkRecv    = 0;
    maxSharedNodes = 0;
    for ( i=0; i<NProcs; i++ )
    {
      maxMembers     = MAX((*maxNodes)(i),maxMembers);
      maxSharedNodes = MAX(maxSharedNodes,gbInRange(i));
    }
    NumWorkRecv += gbInRange(rank) - (*bInRange)(rank);
    maxSharedNodes *= maxMembers;

    // Create receive buffers:
    irWork = new GNIDBuffer  * [NumWorkRecv+1]; // the extra space is for proc id rank's node list
    for ( i=0; i<NumWorkRecv+1; i++ )
    {
      irWork[i] = new GNIDBuffer (maxMembers);
      irWork[i]->Set(-1);
    }

    // Create send buffers:
    isWork = new GNIDBuffer  * [NProcs];
    nwLen  = new GINT  [NProcs]; 
    for ( i=0; i<NProcs; i++ )
    {
      isWork[i] = new GNIDBuffer (maxMembers);
      isWork[i]->Set(-1);
      nwLen [i]  = maxMembers;
    }

    // Allocate buffers that depend on NumWorkRecv,
    // the number of procs to recv work data from:
    iRecvWorkProcID = new GSBuffer   (NumWorkRecv+1);
    iRecvWorkProcID->Set(-1);

    // Find number of Recv buffers for sorted data;
    // allocate buffers that depend on on this: 
    NumRecvBuffs    = 0;
    for ( i=0; i<NProcs; i++ )
      if ( i != rank ) NumRecvBuffs +=  (*bInRange)(i);

    //...sorted data recv ids:
    iRecvProcID = new GSBuffer   (NumRecvBuffs+1) ; //(NumWorkRecv+1);
    iSend       = new GINT  [NumRecvBuffs];
    iRecvProcID->Set(-1);

    // Find max number of recv buffers:
    GComm::Allreduce(&(NumRecvBuffs), &(maxNumRecvBuffs), 1, GC_GINT , G_OP_MAX);

    // Fill bins with global nodes:
#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "Doing BinFill..." << endl;
#endif
    if ( !BinFill(glob_index,isWork) )
    {  
      cout << serr << "BinFill failed" << endl;
      exit(1);
    } 
#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "BinFill done." << endl;
#endif

    // Find procs from which to recv data, and find the 
    // indices in isWork indicating the send buffers which must be sent:
    ne = 0;
    for ( i=0; i<NProcs; i++ )
    {
      if ( (*bInRange)(i) > 0 && i != rank ) 
      {
         (*iRecvProcID)(ne) = i;
         iSend[ne] = i;
         ne++;
      }
    }

#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "Doing ASendRecvNB..." << endl;
#endif
    // Send work data to respective procs; get this proc's work data:
    if ( !GComm::ASendRecvNB(irWork,NumWorkRecv, NULL , maxMembers, iRecvWorkProcID->Data(), FALSE,
                             isWork,         ne, iSend, maxMembers, iRecvProcID->Data()) )
    {
      cout << serr << "GComm::ASendRecvIB failed on work data" << endl;
      exit(1);
    }
#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "ASendRecvNB done." << endl;
#endif

    // Final slot in irWork is the list from this proc's sorting of its global
    // nodes:
    for ( i=0; i<iWorkNodes->dim() && (*iWorkNodes)(i)>-1; i++ )
       (*irWork[NumWorkRecv])(i) =  (*iWorkNodes)(i);
    (*iRecvWorkProcID)(NumWorkRecv) = rank;

    prrec_len   = NProcs + 1;

    // Set up sorted data recv and send buffers:
    PrRecvSize = maxSharedNodes*(prrec_len+1);  
    if ( irBin != NULL ) {
      cout << serr << "non-NULL irBin" << endl;
      exit(1);
    }
    irBin      = new GNIDBuffer  * [maxNumRecvBuffs+1];
    for ( j=0; j<maxNumRecvBuffs+1; j++ )
    {
      irBin  [j] = new GNIDBuffer (PrRecvSize);
      irBin  [j]->Set(-1);
    }

    isBin   = new GNIDBuffer  * [NumWorkRecv+1];
    for ( i=0; i<NumWorkRecv+1; i++ )
    {
      isBin  [i] = new GNIDBuffer  (PrRecvSize);
    }

#if 0
    nrLen    = new GINT  [MAX(maxNumRecvBuffs,NumWorkRecv)+1];
    for ( i=0; i<MAX(maxNumRecvBuffs,NumWorkRecv)+1; i++ )
      nrLen[i] = PrRecvSize;
#endif
   
    // Process work data:
#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "Doing DoCommonNodeSort..." << endl;
#endif
    if ( !DoCommonNodeSort(maxSharedNodes, prrec_len, 
                           irWork,iRecvWorkProcID, isBin, NumWorkRecv+1) ) 
    {  
      cout << serr << "DoCommNodeSort failed " << endl;
      exit(1);
    } 
#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "DoCommonNodeSort done." << endl;
#endif

    // Copy this proc's common node sorted data into the receive buffer:
    for ( i=0; i<isBin[NumWorkRecv]->dim(); i++ )
    {
      (*irBin[NumRecvBuffs])(i) =  (*isBin[NumWorkRecv])(i);
    }
#if defined(GLOP_DEBUG_OUTPUT)
    cout << serr << "irBin[NumRecvBuffs=" << NumRecvBuffs << "]=" << *irBin[NumRecvBuffs] << endl;
#endif

    // Send processor-sorted work data processed by this proc to respective procs; 
    // get this proc's nodes that have been processed by other procs:
#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "Doing ASendRecvNB (2)..." << endl;
#endif
    GComm::Synch();
    if ( !GComm::ASendRecvNB(irBin,NumRecvBuffs,NULL, PrRecvSize, iRecvProcID    ->Data(), TRUE,
                             isBin,NumWorkRecv ,NULL, PrRecvSize, iRecvWorkProcID->Data()) )
    {
      cout << serr << "GComm::ASendRecvIB failed on proc-sorted data" << endl;
      exit(1);
    }

#if defined(GLOP_TRACE_OUTPUT)
  cout << serr << "ASendRecvNB (2) done." << endl;
#endif
#if 0
cout << "GlOp::InitSort: maxNumRecvBuffs=" << maxNumRecvBuffs << endl;
    for ( j=0; j<maxNumRecvBuffs+1; j++ )
    {
      cout << "GlOp::InitSort: irBin[" << j << "]=" << *irBin[j] << endl;
    }
#endif


cleanup:
    delete maxNodes;
    if ( nwLen != NULL ) delete [] nwLen;
    if ( iSend != NULL ) delete [] iSend;
    delete iRecvWorkProcID;
    for ( i=0; i<NProcs; i++ )
      if ( isWork[i] != NULL ) delete isWork[i];
    delete [] isWork;
    for ( i=0; i<NumWorkRecv+1; i++ )
      if ( irWork[i] != NULL ) delete irWork[i];
    delete [] irWork;
    delete iWorkNodes;
    delete iRecvProcID;
    delete bInRange;
    delete nInRange;
    for ( i=0; i<NumWorkRecv+1; i++ )
      if ( isBin[i] != NULL ) delete isBin[i];
    delete [] isBin;
  }
#if defined(GLOP_DEBUG_OUTPUT)
  cout << serr << "BinSort done." << endl;
#endif
  
  return hRet;

} // end of method InitSort 


//************************************************************************************
//************************************************************************************
// METHOD     : DoCommonNodeSort
// DESCRIPTION: Sorts the nodes, in buffers, Nodes, 
//               s.t the sorted data to be placed in the PrOutput structure,
//               is organized in lists of records, one list for each
//               of the proc ids, procIDs.
//
//               Records are structured s.t. for each proc or list:
//               buffer index:   0      1     2     3     ...
//               data        :   Node1  Proc0 Proc1 Proc2 ... with fill of -1 ... Node2 Proc0 Proc1 Proc2 ...
// ARGUMENTS  : maxSharedNodes: for allocating temporary space, this is the max. number
//                              of shared global nodes.
//              Prrec_len    : max length of each PrOutput record (should be >= max(Procs) + 1)
//              Nodes[]      : array of GBuffer's that must be sorted. These must have the structure:
//                           Node[0] = (glob_index0, glob_index2, ...) for processor, procIDs(0), etc.
//                           Node[1] = (glob_index0, glob_index2, ...) for processor, procIDs(1), etc.
//              procIDs      : list of the proc ids providing the node data in the Nodes[] buffers
//              PrOutput[]   : array of GBuffers containing the data sorted by proc as described above,
//                             one GBuffer for each procID. NOTE: This is allocated in the calling
//                             method, but shouldn't be. 
//              NumberOfLists: the number of GBuffer's in the Nodes[] array (i.e., the length of
//                             the Nodes[] array); also the length of the Output arrays.
//                             Is actually the number of processors from which data recvd + 1.
//              
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::DoCommonNodeSort(GINT  maxSharedNodes, GINT  Prrec_len, 
                             GNIDBuffer  *Nodes [], GSBuffer   *procIDs,  
                             GNIDBuffer   *PrOutput[], 
                             const GINT  NumberOfLists)
{
  GINT       i, ii, index, iwhere, j, k, kstart, nn, maxProcs; 
  GBOOL      bRet=TRUE;
  GIBuffer   n_procs(maxSharedNodes), 
             *numb_tmp, *iascend, *itmp;
  GNIDBuffer *curr_list, *comp_list;
  GNIDBuffer distinct_nodes(maxSharedNodes);
  GSBuffer   **distinct_procs, *ptmp;

  if ( Nodes == NULL || PrOutput == NULL || procIDs == NULL ) return FALSE;

  // For each node, find the list of procs which share it:

  distinct_nodes.Set(-1);     // used for determining end of active list

  distinct_procs = new GSBuffer   * [maxSharedNodes];
  for ( j=0; j<maxSharedNodes; j++ )
  {
    distinct_procs[j] = new GSBuffer   (NumberOfLists);
    distinct_procs[j]->Set(-1);  // used for determining end of active list
  }

  // Compare each node list with every other node list
  // from the other procs, and find shared nodes:
  nn = 0;
  n_procs.Set(0);
  for ( j=0; j<NumberOfLists && bRet; j++ )
  {
    PrOutput[j]->Set(-1);
    comp_list = Nodes[j];
    for ( i=j; i<NumberOfLists &&  bRet; i++ )
    {
      curr_list = Nodes[i];
      for ( k=0; k<curr_list->dim() && (*curr_list)(k)>=0; k++ )  // look node-by-node for common nodes
      {
        if ( nn >= maxSharedNodes )
        {
          cout << "Proc: " << rank << " GlOp::DoCommonNodeSort failure: num distinct nodes = " << nn
               << " max = " << maxSharedNodes << endl; 
          bRet = FALSE;
          break;
        }
#if defined(GLOP_DEBUG_OUTPUT)
        cout << " Proc: " << rank << " checking if node " << *(curr_list->Data()+k) << " is in list..." << endl;
#endif
        if ( comp_list->contains(*(curr_list->Data()+k),index,-1) )   // common id found
        {
          if ( !distinct_nodes.contains(*(curr_list->Data()+k),iwhere,-1) )  // not already in list
          {  
#if defined(GLOP_DEBUG_OUTPUT)
            cout << " Proc: " << rank << " 1a: nn=" << nn << " n_procs(nn)=" << n_procs(nn) << endl;
#endif
            distinct_nodes(nn) = (*curr_list)(k);
            (*distinct_procs[nn])(n_procs(nn)++) = (*procIDs)(i);
            if ( (*procIDs)(i) != (*procIDs)(j) ) (*distinct_procs[nn])(n_procs(nn)++) = (*procIDs)(j);
#if defined(GLOP_DEBUG_OUTPUT)
            cout << " Proc: " << rank << " 1b: nn=" << nn << " n_procs(nn)=" << n_procs(nn) << endl;
#endif
            nn++;
          }
          else  // node in distinct_nodes list; update the proc id:
          {
            if ( !distinct_procs[iwhere]->contains((*procIDs)(i),index,-1) ) 
            {
              if ( iwhere >= n_procs.dim() || n_procs(iwhere) >= NumberOfLists)
              {
                cout << "Proc: " << rank << " GlOp::DoCommonNodeSort failure: n_procs = " << 
                        n_procs(iwhere) << " max= " << NumberOfLists << endl;
                bRet = FALSE;
                break;
              }
              (*distinct_procs[iwhere])(n_procs(iwhere)) = (*procIDs)(i);
              n_procs(iwhere)++;
            }
            if ( !distinct_procs[iwhere]->contains((*procIDs)(j),index,-1) ) 
            {
              if (  iwhere >= n_procs.dim() || n_procs(iwhere) >= NumberOfLists)
              {
                cout << "Proc: " << rank << " GlOp::DoCommonNodeSort failure: n_procs = " << 
                        n_procs(iwhere) << " max= " << NumberOfLists << endl;
                bRet = FALSE;
                break;
              }
              (*distinct_procs[iwhere])(n_procs(iwhere)) = (*procIDs)(j);
              n_procs(iwhere)++;
            }
          }
        }
      }
    }
  }
  if ( !bRet ) 
  {
    cout << "GlOp::DoCommonNodeSort: initial sort failed" << endl;
    exit(1);
  }

#if defined(GLOP_DEBUG_OUTPUT)
  cout << "GlOp::DoCommonNodeSort: nn=" << nn << endl;
#endif

  if ( nn >= maxSharedNodes )
  {
    cout << " Proc: " << rank << " GlOp::DoCommonNodeSort failure: num distinct nodes = " 
         << nn << " max=" << maxSharedNodes << endl;
    exit(1);
  }

  //  Note: nn is the number of distinct nodes; for each distinct node,
  //  i, n_procs(i) is the number of distinct procs sharing that node. 
  for ( i=0, maxProcs=0; i<nn; i++ )
  {
    maxProcs = MAX(maxProcs,n_procs(i));
  }

  if ( Prrec_len < maxProcs+1 ) 
  {
    cout  << " Proc: " << rank << " GlOp::DoCommonNodeSort failure: Prrec_len =" 
          << Prrec_len << " maxProcs =" << maxProcs+1 << endl;
    bRet = FALSE;
  }

  // Re-order the nodes in increasing order:
  itmp = new GIBuffer  (maxSharedNodes);
  iascend     = new GIBuffer  (maxSharedNodes);
  if ( !IndexSort(distinct_nodes, *itmp) ) bRet = FALSE;
  iascend->Set(-1);
  for ( j=0, k=0; j<maxSharedNodes && bRet; j++ )
  {
    if ( distinct_nodes((*itmp)(j)) >= 0 )
    {
      (*iascend)(k) = (*itmp)(j);
      k++;
    }
  }
  if ( k != nn )
  {
    cout  << " Proc: " << rank << " GlOp::DoCommonNodeSort failure: num distinct nodes in sort not equal to pre-sort" << endl;
    bRet = FALSE;
  }

  delete itmp;

  // Re-order proc lists in ascending order:
  ptmp = new GSBuffer   (NumberOfLists);
  for ( i=0; i<nn && bRet; i++ )
  {
    *ptmp = *(distinct_procs[i]);
    bRet = IndexSort(*ptmp);
    distinct_procs[i]->Set(-1);
    k = 0;
    for ( j=0; j<distinct_procs[i]->dim() && bRet ; j++ )
      if ( (*ptmp)(j) >= 0 )
      {
        (*distinct_procs[i])(k) = (*ptmp)(j); 
        k++;
      }
  }
  delete ptmp;

  // Package the proc-sorted data to be sent in the PrOutput structure:
  // Records are structured s.t. for each proc or list:
  // buffer index:   0      1     2     3     ...
  // data        :   Node1  Proc0 Proc1 Proc2 ... with fill of -1 ... Node2 Proc0 Proc1 Proc2 ...
  for ( j=0; j<NumberOfLists && bRet; j++ )
  {
    // Re-order the node ids and the corresponding proc ids:
    kstart = 0;
    for ( ii=0; ii<nn; ii++ )
    {
        if ( (kstart+distinct_procs[i]->dim()) >= PrOutput[j]->dim() ) 
        {
          bRet = FALSE;
          break;
        }
        i = (*iascend)(ii);
   
      // Don't build record if the proc doesn't share the node, 
      // or if there is only 1 proc with the node in its list:
      if ( Nodes[j]->contains(distinct_nodes(i),index,-1) && n_procs(i) > 1 )
      {
        (*PrOutput[j])(kstart) = distinct_nodes(i);

        // For this list, build record starting from last record:
        for ( k=kstart; k<kstart+distinct_procs[i]->dim(); k++ )
        {
          if ( (*distinct_procs[i])(k-kstart) < 0 ) break;
          (*PrOutput[j])(k+1) = (*distinct_procs[i])(k-kstart); 
        }
        kstart += Prrec_len;  //(distinct_procs[i]->dim() + 1);
      }
    } 
#if defined(GLOP_DEBUG_OUTPUT)
    cout << "GlOp::DoCommonNodeSort: PrOutput[j=" << j << "]=" << *PrOutput[j] << endl;
#endif
  }
 
  // Clean up:
cleanup:
  for ( j=0; j<maxSharedNodes; j++ )
  {
    delete distinct_procs[j];
  }
  delete [] distinct_procs;
  delete iascend;
    

  return bRet;

} // end of method DoCommonNodeSort


//************************************************************************************
//************************************************************************************
// METHOD     : DoOp (1)
// DESCRIPTION: Actually carries out the operation, op, on the elements
//              of field, u, using the intialization from the call to Init, 
//              and contained in the handle, hWork.
//              which corresponds to the handle, hWork.
// ARGUMENTS  : u    : input field whose elements are to be combined.
//              op   : operation to be performed in the combination
//              hWork: handle returned from call to Init with the global indices
//                     that correspond to the elements of u
// RETURNS    : TRUE on success; else FALSE, if invalid operation requested, or if 
//              there is an error in the gather/scatter combination.
//************************************************************************************
GBOOL GlOp::DoOp(GDOUBLE *u, GINT  ulen,  G_OP op, GCHandle hWork)
{
  GSHORT      iHandle=HandleIndex(hWork);
  GINT        i, irank, j, n, n_last, n_urecv;
  GBOOL       bRet=TRUE;

  if ( iHandle < 0 ) {
    cout << "GlOp::DoOp invalid handle" << endl;
    return FALSE;
  }

  n = glob_dups[iHandle]->dim();
  if ( !CombineLocal(u,ulen, local_op_indices[iHandle], n, op, lresult[iHandle]) ) {
     return FALSE;
  }
  for ( i=0; i<glob_dups[iHandle]->dim(); i++ )
  {
    for ( j=0; j<local_op_indices[iHandle][i]->dim(); j++ )
    {
      u[(*local_op_indices[iHandle][i])(j)] = lresult[iHandle][i];
    }
  }

  if ( NProcs == 1 || numGlNode_buffs[iHandle] == 0  ) return TRUE;

#if defined(DO_GLOP_TIMING)
  GDOUBLE tstart = STK::Timer();
#endif

  // perform a exchange of field data based on the global node 
  // pointers already retrieved in call to Init:
  if ( !Data_Exchange     (hWork, u, u_recv[iHandle]->Data(), n_last) )
  {
    cout << " GlOp::DoOp data exchange failed " << endl;
    exit(1);
  }

  // find index of this proc's rank in the handle data:
  irank = 0;
  while ( (*srProc_list[iHandle])(irank) != rank && irank < srProc_list[iHandle]->dim() ) irank++; 

  if ( irank >= srProc_list[iHandle]->dim() ) 
  {
    cout << " GlOp::DoOp corrupt handle data; invalid handle" << endl; 
    exit(1);
  }

  // fill recv buff with the local data, starting at last point 
  // in the recv buffer (provided by call to _Exchange method above):
  for ( j=n_last; j<n_last+srGlNode_buffs[iHandle][irank]->dim(); j++ )
  {
    (*glNodeList[iHandle])(j) = (*srGlNode_buffs[iHandle][irank])(j-n_last);
    (*u_recv[iHandle])    [j] = u[+ig2l[iHandle][irank][j-n_last]];
  }
  n_urecv = j;

  if ( (bRet=CombineLocal(u_recv[iHandle]->Data(), n_urecv, node_indices_op[iHandle], distinct_nodes_op[iHandle]->dim(), op, gresult[iHandle])) )
  { 
    for ( i=0; i<distinct_nodes_op[iHandle]->dim(); i++ )
      SetGlobalNodeVal(hWork, u, ulen, (*distinct_nodes_op[iHandle])(i), gresult[iHandle][i]);
  }

#if defined(DO_GLOP_TIMING)
  doop_time = STK::Timer() - tstart;
#endif


  return bRet;

} // end of method DoOp(1)


//************************************************************************************
//************************************************************************************
// METHOD     : DoOp (2)
// DESCRIPTION: Actually carries out the operation, op, on the elements
//              of field, u, using the intialization from the call to Init, 
//              and contained in the handle, hWork.
// ARGUMENTS  : u    : input field vector whose elements are to be combined.
//              op   : operation to be performed in the combination
//              hWork: handle returned from call to Init with the global indices
//                     that correspond to the elements of u
// RETURNS    : GVector solution; else exits, if invalid operation requested, or if 
//              there is an error in the gather/scatter combination.
//************************************************************************************
GVector GlOp::DoOp(GVector &u, G_OP op, GCHandle hWork)
{
  GVector  *vret;

  vret = new GVector (u.dim());
  *vret = u;

  if ( !DoOp(vret->Data(), vret->dim(), op, hWork) )
  {
    cout << "GlOp::DoOp(2): return failure" << endl;
    exit(1);
  }
  return *vret;

} // end of method DoOp(2)


//************************************************************************************
//************************************************************************************
// METHOD     : ComputeGlobalMultiplicity
// DESCRIPTION: Uses the GlOp object (essentially DoOp) to compute a handle's
//              global multiplicity 
//             
// ARGUMENTS  : h     : handle for which to compute multiplicity. Init must
//                      already have been called
//              mult  : buffer in which to store the results. The dimension
//                      must be the same as the dimension of the input field
//                      in call to Init which produced the corresp. handle
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::ComputeGlobalMultiplicity(GCHandle h, GVector *&mult)
{
  
  GSHORT      iHandle=HandleIndex(h);

  if ( iHandle < 0 || mult == NULL ) return FALSE;
  if ( mult->dim() != LocToGlob[iHandle]->dim() ) return FALSE; 

  *mult = 1.0;
#if defined (GENERALIZED_EXCHANGE)
  if ( !DoOp(mult->Data(), mult->dim(), GC_GDOUBLE, G_OP_SUM, h) ) return FALSE;
#else
  if ( !DoOp(mult->Data(), mult->dim(),  G_OP_SUM, h) ) return FALSE;
#endif

  return TRUE;
 
} // end of method ComputeGlobalMultiplicity


//************************************************************************************
//************************************************************************************
// METHOD     : iMultiplicity
// DESCRIPTION: Retrieves pointer to global inverse multiplicity buffer for 
//              handle provided
//              
// ARGUMENTS  : 
//              ie    : element index. Not used in GlOp, but needed for 
//                      GS:: interface consistency.
//              h     : handle for which to compute multiplicity. Init must
//                      already have been called
// RETURNS    : GDBuffer  * corresponding to the handle's gmultiplicity buffer
//              on success; else NULL
//************************************************************************************
GVector *GlOp::iMultiplicity(GINT  ie, GCHandle h)
{
   
  GSHORT      iHandle=HandleIndex(h);

  if ( iHandle < 0 ) return NULL;

  return gmultiplicity[iHandle];

} // end of method Multiplicity



//************************************************************************************
//*************************************************************************************
// METHOD     : CombineLocal (1)
// DESCRIPTION: performs G_OP (op) on the array of field values, u, of
//              size GINT , returning the result in the argument, result.
// ARGUMENTS  : u     : input field whose elements are to be combined.
//              ne    : number of elements in u
//              ilocal: local index buffers: ilocal[n_comb], one for
//                      each distinct combination. dimension of the
//                      buffer determines the number of elements to combine.
//              n_comb: number of combinations to perform
//              op    : operation to be performed in the combination
//             qresult: result of the operation of dimension [n_comb], one
//                      for each combination. This is allocated here in Init.
// RETURNS    : TRUE on success; else FALSE, if invalid operation requested, or
//              invalid local index specified.
//************************************************************************************
GBOOL GlOp::CombineLocal(GDOUBLE *qu    , GINT  ne, GIBuffer  **ilocal, 
                         GINT  n_comb,  G_OP op, GDOUBLE *qresult   )
{

  GINT   i, j;

#if defined(GLOP_DEBUG_OUTPUT)
  cout << "GlOp::CombineLocal: ne=" << ne << " n_comb=" << n_comb << endl;
#endif

  if ( qu == NULL || ilocal == NULL || qresult == NULL ) return FALSE;
  if ( n_comb <= 0 ) return TRUE;


  // Perform G_OP on the nodes shared by this proc:
  switch(op) 
  {
    case G_OP_SUM:
      for ( i=0; i<n_comb; i++ )
      {
        qresult[i] = 0.0;
        for ( j=0; j<ilocal[i]->dim() && j<ne; j++ )
        {
           qresult[i] += qu[(*ilocal[i])(j)];
#if defined(GLOP_DEBUG_OUTPUT)
           cout << "Combine: " << i << " u[" << (*ilocal[i])(j) << "]=" << qu[(*ilocal[i])(j)] << " : sum=" << qresult[i] << endl;
#endif
        }
      }
      break;
    case G_OP_PROD:
      for ( i=0; i<n_comb; i++ )
      {
        qresult[i]  = 1.0;
        for ( j=0; j<ilocal[i]->dim() && j<ne; j++ ) {
          qresult[i] *= qu[(*ilocal[i])(j)];
        }
      }
      break;
    case G_OP_MAX:
      for ( i=0; i<n_comb; i++ )
      {
        qresult[i] = -HUGE;
        for ( j=0; j<ilocal[i]->dim() && j<ne; j++ ) {
          qresult[i] = MAX(qresult[i],qu[(*ilocal[i])(j)]);
        }
      }
      break;
    case G_OP_MIN:
      for ( i=0; i<n_comb; i++ ) {
        qresult[i] = HUGE;
        for ( j=0; j<ilocal[i]->dim() && j<ne; j++ )
          qresult[i] = MIN(qresult[i],qu[(*ilocal[i])(j)]);
      }
      break;
    default:
      return FALSE;
  }

  return TRUE;

} // end of method CombineLocal (1)


//************************************************************************************
//************************************************************************************
// METHOD     : SetGlobalNodeVal (1)
// DESCRIPTION: Sets values of u at all common global ids, iGlobal, to the 
//              value, val. The global ids, of which iGlobal is one, must
//              have been stored in a prior call to Init. 
// ARGUMENTS  : h      : handle from call to Init
//              u      : field array whose global indices are to be set
//              n      : number of field elements
//              iGlobal: global index to set (method sell corresponding 
//                       u's for all occurences of iGlobal)
//              val    : value to set
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::SetGlobalNodeVal(GCHandle h, GDOUBLE *u, GINT  n, const GNODEID iGlobal, const GDOUBLE val)
{
  
  GINT    iLocal;
  GSHORT  iHandle = HandleIndex(h);

  if ( u == NULL ) return FALSE;
  if ( iHandle < 0 ) return FALSE;
 
  if ( (iLocal=GlobalToLocal(LocToGlob[iHandle], 0, iGlobal)) < 0 ) return FALSE;
  if ( iLocal >= n || iLocal < 0 ) return FALSE;

  u[iLocal] = val;
  while ( (iLocal=GlobalToLocal(NULL, 0, iGlobal)) > 0 && iLocal < n )
    u[iLocal] = val;

  if ( iLocal >= n || iLocal < 0 ) return FALSE;
  return TRUE;
  
} // end of method SetGlobalNodeVal (1)


//************************************************************************************
//************************************************************************************
// METHOD     : Data_Exchange (1)
// DESCRIPTION: Perform data exchange between procs by using asynchronous
//              recvs...
// ARGUMENTS  :
//              hWork     : handle from Init
//              u_local   : local field data from DoOp
//              urecv     : field data received. Allocated in Init.
//              numrecv   : number of elements of urecv filled in this exchange
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::Data_Exchange(GCHandle hWork, GDOUBLE *u_local, GDOUBLE *urecv,
                          GINT  &numrecv)
{
  GINT        i, j, k, n, nsendrecv;
  GINT        iHandle= HandleIndex(hWork);
  GSHORT      srcdest;
  GBOOL       bRet=TRUE;
  GDOUBLE     *p_recv=NULL, *data;

#if defined(DO_GLOP_TIMING)
  GDOUBLE tstart;
#endif


#if defined(DO_GLOP_TIMING)
  tstart =  STK::Timer();
#endif

  // Build list of all global nodes to be received or existing on this proc,
  // and post receives. Note that the recv buffer 'stack' used in
  // call to ARecv is simply a pointer to the appropriate postion in
  // the  'flat' urecv array, which is returned 
#if defined(GLOP_DEBUG_OUTPUT)
  cout << "GlOp::Data_Exchange: number procs receiving from: " << n_recv[iHandle] << endl;
#endif
  j  = 0;
  for ( k=0; k<n_recv[iHandle] && bRet; k++ )
  {
    i = (*index[iHandle])(k);
    p_recv     = urecv+j;
    nsendrecv  = srGlNode_buffs[iHandle][i]->dim();
    srcdest    = (*srProc_list[iHandle])(i);
#if defined(GLOP_DEBUG_OUTPUT)
    cout << "GlOp::Data_Exchange: num data receiving from proc " << srcdest 
         << ": " << nsendrecv << endl;
#endif
    GComm::ARecv((void**)&p_recv, 1, &nsendrecv, NULL, GC_GDOUBLE, &srcdest, rhandle[iHandle][k]);
    bRet       = rhandle[iHandle][k].nposts <= 0 ? FALSE : TRUE;
//  for ( n=j; n<j+srGlNode_buffs[iHandle][i]->dim(); n++ )
//    (*glNodeList[iHandle])(n) = (*srGlNode_buffs[iHandle][i])(n-j);

    // update position in the u-recv buffer:
    j += srGlNode_buffs[iHandle][i]->dim();
  }
  numrecv = j;

#if 0
  if ( !bRet ) 
    cout << "GlOp::Data_Exchange:: ARecv failed; NULL CHandle" << endl;
#endif

  // Send required data to all processors that share this
  // proc's nodes:
  for ( k=0; k<n_recv[iHandle] && bRet; k++ )
  {
    i = (*index[iHandle])(k);
    // create send message for this proc:
    for ( j=0; j<srGlNode_buffs[iHandle][i]->dim(); j++ )
      (*u_send[iHandle])(j) = *(u_local + ig2l[iHandle][i][j]);
    nsendrecv = srGlNode_buffs[iHandle][i]->dim();
    srcdest   = (*srProc_list[iHandle])(i);
//#if defined(GLOP_DEBUG_OUTPUT)
#if 0
    cout << "GlOp::Data_Exchange: sending to proc " << srcdest << " data: "; 
    for ( j=0; j<srGlNode_buffs[iHandle][i]->dim(); j++ ) cout << (*u_send[iHandle])(j)  << " ";
    cout << endl;
#endif
    data  = u_send[iHandle]->Data();
    bRet  = GComm::BSend((void**)&data, 1, &nsendrecv, NULL, GC_GDOUBLE, &srcdest);
#if 0
    if ( !bRet ) 
      cout << "GlOp::Data_Exchange:: BSend failed " << endl;
#endif
  }
  
  for ( k=0; k<n_recv[iHandle] && bRet; k++ )
  {
    bRet &= GComm::AWaitOnRecv(rhandle[iHandle][k]);
  }

#if 0
  if ( !bRet ) 
    cout << "GlOp::Data_Exchange:: AWaitOnRecv failed" << endl;
#endif

#if defined(DO_GLOP_TIMING)
  exch_time = STK::Timer() - tstart;
#endif

  return bRet;
} // end of Data_Exchange (1)


#if defined (GENERALIZED_EXCHANGE)
//************************************************************************************
//************************************************************************************
// METHOD     : DoOp (3)
// DESCRIPTION: Actually carries out the operation, op, on the elements
//              of field, u, using the intialization from the call to Init, 
//              and contained in the handle, hWork.
//              which corresponds to the handle, hWork.
// ARGUMENTS  : u    : input field whose elements are to be combined.
//              u_len: legth of vector, u
//              dtype: GC_DATATYPE specifiction of field, u
//              op   : operation to be performed in the combination
//              hWork: handle returned from call to Init with the global indices
//                     that correspond to the elements of u
// RETURNS    : TRUE on success; else FALSE, if invalid operation requested, or if 
//              there is an error in the gather/scatter combination.
//************************************************************************************
GBOOL GlOp::DoOp(void *u, GINT  ulen,  GC_DATATYPE dtype, G_OP op, GCHandle hWork)
{
  GSHORT      iHandle=HandleIndex(hWork);
  GINT       i, irank, j, k, n, n_last, n_urecv, iLocal, tsize;
  GBOOL      bRet=TRUE;
  void      *lresult=NULL, *sop=NULL;
  GNIDBuffer  *distinct_nodes=NULL;
  GIBuffer  **node_indices=NULL;
  GNIDBuffer  *glNodeList=NULL;

  if ( iHandle < 0 ) return FALSE;

  tsize = dtype;
  n = glob_dups[iHandle]->dim();
  if ( !CombineLocal(u,ulen, dtype, local_op_indices[iHandle], n, op, lresult) ) {
    bRet = FALSE;
    goto cleanup;
  }
  for ( i=0; i<glob_dups[iHandle]->dim(); i++ )
  {
    for ( j=0; j<local_op_indices[iHandle][i]->dim(); j++ )
    {
      k = (*local_op_indices[iHandle][i])(j);
      memcpy((GBYTE*)u+k*tsize,(GBYTE*)lresult+i*tsize, tsize);
    }
  }

  if ( NProcs == 1 ) {
    bRet = TRUE;
    goto cleanup;
  }

#if defined(DO_GLOP_TIMING)
  GDOUBLE tstart = STK::Timer();
#endif

  // perform a exchange of field data based on the global node 
  // pointers already retrieved in call to Init:
  if ( !Data_Exchange     (hWork, u, u_recv[iHandle], n_last, dtype, glNodeList) )
  {
    cout << " GlOp::DoOp data exchange failed " << endl;
    exit(1);
  }

  // find index of this proc's rank in the handle data:
  irank = 0;
  while ( (*srProc_list[iHandle])(irank) != rank && irank < srProc_list[iHandle]->dim() ) irank++; 

  if ( irank >= srProc_list[iHandle]->dim() ) 
  {
    cout << " GlOp::DoOp corrupt handle data; invalid handle" << endl; 
    exit(1);
  }

  // fill recv buff with the local data, starting at last point 
  // in the recv buffer (provided by call to _Exchange method above):
  for ( j=n_last; j<n_last+srGlNode_buffs[iHandle][irank]->dim(); j++ )
  {
    (*glNodeList)(j) = (*srGlNode_buffs[iHandle][irank])(j-n_last);
     memcpy((GBYTE*)u_recv+j*tsize,(GBYTE*)u+ig2l[iHandle][irank][j-n_last]*tsize, tsize);
  }
  n_urecv = j;

  if ( GetMultiplicity(glNodeList, distinct_nodes, node_indices) ) 
  {  
      if ( (bRet=CombineLocal(u_recv, n_urecv, dtype, node_indices, distinct_nodes->dim(), op, sop)) )
      { 
        for ( i=0; i<distinct_nodes->dim(); i++ )
        {
          SetGlobalNodeVal(hWork, u, ulen, dtype, (*distinct_nodes)(i), (GBYTE*)sop+i*tsize);
        }
      }
  } 

#if defined(DO_GLOP_TIMING)
  doop_time = STK::Timer() - tstart;
#endif

cleanup:
  if ( u_recv         != NULL ) delete [] u_recv;
  if ( sop            != NULL ) delete [] sop;
  if ( lresult        != NULL ) delete [] lresult;
  for ( i=0; i<distinct_nodes && distinct_nodes->dim(); i++ )
    if ( node_indices[i] != NULL ) delete node_indices[i];
  delete [] node_indices;
  if ( distinct_nodes != NULL ) delete  distinct_nodes;
  if ( glNodeList     != NULL ) delete glNodeList;


  return bRet;

} // end of method DoOp(3)


//************************************************************************************
//*************************************************************************************
// METHOD     : CombineLocal (2)
// DESCRIPTION: performs G_OP (op) on the array of field values, u, of
//              size GINT , returning the result in the argument, result.
// ARGUMENTS  : u     : input field whose elements are to be combined.
//              ne    : number of elements in u
//              dtype : GC_DATATYPE of field, u
//              ilocal: local index buffers: ilocal[n_comb], one for
//                      each distinct combination. dimension of the
//                      buffer determines the number of elements to combine.
//              n_comb: number of combinations to perform
//              op    : operation to be performed in the combination
//              result: result of the operation of dimension [n_comb], one
//                      for each combination. This is allocated here, and
//                      must be deleted by caller.
// RETURNS    : TRUE on success; else FALSE, if invalid operation requested, or
//              invalid local index specified.
//************************************************************************************
GBOOL GlOp::CombineLocal(void *u     , GINT  ne, GC_DATATYPE dtype, GIBuffer  **ilocal, 
                         GINT  n_comb,  G_OP op, void *&result   )
{

  GINT   i, j, tsize=dtype;
  GQUAD  *qu, *qresult;
  UTYPE  ut;

  if ( u == NULL || ilocal == NULL ) return FALSE;
  if ( n_comb <= 0 ) return TRUE;

  qu = new GQUAD [ne];
  if ( qu == NULL ) return FALSE;
  qresult = new GQUAD [n_comb]; 
  if ( qresult == NULL ) 
  {
    delete [] qu;
    return FALSE;
  }

  for ( i=0; i<n_comb; i++ )
    qresult[i] = 0.0;

  for ( i=0; i<ne; i++ )
  {
    qu[i] = 0.0;
    memcpy(qu+i,(GBYTE*)u+i*tsize,tsize);
  }

//        cout << "Combine: ne=" << ne << " n_comb=" << n_comb << endl;

  // Perform G_OP on the nodes shared by this proc:
  switch(op) 
  {
    case G_OP_SUM:
      for ( i=0; i<n_comb; i++ )
      {
        qresult[i] = 0.0;
        for ( j=0; j<ilocal[i]->dim() && j<ne; j++ )
        {
//           cout << "Combine: " << i << " u[" << (*ilocal[i])(j) << "]=" << qu[(*ilocal[i])(j)] << endl;
           qresult[i] += qu[(*ilocal[i])(j)];
        }
      }
      break;
    case G_OP_PROD:
      for ( i=0; i<n_comb; i++ )
      {
        qresult[i]  = 1.0;
        for ( j=0; j<ilocal[i]->dim() && j<ne; j++ )
          qresult[i] *= qu[(*ilocal[i])(j)];
      }
      break;
    case G_OP_MAX:
      for ( i=0; i<n_comb; i++ )
      {
        qresult[i] = -HUGE;
        for ( j=0; j<ilocal[i]->dim() && j<ne; j++ )
          qresult[i] = MAX(qresult[i],qu[(*ilocal[i])(j)]);
      }
      break;
    case G_OP_MIN:
      for ( i=0; i<n_comb; i++ )
      {
        qresult[i] = HUGE;
        for ( j=0; j<ilocal[i]->dim() && j<ne; j++ )
          qresult[i] = MIN(qresult[i],qu[(*ilocal[i])(j)]);
      }
      break;
    default:
      delete [] qu;
      delete [] qresult;
      return FALSE;
  }


  // Create result quantity of the required type, and copy 
  // temporary result:
  result = new GBYTE [n_comb*tsize];
  for ( i=0; i<n_comb; i++ ) 
  {
    memcpy((GBYTE*)result+i*tsize, (GBYTE*)qresult+i*sizeof(GQUAD), tsize);
//    cout << " CombineLocal: qr["<<i<<"]= " << *((GDOUBLE*)result +i) << endl;
  }

  delete [] qu;
  delete [] qresult;
  return TRUE;

} // end of method CombineLocal (2)


//************************************************************************************
//************************************************************************************
// METHOD     : SetGlobalNodeVal (2)
// DESCRIPTION: Sets values of u at all common global ids, iGlobal, to the 
//              value, val. The global ids, of which iGlobal is one, must
//              have been stored in a prior call to Init. 
// ARGUMENTS  : h      : handle from call to Init
//              u      : field array whose global indices are to be set
//              n      : number of field elements
//              dtype: GC_DATATYPE specifiction of field, u
//              iGlobal: global index to set (method sell corresponding 
//                       u's for all occurences of iGlobal)
//              val    : value to set
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::SetGlobalNodeVal(GCHandle h, void *u, GINT  n, GC_DATATYPE dtype, GNODEID iGlobal, void *val)
{
  
  GINT  iLocal, tsize=dtype;
  GSHORT  iHandle = HandleIndex(h);

  if ( u == NULL ) return FALSE;
  if ( iHandle < 0 ) return FALSE;
 
  if ( (iLocal=GlobalToLocal(LocToGlob[iHandle], 0, iGlobal)) < 0 ) return FALSE;
  if ( iLocal >= n || iLocal < 0 ) return FALSE;

  memcpy((GBYTE*)u+iLocal*tsize, (GBYTE*)val, tsize);
  while ( (iLocal=GlobalToLocal(NULL, 0, iGlobal)) > 0 && iLocal < n )
  {
    memcpy((GBYTE*)u+iLocal*tsize, (GBYTE*)val, tsize);
  }

  if ( iLocal >= n || iLocal < 0 ) return FALSE;
  return TRUE;
  
} // end of method SetGlobalNodeVal (2)


//************************************************************************************
//************************************************************************************
// METHOD     : Data_Exchange (2)
// DESCRIPTION: Perform pair-wise data exchange between procs by using asynchronous
//              recvs...
// ARGUMENTS  :
//              hWork     : handle from Init
//              u_local   : local field data from DoOp, from which to
//                          get send message data
//              u_recv    : field data received. Allocated here, but caller
//                          is responsible for deleting this.
//              n_recv    : number of elements of u_recv filled in this exchange
//              dtype     : GC_DATATYPE of recv/send data
//              glNodeList: list of global nodes corresponding to the field elements,
//                          u_recv, received from other procs
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
 
GBOOL GlOp::Data_Exchange(GCHandle hWork, void *u_local, void *&u_recv,
                          GINT  &numrecv, GC_DATATYPE dtype, GIBuffer  *&glNodeList)
{
  GINT        i, j, k, n, maxsend, nsendrecv, n_recv, *rlen, tsize;
  GINT        iHandle= HandleIndex(hWork);
  GSHORT       srcdest;
  GBOOL       bRet=TRUE;
  void        *u_send=NULL, *p_recv=NULL;
  GIBuffer    index(numGlNode_buffs[iHandle]);
  CHandle     *chandle; 

#if defined(DO_GLOP_TIMING)
  GDOUBLE tstart;
#endif

  tsize = dtype;

  // find proc ids != rank of this proc:
  n_recv= 0;
  for ( i=0; i<numGlNode_buffs[iHandle]; i++ )
    if ( (*srProc_list[iHandle])(i) != rank )  index(n_recv++) = i;

  n       = 0;
  maxsend = 0;
  for ( i=0; i<numGlNode_buffs[iHandle]; i++ )
  {
    n += srGlNode_buffs[iHandle][i]->dim();
    maxsend = MAX(srGlNode_buffs[iHandle][i]->dim(), maxsend);
  }

  if ( n <= 0 ) 
  {
    cout << "GlOp::Data_Exchange: invalid message size" << endl;
    exit(1);
  }
  if ( u_recv     != NULL ) delete [] u_recv;
  if ( glNodeList != NULL ) delete glNodeList;
  u_recv     = new GDOUBLE [n];
  u_send     = new GDOUBLE [maxsend];
  glNodeList = new GIBuffer (n);
  chandle    = new CHandle [n_recv];

#if defined(DO_GLOP_TIMING)
  tstart =  STK::Timer();
#endif

  // Build list of all global nodes to be received or existing on this proc,
  // and post receives. Note that the recv buffer 'stack' used in
  // call to ARecv is simply a pointer to the appropriate postion in
  // the  'flat' u_recv array, which is returned 
  j  = 0;
  for ( k=0; k<n_recv && bRet; k++ )
  {
    i = index(k);
    p_recv     = (GBYTE*)u_recv+j*tsize;
    nsendrecv  = srGlNode_buffs[iHandle][i]->dim();
    srcdest    = (*srProc_list[iHandle])(i);
    GComm::ARecv(&p_recv, 1, &nsendrecv, NULL, dtype, &srcdest,chandle[k]);
    bRet       = chandle[k].nposts<=0 ? FALSE : TRUE;
    for ( n=j; n<j+srGlNode_buffs[iHandle][i]->dim(); n++ )
      (*glNodeList)(n) = (*srGlNode_buffs[iHandle][i])(n-j);

    // update position in the u-recv buffer:
    j += srGlNode_buffs[iHandle][i]->dim();
  }
  numrecv = j;

#if 0
  if ( !bRet ) 
    cout << "GlOp::Data_Exchange:: ARecv failed; NULL CHandle" << endl;
#endif

  // send required data to all processors that share this
  // procs nodes:
  for ( k=0; k<n_recv && bRet; k++ )
  {
    i = index(k);
    // create send message for this proc:
    for ( j=0; j<srGlNode_buffs[iHandle][i]->dim(); j++ )
      memcpy((GBYTE*)u_send+j*tsize,(GBYTE*)u_local+ig2l[iHandle][i][j]*tsize, tsize);
    nsendrecv = srGlNode_buffs[iHandle][i]->dim();
    srcdest   = (*srProc_list[iHandle])(i);
    bRet  = GComm::BSend(&u_send, 1, &nsendrecv, NULL, GC_GDOUBLE, &srcdest);
#if 0
    if ( !bRet ) 
      cout << "GlOp::Data_Exchange:: BSend failed " << endl;
#endif
  }
  
  for ( k=0; k<n_recv && bRet; k++ )
  {
    bRet &= GComm::AWaitOnRecv(chandle[k]);
  }

#if 0
  if ( !bRet ) 
    cout << "GlOp::Data_Exchange:: AWaitOnRecv failed" << endl;
#endif

#if defined(DO_GLOP_TIMING)
  exch_time = STK::Timer() - tstart;
#endif


cleanup:
  delete [] u_send;
  delete [] chandle;

  return bRet;
} // end of Data_Exchange (2)
#endif


//************************************************************************************
//************************************************************************************
// METHOD     : GetMultiplicity
// DESCRIPTION: takes input list of nodes, list, and determines the 
//              multiplicity of each node. The distinct nodes are
//              output in distinct_nodes, while the indices corresponding
//              to each of the distinct_nodes is provided in index_buffs,
//              one buffer for each of the distinct nodes.
// ARGUMENTS  : list          : list of node indices whose multiplicity is to be determined
//                              must be delimited by -1
//              distinct_nodes: on output, contains list, of the distinct nodes found,
//                              which have multipicity > 1
//              index_bufs    : for each of the distinct_nodes, provides a buffer
//                              of the index of their occurence in list
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::GetMultiplicity(GNIDBuffer  *list, GNIDBuffer  *&distinct_nodes,
                            GIBuffer   **&index_buffs)
{
  if ( list == NULL ) {
    cout << "GlOp::GetMultiplicity: NULL input list" << endl;
    return FALSE;
  }

  GINT       i, index, j, n, ndn , ng1, *ig1 ;
  GIBuffer   *ni;
  GNIDBuffer *tmp_nodes;

  ni         = new GIBuffer  (list->dim());
  tmp_nodes  = new GNIDBuffer(list->dim());



#if defined(GLOP_DEBUG_OUTPUT)
  cout << "GlOp::GetMultiplicity: list=" << *list << endl;
#endif
  ni->Set(0);
  tmp_nodes->Set(-1);
  ndn = 0;
  for ( i=0; i<list->dim() && (*list)(i)>=0; i++ )
  {
    if ( !tmp_nodes->contains((*list)(i), index,-1) )
    {
      (*tmp_nodes)(ndn) = (*list)(i);
      (*ni)(ndn) ++;
      for ( j=i+1; j<list->dim() && (*list)(j)>=0; j++ )
      {
        if ( (*list)(j) == (*list)(i) )
        {
          (*ni)(ndn) ++;
        }
      }
      ndn++;
    }
  }

  if ( distinct_nodes != NULL ) delete distinct_nodes;
  if ( index_buffs    != NULL ) delete [] index_buffs;

  if ( ndn <= 0 ) {
    cout << "GlOp::GetMultiplicity: ndn < 0" << endl;
    return FALSE;
  }

  ng1 = 0;
  ig1 = new GINT  [ndn];
  for ( i=0; i<ndn; i++ )
  {
    if ( (*ni)(i) > 1) 
    {
       ig1[ng1] = i;
       ng1++;
    }
  }

  // Note: ng1 is the number of dist. nodes whose multiplicity > 1;
  //       ig1 is the array of indices of len ng1 whose values
  //       hold the indices whose mult. > 1. The dimension of
  //       the index_buffs[i] array is the multiplicity for 
  //       index, i.

  distinct_nodes = new GNIDBuffer (ng1);
  index_buffs    = new GIBuffer * [ng1];

  for ( i=0; i<ng1; i++ )
  {
    j = ig1[i];
    (*distinct_nodes)(i) = (*tmp_nodes)(j);
    index_buffs[i] = new GIBuffer  ((*ni)(j));
  }

  for ( j=0; j<ng1; j++ )
  {
    n = 0;
    for ( i=0; i<list->dim() && (*list)(i)>=0 && n<(*ni)(ig1[j]); i++ )
    {
      if ( (*list)(i) == (*distinct_nodes)(j) )
      {
        (*index_buffs[j])(n) = i;
         n++;
      }
    }
  }

  delete ni;
  delete [] ig1;
  delete tmp_nodes;

  return TRUE;

} // end of method GetMultiplicity


//************************************************************************************
//************************************************************************************
// METHOD     : Repack
// DESCRIPTION: Method Repack repacks the sorted records, record_buffs, which 
//              have the structure
//       buff0:  node0 proc0 proc1...proc_max_nodes or -1 node1 proc0 proc1 ...
//              which is a series of records each of a fixed length, record_length
//              and creates a list of proc ids, proc_list, each of which has a buffer 
//              of global nodes, glnodebuffs
// ARGUMENTS  : record_buffs: the records input for Repacking
//              numBuffs    : length of array record_buffs
//              max_procs   : maximum no of distinct procs, for allocating temp. space
//              max_nodes   : maximum no of distinct nodes for each proc, for allocating temp. space
//              record_length: fixed record length of each record in each buffered list of records, record_buffs
//              proc_list   : list of  proc ids corresponding to each distinct global node. 
//                            Allocated upon return to the exact number of procs found; caller
//                            responsible for deletion.
//              glnodebuffs : contains, for each proc in proc_list, the global nodes shared
//                            by more than one proc. Each of the proc_list->dim() buffers
//                            of this array is allocated exactly upon return; caller
//                            is responsible for deletion.
//              np          : number of global node buffers
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::Repack(GNIDBuffer  **record_buffs, GINT  numBuffs, GINT  max_procs, GINT  max_nodes,
                   GINT  record_length, GSBuffer  *&proc_list,
                   GNIDBuffer  **&glnodebuffs, GINT  &np)
{
  if ( record_buffs == NULL ) return FALSE;
  if ( glnodebuffs != NULL ) return FALSE;

  GINT       i, index, j, k, ne;
  GSHORT     *splist=NULL, nprocs;
  GBOOL      bRet =TRUE;
  GNODEID    iGlobal;
  GSBuffer   *tmp_proc_list=NULL;
  GNIDBuffer **tmp_node_buffs=NULL;
  GIBuffer   *iascend=NULL, *nodebuffsize=NULL;

  tmp_proc_list    = new GSBuffer  (max_procs);
  tmp_node_buffs   = new GNIDBuffer * [max_procs];
  for ( i=0; i<max_procs; i++ ) tmp_node_buffs[i] = NULL;
  nodebuffsize     = new GIBuffer  (max_procs);

  tmp_proc_list->Set(-1);
  nodebuffsize->Set(0);

//cout << "Proc: " << rank << " Repack: beginning initial sort...." << endl;
  np = 0;
  for ( i=0; i<numBuffs && bRet; i++ )
  {
    j = 0;
//#if defined(GLOP_DEBUG_OUTPUT)
#if 0
     cout << "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" << endl;
     cout << "Proc: " << rank << " record list[" << i << "]: " << *record_buffs[i] <<  endl;
#endif
    while ( bRet && GetRecord(j, record_buffs[i], record_length, &iGlobal, splist, nprocs) && nprocs>1 )
    {  
#if 0
      GComm::Synch();
      cout << "GlOp::Repack: np=" << np << " tmp_proc_list=" << *tmp_proc_list << endl;
      cout << "Proc: " << rank << " record [j=" << j << "]: " <<  endl;
      cout << "GlOp::Repack: nprocs=" << nprocs << " splist=" << endl;
#endif
      
//    for ( k=0; k<nprocs; k++) cout << " " << splist[k] ; cout << endl;

      for ( k=0; k<nprocs; k++)
      {
#if defined(GLOP_DEBUG_OUTPUT)
       cout << "GlOp::Repack: tmp_proc_list=" << *tmp_proc_list << " splist[k=" << k << "]=" << splist[k] << endl;
#endif
        if ( !tmp_proc_list->contains(splist[k],index,-1) ) // if proc id not in list, add it, and update node buff and size
        {
          if ( np >= max_procs )
          {
            cout << "GlOp::Repack:  GlOp::Repack: not enough temp space max_procs=" << max_procs << " np=" << np << endl;
            cout << "GlOp::Repack: tmp_proc_list=" << *tmp_proc_list << " splist[k=" << k << "]=" << splist[k] << endl;
            bRet = FALSE;
            break;
          }
#if defined(GLOP_DEBUG_OUTPUT)
          cout << "Proc: " << rank << " record [" << j << "]: " << " Adding proc " << splist[k] << " to list..." <<   endl;
#endif
          tmp_node_buffs[np] = new GNIDBuffer  (max_nodes);
          tmp_node_buffs[np]->Set(-1);
          (*tmp_node_buffs[np])((*nodebuffsize)(np)) = iGlobal;  // if buff length exceeded, GBuffer terminates program
          (*nodebuffsize)(np)++;
          (*tmp_proc_list)(np) = splist[k];
          np++;
        }
        else if ( index >= 0 )                                  // proc id already found; update its corresp. node buff and size
        {
          if ( (*nodebuffsize)(index) >= max_nodes )
          {
            cout << "Proc: " << rank << "GlOp::Repack:  not enough temp space max_nodes=" << max_nodes << " nd=" 
                 << (*nodebuffsize)(index) << " index=" << index << endl;
            bRet = FALSE;
            break;
          }
          (*tmp_node_buffs[index])((*nodebuffsize)(index)) = iGlobal;
          (*nodebuffsize)(index)++;
#if defined(GLOP_DEBUG_OUTPUT)
          cout << "Proc: " << rank << " nodebuffsize[index=" << index << "]=" << (*nodebuffsize)(index) <<
               " tmp_node_buffs[index=" << index << "]=" << *tmp_node_buffs[index] << endl;
#endif
        }

      }
//    cout << "Proc: " << rank << "... New record" << endl;
      j++;
    }  
  }  
//cout << "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" << endl;
//cout << "Proc: " << rank << " Repack: initial sort done." << endl;


  if ( !bRet ) 
  {
    cout << "GlOp::Repack: initial sort failed" << endl;
    exit(1);
  }

//   cout << "Proc: " << rank << " Repack: Setting glnodebuffs.." << endl;
  // create global node buffers, one for each distinct proc id;
  // np is the number of distinct proc ids found:
  if ( glnodebuffs != NULL )
  {
//    for ( i=0; i<np; i++ )
//      if ( glnodebuffs[i] != NULL ) delete glnodebuffs[i]; 
    delete [] glnodebuffs;
  }

  if ( np == 0 )
  {
    cout << "GlOp::Repack: No processors sharing nodes!" << endl; 
    goto cleanup;
  }

  if ( proc_list != NULL ) delete proc_list;
  proc_list = new GSBuffer  (np);
  iascend   = new GIBuffer   (np);

  // copy from the temporary buffers to the permanent ones:

  // ... first, the proc-list. Reorder in ascending order:
  for ( i=0; i<np; i++ )
    (*proc_list)(i) = (*tmp_proc_list)(i);


  bRet = IndexSort(*proc_list, *iascend); 
  bRet &= IndexSort(*proc_list); 


  // set node buffer sizes according to those that have 
  // been re-ordered:
//cout << "Proc: " << rank << " Repack: Setting glnodebuffs.." << endl;
  glnodebuffs = new GNIDBuffer  * [np];
  for ( i=0; i<np; i++ )
    glnodebuffs[i] = new GNIDBuffer  ((*nodebuffsize)((*iascend)(i)));
// cout << "Proc: " << rank << " Repack: Setting glnodebuffs done." << endl;

  // copy from temporary node buffers, in order of ascending proc-id's :
//cout << "Proc: " << rank << " Repack: reordering.." << endl;
  for ( i=0; i<np && bRet; i++ )
  {
    for ( j=0; j<glnodebuffs[i]->dim(); j++ )
      (*glnodebuffs[i])(j) = (*tmp_node_buffs[(*iascend)(i)])(j);
#if defined(GLOP_DEBUG_OUTPUT)
    cout << "GlOp::Repack: before IndexSort: glnodebuffs[i=" << i << "]=" << *glnodebuffs[i] << endl;
#endif
    bRet &= IndexSort(*(glnodebuffs[i]));
#if defined(GLOP_DEBUG_OUTPUT)
    cout << "GlOp::Repack: after  IndexSort: glnodebuffs[i=" << i << "]=" << *glnodebuffs[i] << endl;
#endif
  }
//cout << "Proc: " << rank << " Repack: reordering done." << endl;

cleanup:

  if ( tmp_node_buffs != NULL )
  {
    for ( i=0; i<max_procs; i++ )
      if ( tmp_node_buffs[i] != NULL ) delete tmp_node_buffs[i];
    delete [] tmp_node_buffs;
  }

  if ( nodebuffsize  != NULL ) delete nodebuffsize;
  if ( iascend       != NULL ) delete iascend;
  if ( tmp_proc_list != NULL ) delete tmp_proc_list;
  if ( splist        != NULL ) delete [] splist;

  return bRet;

} // end of method Repack


//************************************************************************************
//************************************************************************************
// METHOD     : GetRecord
// DESCRIPTION: Retrieves record, with index, index, from a list of records, rec_buff,
//              which is of the form:
//   global_node0 procid0 procid1 procid2, ... procid_maxid ... -1 -1 -1 ...
// ARGUMENTS  : rec_buffs  : the records list 
//              maxlength  : length of array record_buffs
//              max_procs  :  maximum no of distinct procs, for allocating temp. space
//              iGlobal    : global node index of contained in record
//              proc_buff  : list of proc ids that share the iGlobal node
//              num_procs  : number of procs in the list, proc_buff
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::GetRecord(GINT  index, GNIDBuffer  *rec_buff, GINT  maxlength, GNODEID *iGlobal, GSHORT  *&proc_buff, GSHORT  &num_procs)
{
  if ( rec_buff == NULL ) return FALSE;

  
  GINT  i, maxi, istart=index*maxlength;


  if ( istart >= rec_buff->dim() ) 
  {
    return FALSE;
  }

//cout << "Proc: " << rank << " GetRecord: index=" << index << " istart=" << istart <<  " rec_buff=" << *rec_buff << endl;

  i = istart + 1;
  num_procs = 0;
  maxi = (istart+maxlength)< rec_buff->dim() ? (istart+maxlength):  rec_buff->dim() ;

  while ( i<maxi && (*rec_buff)(i) >= 0 )
  {
    num_procs++;
    i++;
  }

  if ( proc_buff != NULL ) delete [] proc_buff; proc_buff = NULL;
  if ( num_procs == 0 ) return TRUE;
  proc_buff = new GSHORT  [num_procs];
  for ( i=0; i<num_procs; i++ ) proc_buff[i] = -1;

  i = istart + 1;
  *iGlobal = (*rec_buff)(istart);
//cout << "Proc: " << rank << " GetRecord: iGlobal=" << *iGlobal << endl;
//maxi = (istart+maxlength+1)< rec_buff->dim() ? (istart+maxlength+1):  rec_buff->dim() ;
  maxi = (istart+maxlength)< rec_buff->dim() ? (istart+maxlength):  rec_buff->dim() ;
//cout << "Proc: " << rank << " GetRecord: maxi=" << maxi << endl;
  while ( i<maxi && (*rec_buff)(i) >= 0 )
  {
    proc_buff[i-istart-1] = (*rec_buff)(i); 
    i++;
//  cout << "Proc: " << rank << " GetRecord: " << " proc_buff[" << i << "]=" << proc_buff[i] << endl;
  }

  return TRUE; 
  
} // end of method GetRecord
  

//************************************************************************************
//************************************************************************************
// METHOD     : GetNewHandle
// DESCRIPTION: Retrieves new handle from handle pool
// ARGUMENTS  : node
// RETURNS    : new handle, GCHandle; else -1
//************************************************************************************
GCHandle  GlOp::GetNewHandle()
{

  if ( nUsedHandles >= MAX_OPHANDLES ) return NULL_HANDLE;

  GINT     j, i=0;
  GBOOL    bUsed;
  GCHandle hNew=NULL_HANDLE;

  while ( i<MAX_OPHANDLES )
  {
    // search existing handle indices for valid value:
    bUsed = FALSE;
    for ( j=0; j<nUsedHandles; j++ )
      if ( (*iUsedHandles)(j) == i ) 
      {
        bUsed=TRUE;
      }

    if ( !bUsed ) break;
    i++;  
  }

  if ( i<MAX_OPHANDLES )
  {
    nUsedHandles++;
    (*iUsedHandles)(nUsedHandles-1) = i;
    hNew = (*opHandle)((*iUsedHandles)(nUsedHandles-1));
  }  
 
  return hNew; 

} // end of method GetNewHandle


//************************************************************************************
//************************************************************************************
// METHOD     : HandleIndex
// DESCRIPTION: Returns index in handle pool, corresponding to handle, h
// ARGUMENTS  : GCHandle, h
// RETURNS    : corresponding pool index on success; else -1
//************************************************************************************
GSHORT  GlOp::HandleIndex(GCHandle h)
{
  if ( h == NULL_HANDLE ) return -1;

  GSHORT  i=0;
 
  // find handle in handle pool:
  while ( i<nUsedHandles && (*opHandle)((*iUsedHandles)(i)) != h ) i++;

  if ( i>=nUsedHandles  ) return -1;  // Handle not found

  return (*iUsedHandles)(i);

} // end of method HandleIndex


//************************************************************************************
//************************************************************************************
// METHOD     : FreeHandleMem
// DESCRIPTION: Frees all memory associated with handle, handle
// ARGUMENTS  : GCHandle, handle
// RETURNS    : none.
//************************************************************************************
void GlOp::FreeHandleMem(GCHandle handle)
{
  GINT  i, iHandle=HandleIndex(handle);

  if ( iHandle < 0  || iHandle >= nUsedHandles ) return ; 

  // Global-to local maps:
  if ( ig2l[iHandle] != NULL )
  {
    for ( i=0; i<numGlNode_buffs[iHandle]; i++ )
      if ( ig2l[iHandle][i] != NULL ) delete [] ig2l[iHandle][i];
    delete [] ig2l[iHandle];
    ig2l[iHandle] = NULL;
  }
  
  // Receive buffers:
  if ( srGlNode_buffs[iHandle] != NULL )
  {
    for ( i=0; i<numGlNode_buffs[iHandle]; i++ )
      delete srGlNode_buffs[iHandle][i];
    delete [] srGlNode_buffs[iHandle];
    srGlNode_buffs[iHandle] = NULL;
  }

  if ( srProc_list[iHandle] != NULL )
  {
    delete  srProc_list[iHandle];
    srProc_list[iHandle] = NULL;
  }


  // Local to global arrays:
  if ( LocToGlob[iHandle] != NULL ) 
  {
     delete LocToGlob[iHandle];
     LocToGlob[iHandle] = NULL;
  }

  if ( local_op_indices[iHandle] != NULL )
  {
    for ( i=0; i<glob_dups[iHandle]->dim(); i++ )
      delete local_op_indices[iHandle][i];
    delete [] local_op_indices[iHandle];
    local_op_indices[iHandle] = NULL;
  }
  
  // multiplicity objects:
  if ( gmultiplicity[iHandle] != NULL )
  {
    delete gmultiplicity[iHandle];
    gmultiplicity[iHandle] = NULL;
  }

  // flat vector objects:
  if ( vflat [iHandle] != NULL )
  {
    delete vflat [iHandle];
    vflat [iHandle] = NULL;
  }

  if ( glob_dups[iHandle] != NULL )
  {
    delete glob_dups[iHandle];
    glob_dups[iHandle] = NULL;
  }

  // Re-initialize indices:
  numGlNode_buffs[iHandle] = 0;

  // Misc DoOp quantities:
  if ( u_send[iHandle] != NULL )
  {
    delete u_send[iHandle];
    u_send[iHandle] = NULL;
  }

  if ( u_recv[iHandle] != NULL )
  {
    delete u_recv[iHandle];
    u_recv[iHandle] = NULL;
  }

  if ( rhandle[iHandle] != NULL )
  {
    delete [] rhandle[iHandle];
    rhandle[iHandle] = NULL;
  }

  if ( glNodeList[iHandle] != NULL )
  {
    delete glNodeList[iHandle];
    glNodeList[iHandle] = NULL;
  }

  if ( index[iHandle] != NULL )
  {
    delete index[iHandle];
    index[iHandle] = NULL;
  }

  if ( node_indices_op[iHandle] != NULL )
  {
    for ( i=0; distinct_nodes_op[iHandle] && i<distinct_nodes_op[iHandle]->dim(); i++ )
      if ( node_indices_op[iHandle][i] != NULL ) delete node_indices_op[iHandle][i];
    delete [] node_indices_op[iHandle];
    node_indices_op[iHandle] = NULL;
  }
  
  if ( distinct_nodes_op[iHandle] != NULL ) 
  {
    delete  distinct_nodes_op[iHandle];
    distinct_nodes_op[iHandle] = NULL;
  }

  if ( lresult[iHandle] != NULL ) 
  {
    delete  [] lresult[iHandle];
    lresult[iHandle] = NULL;
  }

  if ( gresult[iHandle] != NULL ) 
  {
    delete  [] gresult[iHandle];
    gresult[iHandle] = NULL;
  }

  n_recv         [iHandle] = 0;
  n_recv_data    [iHandle] = 0;
  maxsend        [iHandle] = 0;

} // end of method FreeHandleMem


//************************************************************************************
//************************************************************************************
// METHOD     : FreeHandle
// DESCRIPTION: Frees up handle, handle, by first deleting all memory
//              associated with it, and then by returning the handle
//              to the handle pool.
// ARGUMENTS  : GCHandle, h
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::FreeHandle(GCHandle handle)
{
  GINT  i, n, iRemove=-1;
  GSBuffer    iCopy(MAX_OPHANDLES);



  // Close all handle memory:
  FreeHandleMem(handle);


  // find handle in handle pool:
  iRemove = HandleIndex(handle) ;

  if ( iRemove < 0  || iRemove >= nUsedHandles ) return FALSE;  // Handle is already free, or doesn't exist

  // update current handle list:
  iCopy = *iUsedHandles;
  iUsedHandles->Set(-1);

  n=-1;
  for ( i=0; i<nUsedHandles; i++ )
  {
    if ( iCopy(i) != iRemove )
    {
      (*iUsedHandles)(++n) = iCopy(i);
    }
  }
  nUsedHandles--;

  return TRUE;
  
} // end of method FreeHandle


//************************************************************************************
//************************************************************************************
// METHOD     : GlobalToLocal
// DESCRIPTION: finds local index associated with global_index.
//              If first call to method returns a valid local index
//              then subsequent calls with LocToGlob_buff==NULL, will
//              return next local index of next occurrence. If no
//              additional global_index's are found in LocToGlob_buff, method
//              returns -1.
// ARGUMENTS  : LocToGlob_buff: buffer containing the global indices
//              istart        : starting index (0-LocToGlob_buff->dim())
//              global_index  : global index whose local index is sought.
// RETURNS    : local index or -1.
//************************************************************************************
GINT  GlOp::GlobalToLocal(GNIDBuffer  *LocToGlob_buff, GINT  istart, GNODEID global_index)
{ 
  GINT  il;
   
  if ( LocToGlob_buff != NULL )
  {
    if ( g2lbuff != NULL ) delete g2lbuff;
    g2lbuff = new GNIDBuffer  (LocToGlob_buff->dim());
    *g2lbuff = *LocToGlob_buff;
  } 

  if ( LocToGlob_buff != NULL )
  {
    il = istart;
    while ( il<LocToGlob_buff->dim() && (*LocToGlob_buff)(il)!=global_index ) il++;
    if ( il>= LocToGlob_buff->dim() ) return -1;  // global index not found
  
  }
  else
  {
    if ( (il = iLocalStack) < 0 ) return -1;
    while ( il<g2lbuff->dim() && (*g2lbuff)(il)!=global_index ) il++;
    if ( il>= g2lbuff->dim() ) return -1;  // global index not found
  }
  
  iLocalStack = il + 1;
  return il;

} // end of method GlobalToLocal 



//************************************************************************************
//************************************************************************************
// METHOD     : BinSort
// DESCRIPTION: Sorts nodelist into bins based on dynamic range, nMaxNodes.
// ARGUMENTS  : nodelist    : input node list to sort
//              bInRange_   : for each proc, sets to 1 (or 0) if nodelist 
//                            contains a member in that bin (or doesn't).  
//              nInRange_   : for each proc, sets the number of distinct members
//                            in that proc's bin
//              iWorkNodes_ : contains the nodes in nodelist that this proc must work on.
// RETURNS    : 1 on success; else 0
//************************************************************************************
GINT  GlOp::BinSort(GNIDBuffer  *nodelist  ,GIBuffer   *&bInRange_,
                    GIBuffer   *&nInRange_,GNIDBuffer  *&iWorkNodes_)
{
  GINT     index;
  GNODEID  i, j, n=0, nDel, nRem, numInRange, nRange0, 
           nRange1, bNodeInRange;
  GNODEID  inode;


  if ( bInRange_ != NULL )
  {
    delete bInRange_;
    bInRange_ = NULL;
  }
  if ( nInRange_ != NULL )
  {
    delete nInRange_;
    nInRange_ = NULL;
  }
  if ( iWorkNodes_ != NULL )
  {
    delete iWorkNodes_;
    iWorkNodes_ = NULL;
  }
  bInRange_   = new GIBuffer  (NProcs);
  nInRange_   = new GIBuffer  (NProcs);
  iWorkNodes_ = new GNIDBuffer (nodelist->dim());


  if ( bInRange_ == NULL ) return 0;
  bInRange_->Set(0);
  nInRange_->Set(0);
  iWorkNodes_->Set(-1);  // not all will be this proc's work--nodes


  // Node list can be in any order:
  nDel    = nMaxNodes / NProcs;
  nRem    = nMaxNodes % NProcs;
//cout << "BinSort: nMaxNodes=" << nMaxNodes << " NProcs=" << NProcs << " nDel=" << nDel 
//     << " nRem=" << nRem  << endl;
//cout << "BinSort: indices=" << endl;
  nRange0 = 0;
  for ( i=0; i<NProcs; i++ )
  {
    numInRange = 0;
    nRange1 = nRange0 + nDel + (i<nRem?1:0) - 1;
//  cout << "BinSort: proc: " << i << " : nRange0=" << nRange0 << " nRange1=" << nRange1 << endl;
    for ( j=0; j<nodelist->dim(); j++ )
    {
      inode = *((nodelist->Data()+j));
      bNodeInRange = (inode >= nRange0 && inode <= nRange1) ? 1 : 0;
      numInRange += bNodeInRange;
//    cout << "BinSort: inode: " << inode << " inRange? : " << bNodeInRange << endl;
      if ( i == rank && bNodeInRange==1 && !iWorkNodes_->contains((*nodelist)(j),index,-1))
      {
        (*iWorkNodes_)(n) = (*nodelist)(j); 
        n++;
      }
    } 
    (*bInRange_)(i) = numInRange > 0 ? 1 : 0;
    (*nInRange_)(i) = numInRange;
    nRange0 = nRange1 + 1;
  }
//cout << "BinSort: bInRange=" << *bInRange_ << endl;

  return 1;
} // end of method BinSort


//************************************************************************************
//************************************************************************************
// METHOD     : BinFill
// DESCRIPTION: fills bins, bins, with the Bin sorting of the input list, nodelist
// ARGUMENTS  : nodelist: input node list to sort
//              bins[]  : contain for each bin (proc), the members of nodelist
//                        corresponding to this bin. Must be allocated prior to entry:
//                        one buffer for each bin, with the buffers allocated.
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::BinFill(GNIDBuffer  *nodelist, GNIDBuffer  *bins[])
{
  if ( nodelist == NULL || bins == NULL ) return FALSE;

  GNODEID  i, j, n, nDel, nRem, nRange0, nRange1, nInRange; 

  // Node list can be in any order:
  // ***NOTE: bins array must be of length NProcs!!!
  nDel    = nMaxNodes / NProcs;
  nRem    = nMaxNodes % NProcs;
  nRange0 = 0;
  for ( i=0; i<NProcs; i++ )
  {
    if ( bins[i] == NULL ) return FALSE;
    bins[i]->Set(-1); 
    nInRange = 0;
    nRange1 = nRange0 + nDel + (i<nRem?1:0) - 1;
    n = 0;
    for ( j=0; j<nodelist->dim(); j++ )
    {
      if ( *(nodelist->Data()+j) >= nRange0 && *(nodelist->Data()+j) <= nRange1 )
      {
        (*bins[i])(n) = (*nodelist)(j);
        n++;
      }
    }
    nRange0 = nRange1 + 1;
  }

  return TRUE;
 
} // end of method BinFill


//************************************************************************************
//************************************************************************************
// METHOD     : IndexSort
// DESCRIPTION: From Numerical Recipes (largely), sorts array, arrin, in
//              ascending order. Buffer, indx is the buffer of indices, s.t.
//              arrin(indx(j)) is in ascending order for j = 0, 1, 2, ...
// ARGUMENTS  : arrin: input list to sort
//              indx : sorted indices
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::IndexSort(GNIDBuffer  &arrin, GIBuffer  &indx)
{

  if ( arrin.dim() != indx.dim() ) return FALSE;

  GINT    l, j, ir, indxt, i, n=arrin.dim();
  GNODEID q;

  for (j=0;j<n;j++) indx(j)=j;
  l=(n >> 1) + 1;
  ir=n-1;
  for (;;) 
  {
    if (l > 0)
      q=arrin((indxt=indx(--l)));
    else {
      q=arrin((indxt=indx(ir)));
      indx(ir)=indx(0);
      if (--ir == 0) 
      {
        indx(0)=indxt;
        return TRUE;
      }
    }
    i=l;
    j=l << 1;
    while (j <= ir) 
    {
      if (j < ir && arrin(indx(j)) < arrin(indx(j+1))) j++;
      if (q < arrin(indx(j))) 
      {
        indx(i)=indx(j);
        i = j;
        j += MAX(j,1);
      }
      else j=ir+1;
    }
    indx(i)=indxt;
  }

  return TRUE;

} // end of method IndexSort


#if  0
//************************************************************************************
//************************************************************************************
// METHOD     : IndexSort
// DESCRIPTION: From Numerical Recipes (largely), sorts array, arrin, in
//              ascending order. Buffer, indx is the buffer of indices, s.t.
//              arrin(indx(j)) is in ascending order for j = 0, 1, 2, ...
// ARGUMENTS  : arrin: input list to sort
//              indx : sorted indices
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::IndexSort(GNIDBuffer  &arrin, GIBuffer  &indx)
{

  if ( arrin.dim() != indx.dim() ) return FALSE;

  GINT  l, j, ir, indxt, i, n=arrin.dim();
  GINT  q;

  for (j=0;j<n;j++) indx(j)=j;
  l=(n >> 1) + 1;
  ir=n-1;
  for (;;)
  {
    if (l > 0)
      q=arrin((indxt=indx(--l)));
    else {
      q=arrin((indxt=indx(ir)));
      indx(ir)=indx(0);
      if (--ir == 0)
      {
        indx(0)=indxt;
        return TRUE;
      }
    }
    i=l;
    j=l << 1;
    while (j <= ir)
    {
      if (j < ir && arrin(indx(j)) < arrin(indx(j+1))) j++;
      if (q < arrin(indx(j)))
      {
        indx(i)=indx(j);
        i = j;
        j += MAX(j,1);
      }
      else j=ir+1;
    }
    indx(i)=indxt;
  }
  
  return TRUE;

} // end of method IndexSort
#endif


#if defined(GBUFF_DEF_GSHORT)
//************************************************************************************
//************************************************************************************
// METHOD     : IndexSort
// DESCRIPTION: From Numerical Recipes (largely), sorts array, arrin, in
//              ascending order. Buffer, indx is the buffer of indices, s.t.
//              arrin(indx(j)) is in ascending order for j = 0, 1, 2, ...
// ARGUMENTS  : arrin: input list to sort
//              indx : sorted indices
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::IndexSort(GSBuffer  &arrin, GIBuffer  &indx)
{

  if ( arrin.dim() != indx.dim() ) return FALSE;

  GINT  l, j, ir, indxt, i, n=arrin.dim();
  GSHORT  q;

  for (j=0;j<n;j++) indx(j)=j;
  l=(n >> 1) + 1;
  ir=n-1;
  for (;;)
  {
    if (l > 0)
      q=arrin((indxt=indx(--l)));
    else {
      q=arrin((indxt=indx(ir)));
      indx(ir)=indx(0);
      if (--ir == 0)
      {
        indx(0)=indxt;
        return TRUE;
      }
    }
    i=l;
    j=l << 1;
    while (j <= ir)
    {
      if (j < ir && arrin(indx(j)) < arrin(indx(j+1))) j++;
      if (q < arrin(indx(j)))
      {
        indx(i)=indx(j);
        i = j;
        j += MAX(j,1);
      }
      else j=ir+1;
    }
    indx(i)=indxt;
  }

  return TRUE;

} // end of method IndexSort


//************************************************************************************
//************************************************************************************
// METHOD     : IndexSort
// DESCRIPTION: Sorts input buffer ra in ascending order
// ARGUMENTS  : ra: input buffer to sort
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::IndexSort(GSBuffer  &ra)
{

  GINT  l, j, ir, i, n=ra.dim();
  GSHORT  rra;

  if ( ra.dim() < 2 ) return TRUE;

  l=(n >> 1)+1;
  ir=n-1;
  for (;;)
   {
    if (l > 0)
    {
      rra=ra(--l);
    }
    else
    {
      rra=ra(ir);
      ra(ir)=ra(0);
      if (--ir == 0)
      {
        ra(0)=rra;
        return TRUE;
      }
    }
    i=l;
    j= l<<1 ;
    while (j <= ir )
    {
      if (j < ir && ra(j) < ra(j+1)) ++j;
      if (rra < ra(j))
      {
        ra(i)=ra(j);
        i = j;
        j += MAX(j,1);
      }
      else j=ir+1;
    }
    ra(i)=rra;
  }


  return TRUE;

} // end of method IndexSort

#endif


//************************************************************************************
//************************************************************************************
// METHOD     : IndexSort
// DESCRIPTION: Sorts input buffer ra in ascending order
// ARGUMENTS  : ra: input buffer to sort
// RETURNS    : TRUE on success; else FALSE
//************************************************************************************
GBOOL GlOp::IndexSort(GNIDBuffer  &ra)
{

  GINT    l, j, ir, i, n=ra.dim();
  GNODEID rra;

  if ( ra.dim() < 2 ) return TRUE;

  l=(n >> 1)+1;
  ir=n-1;
  for (;;)
   {
    if (l > 0)
    {
      rra=ra(--l);
    }
    else
    {
      rra=ra(ir);
      ra(ir)=ra(0);
      if (--ir == 0) 
      {
        ra(0)=rra;
        return TRUE;
      }
    }
    i=l;
    j= l<<1 ;
    while (j <= ir )
    {
      if (j < ir && ra(j) < ra(j+1)) ++j;
      if (rra < ra(j)) 
      {
        ra(i)=ra(j);
        i = j;
        j += MAX(j,1);
      }
      else j=ir+1;
    }
    ra(i)=rra;
  }


  return TRUE;

} // end of method IndexSort


//************************************************************************************
//************************************************************************************
// METHOD     : DSOp(1)
// DESCRIPTION: Performs direct stiffness summation/prod...
// ARGUEMENTS : 
//             **v   : array of pointers to vector objects. 
//                     There are nop array element pointers.
//                     The total of the vector sizes must equal
//                     the size of the id array used in call
//                     to Init.
//              nop  : number of pointer array elements in v.
//              seop : direct-stiffness operation to perform
//              hDSOp: GlOp handle derived from call to Glop::Init.
// RETURNS    : TRUE on success; else FALSE. A valid op is to
//              have a NULL handle.
GBOOL GlOp::DSOp(GVector **v, GINT  nop, G_OP seop, GCHandle hDSOp)
{
  GINT   i, nd, ntot;
  GSHORT  iHandle;

  if ( hDSOp == NULL_HANDLE || gmultiplicity == NULL )
  {
    return TRUE;  // return w/o doing anything--not an error
  }
  if ( v == NULL || nop <= 0 ) 
  {
    cout << "GlOp::DSOp: NULL invalid structured vector list" << endl;
    return FALSE;
  }
  iHandle = HandleIndex(hDSOp);

  for ( i=0, ntot=0; i<nop; i++ ) ntot += v[i]->dim();

  // copy from the 'structured' vector list into a 'flat'
  // vector, for use by DoOp:
  if ( vflat[iHandle] == NULL )
  {
    for ( i=0, ntot=0; i<nop; i++ ) ntot += v[i]->dim();
    vflat[iHandle] = new GVector(ntot);
  }
 
  GComm::Synch();
  for ( i=0,ntot=0; i<nop; i++ )
  {
    nd = v[i]->dim();
    memcpy(vflat[iHandle]->Data()+ntot, v[i]->Data(), nd*sizeof(GDOUBLE));
    ntot += nd;
  }

  if ( ntot != gmultiplicity[iHandle]->dim() )
  {
    cout << "GlOp::DSOp: list unrolling failed" << endl;
    cout << "           total nodes: " << ntot << " multiplicity dimension: " << gmultiplicity[iHandle]->dim() << endl;
    return FALSE;
  }

  GComm::Synch();
  if ( !DoOp(vflat[iHandle]->Data(), vflat[iHandle]->dim(), seop, hDSOp) )
  {
    cout << "GlOp::DSOp: DoOp failed" << endl;
    return FALSE;
  }

  // copy back from gather/scattered 'flat' vector to structured list:
  GComm::Synch();
  for ( i=0,ntot=0; i<nop; i++ )
  {
    nd = v[i]->dim();
    memcpy(v[i]->Data(), vflat[iHandle]->Data()+ntot, nd*sizeof(GDOUBLE));
    ntot += nd;
  }
  return TRUE;
} // end of method DSOp(1)


//************************************************************************************
//************************************************************************************
// METHOD     : DSOp(2)
// DESCRIPTION: Performs direct stiffness summation/prod...
// ARGUEMENTS :
//             v     : structured vector linked-list. 
//                     The total of the vector sizes must equal
//                     the size of the id array used in call
//                     to GlOp::Init.
//              seop : direct-stiffness operation to perform
//              hDSOp: GlOp handle derived from call to Glop::Init.
// RETURNS    : TRUE on success; else FALSE. A valid op is to
//              have a NULL handle.
//************************************************************************************
GBOOL GlOp::DSOp(GVecList &vlist, G_OP seop, GCHandle hDSOp)
{
  GINT      i, nd, ntot, nop;
  GSHORT    iHandle;

  if ( hDSOp == NULL_HANDLE || gmultiplicity == NULL )
  {
    return TRUE;  // return w/o doing anything--not an error
  }
  nop = vlist.size();
  iHandle = HandleIndex(hDSOp);

  // copy from the 'structured' vector list into a 'flat'
  // vector, for use by DoOp:
  if ( vflat[iHandle] == NULL )
  {
    vlist.start(NULL);
    for ( i=0, ntot=0; i<nop; i++ ) {
      ntot += vlist.member()->dim();
      vlist.next();
    }
    vflat[iHandle] = new GVector(ntot);
  }

  GComm::Synch();
  vlist.start(NULL);
  for ( i=0,ntot=0; i<nop; i++ )
  {
    nd = vlist.member()->dim();
    memcpy(vflat[iHandle]->Data()+ntot, vlist.member()->Data(), nd*sizeof(GDOUBLE));
    ntot += nd;
    vlist.next();
  }

  if ( ntot != gmultiplicity[iHandle]->dim() )
  {
    cout << "GlOp::DSOp: list unrolling failed" << endl;
    cout << "           total nodes: " << ntot << " multiplicity dimension: " << gmultiplicity[iHandle]->dim() << endl;
    return FALSE;
  }

  GComm::Synch();
  if ( !DoOp(vflat[iHandle]->Data(), vflat[iHandle]->dim(), seop, hDSOp) )
  {
    cout << "GlOp::DSOp: DoOp failed" << endl;
    return FALSE;
  }

  // copy back from gather/scattered 'flat' vector to structured list:
  GComm::Synch();
  vlist.start(NULL);
  for ( i=0,ntot=0; i<nop; i++ )
  {
    nd = vlist.member()->dim();
    memcpy(vlist.member()->Data(), vflat[iHandle]->Data()+ntot, nd*sizeof(GDOUBLE));
    ntot += nd;
    vlist.next();
  }

  return TRUE;
} // end of method DSOp(2)


#if defined(DO_GLOP_TIMING)
//************************************************************************************
//************************************************************************************
// METHOD     : GetTimes
// DESCRIPTION: Retrieves timer results
// ARGUMENTS  : 
// RETURNS    : 
//************************************************************************************
GDOUBLE GlOp::GetTimes(SETIMER_RESULT_T type)
{
  switch ( type ) 
  {
    case SETTR_OP:
      return doop_time;
      break;
    case SETTR_EXCH:
      return exch_time;
      break;
    default:
      return 0;
  }

  return 0;

} // end of method GetTimes
#endif

