//************************************************************************************
// Module       : mtk.cpp
// Date         : 5/14/02 (DLR)
// Copyright    : 2002-2006 Copyright University Corporation for Atmospheric
//                Research
// Description  : Namespace encapsulating C-style linear algebra methods
// Derived From : none.
// Modifications:
//************************************************************************************
#include <string.h>
#include "stdlib.h"
#include "mtk.hpp"
#include "cff_wrappers.hpp"

#if !defined(GMTK_GLOBAL_DATA)
#define GMTK_GLOBAL_DATA
GINT     szCache_=szCACHE;
GDOUBLE  stmp[szCACHEMAX*szCACHEMAX];
#endif

//************************************************************************************
//************************************************************************************
// METHOD     : fvec_add
// DESCRIPTION: adds two GVectors, returns result in aret = x + y
// ARGUMENTS  :
// RETURNS    : 
//************************************************************************************

void MTK::fvec_add(GTVector<GDOUBLE> &x, GTVector<GDOUBLE> &y, GTVector<GDOUBLE> &aret)
{
  if ( x.dim() != y.dim() || x.dim() != aret.dim() ) {
    cout << "MTK::fvec_add: incompatible vectors "<< endl;
    exit(1);
  }

#if defined(DO_MTK_TIMING)
  _MTKtstart = STK::Timer();
#endif

#if !defined(CACHE_UNFRIENDLY)
  w_dzaxpby   (aret.Data(), x.Data(), 1.0, y.Data(), 1.0, x.dim(), szCache_); 
#else
  basic_dzaxpby(aret.Data(), x.Data(), 1.0, y.Data(), 1.0, x.dim()); 
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif

} // end of fvec_add


//************************************************************************************
//************************************************************************************
// METHOD     : fvec_add_rep
// DESCRIPTION: adds two GVectors, returns result in x += y 
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************

void MTK::fvec_add_rep(GTVector<GDOUBLE> &x, GTVector<GDOUBLE> &y)
{
   if ( x.dim() != y.dim() ) {
    cout << "MTK::fvec_add_rep: incompatible vectors "<< endl;
    exit(1);
  }

#if defined(DO_MTK_TIMING)
  _MTKtstart = STK::Timer();
#endif

#if !defined(CACHE_UNFRIENDLY)
  w_dxaxpby   (x.Data(), 1.0, y.Data(), 1.0, x.dim(), szCache_); 
#else
  basic_dxaxpby(x.Data(), 1.0, y.Data(), 1.0, x.dim()); 
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif
} // end of method fvec_add_rep


//************************************************************************************
//************************************************************************************
// METHOD     : qvec_add
// DESCRIPTION: adds two GVectors, returns result in aret = x + y
// ARGUMENTS  :
// RETURNS    : 
//************************************************************************************

void MTK::qvec_add(GTVector<GQUAD> &x, GTVector<GQUAD> &y, GTVector<GQUAD> &aret)
{
  if ( x.dim() != y.dim() || x.dim() != aret.dim() ) {
    cout << "MTK::qvec_add: incompatible vectors "<< endl;
    exit(1);
  }

#if defined(DO_MTK_TIMING)
  _MTKtstart = STK::Timer();
#endif

#if !defined(CACHE_UNFRIENDLY)
  cf_qzaxpby   (aret.Data(), x.Data(), 1.0, y.Data(), 1.0, x.dim()); 
#else
  basic_qzaxpby(aret.Data(), x.Data(), 1.0, y.Data(), 1.0, x.dim()); 
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif

} // end of qvec_add


//************************************************************************************
//************************************************************************************
// METHOD     : qvec_add_rep
// DESCRIPTION: adds two GVectors, returns result in x += y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************

void MTK::qvec_add_rep(GTVector<GQUAD> &x, GTVector<GQUAD> &y)
{
  if ( x.dim() != y.dim() ) {
    cout << "MTK::qvec_add_rep: incompatible vectors "<< endl;
    exit(1);
  }

#if defined(DO_MTK_TIMING)
  _MTKtstart = STK::Timer();
#endif
#if !defined(CACHE_UNFRIENDLY)
  cf_qxaxpby   (x.Data(), 1.0, y.Data(), 1.0, x.dim()); 
#else
  basic_qxaxpby(x.Data(), 1.0, y.Data(), 1.0, x.dim()); 
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif
} // end of method qvec_add_rep



//************************************************************************************
//************************************************************************************
// METHOD     : fvec_sub
// DESCRIPTION: subtracts two GVectors, returns result in aret = x - y
// ARGUMENTS  :
// RETURNS    : 
//************************************************************************************

void MTK::fvec_sub(GTVector<GDOUBLE> &x, GTVector<GDOUBLE> &y, GTVector<GDOUBLE> &aret)
{
   if ( x.dim() != y.dim() || x.dim() != aret.dim() ) {
    cout << "template<class T> GTVector<T>::vec_sum: incompatible vectors "<< endl;
    exit(1);
  }

#if defined(DO_MTK_TIMING)
  _MTKtstart = STK::Timer();
#endif

#if !defined(CACHE_UNFRIENDLY)
  w_dzaxpby   (aret.Data(), x.Data(), 1.0, y.Data(), -1.0, x.dim(), szCache_); 
#else
  basic_dzaxpby(aret.Data(), x.Data(), 1.0, y.Data(), -1.0, x.dim()); 
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif

} // end of fvec_sub


//************************************************************************************
//************************************************************************************
// METHOD     : fvec_sub_rep
// DESCRIPTION: subtracts, two GVectors, returns result in x -= y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
    
void MTK::fvec_sub_rep(GTVector<GDOUBLE> &x, GTVector<GDOUBLE> &y)
{   
  if ( x.dim() != y.dim() ) { 
    cout << "MTK::fvec_sub_rep: incompatible vectors "<< endl;
    exit(1);
  }

#if defined(DO_MTK_TIMING)
  _MTKtstart = STK::Timer();
#endif

#if !defined(CACHE_UNFRIENDLY)
  w_dxaxpby   (x.Data(), 1.0, y.Data(), -1.0, x.dim(), szCache_); 
#else
  basic_dxaxpby(x.Data(), 1.0, y.Data(), -1.0, x.dim()); 
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif

} // end of method fvec_sub_rep


//************************************************************************************
//************************************************************************************
// METHOD     : qvec_sub
// DESCRIPTION: subtracts two GVectors, returns result in aret = x - y
// ARGUMENTS  :
// RETURNS    : 
//************************************************************************************

void MTK::qvec_sub(GTVector<GQUAD> &x, GTVector<GQUAD> &y, GTVector<GQUAD> &aret)
{
  if ( x.dim() != y.dim() || x.dim() != aret.dim() ) {
    cout << "MTK::qvec_rep: incompatible vectors "<< endl;
    exit(1);
  }

#if defined(DO_MTK_TIMING)
  _MTKtstart = STK::Timer();
#endif

#if !defined(CACHE_UNFRIENDLY)
  cf_qzaxpby   (aret.Data(), x.Data(), 1.0, y.Data(), -1.0, x.dim()); 
#else
  basic_qzaxpby(aret.Data(), x.Data(), 1.0, y.Data(), -1.0, x.dim()); 
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif

} // end of qvec_sub


//************************************************************************************
//************************************************************************************
// METHOD     : qvec_sub_rep
// DESCRIPTION: subtracts two GVectors, returns result in x -= y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************

void MTK::qvec_sub_rep(GTVector<GQUAD> &x, GTVector<GQUAD> &y)
{
  if ( x.dim() != y.dim() ) {
    cout << "MTK::qvec_sub_rep: incompatible vectors "<< endl;
    exit(1);
  }

#if defined(DO_MTK_TIMING)
  _MTKtstart = STK::Timer();
#endif

#if !defined(CACHE_UNFRIENDLY)
  cf_qxaxpby   (x.Data(), 1.0, y.Data(), -1.0, x.dim()); 
#else
  basic_qxaxpby(x.Data(), 1.0, y.Data(), -1.0, x.dim()); 
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif
} // end of method qvec_sub_rep


//************************************************************************************
//************************************************************************************
// METHOD     : afmatvec_prod
// DESCRIPTION: performs matrix-vector product, ret = Ax, assuming
//              A s a GTMatrix, and x, and ret are contiguous arrays[].
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************

void MTK::afmatvec_prod(GTMatrix<GDOUBLE> &A, GDOUBLE *x, GINT  nx, GDOUBLE *aret, GINT  ny)
{
  if ( A.dim(2) != nx || A.dim(1) != ny ) {
    cout << "MTK::afmatvec_prod: incompatible factors"<< endl;
    exit(1);
  }

#if !defined(CACHE_UNFRIENDLY)
  w_dmxv   (aret, ny, A.Data(), x, nx, szCache_);
#else
  basic_dmxv(aret, ny, A.Data(), x, nx);
#endif
 
} // end of method afmatvec_prod


//************************************************************************************
//************************************************************************************
// METHOD     : fmatvec_prod
// DESCRIPTION: performs matrix-vector product, ret = Ax
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************

void MTK::fmatvec_prod(GTMatrix<GDOUBLE> &A, GTVector<GDOUBLE> &x, GTVector<GDOUBLE> &aret)
{
#if defined(DO_MTK_TIMING) 
  _MTKtstart = STK::Timer();
#endif
 
  if ( A.dim(2) != x.dim() || A.dim(1) != aret.dim() ) {
    cout << "MTK::fmatvec_prod: incompatible factors"<< endl;
    exit(1);
  }

#if !defined(CACHE_UNFRIENDLY)
  w_dmxv   (aret.Data(), aret.dim(), A.Data(), x.Data(), x.dim(), szCache_);
#else
  basic_dmxv(aret.Data(), aret.dim(), A.Data(), x.Data(), x.dim());
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif
 
} // end of method fmatvec_prod


//************************************************************************************
//************************************************************************************
// METHOD     : qmatvec_prod
// DESCRIPTION: performs matrix-vector product, ret = Ax
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************

void MTK::qmatvec_prod(GTMatrix<GQUAD> &A, GTVector<GQUAD> &x, GTVector<GQUAD> &aret)
{
#if defined(DO_MTK_TIMING) 
  _MTKtstart = STK::Timer();
#endif

  if ( A.dim(2) != x.dim() || A.dim(1) != aret.dim() ) {
    cout << "MTK::qmatvec_prod: incompatible factors"<< endl;
    exit(1);
  }

#if !defined(CACHE_UNFRIENDLY)
  cf_qmxv   (aret.Data(), aret.dim(), A.Data(), x.Data(), x.dim());
#else
  basic_qmxv(aret.Data(), aret.dim(), A.Data(), x.Data(), x.dim());
#endif
#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif

} // end of method qmatvec_prod


//************************************************************************************
//************************************************************************************
// METHOD     : fmatmat_prod
// DESCRIPTION: performs matrix-matrix product, C = AB
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************

void MTK::fmatmat_prod(GTMatrix<GDOUBLE> &A, GTMatrix<GDOUBLE> &B, GTMatrix<GDOUBLE> &C)
{
#if defined(DO_MTK_TIMING) 
  _MTKtstart = STK::Timer();
#endif

  if ( A.dim(2) != B.dim(1) || A.dim(1) != C.dim(1) || B.dim(2) != C.dim(2) ) {
    cout << "MTK::fmatmat_prod: incompatible factors"<< endl;
    exit(1);
  }

#if !defined(CACHE_UNFRIENDLY)
  w_dmxm(C.Data(), A.Data(), A.dim(1), A.dim(2), B.Data(), B.dim(1), B.dim(2), szCache_);
#else
  basic_dmxm(C.Data(), A.Data(), A.dim(1), A.dim(2), B.Data(), B.dim(1), B.dim(2));
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif

} // end of method fmatmat_prod

//************************************************************************************
//************************************************************************************
// METHOD     : qmatmat_prod
// DESCRIPTION: performs matrix-matrix product, C = AB
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************

void MTK::qmatmat_prod(GTMatrix<GQUAD> &A, GTMatrix<GQUAD> &B, GTMatrix<GQUAD> &C)
{ 
#if defined(DO_MTK_TIMING) 
  _MTKtstart = STK::Timer();
#endif
  
  if ( A.dim(2) != B.dim(1) || A.dim(1) != C.dim(1) || B.dim(2) != C.dim(2) ) {
    cout << "MTK::qmatmat_prod: incompatible factors"<< endl;
    exit(1);
  }

#if !defined(CACHE_UNFRIENDLY)
  cf_qmxm   (C.Data(), A.Data(), A.dim(1), A.dim(2), B.Data(), B.dim(1), B.dim(2));
#else
  basic_qmxm(C.Data(), A.Data(), A.dim(1), A.dim(2), B.Data(), B.dim(1), B.dim(2));
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif

} // end of method qmatmat_prod


//************************************************************************************
//************************************************************************************
// METHOD     : fvec_dot
// DESCRIPTION: performs dot product of 2 GDOUBLE vectors
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
GDOUBLE  MTK::fvec_dot(GTVector<GDOUBLE> &x, GTVector<GDOUBLE> &y)
{
  GDOUBLE dret;

  if ( x.dim() != y.dim() ) {
    cout << "MTK::fvec_dot: incompatible factors"<< endl;
    exit(1);
  }

#if defined(DO_MTK_TIMING)
  _MTKtstart = STK::Timer();
#endif

#if !defined(CACHE_UNFRIENDLY)
  w_ddot   (&dret, x.Data(), y.Data(), x.dim(), szCache_);
#else
  basic_ddot(&dret, x.Data(), y.Data(), x.dim());
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif

  return dret;

} // end of method fvec_dot


//************************************************************************************
//************************************************************************************
// METHOD     : qvec_dot
// DESCRIPTION: performs dot product of 2 GQUAD vectors
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
GQUAD  MTK::qvec_dot(GTVector<GQUAD> &x, GTVector<GQUAD> &y)
{
  GQUAD qret;

  if ( x.dim() != y.dim() ) {
    cout << "MTK::qdot: incompatible factors"<< endl;
    exit(1);
  }

#if defined(DO_MTK_TIMING)
  _MTKtstart = STK::Timer();
#endif

#if !defined(CACHE_UNFRIENDLY)
  cf_qdot   (&qret, x.Data(), y.Data(), x.dim());
#else
  basic_qdot(&qret, x.Data(), y.Data(), x.dim());
#endif

#if defined(DO_MTK_TIMING)
  _MTKtime_result = STK::Timer() - _MTKtstart;
#endif

  return qret;

} // end of method qvec_dot


//************************************************************************************
//************************************************************************************
// METHOD     : fvec_copy
// DESCRIPTION: copy x into y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void  MTK::fvec_copy(GTVector<GDOUBLE> &x, GTVector<GDOUBLE> &y)
{
  if ( x.dim() !=  y.dim() ) {
    cout << "MTK::fvec_copy: incompatible factors"<< endl;
    exit(1);
  }
  
#if !defined(CACHE_UNFRIENDLY)
  w_dcopy   (y.Data(), x.Data(), x.dim(), szCache_);
#else
  basic_dcopy(y.Data(), x.Data(), x.dim());
#endif

} // end of method fvec_copy


//************************************************************************************
//************************************************************************************
// METHOD     : qvec_copy
// DESCRIPTION: copy x into y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void  MTK::qvec_copy(GTVector<GQUAD> &x, GTVector<GQUAD> &y)
{
  if ( x.dim() !=  y.dim() ) {
    cout << "MTK::qvec_copy: incompatible factors"<< endl;
    exit(1);
  }
  
#if !defined(CACHE_UNFRIENDLY)
  cf_qcopy   (y.Data(), x.Data(), x.dim());
#else
  basic_qcopy(y.Data(), x.Data(), x.dim());
#endif

} // end of method qvec_copy


//************************************************************************************
//************************************************************************************
// METHOD     : fvec_const_prod
// DESCRIPTION: performs product of GDOUBLE vector, x with constant, s.t. y = x * a
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void  MTK::fvec_const_prod(GTVector<GDOUBLE> &x, GDOUBLE a, GTVector<GDOUBLE> &y)
{
  if ( x.dim() !=  y.dim() ) {
    cout << "MTK::fvec_const_prod: incompatible factors"<< endl;
    exit(1);
  }
 
#if !defined(CACHE_UNFRIENDLY)
  w_dxaxpby   (y.Data(), 0.0, x.Data(), a, x.dim(), szCache_); 
#else
  basic_dxaxpby(y.Data(), 0.0, x.Data(), a, x.dim()); 
#endif

}  // end of method fvec_const_prod


//************************************************************************************
//************************************************************************************
// METHOD     : qvec_const_prod
// DESCRIPTION: performs product of GQUAD vector, x with constant, s.t. y = x * a
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void  MTK::qvec_const_prod(GTVector<GQUAD> &x, GQUAD a, GTVector<GQUAD> &y)
{
  if ( x.dim() !=  y.dim() ) {
    cout << "MTK::fvec_const_prod: incompatible factors"<< endl;
    exit(1);
  }
 
#if !defined(CACHE_UNFRIENDLY)
  cf_qxaxpby   (y.Data(), 0.0, x.Data(), a, x.dim()); 
#else
  basic_qxaxpby(y.Data(), 0.0, x.Data(), a, x.dim()); 
#endif

}  // end of method qvec_const_prod


//************************************************************************************
//************************************************************************************
// METHOD     : fvec_const_prod_rep
// DESCRIPTION: performs product of GDOUBLE vector, x with constant, s.t. x *= a
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void  MTK::fvec_const_prod_rep(GTVector<GDOUBLE> &x, GDOUBLE a)
{
#if !defined(CACHE_UNFRIENDLY)
  w_dxaxpby   (x.Data(), a, x.Data(), 0.0, x.dim(), szCache_); 
#else
  basic_dxaxpby(x.Data(), a, x.Data(), 0.0, x.dim()); 
#endif
 
}  // end of method fvec_const_prod_rep



//************************************************************************************
//************************************************************************************
// METHOD     : qvec_const_prod_rep
// DESCRIPTION: performs product of GQUAD vector, x with constant, s.t. x *= a
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void  MTK::qvec_const_prod_rep(GTVector<GQUAD> &x, GQUAD a)
{
#if !defined(CACHE_UNFRIENDLY)
  cf_qxaxpby   (x.Data(), a, x.Data(), 0.0, x.dim()); 
#else
  basic_qxaxpby(x.Data(), a, x.Data(), 0.0, x.dim()); 
#endif

}  // end of method qvec_const_prod_rep


//************************************************************************************
//************************************************************************************
// METHOD     : fvec_const_prod_sum_rep
// DESCRIPTION: performs the following operation:
//                 x = a x + b y , where a and b are constants
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void  MTK::fvec_const_prod_sum_rep(GTVector<GDOUBLE> &x, GTVector<GDOUBLE> &y, GDOUBLE a, GDOUBLE b)
{
  if ( x.dim() !=  y.dim() ) {
    cout << "MTK::fvec_const_prod_sum_rep: incompatible factors"<< endl;
    exit(1);
  }

#if !defined(CACHE_UNFRIENDLY)
  w_dxaxpby   (x.Data(), a, y.Data(), b, x.dim(), szCache_); 
#else
  basic_dxaxpby(x.Data(), a, y.Data(), b, x.dim()); 
#endif

}  // end of method fvec_const_prod_sum_rep


//************************************************************************************
//************************************************************************************
// METHOD     : fvec_point_prod
// DESCRIPTION: Computes the _diagonal_ elements of the vector outer product,
//              z_i = x_i * y_i, returning the result in z.
// ARGUMENTS  : 
// RETURNS    :
//************************************************************************************
void  MTK::fvec_point_prod(GTVector<GDOUBLE> &x, GTVector<GDOUBLE> &y, GTVector<GDOUBLE> &z)
{
  if ( x.dim() !=  y.dim() || x.dim() != z.dim() ) {
    cout << "MTK::fvec_point_prod: incompatible factors"<< endl;
    exit(1);
  }

#if !defined(CACHE_UNFRIENDLY)
  w_dzvxvpt   (z.Data(), x.Data(), y.Data(), x.dim(), szCache_); 
#else
  basic_dzvxv_point(z.Data(), x.Data(), y.Data(), x.dim()); 
#endif

} // end of method fvec_point_prod    

//************************************************************************************
//************************************************************************************
// METHOD     : fvec_point_prod_rep
// DESCRIPTION: Computes the _diagonal_ elements of the vector outer product,
//              x_i *= y_i.
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void  MTK::fvec_point_prod_rep(GTVector<GDOUBLE> &x, GTVector<GDOUBLE> &y)
{
  if ( x.dim() !=  y.dim() ) {
    cout << "MTK::fvec_point_prod: incompatible factors"<< endl;
    exit(1);
  }

#if !defined(CACHE_UNFRIENDLY)
  w_dvvxvpt   (x.Data(), y.Data(), x.dim(), szCache_); 
#else
  basic_dvvxv_point(x.Data(), y.Data(), x.dim()); 
#endif

} // end of method fvec_point_prod_rep


//************************************************************************************
//************************************************************************************
// METHOD     : I2_X_D1
// DESCRIPTION: performs action of tensor product y = (I2 X D1) x
//              on vector x, returning result in y. D1 is a dense
//              matrix on the 1-nodes, and I2 is the identity in the 2-direction.
//              NOTE:  all strides must = 1; else results are unpredictable.
// ARGUMENTS  : D1 : N1 X N1 matrix
//              x  : N1 x N2 vector, with x1-values changing most rapidly
//              N1 : 1-dimension
//              N2 : 2-dimension
//              y  : N1 x N2 vector.
// RETURNS    :
//************************************************************************************
void  MTK::I2_X_D1(GMatrix &D1, GVector &x, GINT  N1, GINT  N2,  GVector &y)
{

  GINT  NN=N1*N2;

  if ( x.dim  () != NN || y.dim  () != NN ) {
    cout << "MTK::I2_X_D1: incompatible dimensions" << endl;
    exit(1);
  }
  
#if !defined(CACHE_UNFRIENDLY)
  w_dmxm   (y.Data(), D1.Data(), D1.dim(1), D1.dim(2), x.Data(), N1, N2, szCache_);
#else
  basic_dmxm(y.Data(), D1.Data(), D1.dim(1), D1.dim(2), x.Data(), N1, N2);
#endif


} // end of method I2_X_D1


//************************************************************************************
//************************************************************************************
// METHOD     : D2_X_I1
// DESCRIPTION: performs action of tensor product y = (D2_X_I1) x
//              on vector x, returning result in y. D2 is a dense
//              matrix on the 2-nodes, and I1 is the identity in the 1-direction.
//              NOTE:  all strides must = 1; else results are unpredictable.
// ARGUMENTS  : D2T: N2 X N2 matrix: the transpose of D2. This is required so that
//              stride is minimized.
//              x  : N1 x N2 vector, with x1-values changing most rapidly
//              y  : N1 x N2 vector.
// RETURNS    : 
//************************************************************************************
void  MTK::D2_X_I1(GMatrix &D2T, GVector &x, GINT  N1, GINT  N2, GVector &y)
{
  GINT    NN=N1*N2;

  if ( x.dim   () != NN || y.dim   () != NN ) {
    cout << "MTK::D2_X_I1: incompatible dimensions" << endl;
    exit(1);
  }

#if !defined(CACHE_UNFRIENDLY)
  w_dmxm   (y.Data(), x.Data(), N1, N2, D2T.Data(), D2T.dim(1), D2T.dim(2), szCache_ );
#else
  basic_dmxm(y.Data(), x.Data(), N1, N2, D2T.Data(), D2T.dim(1), D2T.dim(2) );
#endif
 
} // end of method D2_X_I1


//************************************************************************************
//************************************************************************************
// METHOD     : D2_X_D1
// DESCRIPTION: Carries out operation of multiplying  tensor product operator,
//              D2 X D1 by vector x, returning result in y. D1 and D2 are
//              assumed to be dense, non-square matrices here.
//                    ( y = (D2 X D1 ) x )
//              NOTE: striding is _not_ allowed here. If stride != 1 for all
//              vectors/matrices, results are unpredictable.
// ARGUMENTS  : D1   : dense matrix containing 1-operator
//              D2T  : dense matrix containing transpose of 2-operator
//              x    : GVector argument
//              nx1,
//              nx2  : matrix size of x = nx1 x nx2
//              tmp  : GVector temp space of size 
//              nt1,
//              nt2  : matrix size of tmp. Must be D1(1) x nx2.
//              y    : GVector result
//              ny1,
//              ny2  : matrix size of y. Must be D1(1) x D2(2).
// RETURNS    :
//************************************************************************************
void MTK::D2_X_D1(GMatrix &D1, GMatrix  &D2T, GVector &x, GINT nx1, GINT nx2, GVector &tmp, GINT nt1, GINT nt2, GVector &y, GINT ny1, GINT ny2)
{
  GINT     N11, N12, N21, N22;
  
  N11 = D1 .dim(1);
  N12 = D1 .dim(2);
  N21 = D2T.dim(1);
  N22 = D2T.dim(2);
  if ( ny1 != nt1 || ny2 != N22 ||
       nt1 != N11 || nt2 != N21 ||
       nx1 != N12 || nx2 != nt2 
        ) {
    cout << "MTK::D2_X_D1: incompatible dimensions" << endl;
    exit(1);
  }

  // Compute y = D2_X_D1 x as: y = D1 X D2T, where X is x in matrix form:

#if !defined(CACHE_UNFRIENDLY)
  // tmp = I2_X_D1 * x == D1 X (in mat form):
  w_dmxm   (tmp.Data(), D1 .Data(), N11, N12, x.Data(), N12, N21, szCache_);
  // y = D2_X_I1 * tmp == TMP D2T (in mat form):
  w_dmxm   (y  .Data(), tmp.Data(), N11, N21, D2T.Data(), N21, N22, szCache_);
#else
  // tmp = I2_X_D1 * x == D1 X (in mat form):
  basic_dmxm(tmp.Data(), D1 .Data(), N11, N21, x.  Data(), nx1, nx2);

  // y = D2_X_I1 * tmp == TMP D2T (in mat form):
  basic_dmxm(y  .Data(), tmp.Data(), nt1, nt2, D2T.Data(), N21, N22);
#endif

} // end of method D2_X_D1


//************************************************************************************
//************************************************************************************
// METHOD     : Dg2_X_D1
// DESCRIPTION: Carries out operation of multiplying  tensor product operator,
//              Dg2 X D1 by vector x, returning result in y. Dg2 is assumed to
//              be diagonal, and specified by a GVector, while D1 is assumed
//              to be a dense, square  matrix.
//                    y = (Diag(Dg2) X D1 ) x 
//                      = (Diag(Dg2) X I )(I X D1) x 
//              NOTE: striding is _not_ allowed here. If stride != 1 for all
//              vectors/matrices, results are unpredictable.
// ARGUMENTS  : D1   : dense matrix containing 1-operator
//              Dg2  : GVector containing diagonal values of 2-operator
//              x    : GVector argument
//              y    : GVector result
// RETURNS    :
//************************************************************************************
void MTK::Dg2_X_D1(GMatrix &D1, GVector &Dg2, GVector &x, GVector &vtmp, GVector &y)
{
  GINT     N1, N2, NN;
  GDOUBLE  coeff;

  N1 = D1.dim(1);
  N2 = Dg2.dim();
  NN = N1 * N2;
  if ( x.dim() != NN  || y.dim() != NN ) {
    cout << "MTK::Dg2_X_D1: incompatible vectors" << endl;
    exit(1);
  }
#if !defined(CACHE_UNFRIENDLY)
  // tmp = I2_X_D1 * x:
  w_dmxm   (vtmp.Data(), D1 .Data(), D1.dim(1), D1.dim(2), x.Data(), N1, N2, szCache_);
//cout << "MTK::Dg2_X_D1: x   =" << x << endl;
//cout << "MTK::Dg2_X_D1: vtmp=" << vtmp << endl;
  // y = Diag(Dg2)_X_I1 * tmp: y_ij = tmp_ij * Dg2_j:
  w_dmxDm   (y.Data(), vtmp.Data(), N1, N2, Dg2.Data(), N2, szCache_);
//cout << "MTK::Dg2_X_D1: y=" << y << endl;
#else
  // tmp = I2_X_D1 * x:
  basic_dmxm(vtmp.Data(), D1 .Data(), D1.dim(1), D1.dim(2), x.Data(), N1, N2);

  // y = Diag(Dg2)_X_I1 * tmp: y_ij = tmp_ij * Dg2_j (no sum on j):
  basic_dmxDm(y.Data(), vtmp.Data(), N1, N2, Dg2.Data(), N2);
#endif

}  // end of method Dg2_X_D1


//************************************************************************************
//************************************************************************************
// METHOD     : D2_X_Dg1
// DESCRIPTION: Carries out operation of multiplying  tensor product operator,
//              D2 X Dg1 by vector x, returning result in y. Dg1 is assumed to
//              be diagonal, and specified by a GVector, while D2 is assumed
//              to be a dense, square matrix.
//                    y = (D2 X Diag(D1) ) x 
//                      = (D2 X I) ( I X Diag(D1)) x
//              NOTE: striding is _not_ allowed here. If stride != 1 for all
//              vectors/matrices, results are unpredictable.
// ARGUMENTS  : D2T  : transpose of dense matrix, D2, containing 2-operator
//              Dg1  : GVector containg diagonal values of 1-operator
//              x    : GVector argument
//              y    : GVector result
// RETURNS    :
//************************************************************************************
void MTK::D2_X_Dg1(GMatrix &D2T, GVector &Dg1, GVector &x, GVector &vtmp, GVector &y)
{
  GINT   N1, N2, NN;
  
  N1 = Dg1.dim();
  N2 = D2T.dim(1);
  NN = N1 * N2;
  if ( x.dim() != NN  || y.dim() != NN ) 
  {
    cout << "MTK::Dg2_X_D1: incompatible vectors" << endl;
    exit(1);
  }

#if !defined(CACHE_UNFRIENDLY) 
  // tmp = D2_X_I1 * x: tmp_ij = x_ik * D2T^k_j :
  w_dmxm(vtmp.Data(), x.Data(), N1, N2, D2T.Data(), D2T.dim(1), D2T.dim(2), szCache_);

  // y = I2_X_Diag(D1) * tmp: y_ij = d1_i * tmp_ij (no sum on i)
  w_dDmxm(y.Data(), Dg1.Data(), N1, vtmp.Data(), N1, N2, szCache_);
#else
  // tmp = D2_X_I1 * x: tmp_ij = x_ik * D2T^k_j :
  basic_dmxm(vtmp.Data(), x.Data(), N1, N2, D2T.Data(), D2T.dim(1), D2T.dim(2) );

  // y = I2_X_Diag(D1) * x: y_ij = d1_i * tmp_ij (no sum on i)
  basic_dDmxm(y.Data(), Dg1.Data(), N1, vtmp.Data(), N1, N2);

#endif

}  // end of method D2_X_Dg1


#if defined(DO_MTK_TIMING)
//************************************************************************************
//************************************************************************************
// METHOD     : GetTime
// DESCRIPTION: returns timing result
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************

GDOUBLE MTK::GetTime()
{
  return _MTKtime_result;
} // end of method GetTime()
#endif


//////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////// Basic Routines /////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////


//************************************************************************************
//************************************************************************************
// METHOD     : basic_dxaxpby
// DESCRIPTION: computes x = a*x + b*y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_dxaxpby(GDOUBLE x[], GDOUBLE a, GDOUBLE y[], GDOUBLE b, GINT nxy)
{
  GINT     i;
  GDOUBLE  sprod, xi, yi;
  
  xi = x[0];
  yi = y[0];
  for ( i=0; i<nxy; i++ ) {
    sprod  = b*y[i] + 0.0;
    x  [i] = a*x[i] + sprod; 
  }
} // end of method basic_dxaxpby


//************************************************************************************
//************************************************************************************
// METHOD     : cf_dxaxpby
// DESCRIPTION: computes x = a*x + b*y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_dxaxpby(GDOUBLE x[], GDOUBLE a, GDOUBLE y[], GDOUBLE b, GINT nxy)
{
  GINT     i, ii, k, m, isz, NN;
  GDOUBLE  xi, yi;
  
  isz = MIN(szCACHEMAX,szCache_);
  NN  = isz*isz;

  m = nxy % NN;
  if ( m > 0 ) {
    xi = x[0];
    yi = y[0];
    for ( i=0; i<m; i++ ) {
      stmp [0] = b*y[i] + 0.0;
      x    [i] = a*x[i] + stmp[0];
    }
  }

  if ( nxy >= NN ) {
    xi = x[m];
    yi = y[m];
    for ( i=m; i<nxy; i+=NN) {
      for ( ii=i; ii<MIN(i+NN,nxy); ii++ ) {
        k = ii-i;
        stmp [k]  = b*y  [ii] + 0.0;
        x   [ii]  = a*x  [ii] + stmp[k];
      }
    }
  }

} // end of method cf_dxaxpby


//************************************************************************************
//************************************************************************************
// METHOD     : basic_qxaxpby
// DESCRIPTION: computes x = a*x + b*y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_qxaxpby(GQUAD x[], GQUAD a, GQUAD y[], GQUAD b, GINT nxy)
{
  GINT     i;
  GQUAD    sprod, xi, yi;

  xi = x[0];
  yi = y[0];
  for ( i=0; i<nxy; i++ ) {
    sprod  = b*y[i] + 0.0;
    x[i] = a*x[i] + sprod;
  }
 
} // end of method basic_qxaxpby


//************************************************************************************
//************************************************************************************
// METHOD     : cf_qxaxpby
// DESCRIPTION: computes x = a*x + b*y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_qxaxpby(GQUAD x[], GQUAD a, GQUAD y[], GQUAD b, GINT nxy)
{
  GINT     i, ii, k, m, isz, NN;
  GQUAD    xi, yi;
  
  isz = MIN(szCACHEMAX,szCache_);
  NN  = isz*isz;
  m = nxy % NN;
  if ( m > 0 ) {
    xi = x[0];
    yi = y[0];
    for ( i=0; i<m; i++ ) {
      stmp [0] = b*y[i] + 0.0;
      x    [i] = a*x[i] + stmp[0];
    }
  }

  if ( nxy >= NN ) {
    xi = x[m];
    yi = y[m];
    for ( i=m; i<nxy; i+=NN) {
      for ( ii=i; ii<MIN(i+NN,nxy); ii++ ) {
        k = ii-i;
        stmp [k]  = b*y  [ii] + 0.0;
        x   [ii]  = a*x  [ii] + stmp [k];
      }
    }
  }
} // end of method cf_qxaxpby


//************************************************************************************
//************************************************************************************
// METHOD     : basic_dzaxpby
// DESCRIPTION: computes z = a*x + b*y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_dzaxpby(GDOUBLE z[], GDOUBLE x[], GDOUBLE a, GDOUBLE y[], GDOUBLE b, GINT nxy)
{
  GINT     i;
  GDOUBLE  sprod;

  for ( i=0; i<nxy; i++ ) {
    sprod  = b*y[i] + 0.0;
    z[i] = a*x[i] + sprod;
  }
 
} // end of method basic_dzaxpby


//************************************************************************************
//************************************************************************************
// METHOD     : cf_dzaxpby
// DESCRIPTION: computes z = a*x + b*y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_dzaxpby(GDOUBLE z[], GDOUBLE x[], GDOUBLE a, GDOUBLE y[], GDOUBLE b, GINT nxy)
{
  GINT     i, ii, k, m, isz, NN;
  GDOUBLE  xi, yi, zi;

  isz = MIN(szCACHEMAX,szCache_);
  NN  = isz*isz;
  m = nxy % NN;
  if ( m > 0 ) {
    xi = x[0];
    yi = y[0];
    zi = y[0];
    for ( i=0; i<m; i++ ) {
      z[i] = a*x[i] + b*y[i];
    }
  }

  if ( nxy >= NN ) {
    xi = x[m];
    yi = y[m];
    zi = z[m];
    for ( i=m; i<nxy; i+=NN ) {
      for ( ii=i; ii<MIN(i+NN,nxy); ii++ ) {
        k = ii-i;
        stmp [k]  = b*y  [ii] + 0.0;
        z   [ii]  = a*x  [ii] + stmp[k];
      }
    }
  }

} // end of method cf_dzaxpby

//************************************************************************************
//************************************************************************************
// METHOD     : basic_qzaxpby
// DESCRIPTION: computes z = a*x + b*y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_qzaxpby(GQUAD z[], GQUAD x[], GQUAD a, GQUAD y[], GQUAD b, GINT nxy)
{
  GINT     i;
  GQUAD    sprod, xi, yi, zi;

  xi = x[0];
  yi = y[0];
  zi = z[0];
  for ( i=0; i<nxy; i++ ) {
    sprod  = b*y[i] + 0.0;
    z[i] = a*x[i] + sprod;
  }
 
} // end of method basic_qzaxpby


//************************************************************************************
//************************************************************************************
// METHOD     : cf_qzaxpby
// DESCRIPTION: computes z = a*x + b*y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_qzaxpby(GQUAD z[], GQUAD x[], GQUAD a, GQUAD y[], GQUAD b, GINT nxy)
{   
  GINT     i, ii, k, m, isz, NN;
  GQUAD    xi, yi, zi;

  isz = MIN(szCACHEMAX,szCache_);
  NN  = isz*isz;
  m = nxy % NN;
  if ( m > 0 ) {
    xi = x[0];
    yi = y[0];
    zi = y[0];
    for ( i=0; i<m; i++ ) {
      x[i] = a*x[i] + b*y[i];
    }
  }
    
  if ( nxy >= NN ) {
    xi = x[m];
    yi = y[m];
    zi = z[m];
    for ( i=m; i<nxy; i+=NN ) {
      for ( ii=i; ii<MIN(i+NN,nxy); ii++ ) {
        k = ii-i;
        stmp [k]  = b*y  [ii] + 0.0;
        z   [ii]  = a*x  [ii] + stmp[k];
      }
    }
  }

} // end of method cf_qzaxpby



//************************************************************************************
//************************************************************************************
// METHOD     : basic_dmxv
// DESCRIPTION: computes y = Ax 
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_dmxv(GDOUBLE y[], GINT ny, GDOUBLE A[], GDOUBLE x[], GINT nx)
{
  GINT     i, j;
  GDOUBLE  xii, sum;
  
  for ( i=0; i<ny; i++ ) {
    xii   = x  [0];
    for ( j=0,sum=0.0; j<nx; j++ ) {
       sum  = sum + A[i+j*ny]*x[j];
    }
    y[i]  = sum;
  }
  
} // end of method  basic_dmxv


//************************************************************************************
//************************************************************************************
// METHOD     : cf_dmxv
// DESCRIPTION: computes y = Ax 
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_dmxv(GDOUBLE y[], GINT ny, GDOUBLE A[], GDOUBLE x[], GINT nx)
{
  GINT     i, ii, j, jj, k, m, NN=(MIN(szCACHEMAX,szCache_)) ;

  // Note: A -> A(ny,nx)
  m = ny % NN;  
  if ( m > 0 ) {
    for ( ii=0; ii<NN; ii++ ) stmp[ii] = 0.0;
    for ( ii=0; ii<m; ii++ ) {
      for ( j=0; j<nx; j+=NN ) {
        for ( jj=j; jj<MIN(j+NN,nx); jj++ ) {
          stmp[ii] = stmp[ii] + A[ii+jj*ny]*x[jj];
        }
      }
      y[ii] = stmp[ii];
    }
  }

  if ( ny >= NN ) {
    for ( i=m; i<ny; i+=NN ) {
      for ( ii=0; ii<NN; ii++ ) stmp[ii] = 0.0;
      for ( ii=i; ii<MIN(i+NN,ny); ii++ ) {
        k = ii - i;
        for ( j=0; j<nx; j+=NN ) {
          for ( jj=j; jj<MIN(j+NN,nx); jj++ ) {
            stmp[k] = stmp[k] + A[ii+jj*ny]*x[jj];
          }
        }
      }
      for ( ii=0; ii<NN; ii++ ) y[i+ii] = stmp[ii];
    }
  }

} // end of method 


//************************************************************************************
//************************************************************************************
// METHOD     : basic_qmxv
// DESCRIPTION: computes y = Ax 
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_qmxv(GQUAD y[], GINT ny, GQUAD A[], GQUAD x[], GINT nx)
{
  GINT     i, j;
  GQUAD    xii, yii, sum;

  for ( i=0; i<ny; i++ ) {
    sum   = 0.0;
    xii   = x  [0];
    yii   = y  [0];
    for ( j=0; j<nx; j++ ) {
       sum  = sum + A[i+j*ny]*x[j];
    }
    y[i]  = sum;
  }

} // end of method  basic_qmxv


//************************************************************************************
//************************************************************************************
// METHOD     : cf_qmxv
// DESCRIPTION: computes y = Ax 
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_qmxv(GQUAD y[], GINT ny, GQUAD A[], GQUAD x[], GINT nx)
{
  GINT     i, ii, j, jj, k, m, NN=(MIN(szCACHEMAX,szCache_)) ;

  // A->A(ny,nx)
  m = ny % NN;
  if ( m > 0 ) {
    for ( ii=0; ii<NN; ii++ ) stmp[ii] = 0.0;
    for ( ii=0; ii<m; ii++ ) {
      for ( j=0; j<nx; j+=NN ) {
        for ( jj=j; jj<MIN(j+NN,nx); jj++ ) {
          stmp[ii] = stmp[ii] + A[ii+jj*ny]*x[jj];
        }
      }
      y[ii] = stmp[ii];
    }
  }

  if ( ny >= NN ) {
    for ( i=m; i<ny; i+=NN ) {
      for ( ii=0; ii<NN; ii++ ) stmp[ii] = 0.0;
      for ( ii=i; ii<MIN(i+NN,ny); ii++ ) {
        k = ii - i;
        for ( j=0; j<nx; j+=NN ) {
          for ( jj=j; jj<MIN(j+NN,nx); jj++ ) {
            stmp[k] = stmp[k] + A[ii+jj*ny]*x[jj];
          }
        }
      }
      for ( ii=0; ii<NN; ii++ ) y[i+ii] = stmp[ii];
    }
  }
  
} // end of method  cf_qmxv


//************************************************************************************
//************************************************************************************
// METHOD     : basic_ddot
// DESCRIPTION: computes dot = x.y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_ddot(GDOUBLE *dot, GDOUBLE y[], GDOUBLE x[], GINT nxy)
{
  GINT     i ;
  GDOUBLE  ret, prod, sum, xii, yii;

  for ( i=0, sum=0.0; i<nxy; i++ ) {
    prod = x[i] * y[i];
    sum  = sum + prod;
  }
  *dot = sum;
 
} // end of method  basic_ddot

//************************************************************************************
//************************************************************************************
// METHOD     : cf_ddot
// DESCRIPTION: computes dot = x.y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_ddot(GDOUBLE *dot, GDOUBLE y[], GDOUBLE x[], GINT nxy)
{
  GINT    i, ii, isz, k;

  isz  = MIN(szCACHEMAX,szCache_);
  *dot = 0.0;

#if 0
  NN   = isz*isz;
  m    = nxy % NN;
  for ( ii=0; ii<NN; ii++ ) stmp[ii] = 0.0;
  for ( ii=0; ii<m; ii++ ) stmp[ii] = x[ii]*y[ii] + 0.0;

  for ( i=m; i<nxy; i+=NN ) {
    for ( ii=i; ii<MIN(i+NN,nxy); ii++ ) {
       k = ii-i;
       stmp[k]   = stmp[k] + x[ii]*y[ii];
    }
    for ( ii=0; ii<NN; ii++ ) *dot = *dot + stmp[ii];
  }
#endif
  for ( i=0; i<nxy; i+=isz ) {
    for ( ii=i; ii<MIN(i+isz,nxy); ii++ ) {
       k = ii-i;
       *dot  = *dot + x[ii]*y[ii];
    }
  }
 
} // end of method cf_ddot


//************************************************************************************
//************************************************************************************
// METHOD     : basic_qdot
// DESCRIPTION: computes dot = x.y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_qdot(GQUAD *dot, GQUAD y[], GQUAD x[], GINT nxy)
{
  GINT  i ;
  GQUAD ret, prod, sum, xii, yii;

  xii = x[0];
  yii = y[0];
  for ( i=0, sum=0.0; i<nxy; i++ ) {
    prod = x[i] * y[i];
    sum  = sum + prod;
  }
  *dot = sum;
} // end of method  basic_qdot

//************************************************************************************
//************************************************************************************
// METHOD     : cf_qdot
// DESCRIPTION: computes dot = x.y
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_qdot(GQUAD *dot, GQUAD y[], GQUAD x[], GINT nxy)
{
  GINT    i, ii, isz, k;

#if 0
  isz  = MIN(szCACHEMAX,szCache_);
  NN   = isz*isz;
  m    = nxy % NN;
  *dot = 0.0;
  for ( ii=0; ii<NN; ii++ ) stmp[ii] = 0.0;
  for ( ii=0; ii<m; ii++ ) stmp[ii] = x[ii]*y[ii] + 0.0;

  for ( i=m; i<nxy; i+=NN ) {
    for ( ii=i; ii<MIN(i+NN,nxy); ii++ ) {
       k = ii-i;
       stmp[k]   = stmp[k] + x[ii]*y[ii];
    }
    for ( ii=0; ii<NN; ii++ ) *dot = *dot + stmp[ii];
  }
#endif
  for ( i=0; i<nxy; i+=isz ) {
    for ( ii=i; ii<MIN(i+isz,nxy); ii++ ) {
       k = ii-i;
       *dot  = *dot + x[ii]*y[ii];
    }
  }
 
} // end of method  cf_qdot


//************************************************************************************
//************************************************************************************
// METHOD     : basic_dcopy
// DESCRIPTION: computes copies x into y: y = x
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_dcopy(GDOUBLE y[], GDOUBLE x[], GINT nxy)
{
  GINT      i;
  GDOUBLE   xii, yii;

  xii = x[0]; yii = y[0];
  for ( i=0; i<nxy; i++ ) {
     y[i] = x[i];
  }
 
} // end of method  basic_dcopy


//************************************************************************************
//************************************************************************************
// METHOD     : cf_dcopy
// DESCRIPTION: computes copies x into y: y = x
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_dcopy(GDOUBLE y[], GDOUBLE x[], GINT nxy)
{
  GINT      i, ii, m, isz, NN;

  isz = MIN(szCACHEMAX,szCache_);
  NN  = isz*isz;
  m = nxy % NN;
  for ( i=0; i<m; i++ )
    y[i] = x[i];

  for ( i=m; i<nxy; i+=NN ) {
    for ( ii=i; ii<MIN(i+NN,nxy); ii++ ) {
       y  [ii] = x  [ii];
    }
  }

 
} // end of method  cf_dcopy


//************************************************************************************
//************************************************************************************
// METHOD     : basic_qcopy
// DESCRIPTION: computes copies x into y: y = x
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_qcopy(GQUAD y[], GQUAD x[], GINT nxy)
{
  GINT   i;   
  GQUAD  xii, yii;

  xii = x[0]; yii = y[0]; 
  for ( i=0; i<nxy; i++ ) { 
     y[i] = x[i]; 
  }
  
} // end of method  basic_qcopy


//************************************************************************************
//************************************************************************************
// METHOD     : cf_qcopy
// DESCRIPTION: computes copies x into y: y = x
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_qcopy(GQUAD y[], GQUAD x[], GINT nxy)
{
 GINT      i, ii, m, isz, NN;

  isz = MIN(szCACHEMAX,szCache_);
  NN  = isz*isz;
  m   = nxy % NN;
  for ( i=0; i<m; i++ )
    y[i] = x[i];

  for ( i=m; i<nxy; i+=NN ) {
    for ( ii=i; ii<MIN(i+NN,nxy); ii++ ) {
       y  [ii] = x  [ii];
    }
  }

} // end of method  cf_qcopy


//************************************************************************************
//************************************************************************************
// METHOD     : basic_dzvxv_point
// DESCRIPTION: computes z_j = x_j * y_j  for all j
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_dzvxv_point(GDOUBLE z[], GDOUBLE x[], GDOUBLE y[], GINT nxy)
{
  GINT     i;
  GDOUBLE  xii, yii;

  xii = x[0];
  yii = y[0];
  for ( i=0; i<nxy; i++ ) {
    z[i] = y[i] * x[i] ;
  }
} // end of method  basic_dzvxv_point


//************************************************************************************
//************************************************************************************
// METHOD     : cf_dzvxv_point
// DESCRIPTION: computes z_j = x_j * y_j  for all j
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_dzvxv_point(GDOUBLE z[], GDOUBLE x[], GDOUBLE y[], GINT nxy)
{
  GINT      i, ii, m, isz, NN;

  isz = MIN(szCACHEMAX,szCache_);
  NN  = isz*isz;
  m   = nxy % NN;
  for ( i=0; i<m; i++ ) {
    z[i] = x[i] * y[i] + 0.0;
  }

  for ( i=m; i<nxy; i+=NN ) {
    for ( ii=i; ii<MIN(i+NN,nxy); ii++ ) {
      z[ii] = x[ii] * y[ii] + 0.0;
    }
  }
  
} // end of method  cf_dzvxv_point


//************************************************************************************
//************************************************************************************
// METHOD     : basic_dvvxv_point
// DESCRIPTION: computes x_j = x_j * y_j  for all j
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::basic_dvvxv_point(GDOUBLE x[], GDOUBLE y[], GINT nxy)
{
  GINT     i; 
  GDOUBLE  xii, yii;
  
  xii = x[0];
  yii = y[0];
  for ( i=0; i<nxy; i++ ) {
    x[i] = y[i] * x[i] ;
  }
  
} // end of method  basic_dvvxv_point


//************************************************************************************
//************************************************************************************
// METHOD     : cf_dvvxv_point
// DESCRIPTION: computes x_j = x_j * y_j  for all j
// ARGUMENTS  :
// RETURNS    :
//************************************************************************************
void MTK::cf_dvvxv_point(GDOUBLE x[], GDOUBLE y[], GINT nxy)
{
  GINT      i, ii, m, isz, NN;

  isz = MIN(szCACHEMAX,szCache_);
  NN  = isz*isz;
  m   = nxy % NN;
  for ( i=0; i<m; i++ ) {
    x[i] = x[i] * y[i] + 0.0;
  }

  for ( i=m; i<nxy; i+=NN ) {
    for ( ii=i; ii<MIN(i+NN,nxy); ii++ ) {
      x[ii] = x[ii] * y[ii] + 0.0;
    }
  }

} // end of method  cf_dvvxv_point



//************************************************************************************
//************************************************************************************
// METHOD     : basic_dmxm
// DESCRIPTION: computes C = A B. Dimensions of C assumed to be:
//              nai x nbj; checking is not performed here. naj must equal nbi.
// ARGUMENTS  : C   : returned, solution, of implied dimension nai x nbj
//              A   : of implied dimension nai x naj
//              B   : of implied dimension nbi x nbj
// RETURNS    :
//************************************************************************************
void MTK::basic_dmxm(GDOUBLE C[], GDOUBLE A[], GINT nai, GINT naj, GDOUBLE B[], GINT nbi, GINT nbj)
{
  GINT     i, j, k; 
  GDOUBLE  sum; 

  if ( naj != nbi ) {
    cout << "MTK::basic_dmxm: incompatible matrix" << endl;
    exit(1);
  }

  for ( j=0; j<nbj; j++ ) {
    for ( i=0; i<nai; i++ ) {
      for ( k=0, sum=0.0; k<naj; k++ ) {
        sum += (A[i+k*nai] * B[k+j*nbi]);  
      }
      C[i+j*nai] = sum;
    }
  }

} // end of method basic_dmxm


//************************************************************************************
//************************************************************************************
// METHOD     : cf_dmxm
// DESCRIPTION: computes C = A B, where A & B are matrices. Dimensions of C assumed to be:
//              nai x nbj; checking is not performed here. naj must equal nbi.
// ARGUMENTS  : C   : returned, solution, of implied dimension nai x nnj
//              A   : of implied dimension nai x naj
//              B   : of implied dimension nbi x nbj
// RETURNS    :
//************************************************************************************
void MTK::cf_dmxm(GDOUBLE C[], GDOUBLE A[], GINT nai, GINT naj, GDOUBLE B[], GINT nbi, GINT nbj)
{
  GINT     i, ii, isz, j, jj, js, k, kk, IJ;   
  GDOUBLE  a, b, c;

  isz = MIN(szCACHEMAX,szCache_);
#if 0
  if ( naj != nbi ) {
    cout << "MTK::cf_dmxm: incompatible matrix" << endl;
    exit(1);
  }
#endif

  memset(C,'\0',nai*nbj*sizeof(GDOUBLE));
  for ( ii=0; ii<nai; ii+=isz ) {
    for ( jj=0; jj<nbj; jj+=isz ) {
      for ( kk=0; kk<nbi; kk+=isz ) {

//      c = C[ii+jj*nai];
        for ( i=ii; i<MIN(nai,ii+isz); i++ ) {
          for ( j=jj; j<MIN(nbj,jj+isz); j++ ) {
            js = j*nbi;
//          b = B[kk+js]; a = A[i+kk*nai];
            for ( k=kk; k<MIN(nbi,kk+isz); k++ ) {
              IJ = i+j*nai;
              C[IJ] += A[i+k*nai] * B[k+js];
//            C[i+j*nai] = C[i+j*nai] + A[i+k*nai] * B[k+js];
            }
          }
        }

      }
    }
  }

} // end of method cf_dmxm


//************************************************************************************
//************************************************************************************
// METHOD     : basic_qmxm
// DESCRIPTION: computes C = A B. Dimensions of y assumed to be:
//              nai x nbj; checking is not performed here. naj must equal nbi.
// ARGUMENTS  : C   : returned, solution, of implied dimension nai x nnj
//              A   : of implied dimension nai x naj
//              B   : of implied dimension nbi x nbj
// RETURNS    :
//************************************************************************************
void MTK::basic_qmxm(GQUAD C[], GQUAD A[], GINT nai, GINT naj, GQUAD B[], GINT nbi, GINT nbj)
{
  if ( naj != nbi ) {
    cout << "MTK::basic_qmxm: incompatible matrix" << endl;
    exit(1);
  }

  GINT     i, j, k; 
  GQUAD    sum;

  for ( j=0; j<nbj; j++ ) {
    for ( i=0; i<nai; i++ ) {
      for ( k=0, sum=0.0; k<naj; k++ ) {
//      sum += A[i*naj+k] * B[k*nbj+j];  
        sum += A[i+k*nai] * B[k+j*nbi];  
      }
      C[i+j*nai] = sum;
    }
  }

  
} // end of method basic_qmxm


//************************************************************************************
//************************************************************************************
// METHOD     : cf_qmxm
// DESCRIPTION: computes C = A B. Dimensions of y assumed to be:
//              nai x nbj; checking is not performed here. naj must equal nbi.
// ARGUMENTS  : C   : returned, solution, of implied dimension nai x nbj
//              A   : of implied dimension nai x naj
//              B   : of implied dimension nbi x nbj
// RETURNS    :
//************************************************************************************
void MTK::cf_qmxm(GQUAD C[], GQUAD A[], GINT nai, GINT naj, GQUAD B[], GINT nbi, GINT nbj)
{
  GINT     i, ii, is, isz, j, jj, js, k, IJ, NN;

  isz = MIN(szCACHEMAX,szCache_);
  NN  = isz*isz;

  if ( naj != nbi ) {
    cout << "MTK::cf_qmxm: incompatible matrix" << endl;
    exit(1);
  }

  for ( ii=0; ii<nai; ii+=isz ) {
    for ( jj=0; jj<nbj; jj+=isz ) {
      for ( i=0; i<NN; i++ ) stmp[i] = 0.0;
      for ( i=ii; i<MIN(nai,ii+isz); i++ ) {
        for ( j=jj,is=i-ii; j<MIN(nbj,jj+isz); j++ ) {
          js = j*nbi; IJ = is+(j-jj)*isz;
          for ( k=jj; k<MIN(nbj,jj+isz); k++ ) {
//          stmp[IJ] = stmp[IJ] + A[k+i*naj] * B[k+js];
            stmp[IJ] = stmp[IJ] + A[i+k*nai] * B[k+js];
          }
          C[i+j*nai] = stmp[IJ];
        }
      }
    }
  }

} // end of method basic_qmxm


//************************************************************************************
//************************************************************************************
// METHOD     : basic_dmxDm
// DESCRIPTION: computes C = A * Diag(B). Dimensions of C assumed to be those of A.
//              Compute: 
//                       C_ij = A_ij * B_jj (no summing in j)
// ARGUMENTS  : C   : returned, solution, of implied dimension nai x nnj
//              A   : of dimension nai x naj
//              nai : 1-dimension of A
//              naj : 2-dimension of A
//              b   : Diag(B), of dimension nb
//              nb  : dimension of b
// RETURNS    :
//************************************************************************************
void MTK::basic_dmxDm(GDOUBLE C[], GDOUBLE A[], GINT nai, GINT naj, GDOUBLE b[], GINT nb)
{
  if ( naj != nb ) {
    cout << "MTK::basic_qmxDm: incompatible matrix" << endl;
    exit(1);
  }

  GINT     i, j, k; 
  GDOUBLE  bjj;

  for ( j=0; j<naj; j++ ) {
    k   = j*naj;
    bjj = b[j];
    for ( i=0; i<nai; i++ ) {
      C[i+k] = A[i+k] * bjj ;
    }
  }
  
} // end of method basic_dmxDm


//************************************************************************************
//************************************************************************************
// METHOD     : cf_dmxDm
// DESCRIPTION: computes C = A * Diag(B). Dimensions of C assumed to be those of A.
//              Compute: 
//                       C_ij = A_ij * B_jj (no summing in j)
// ARGUMENTS  : C   : returned, solution, of implied dimension nai x nnj
//              A   : of dimension nai x naj
//              nai : 1-dimension of A
//              naj : 2-dimension of A
//              b   : Diag(B), of implied dimension nb
//              nb  : dimension of b
// RETURNS    :
//************************************************************************************
void MTK::cf_dmxDm(GDOUBLE C[], GDOUBLE A[], GINT nai, GINT naj, GDOUBLE b[], GINT nb)
{
  if ( naj != nb ) {
    cout << "MTK::cf_qmxDm: incompatible matrix" << endl;
    exit(1);
  }

  GINT     i, j, k; 
  GDOUBLE  bjj;

  for ( j=0; j<naj; j++ ) {
    k   = j*naj;
    bjj = b[j];
    for ( i=0; i<nai; i++ ) {
      C[i+k] = A[i+k] * bjj ;
    }
  }

} // end of method cf_dmxDm


//************************************************************************************
//************************************************************************************
// METHOD     : basic_dDmxm
// DESCRIPTION: computes C = Diag(B) * A  Dimensions of C assumed to be those of A.
//              Compute: 
//                       C_ij = B_ii * A_ij  (no summing in j)
// ARGUMENTS  : C   : returned, solution, of implied dimension nai x nnj
//              b   : Diag(B), of dimension nb
//              nb  : dimension of b
//              A   : of dimension nai x naj
//              nai : 1-dimension of A
//              naj : 2-dimension of A
// RETURNS    :
//************************************************************************************
void MTK::basic_dDmxm(GDOUBLE C[], GDOUBLE b[], GINT nb, GDOUBLE A[], GINT nai, GINT naj)
{
  if ( naj != nb ) {
    cout << "MTK::basic_dDmxm: incompatible matrix" << endl;
    exit(1);
  }

  GINT     i, j, k, m; 

  for ( j=0; j<naj; j++ ) {
    k = j*nai;
    for ( i=0; i<nai; i++ ) {
      C[i+k] = A[i+k] * b[i];
    }
  }
} // end of method basic_dDmxm


//************************************************************************************
//************************************************************************************
// METHOD     : cf_dDmxm
// DESCRIPTION: computes C = Diag(B) * A  Dimensions of C assumed to be those of A.
//              Compute: 
//                       C_ij = B_ii * A_ij  (no summing in j)
// ARGUMENTS  : C   : returned, solution, of implied dimension nai x nnj
//              A   : of dimension nai x naj
//              nai : 1-dimension of A
//              naj : 2-dimension of A
//              b   : Diag(B), of implied dimension nb
//              nb  : dimension of b
// RETURNS    :
//************************************************************************************
void MTK::cf_dDmxm(GDOUBLE C[], GDOUBLE b[], GINT nb, GDOUBLE A[], GINT nai, GINT naj)
{
  if ( naj != nb ) {
    cout << "MTK::cf_dDmxm: incompatible matrix" << endl;
    exit(1);
  }

  GINT     i, j, k, m; 

  for ( j=0; j<naj; j++ ) {
    k = j*nai;
    for ( i=0; i<nai; i++ ) {
      C[i+k] = A[i+k] * b[i];
    }
  }
  
} // end of method cf_dDmxm

