# fields, Tools for spatial data
# Copyright 2004-2007, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html

"gcv.sreg" <-
function (out, lambda.grid = NA, cost = 1, nstep.cv = 80, rmse = NA, 
    offset = 0, trmin = NA, trmax = NA, verbose = FALSE, tol = 1e-05) 
{
    shat.pure.error <- out$shat.pure.error
    pure.ss <- out$pure.ss
    nt <- 2
    np <- out$np
    N <- out$N
    out$cost <- cost
    out$offset <- offset

# find good end points for lambda coarse grid. 

    if (is.na(trmin)) 
        trmin <- 2.05
    if (is.na(trmax)) 
        trmax <- out$np * 0.95

    if (verbose) {
        cat( "trmin and trmax", fill=TRUE)
        cat(trmin, trmax, fill = TRUE)
    }

    if (is.na(lambda.grid[1])) {
        l2 <- sreg.df.to.lambda(trmax, out$xM, out$weightsM)
        l1 <- sreg.df.to.lambda(trmin, out$xM, out$weightsM)
        lambda.grid <- exp(seq(log(l2), log(l1), , nstep.cv))
    }
    if (verbose) {
        cat( "endpoints of coarse lamdba grid", fill=TRUE)
        cat(l1, l2, fill = TRUE)
    }

# build up table of coarse grid serach results for lambda
# in the matrix gcv.grid

    nl <- length(lambda.grid)
    V <- V.model <- V.one <- trA <- MSE <- RSS.model <- rep(NA, 
        nl)

#   loop through lambda's and compute various quantities related to 
#   lambda and the fitted spline. 
    for (k in 1:nl) {
        temp <- sreg.fit(lambda.grid[k], out, verbose=verbose)
        RSS.model[k] <- temp$rss
        trA[k] <- temp$trace
        V[k] <- temp$gcv
        V.one[k] <- temp$gcv.one
        V.model[k] <- temp$gcv.model
    }

# adjustments to columns of gcv.grid
    RSS <- RSS.model + pure.ss
    shat <- sqrt(RSS/(N - trA))
    gcv.grid <- cbind(lambda.grid, trA, V, V.one, V.model, shat)
    dimnames(gcv.grid) <- list(NULL, c("lambda", "trA", "GCV", 
        "GCV.one", "GCV.model", "shat"))

    if (verbose) {
        cat( "Results of coarse grid search", fill=TRUE)
        print(gcv.grid)
    }

    lambda.est <- matrix(ncol = 4, nrow = 5, dimnames = list(c("GCV", 
        "GCV.model", "GCV.one", "RMSE", "pure error"), c("lambda", 
        "trA", "GCV", "shat")))

# now do various refinements for different flavors of finding 
# a good value for lambda the smoothing parameter

##### traditional leave-one-out
    lambda.est[1, 1] <- Krig.find.gcvmin(out, lambda.grid, gcv.grid[, 
        "GCV"], sreg.fgcv, tol = tol, verbose = verbose)
    if (verbose) {
        cat("leave one out GCV lambda", fill=TRUE)
        cat(lambda.est[1, 1], fill = TRUE)
    }

##### using GCV criterion adjusting for replicates
    if (!is.na(shat.pure.error)) {
        lambda.est[2, 1] <- Krig.find.gcvmin(out, lambda.grid, 
            gcv.grid[, "GCV.model"], sreg.fgcv.model, tol = tol, 
            verbose = verbose)
        if (verbose) {
            cat("results of GCV.model search", fill=TRUE) 
            cat(lambda.est[2, 1], fill = TRUE)
        }
    }
    lambda.est[3, 1] <- Krig.find.gcvmin(out, lambda.grid, gcv.grid[, 
        "GCV.one"], sreg.fgcv.one, tol = tol, verbose = verbose)

##### matching an external value of RMSE
    lambda.rmse <- NA
    lambda.pure.error <- NA
    if (!is.na(rmse)) {
        guess <- max(gcv.grid[gcv.grid[, "shat"] < rmse, "lambda"])

        if (verbose) {
            cat("trying to matching with RMSE", fill=TRUE) 
            cat("rmse", rmse, "guess", guess, fill=TRUE)
        }
   
     if (!is.na(guess)) {
            lambda.rmse <- find.upcross(sreg.fs2hat, out, upcross.level = rmse^2, 
                guess = guess, tol = tol * rmse^2)
            lambda.est[4, 1] <- lambda.rmse
   
        }
        else {
            warning("Value of rmse is outside possible range")
        }
    }

##### matching  sigma estimated from the replicates. 
    if (!is.na(shat.pure.error)) {
#       all shats smaller than pure error estimate
        guess <- gcv.grid[, "shat"] < shat.pure.error 

#       set to NA a bad guess!
        if( any( guess) ) { 
             guess<- max(  gcv.grid[guess, "lambda"]) }
        else{
           guess<- NA}

        if (verbose) {
            cat("#### trying to matching with sigma from pure error", 
fill=TRUE)
            cat("shat.pure", shat.pure.error, "guess", guess, fill=TRUE)
        }

        if (!is.na(guess)) {
            lambda.pure.error <- find.upcross(sreg.fs2hat, out, 
                upcross.level = shat.pure.error^2, guess = guess, 
                tol = tol * shat.pure.error^2)
            lambda.est[5, 1] <- lambda.pure.error
            if(verbose){
            cat("results of matching with pure error sigma", fill=TRUE) }
        }
        else {
            warning("Value of pure error estimate   
                             is outside possible range")
        }
    }
    if (verbose) {
        cat("All forms of estimated lambdas so far", fill=TRUE)
        print(lambda.est)
    }
    for (k in 1:5) {
        lam <- lambda.est[k, 1]
        if (!is.na(lam)) {
            temp <- sreg.fit(lam, out)
            lambda.est[k, 2] <- temp$trace
            if ((k == 1) | (k > 3)) {
                lambda.est[k, 3] <- temp$gcv
            }
            if (k == 2) {
                lambda.est[k, 3] <- temp$gcv.model
            }
            if (k == 3) {
                lambda.est[k, 3] <- temp$gcv.one
            }
            lambda.est[k, 4] <- temp$shat
        }
    }
    list(gcv.grid = gcv.grid, lambda.est = lambda.est)
}

# fields, Tools for spatial data
# Copyright 2004-2007, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html

"sreg" <-
function(x, y, lambda = NA, df = NA, offset = 0, 
        weights = rep(1, length(x)), cost = 1, 
	nstep.cv = 80, tol=1e-5,find.diagA = TRUE, 
        trmin = 2.01, trmax =NA, 
        lammin = NA, lammax = NA,
	verbose = FALSE, do.cv = TRUE, method = "GCV", rmse = NA, 
        na.rm=TRUE)
{
	call <- match.call()
	out <- list()
	out$call <- match.call()
	class(out) <- c("sreg")

	out$cost <- cost
	out$offset <- offset
	out$method <- method
#        
# some obscure components so that some of the Krig functions
# work without size of "null space"        
	out$nt <- 2
        out$knots<-NULL
#
# various checks on x and y including looking for NAs.
        out2<- Krig.check.xY( x,y,NULL, weights, na.rm, verbose=verbose)
        out<- c( out,out2)
        
# find duplicate rows of the x vector
# unique x values are now in out$xM and the means of 
# y are in out$yM.
        out<- Krig.replicates( out, verbose=verbose)
        out<- c( out,out2)

# number of unique locations
        out$np<- length( out$yM)
# now set maximum of trace for upper bound of GCV grid search
        if(is.na(trmax)){
          trmax<-out$np*.99}
        
        if( verbose) { print(out)}
#
# sorted unique values for prediction to make line plotting quick
        xgrid <- sort(out$xM)
	out$trace <- NA
        
#
# figure out if the GCV function should be minimized
# and which value of lambda should be used for the estimate
# old code used lam as argument, copy it over from lambda
		lam <- lambda

	if(is.na(lam[1]) & is.na(df[1])) {
		do.cv <- TRUE
	}
	else {
		do.cv <- FALSE
	}
#
# find lambda's if df's are given
	if(!is.na(df[1])) {
		lam <- rep(0, length(df))
		for(k in 1:length(df)) {
			lam[k] <- sreg.df.to.lambda(
                                df[k], out$xM, out$weightsM)
		}
	}

#
        if( verbose) { 
           cat("lambda grid",fill=TRUE)
           print(lam)}

	if(do.cv) {
		a <- gcv.sreg(out, lambda = lam, cost = cost, offset = offset,
			nstep.cv = nstep.cv, verbose = verbose, trmin = trmin,
			trmax = trmax,rmse = rmse)

		# if the spline is evaluated at the GCV solution 
                # wipe out lam grid
		# and just use GCV lambda.
		out$gcv.grid <- a$gcv.grid
		out$lambda.est <- a$lambda.est
		# 
		# save  GCV estimate if that is what is needed
		lam <- a$lambda.est[method, "lambda"]
		out$shat.GCV <- a$lambda.est[method, "shat"]
	}

#
# now evaluate spline at lambda either from specified grid or GCV value.
	b <- list()
	# lam can either be  a grid or just the GCV value 
	NL <- length(lam)
	NG <- length(xgrid)
	h <- log(lam)
	residuals <- matrix(NA, ncol = NL, nrow = length(out$y))
	predicted <- matrix(NA, ncol = NL, nrow = NG)
        trace<- rep( NA, NL)
	job <- as.integer(c(0, 3, 0))

	if(find.diagA) {
		diagA <- matrix(NA, ncol = NL, nrow = out$np)
		# add switch to find diag of A. 
		job <- as.integer(c(3, 3, 0))
	}

	for(k in 1:NL) {
		#
		# call cubic spline FORTRAN, this is nasty looking but fast.
                # note lambda is passed in log scale.
                # what the routine does is controlled by array job 
		# spline solution evaluated at xgrid
		# 
		b <- .Fortran("css",
			h = as.double(h[k]),
			npoint = as.integer(out$np),
			x = as.double(out$xM),
			y = as.double(out$yM),
			wt = as.double(1/sqrt(out$weightsM)),
			sy = as.double(rep(0, out$np)),
			trace = as.double(0),
			diag = as.double(c(cost, offset, rep(0, (out$np - 2)))),
       			cv = as.double(0),
			ngrid = as.integer(NG),
			xg = as.double(xgrid),
			yg = as.double(rep(0, NG)),
			job = as.integer(job),
			ideriv = as.integer(0),
			ierr = as.integer(0), PACKAGE="fields")
		if(find.diagA) {
		diagA[, k] <- b$diag}

# note distinction between yM and y, xM and x
# these are residuals at all data point locations not just the
# unique set. 
	        trace[k]<- b$trace
        	residuals[, k] <- out$y - splint(out$xM, b$sy, out$x)
		predicted[, k] <- b$yg }
	
        out$call <- call
	out$lambda <- lam
	out$do.cv <- do.cv

	out$residuals <- residuals
        out$trace<- trace
	out$fitted.values <- out$y - residuals
	out$predicted <- list(x = xgrid, y = predicted)

	if(length(lambda[1]) == 1) {
		out$eff.df <- out$trace[1]
	}

	if(find.diagA) {
		out$diagA <- diagA
	}

        class(out)<- "sreg"

	return(out)
}

# fields, Tools for spatial data
# Copyright 2004-2007, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html

"sreg.df.to.lambda" <-
function(df, x, wt, guess = 1, tol = 1.0e-05)
{

	if(is.na(df))
		return(NA)
	n <- length(unique(x))
	info <- list(x = x, wt = wt, df = df)
	if(df > n) {
		warning(" df too large to match a lambda value")
		return(NA)
	}
	h1 <- log(guess)

	########## find upper lambda
	for(k in 1:25) {
		tr <- sreg.trace(h1, info)
		if(tr <= df)
			break
		h1 <- h1 + 1.5
	}

	########## find lower lambda
	h2 <- log(guess)
	for(k in 1:25) {
		tr <- sreg.trace(h2, info)
		if(tr >= df)
			break
		h2 <- h2 - 1.5
	}
	out <- bisection.search(h1, h2, sreg.fdf, tol = tol, f.extra = info)$
		x
	exp(out)
}
# fields, Tools for spatial data
# Copyright 2004-2007, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html

"sreg.fdf" <-
function(h, info)
{
	sreg.trace(h, info) - info$df
}
# fields, Tools for spatial data
# Copyright 2004-2007, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html

"sreg.fgcv" <-
function(lam, obj)
{
	sreg.fit(lam, obj)$gcv
}
# fields, Tools for spatial data
# Copyright 2004-2007, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html

"sreg.fgcv.model" <-
function(lam, obj)
{
	sreg.fit(lam, obj)$gcv.model
}
# fields, Tools for spatial data
# Copyright 2004-2007, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html

"sreg.fgcv.one" <-
function(lam, obj)
{
	sreg.fit(lam, obj)$gcv.one
}
# fields, Tools for spatial data
# Copyright 2004-2007, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html

"sreg.fit" <-
function(lam, obj, verbose=FALSE)
{
	np <- obj$np
	N <- obj$N
	nt <- 2
	if(is.null(obj$cost)) {
		cost <- 1
	}
	else {
		cost <- obj$cost
	}
	if(is.null(obj$offset)) {
		offset <- 0
	}
	else {
		offset <- obj$offset
	}
	if(is.null(obj$shat.pure.error)) {
		shat.pure.error <- 0
	}
	else {
		shat.pure.error <- obj$shat.pure.error
	}
	if(is.null(obj$pure.ss)) {
		pure.ss <- 0
	}
	else {
		pure.ss <- obj$pure.ss
	}
	#print(np)
	#	NOTE h <- log(lam)
	temp <- .Fortran("css",
		h = as.double(log(lam)),
		npoint = as.integer(np),
		x = as.double(obj$xM),
		y = as.double(obj$yM),
		wt = as.double(sqrt(1/obj$weightsM)),
		sy = as.double(rep(0, np)),
		trace = as.double(0),
		diag = as.double(rep(0, np)),
		cv = as.double(0),
		ngrid = as.integer(0),
		xg = as.double(0),
		yg = as.double(0),
		job = as.integer(c(3, 0, 0)),
		ideriv = as.integer(0),
		ierr = as.integer(0),
                PACKAGE="fields")
	rss <- sum((temp$sy - obj$yM)^2 * obj$weightsM)
	MSE <- rss/np
	if((N - np) > 0) {
		MSE <- MSE + pure.ss/(N - np)
	}
	trA <- temp$trace
	den <- (1 - (cost * (trA - nt - offset) + nt)/np)
	den1 <- (1 - (cost * (trA - nt - offset) + nt)/N)

	# If the denominator is negative then flag this as a bogus case
	# by making the GCV function "infinity"
	#
	shat <- sqrt((rss + pure.ss)/(N - trA))
	GCV <- ifelse(den > 0, MSE/den^2, NA)
	gcv.model <- ifelse(
              (den > 0)&( (N-np)>0), 
              pure.ss/(N - np) + (rss/np)/(den^2), NA)
	gcv.one <- ifelse(den > 0, ((pure.ss + rss)/N)/(den1^2), NA)
	list(trace = trA, gcv = GCV, rss = rss, shat = shat, gcv.model = 
		gcv.model, gcv.one = gcv.one)
}

# fields, Tools for spatial data
# Copyright 2004-2007, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html

"sreg.fs2hat" <-
function(lam, obj)
{
	sreg.fit(lam, obj)$shat^2
}
# fields, Tools for spatial data
# Copyright 2004-2007, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html

"sreg.trace" <-
function(h, info)
{

	N <- length(info$x)
	#	h <- log(lam)
	temp <- (.Fortran("css",
		h = as.double(h),
		npoint = as.integer(N),
		x = as.double(info$x),
		y = as.double(rep(0, N)),
		wt = as.double(1/sqrt(info$wt)),
		sy = as.double(rep(0, N)),
		trace = as.double(0),
		diag = as.double(rep(0, N)),
		cv = as.double(0),
		ngrid = as.integer(0),
		xg = as.double(0),
		yg = as.double(0),
		job = as.integer(c(3, 0, 0)),
		ideriv = as.integer(0),
		ierr = as.integer(0),PACKAGE="fields")$trace)
	return(temp)
}