library(splines)
library(qmap)
library(fitdistrplus)

################
### Bias correction functions
### All functions take '...' so we can call them uniformly inside a loop.


### Oracle: perfect correction based on a priori knowledge of
### source distribution parameters.

oracle <- function(fut, cur.par, obs.par, distro, ...){
    rfun <- paste0("r", distro)
    dfun <- paste0("d", distro)
    pfun <- paste0("p", distro)
    qfun <- paste0("q", distro)
    
    if(exists(pfun) && exists(qfun)){
        return(.pmap(fut, cur.par, obs.par, distro))
    } else {

        x0 <- density(fut)$x
        
        dens.cur <- do.call(dfun, c(list(x=x0), cur.par))
        dens.obs <- do.call(dfun, c(list(x=x0), obs.par))
        
        cdf.cur <- trapezoid(dens.cur, diff(x0)[1])
        cdf.obs <- trapezoid(dens.obs, diff(x0)[1])
        
        p <- approx(x0, cdf.cur, c(fut))
        q <- approx(cdf.obs, x0, p$y)

        return(q$y)
    }
}



### Probability mapping: map from value to probability using
### distribution fitted to model data, then from probability back to
### value using distribution fitted to obs data.  Distribution must be
### specified a priori.

pmap <- function(cur, fut, obs, distro, estimator, ...){
    cur.par <- do.call(estimator, list(x=cur, distro=distro))
    obs.par <- do.call(estimator, list(x=obs, distro=distro))
    return(.pmap(fut, cur.par, obs.par, distro))
}


### Private function that just does the mapping bit
### Separated out because it's also used by oracle()

.pmap <- function(x, x.par, y.par, distro){
    p <- do.call(paste0("p", distro), c(list(q=x), x.par))
    q <- do.call(paste0("q", distro), c(list(p=p), y.par))    
    return(q)  
}


### Calculate distribution parameters using moment estimators (sample
### mean and sd) plus some algebra

smp <- function(x, distro, ...){
    mu    <- mean(x)
    sigma <- sd(x)
    if(distro=="norm"){
        return(list(mean=mu, sd=sigma))
    }
    if(distro=="gamma"){
        return(list(shape=mu^2/sigma^2, rate=mu/sigma^2))
    }
    stop("smp: don't know how to estimate parameters for distribution: ", distro)
}


### Maximum-Likelihood Estimate of distribution parameters

mle <- function(x, distro, ...){
    f <- fitdist(x, distro, "mle")$estimate  
    return(as.list(f))
}



### Quantile mapping: calculate quantiles of obs and mod, then
### apply tricubic transform to data in each quantile of mod so
### that the quantiles match obs.  Main parameter is number of
### quantiles to use.  Previous analysis suggests that more is
### better; here we use "few" (5 or 6 quantiles), "many" (5 or 6
### points per quantile), and "some" (roughly equal numbers of
### quantiles and points per quantile).  If number of quantiles
### equals number of points, this is called ECDF mapping.  fitQmap
### and doQmap functions come from the qmap library.

qmap <- function(cur, fut, obs, nbins, ...){
    q <- fitQmap(obs, cur, "QUANT", wet.day=FALSE, qstep=1/nbins)
    v <- doQmap(fut, q, method="tricub")
    return(v)
}



### ECDF (Empirical CDF matching): make the empirical CDFs match, 
### point-by-point.  Similar to qmap with Nbins = Ndata.

ecdf <- function(cur, fut, obs, mult=FALSE, ...){
    sort.order <- order(fut)
    if(mult){
        bias   <- sort(cur) / sort(obs)
        result <- sort(fut) / bias
    } else {
        bias   <- sort(cur) - sort(obs)
        result <- sort(fut) - sort(bias)
    }

    result[sort.order] <- result
    return(result)
}



### ARRM (Asynchronous Regional Regression Model): uses a
### piecewise regression function on the Q-Q plot.  It uses linear
### regression with QR decomposition (i.e., the R function 'lm')
### over a moving window of fixed width to find the points where
### the slope of the transfer function changes abruptly.  These
### breakpoints are used as the knots for a spline-based
### statistical model (i.e., a call to 'lm(y ~ ns(x))').  The
### width of the fixed window as a percentage of total number of
### points is set by the parameter 'fww'; the number of
### breakpoints is set by the parameter 'maxbreaks'.

arrm <- function(cur, fut, obs, bypass=FALSE, ...){

    # To speed things up during code dev and debug of the rest of the code
    if(bypass){
        warning("Bypassing ARRM! Dummy results from KDDM provided instead.")
        return(kddm(cur, fut, obs, ...))
    }

    x <- sort(cur)
    N <- length(x)
    y <- sort(obs[1:N])
    maxbreaks <- 6

    ## moving window widths

    n10 <- N * 10 / 100
    n90 <- N * 90 / 100

    fww <- (x[n90] - x[n10]) * 0.05
    tailw <- max(x[10] - x[1], x[N] - x[N-9])

    ## do linear regressions on moving window
    
    r2 <- array()
    windex <- list()
   
    for(w in n10:n90){
        upper <- x[w] + fww/2
        lower <- x[w] - fww/2
        window <- which(lower <= x & x <= upper)
        r2[w] <- summary(lm(y[window] ~ x[window]))$r.squared
        windex[[w]] <- window
    }

    ## for tails, linearly taper window size from fww to tailw
    ## lower tail
    for(w in 5:(n10-1)){
        ww <- fww + (tailw-fww) * (1 - (w-5)/(n10-6))
        upper <- x[w] + ww/2
        lower <- x[w] - ww/2
        window <- which(lower <= x & x <= upper)
        r2[w] <- summary(lm(y[window] ~ x[window]))$r.squared
        windex[[w]] <- window
    }

    ## upper tail
    for(w in (n90+1):(N-5)){
        upper <- x[w] + ww/2
        lower <- x[w] - ww/2
        window <- which(lower <= x & x <= upper)
        r2[w] <- summary(lm(y[window] ~ x[window]))$r.squared
        windex[[w]] <- window    
    }
 
    ## Now find breakpoints
    breaks <- array()

    for(b in 1:maxbreaks){
        w <- which.min(r2)    
        breaks[b] <- w
        window <- windex[[w]]
        r2[window] <- NA
    }

    ## check slopes for bad segments    
    breaks <- sort(breaks)
    flag <- TRUE
    removed <- 0
    while(flag){
        flag <- FALSE
        slopes <- c()
        bb <- c(1, breaks, N)
        for (s in 1:(length(breaks)+1)){
            xb <- x[bb[s]:bb[s+1]]
            yb <- y[bb[s]:bb[s+1]]
            slopes[s] <- coef(lm(yb ~ xb))[2]
        }

        if(any(slopes < 0.1)){
            flag <- TRUE
            removed <- removed + 1
            bad <- which(slopes < 0.1)
            if(bad[1] == 1){
                # remove first breakpoint if leftmost segment bad
                breaks <- breaks[-1]
            } else { 
                # remove left breakpoint of first bad segment
                breaks <- breaks[-(bad[1]-1)]
            }
        }
    }
    if(removed > 0){
        print(paste("breaks removed:",removed))
    }
    
    ## build piecewise linear statistical model using spline interpolations
    arrm <- lm(y ~ ns(x, knots=x[breaks]))
    z <- unname(predict(arrm, newdata=data.frame(x=fut)))
    return(z)
}



### KDDM (kernel density distribution mapping): like
### pmap/qmap/arrm, but rather than fitting the underlying
### distribution to a gaussian or approximating it with quantiles
### or piecewise linear fits to the Q-Q map, we estimate the
### distribution using kernel density.  We calculate a CDF from
### the resulting density object using the trapezoid rule.

trapezoid <- function(y, dx, normalize=TRUE){
    z <- (cumsum(y) - y/2)*dx
    if(normalize){z <- z / max(z)}
    return(z)
}


kddm <-function(cur, fut, obs, ...){
    d.cur <- density(cur, ...)
    d.obs <- density(obs, ...)
    cdf.cur <- trapezoid(d.cur$y, diff(d.cur$x)[1])
    cdf.obs <- trapezoid(d.obs$y, diff(d.obs$x)[1])
    p <- approx(d.cur$x, cdf.cur, fut, rule=2)
    q <- approx(cdf.obs, d.obs$x, p$y, rule=2)
    return(q$y)
}


# Copyright 2014 Univ. Corp for Atmos. Research
# Author: Seth McGinnis, mcginnis@ucar.edu
