source("mixture-distribution.R")
source("bc-techniques.R")

set.seed(22)

Ndata = 450 # number of data points per test case
#Niter <- 20  # number of iterations.  Use 20 for development, 500 for full run
Niter <- 1000


################

# storage arrays

all.time <- list()
all.data <- list()
all.result <- list()
xr <- yr <- c()

for (distro in c("norm","gamma","mixture")){

    print("************")
    print(distro)
    print("************")


    ### Set up distributions

    ### Two-sided distributions need to be centered (mean zero) for
    ### the oracle calculation to work properly.  Store non-zero means
    ### separately and apply after oracle data has been generated.


    # normal distribution

    if(distro == "norm"){
        obs.par <- list(mean=0, sd=1)
        cur.par <- list(mean=0, sd=2)
        fut.par <- list(mean=0, sd=2.5)

        obs.mean <- 10
        cur.mean <- 20
        fut.mean <- 25
        ora.mean <- obs.mean + (fut.mean - cur.mean)
        
        one.sided <- FALSE
    }


    # gamma distribution
    # if shape == rate, then mean == 1 for all cases

    if(distro == "gamma"){

        obs.par <- list(shape=5,   rate=5)
        cur.par <- list(shape=2.5, rate=2.5)
        fut.par <- list(shape=4,   rate=4)

        one.sided <- TRUE
    }


    # mixture distribution 
    # Mixture of two gaussians, which gives a bimodal PDF

    if(distro == "mixture"){
        
        obs.par <- list(mean2=-5.0,  sd2=2.5, mean1=2.5,  sd1=2.5, mix=1/3)
        cur.par <- list(mean2=-3.75, sd2=3.2, mean1=3.75, sd1=2.4, mix=1/2)
        fut.par <- list(mean2=-5.5,  sd2=2.5, mean1=2.75, sd1=2.1, mix=1/3)

        obs.mean <- 13
        cur.mean <- 11.5
        fut.mean <- 15
        ora.mean <- obs.mean + (fut.mean - cur.mean)
        
        one.sided <- FALSE
    }


    ################

    ### Generate raw data

    set.seed(4978139)

    obs <- cur <- fut <- ora <- array(dim=c(Niter, Ndata))

    for(i in 1:Niter){
        obs[i,] <- do.call(paste0("r", distro), c(list(n=Ndata), obs.par))
        cur[i,] <- do.call(paste0("r", distro), c(list(n=Ndata), cur.par))
        fut[i,] <- do.call(paste0("r", distro), c(list(n=Ndata), fut.par))
        ora[i,] <- oracle(fut[i,], cur.par, obs.par, distro=distro)
    }

    if(!one.sided){
        obs <- obs + obs.mean
        cur <- cur + cur.mean
        fut <- fut + fut.mean
        ora <- ora + ora.mean    
    }


    all.data[[distro]] <- list(obs=obs, cur=cur, fut=fut, ora=ora)
    

    ################

    ### Cases to evalute

    ### List of functions to call for bias correction (from techniques.R)

    bc.func <- list()
    bc.func[["ecdf"]]   <- ecdf
    bc.func[["q.few"]]  <- qmap
    bc.func[["q.some"]] <- qmap
    bc.func[["q.many"]] <- qmap
    bc.func[["arrm"]]   <- arrm
    bc.func[["kddm"]]   <- kddm
    bc.func[["p.smp"]]  <- pmap
    bc.func[["p.mle"]]  <- pmap

    ### Also need extra arguments for each method

    args <- list()
    args[["ecdf"]]    <- list(mult=FALSE)  # would be =one.sided w/o log xform
    args[["q.few"]]   <- list(nbins=5)
    args[["q.some"]]  <- list(nbins=round(sqrt(Ndata)))
    args[["q.many"]]  <- list(nbins=round(Ndata/5))
    args[["arrm"]]    <- list()
#    args[["arrm"]]    <- list(bypass=TRUE)  # speed things up for testing
    args[["kddm"]]    <- list(bw="SJ")
    if(one.sided){
        args[["p.smp"]] <- list(distro="gamma",estimator="smp")
        args[["p.mle"]] <- list(distro="gamma",estimator="mle")
    } else {
        args[["p.smp"]] <- list(distro="norm",estimator="smp")
        args[["p.mle"]] <- list(distro="norm",estimator="mle")
    }


    ################

    ### normalization functions

    normalize <- function(L, method){
        if(one.sided) {
            if(any(grep("^p", method) == 1)){
                return(L)
            } else {
                return(lapply(L, log))
            }
        }
        
        mu <- c()
        for(e in c("cur","fut","obs")) {
            mu[e] <- mean(L[[e]])
            L[[e]] <- (L[[e]] - mu[e])
        }
        # NB!  Using globals like this is dangerous and fragile
        # but it makes the code vastly simpler in this case
        norm.delta <<- unname(mu["obs"] + (mu["fut"] - mu["cur"]))
        return(L)
    }

    denormalize <- function(x, method){
        if(one.sided){
            if(any(grep("^p", method) == 1)){
                return(x)
            } else {
                return(exp(x))
            }
        }
        x <- x + norm.delta
        return(x)
    }
    
    
    ################

    ### Do bias correction with timing

    data <- list(uncorrected=fut, oracle=ora)
    timing <- c()


    for (m in names(bc.func)){
        result <- c()
        timing[m] <- 0
        print("")
        print(m)
        flush.console()
        pb <- txtProgressBar(1, Niter, style=3)
        start.time <- proc.time()
        for (i in (1:Niter)){
            call.args <- c(
                normalize(list(cur=cur[i,], fut=fut[i,], obs=obs[i,]), m),
                args[[m]])
            result <- rbind(result,
                            denormalize(do.call(bc.func[[m]], call.args), m))
            setTxtProgressBar(pb, i)
        }
        dt <- proc.time() - start.time
        timing[m] <- dt["elapsed"]
        data[[m]] <- result
        rm(result)
    }
    print("")

    all.time[[distro]] <- timing
    all.result[[distro]] <- data
}

save.image(paste0(Niter, "-run.RData"))
