rm(list=ls()) library(car) library(nlme) library(fields) set.seed(123) ############Compute correlation matrix \Sigma for AR(1 or 2) process########################### Sigma <- function(c1, phi){ ARorder = length(phi) Sigma <- diag(c1) if (ARorder==1){ for (i in 1:c1){ for (j in 1:c1){ Sigma[i,j] <- phi^abs(i-j) } } #Sigma <- arResVar*Sigma/(1-phi^2) } if (ARorder==2){ phi1 <- phi[1] phi2 <- phi[2] #gamma0 <- arResVar*(1-phi2)/((1+phi2)*(phi1+phi2-1)*(phi2-phi1-1)) rho <- rep(0,c1) rho[1] <- 1 rho[2] <- phi1/(1-phi2) for (i in 3:c1){ rho[i] <- phi1*rho[i-1]+phi2*rho[i-2] } Sigma <- matrix(rho[as.matrix(dist(1:c1))+1], c1, c1) #Sigma <- gamma0*Sigma } } ################################################################################################### #####Compute conditional covariance matrix for AR(1 or 2) model to generate conditional errors##### #c2: size of matrix, i.e. the total years c2=981(1000-1980) in our data #c3: unknown times, i.e. the reconstruction period c3=850(1000-1849) in our data #arfit: fitted AR(1 or 2) model #output is the cholesky decomposed matrix R of the coditonal covariance matrix and conditonal mean muC SigmaAR <- function(c2,c3,lsfit,phi,ResVar){ SigmaAR <- ResVar*Sigma(c2,phi) Sigma11 <- SigmaAR[1:c3, 1:c3] #854 unknow times Sigma12 <- SigmaAR[1:c3, (c3+1):c2] Sigma22 <- SigmaAR[(c3+1):c2, (c3+1):c2] #127 known residuals SigmaM <- Sigma12%*%solve(Sigma22) #matrix for conditional mean SigmaC <- Sigma11-SigmaM%*%t(Sigma12) #conditional variance RR <- chol(SigmaC) #choleski decomposition mu <- mean(lsfit$resid) ee <- lsfit$resid muC <- mu+SigmaM%*%(ee-mu) output <- list(RR=RR,muC=muC) return(output) } ########################################################################## ###############running average for decadle mean########################### decade <- function(x){ runMean <- filter(x,rep(0.1,10)) runMean1 <- subset(runMean, runMean!='NA') return(runMean1) } ############################################################################################# ####Read the instrumental and proxy data and preprocess them################################ file <- readLines("hadcrut3vnh-c070221.dat") newfile <- file[seq(1,length(file),2)] temp <- cbind(as.numeric(substring(newfile, 2,6)), as.numeric(substring(newfile, 91,96))) temp <- temp[-nrow(temp),] colnames(temp) <- c('year','temperature') temp <- as.data.frame(temp) #base period 1961-1990 temp$temperature <- temp$temp - mean(temp$temp[112:141]) proxy <- read.table("MBH99_data.dat", sep='', header=T) tempCal <- subset(subset(temp, year<1981),select=-year) proxyCal <- subset(subset(proxy, year>1849),select=-year) c1 <- nrow(tempCal) #length of calibration period #### OLS regression############## olsfit.Dat <- lm(tempCal$temp~as.matrix(proxyCal)) #### GLS regression############## ARorder <- 2 jointCal <- cbind(tempCal,proxyCal) glsfit.Dat <- gls(temperature~urals+fenno+tasman+quel2.o18+quel2.acc+quel1.o18+quel1.acc+westgren+npatagon+fran010+moro008+namer_pc1fixed+namer_pc2+namer_pc3,jointCal,correlation=corARMA(p=ARorder),method='ML') temporary <- capture.output(glsfit.Dat$modelS$corStr)[3] phi <- as.numeric(unlist(strsplit(temporary, " ")[[1]])) #########################Bootstrap to find the sample of \hat\theta######################################### Sigma.B <- glsfit.Dat$sigma^2*Sigma(c1,phi) eigenSigma <- eigen(Sigma.B) #eigenvalue decomposition Pp <- eigenSigma$vectors #eigenvector of Sigma.B Sigma1 <- Pp%*%diag(eigenSigma$values^(-0.5))%*%t(Pp) #Sigma^(-0.5) Sigma2 <- Pp%*%diag(eigenSigma$values^(0.5))%*%t(Pp) #Sigma^(0.5 ########################## sim <- 1000 betaB <- matrix(0,sim,15) rhoB <- matrix(0,sim,ARorder) sigma2B <- rep(0,sim) newMean <- as.matrix(cbind(rep(1,c1),proxyCal))%*%glsfit.Dat$coeff for (i in 1:sim){ tempB <- newMean+Sigma2%*%rnorm(c1) jointB <- cbind(tempB,proxyCal) glsfit.B <- gls(as.vector(tempB)~urals+fenno+tasman+quel2.o18+quel2.acc+quel1.o18+quel1.acc+westgren+npatagon+fran010+moro008+namer_pc1fixed+namer_pc2+namer_pc3,jointB, correlation=corARMA(p=ARorder),method='ML') temporary <- capture.output(glsfit.B$modelS$corStr)[3] phi.B <- as.numeric(unlist(strsplit(temporary, " ")[[1]])) if (length(phi.B)>2) phi.B <- subset(phi.B, phi.B!='NA') betaB[i,] <- glsfit.B$coeff rhoB[i,] <- phi.B sigma2B[i] <- glsfit.B$sigma^2 print(i) } ############################################Test for overfitting################################################## ###prediction error= var(e)+x0%*%solve(t(X)%*%solve(Sigma)%*%X)%*%t(x0) for linear model with correlated errors### ###########prediction error^2/prediction error ~ N(0, 1) if there is no overfitting############################### inflation <- rep(0, 10) grpSize <- trunc(c1/10) grpAdd <- c1%%10 ###If the calibaration period is not the multiplication of 10, add 1 to the size of the first grpAdd groups### ###phi_1 and phi_2 are fixed ###Group size = grpSize+1 for the first grpAdd groups#################################### if (grpAdd>0){ for (i in 1:grpAdd){ valIndex <-1:(grpSize+1)+(i-1)*(grpSize+1) #index of the left 10% calIndex <- (1:c1)[!(1:c1) %in% valIndex] #index of data used to fit the model print(valIndex) joint.O <- as.matrix(jointCal[calIndex,]) proxy.O <- as.matrix(jointCal[valIndex,2:15]) proxy.Ocal <- as.matrix(jointCal[calIndex,2:15]) temp.O <- jointCal[valIndex,1] ###Sigma.Ot is used as tools Sigma.Ot <- Sigma(c1,phi) corrCal <- matrix(Sigma.Ot[as.vector(as.matrix(dist(calIndex)))+1,1],length(calIndex),length(calIndex)) corrCal.eig <- eigen(corrCal) joint.scale <- corrCal.eig$vector%*%diag(corrCal.eig$value^(-0.5))%*%t(corrCal.eig$vector)%*%joint.O glsfit.O <- lm(joint.scale[,1]~joint.scale[,2:15]) tempPred.O <- as.matrix(cbind(rep(1,length(valIndex)),proxy.O))%*%glsfit.O$coeff valRes <- tempPred.O-temp.O Sigma.O <- (sum(glsfit.O$res^2)/glsfit.O$df)*corrCal covEp0 <- Sigma.O[1,1]*matrix(Sigma.Ot[as.vector(as.matrix(dist(c(valIndex,calIndex)))[(length(valIndex)+1):c1,1:length(valIndex)])+1,1],length(calIndex),length(valIndex)) temporary1 <- t(proxy.Ocal)%*%solve(Sigma.O) temporary2 <- solve(temporary1%*%proxy.Ocal) inflation[i] <- mean(valRes^2/(Sigma.O[1,1]+diag(proxy.O%*%temporary2%*%t(proxy.O)) -2*diag(proxy.O%*%temporary2%*%temporary1%*%covEp0))) } } ###Now every group size = grpSize######################### for (i in 1:(10-grpAdd)){ valIndex <-1:grpSize+(i-1)*grpSize+grpAdd*(grpSize+1) #index of the left 10% calIndex <- (1:c1)[!(1:c1) %in% valIndex] #index of data used to fit the model print(valIndex) joint.O <- as.matrix(jointCal[calIndex,]) proxy.O <- as.matrix(jointCal[valIndex,2:15]) proxy.Ocal <- as.matrix(jointCal[calIndex,2:15]) temp.O <- jointCal[valIndex,1] ###Sigma.Ot is used as tools Sigma.Ot <- Sigma(c1,phi) corrCal <- matrix(Sigma.Ot[as.vector(as.matrix(dist(calIndex)))+1,1],length(calIndex),length(calIndex)) corrCal.eig <- eigen(corrCal) joint.scale <- corrCal.eig$vector%*%diag(corrCal.eig$value^(-0.5))%*%t(corrCal.eig$vector)%*%joint.O glsfit.O <- lm(joint.scale[,1]~joint.scale[,2:15]) tempPred.O <- as.matrix(cbind(rep(1,length(valIndex)),proxy.O))%*%glsfit.O$coeff valRes <- tempPred.O-temp.O Sigma.O <- (sum(glsfit.O$res^2)/glsfit.O$df)*corrCal covEp0 <- Sigma.O[1,1]*matrix(Sigma.Ot[as.vector(as.matrix(dist(c(valIndex,calIndex)))[(length(valIndex)+1):c1,1:length(valIndex)])+1,1],length(calIndex),length(valIndex)) temporary1 <- t(proxy.Ocal)%*%solve(Sigma.O) temporary2 <- solve(temporary1%*%proxy.Ocal) inflation[i+grpAdd] <- mean(valRes^2/(Sigma.O[1,1]+diag(proxy.O%*%temporary2%*%t(proxy.O)) -2*diag(proxy.O%*%temporary2%*%temporary1%*%covEp0))) } inflation <- sqrt(mean(inflation)) ##################################################################################################################### ###########################################Generate ensembles#################################################### c2 <- 981 c3 <- 850 proxyP <- subset(subset(proxy, year<1850),select=-year) nDecad <- c3-9 decadMax <- decadMaxYear <- rep(0,sim) ###decadal maximum and its corresponding year decadMean <- matrix(0,sim,nDecad) ####decadal mean of ensemble members eeC <- matrix(0,sim,c3) #854 prediciton times, now it is true ensemble for (i in 1:sim){ print(i) phi.E <- rhoB[i,] ResVar.E <-sigma2B[i] Sigma.E <- SigmaAR(c2,c3,glsfit.Dat,phi.E,ResVar.E) RR <- Sigma.E$RR muC <- Sigma.E$muC tempPred.E <- as.matrix(cbind(rep(1,c3),proxyP))%*%betaB[i,] eeC[i,] <- tempPred.E+crossprod(RR,rnorm(c3))*inflation+muC #much faster, already include prediction mean decadMean[i,] <- decade(eeC[i,]) decadSort <- sort(decadMean[i,],index.return=T) decadMax[i] <- decadSort$x[nDecad] decadMaxYear[i] <- 1003+decadSort$ix[nDecad] #running mean is from 1004. } ######Compute the probability of each year being correspoinding to the decadal maximum, the green curve in Fig. 3###### countYear <- rep(0,nDecad) #845=c2-9=nDecad for (i in 1:sim){ countYear[decadMaxYear[i]-1003] <- countYear[decadMaxYear[i]-1003]+1 } countYear <- countYear/sim ###Compute decadal averaged instrumental temperature################## runMean <- filter(temp$temperature, rep(0.1,10)) decadInstr <- subset(runMean,runMean!='NA') #running decade mean of instrumental temperature yearInstr <- subset(temp\$year,runMean!='NA') #years corresponding to decadInstr ####max,min,95% confidence bounds of ensembles for each year############# maxT <- minT <- upperT <- lowerT <- rep(0,c3) for (i in 1:c3){ maxT[i] <- max(eeC[,i]) minT[i] <- min(eeC[,i]) upperT[i] <- quantile(eeC[,i],0.975) lowerT[i] <- quantile(eeC[,i],0.025) } ###decadal mean of ensemble members################## decadeEEC <- matrix(0, sim, nDecad) for (i in 1:sim){ decadeEEC[i,] <- decade(eeC[i,]) } ####max,min,95% confidence bounds of dacadal averaged ensembles for each year############ maxDecadeEEC <- uppDecadeEEC <- lowDecadeEEC <- minDecadeEEC <- rep(0,nDecad) for (i in 1:nDecad){ maxDecadeEEC[i] <- max(decadeEEC[,i]) uppDecadeEEC[i] <- quantile(decadeEEC[,i],0.975) lowDecadeEEC[i] <- quantile(decadeEEC[,i],0.025) minDecadeEEC[i] <- min(decadeEEC[,i]) } save.image('uncertainty.RData')