# model fitting from MW(2010) # Minor change to highlight OLS PC4 instead of OLS PC5 my_pc_reg <- function(y, gpc, ppc, gpc_out, ppc_out, num_grid_pc, num_proxy_pc){ k = num_grid_pc p = num_proxy_pc tmpg <- data.frame(gpc[,1:k]) if(k==1){colnames(tmpg) <- "PC1"} yreg <- lm(y~., data=tmpg) tmpghat <- tmpg[1:dim(ppc_out)[1],] if(k==1){tmpghat <- data.frame(PC1=tmpghat)} greg <- list(NULL) tmpx <- data.frame(ppc[,1:p]) if(p==1){colnames(tmpx) <- "PC1"} for(i in 1:k){ greg[[i]] <- lm(tmpg[,i]~., data=tmpx) tmpghat[,i] <- predict(greg[[i]], newdata=ppc_out) } yhat <- predict(yreg, newdata=tmpghat) } # Run proxy pca: proxy_pc <- prcomp(proxy, center=TRUE, scale=TRUE) grid_pc <- prcomp(grid, center=TRUE, scale=TRUE) # Build insample dataframe: my_years <- intersect(rownames(temp), intersect(rownames(grid), rownames(proxy))) yy_in <- temp[my_years,1] names(yy_in) <- my_years proxy_in <- data.frame( proxy[my_years,] ) gpc_in <- data.frame( grid_pc$x[my_years,] ) ppc_in <- data.frame( proxy_pc$x[my_years,] ) # Build out of sample dataframe: proxy_out <- data.frame( proxy ) ppc_out <- data.frame( proxy_pc$x ) # FIT VARIOUS MODELS: library(MASS) library(lars) myfrac <- seq(0, 1, length=201) yhat <- list(NULL) # Intercept: mylm <- lm(yy_in~1) yhat[[1]] <- predict(mylm, data.frame(ppc_out)) # OLS_PC1: mylm <- lm(yy_in~., data=data.frame(x=ppc_in[,1])) yhat[[2]] <- predict(mylm, data.frame(x=ppc_out[,1])) # OLS_PC4: mylm <- lm(yy_in~., data=data.frame(ppc_in[,1:4])) yhat[[3]] <- predict(mylm, data.frame(ppc_out[,1:4])) # OLS_PC10: mylm <- lm(yy_in~., data=data.frame(ppc_in[,1:10])) yhat[[4]] <- predict(mylm, data.frame(ppc_out[,1:10])) # OLS_PC20: mylm <- lm(yy_in~., data=data.frame(ppc_in[,1:20])) yhat[[5]] <- predict(mylm, data.frame(ppc_out[,1:20])) # LASSO_PROXY95 # Cross-validate: cv.mean <- matrix(NA,length(myfrac),numcv_lars) cv.se <- matrix(NA,length(myfrac),numcv_lars) for(j in 1:numcv_lars){ tmp <- cv.lars(as.matrix(proxy_in), yy_in, K=K_lars, fraction=myfrac, type="lasso", use.Gram=FALSE, plot.it=FALSE) cv.mean[,j] <- tmp$cv cv.se[,j] <- tmp$cv.error } # Create Lasso Model tmp <- rowMeans(cv.mean) tmp2 <- (1:length(myfrac))[tmp==min(tmp)] cv.frac <- myfrac[tmp2] full_lasso <- lars(as.matrix(proxy_in), yy_in, type="lasso", use.Gram=FALSE) lasso_yhat <- predict(full_lasso, as.matrix(proxy_out), type="fit", s=cv.frac, mode="fraction") yhat[[6]] <- lasso_yhat$fit # LASSO_PC95 # Cross-validate: cv.mean <- matrix(NA,length(myfrac),numcv_lars) cv.se <- matrix(NA,length(myfrac),numcv_lars) for(j in 1:numcv_lars){ tmp <- cv.lars(as.matrix(ppc_in), yy_in, K=K_lars, fraction=myfrac, type="lasso", use.Gram=FALSE, plot.it=FALSE) cv.mean[,j] <- tmp$cv cv.se[,j] <- tmp$cv.error } # Create Lasso Model tmp <- rowMeans(cv.mean) tmp2 <- (1:length(myfrac))[tmp==min(tmp)] cv.frac <- myfrac[tmp2] full_lasso <- lars(as.matrix(ppc_in), yy_in, type="lasso", use.Gram=FALSE) lasso_yhat <- predict(full_lasso, as.matrix(ppc_out), type="fit", s=cv.frac, mode="fraction") yhat[[7]] <- lasso_yhat$fit # stepAIC_Proxy95: f0 <- lm(yy_in~1, data=proxy_in) f1 <- lm(yy_in~., data=proxy_in) f <- stepAIC(f0, scope=list(lower=f0, upper=f1), trace=FALSE) yhat[[8]] <- predict(f, data.frame(proxy_out)) # stepAIC_PC95: f0 <- lm(yy_in~1, data=ppc_in) f1 <- lm(yy_in~., data=ppc_in) f <- stepAIC(f0, scope=list(lower=f0, upper=f1), trace=FALSE) yhat[[9]] <- predict(f, data.frame(ppc_out)) # stepBIC_Proxy95: f0 <- lm(yy_in~1, data=proxy_in) f1 <- lm(yy_in~., data=proxy_in) f <- stepAIC(f0, scope=list(lower=f0, upper=f1), trace=FALSE, k=log(dim(proxy_in)[2])) yhat[[10]] <- predict(f, data.frame(proxy_out)) # stepBIC_PC95: f0 <- lm(yy_in~1, data=ppc_in) f1 <- lm(yy_in~., data=ppc_in) f <- stepAIC(f0, scope=list(lower=f0, upper=f1), trace=FALSE, k=log(dim(ppc_in)[2])) yhat[[11]] <- predict(f, data.frame(ppc_out)) cnt <- 12 for(aa in numgridPC){ for(bb in numproxyPC){ yhat[[cnt]] <- my_pc_reg(yy_in, gpc_in, ppc_in, gpc_out, ppc_out, aa, bb) cnt <- cnt+1 } } # smooth results (loess, span =1/20 ~ 50 years) x_in=as.numeric(rownames(ppc_out)) yhat_sm = yhat for(i in 2:length(yhat)){ tmplo <- loess(yhat[[i]]~x_in, span=.05) tmpy <- predict(tmplo, x_in, se=TRUE) yhat_sm[[i]]= tmpy$fit }