# the San Deiego demand data is from the Formatted_US_Demand.csv
+# filter the demand data
+setwd("/Users/YueGuo/Documents/Energy_Lab/data/")
+demand_files <- list.files("demand")
+demand_filter_results <- vector("character",length(demand_files))
+for(i in 1:length(demand_files)){
+ data = read.csv(paste("demand/",demand_files[i],sep=""))
+ demand_filter_results[i] = data_filter(data)
+}
+# filter the weather data
+setwd("/Users/YueGuo/Documents/Energy_Lab/data/")
+weather_files <- list.files("weather")
+weather_filter_results <- vector("character",length(weather_files))
+for(i in 1:length(weather_files)){
+ data = read.csv(paste("weather/",weather_files[i],sep=""))
+ weather_filter_results[i] = data_filter(data)
+}
+# compare the results
+city_names <- c("Abidjan_CotedIvoire","Accra_Ghana","Amman_Jordan","Antigua","Beirut_Lebanon",
+ "Chandigarh_India","Dakar_Senegal","Delhi_Discoms","Delhi_India","Kano_Nigeria",
+ "Kupang_Indonesia","Manila_Philippine","MbabaneCity_Swaziland","Nairobi_Kenya",
+ "NewSouthWales_Australia","NYC_US","Philadelphia_US","Queensland_Australia",
+ "SanDiego_US","Singapore_Singapore","SouthAustralia_Australia",
+ "Tema_Ghana","Tokyo_Japan","Victoria_Australia")
# Comment: but there exits years with less months; developed cities have better data
+
+
use cities with no less than 2-year demand data (1)more than 2-year demand data: for building a model(2 years) + test(1 year) (2)no less than 1-year demand data: for similarity analysis (3)NYC’s weather data’s detail is lost (4)drop Delhi data
+
+
delete_list <- c(2,4-1,8-2,9-3,10-4,11-5,16-6,22-7)
+for(i in 1:length(delete_list)){
+ demand_files = demand_files[-delete_list[i]]
+ weather_files = weather_files[-delete_list[i]]
+ city_names = city_names[-delete_list[i]]
+}
+# 16 cities left (9 developing cities, 7 developed cities)
data_Time <- function(data){
+ # drop NA's
+ data=na.omit(data)
+ # set time
+ if(names(data)[5]=="HR"){
+ #if(nchar(deparse(format(data$hours[1])))>16){
+ if(is.null(data$MIN)){
+ data$Time = paste(data$YR,"-",data$M,"-",data$D," ",data$HR,":00:00",sep="")
+ data$Time = as.POSIXct(data$Time, format="%Y-%m-%d %H:%M:%S")
+ } else {
+ data$Time = paste(data$YR,"-",data$M,"-",data$D," ",data$HR,":",data$MIN,":00",sep="")
+ data$Time = as.POSIXct(data$Time, format="%Y-%m-%d %H:%M:%S")
+ }
+ #else {data$Time = as.POSIXct(paste("20",sub('^..','',as.character(as.POSIXct(data$hours, format="%m/%d/%Y %H:%M"))),sep=""),format="%Y-%m-%d %H:%M:%S")}
+ }
+ else{
+ if(nchar(deparse(format(data$hours[1])))>16){
+ data$Time = as.POSIXct(data$hours, format="%Y-%m-%d %H:%M:%S")}
+ else {data$Time = as.POSIXct(paste("20",sub('^..','',as.character(as.POSIXct(data$hours, format="%m/%d/%Y %H:%M"))),sep=""),format="%Y-%m-%d %H:%M:%S")}
+ }
+ return(data)
+}
+# the date
+data_Date <- function(data){
+ data$Date <- as.POSIXct(substr(as.character(data$Time),1,10), format="%Y-%m-%d")
+ return(data)
+}
+# set Date and Time for all data
+for(i in 1:16){
+ data_list_weather[[i]] = data_Time(data_list_weather[[i]])
+ data_list_demand[[i]] = data_Time(data_list_demand[[i]])
+ data_list_weather[[i]] = data_Date(data_list_weather[[i]])
+ data_list_demand[[i]] = data_Date(data_list_demand[[i]])
+}
+
B. pair the cities
+
# The function to get the latest whole year weather data of each city
+latest_year_weaData <- function(data){
+ if(nrow(subset(data,data$YR==max(data$YR)))<364*24){
+ data_use <- subset(data,data$YR==(max(data$YR)-1))
+ }else{data_use <- subset(data,data$YR==max(data$YR))}
+}
+ly_data_list_weather <- vector("list",16)
+for(i in 1:16){
+ ly_data_list_weather[[i]] = latest_year_weaData(data_list_weather[[i]])
+ ly_data_list_weather[[i]]$Time_no_year = substr(as.character(ly_data_list_weather[[i]]$Time),6,19)
+}
+# The similarity score function:
+similarity <- function(data1_use,data2_use){
+ # merge the data according to the lastest year data
+ compare_data=merge(data1_use[, c("Time_no_year", "TEMP", "DEW.POINT")],
+ data2_use[, c("Time_no_year", "TEMP", "DEW.POINT")], by="Time_no_year")
+ # calculate the similarity score
+ simil_score = sum(compare_data[,2]*compare_data[,4])/(sum(compare_data[,2]^2))^.5/(sum(compare_data[,4]^2))^.5 + sum(compare_data[,3]*compare_data[,5])/(sum(compare_data[,3]^2))^.5/(sum(compare_data[,5]^2))^.5
+ return(simil_score)
+}
+
calculate all chosen cities’ similarity scores
+
similarity_matrix <- matrix(0,16,16)
+for(i in 1:15){
+ for(j in (i+1):16){
+ similarity_matrix[i,j] = similarity(ly_data_list_weather[[i]],ly_data_list_weather[[j]])
+ }
+}
+for(i in 1:16){
+ for(j in 1:i){
+ similarity_matrix[i,j] = similarity_matrix[j,i]
+ }
+}
+diag(similarity_matrix) <- rep(2,16)
+rownames(similarity_matrix) <- city_names
+colnames(similarity_matrix) <- city_names
+## developed - stable GDP growth rate
+## developing - unstable GDP growth rate
+
find the max scores for a developing city with all developed cities
## group developed_city developed_city_latitude mean_latitude mean_similarity_score climate_zone
+## 1 group1(5 cities) Singapore 1.417 1.34320 1.957062 -
+## 2 group2(3 cities) San Diego 32.734 32.04433 1.838980 -
+
## the temperature dominate the similarity score
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Energy_Lab/B-view_the_data.R b/Energy_Lab/B-view_the_data.R
new file mode 100644
index 0000000..25f8b92
--- /dev/null
+++ b/Energy_Lab/B-view_the_data.R
@@ -0,0 +1,71 @@
+library(ggplot2)
+### the function give the data's plot, and we decide which city to predict
+view_data <- function(demandData){
+ # delete rows with NA's
+ demandData=na.omit(demandData)
+ # the daily peak demand
+ dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+ # give the data a date col
+ dailyMax$Date=as.Date(dailyMax[,1])
+ dailyMax=dailyMax[order(dailyMax$Date), ][,-1]
+ # name the col of demand
+ names(dailyMax)[1] <- "MW"
+ # plot the data
+ ggplot(dailyMax, aes(x=Date, y=MW))+geom_line(colour="#E69F00")+labs(title=demandData$city[1])
+}
+
+# view all the demand data after filtering the data
+plot_list <- vector("list",length(data_list_demand))
+for(i in 1:length(data_list_demand)){
+ plot_list[[i]] = view_data(data_list_demand[[i]])
+}
+
+### show multiple plots
+# code from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
+# Multiple plot function
+#
+# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
+# - cols: Number of columns in layout
+# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
+#
+# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
+# then plot 1 will go in the upper left, 2 will go in the upper right, and
+# 3 will go all the way across the bottom.
+#
+multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
+ library(grid)
+
+ # Make a list from the ... arguments and plotlist
+ plots <- c(list(...), plotlist)
+
+ numPlots = length(plots)
+
+ # If layout is NULL, then use 'cols' to determine layout
+ if (is.null(layout)) {
+ # Make the panel
+ # ncol: Number of columns of plots
+ # nrow: Number of rows needed, calculated from # of cols
+ layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
+ ncol = cols, nrow = ceiling(numPlots/cols))
+ }
+
+ if (numPlots==1) {
+ print(plots[[1]])
+
+ } else {
+ # Set up the page
+ grid.newpage()
+ pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
+
+ # Make each plot, in the correct location
+ for (i in 1:numPlots) {
+ # Get the i,j matrix positions of the regions that contain this subplot
+ matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
+
+ print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
+ layout.pos.col = matchidx$col))
+ }
+ }
+}
+multiplot(plot_list[[1]], plot_list[[2]],plot_list[[3]],plot_list[[4]],plot_list[[5]], plot_list[[6]],plot_list[[7]],plot_list[[8]], plot_list[[9]],plot_list[[10]], plot_list[[11]],plot_list[[12]],plot_list[[13]],plot_list[[14]],plot_list[[15]], plot_list[[16]], cols=4)
+
diff --git a/Energy_Lab/B-view_the_data.Rmd b/Energy_Lab/B-view_the_data.Rmd
new file mode 100644
index 0000000..55c568c
--- /dev/null
+++ b/Energy_Lab/B-view_the_data.Rmd
@@ -0,0 +1,75 @@
+---
+title: "View the demand data"
+author: "Yue Guo"
+date: "May 26, 2015"
+output: html_document
+---
+```{r}
+setwd("/Users/YueGuo/Documents/Energy_Lab/data/")
+load("DemandData_use.RData")
+library(ggplot2)
+### the function give the data's plot, and we decide which city to predict
+view_data <- function(demandData){
+ # delete rows with NA's
+ demandData=na.omit(demandData)
+ # the daily peak demand
+ dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+ # give the data a date col
+ dailyMax$Date=as.Date(dailyMax[,1])
+ dailyMax=dailyMax[order(dailyMax$Date), ][,-1]
+ # name the col of demand
+ names(dailyMax)[1] <- "MW"
+ # plot the data
+ ggplot(dailyMax, aes(x=Date, y=MW))+geom_line(colour="#E69F00")+labs(title=demandData$city[1])
+}
+
+# view all the demand data after filtering the data
+plot_list <- vector("list",length(data_list_demand))
+for(i in 1:length(data_list_demand)){
+ plot_list[[i]] = view_data(data_list_demand[[i]])
+}
+```
+Show multiple plots
+```{r}
+# code from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
+# Multiple plot function
+multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
+ library(grid)
+
+ # Make a list from the ... arguments and plotlist
+ plots <- c(list(...), plotlist)
+
+ numPlots = length(plots)
+
+ # If layout is NULL, then use 'cols' to determine layout
+ if (is.null(layout)) {
+ # Make the panel
+ # ncol: Number of columns of plots
+ # nrow: Number of rows needed, calculated from # of cols
+ layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
+ ncol = cols, nrow = ceiling(numPlots/cols))
+ }
+
+ if (numPlots==1) {
+ print(plots[[1]])
+
+ } else {
+ # Set up the page
+ grid.newpage()
+ pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
+
+ # Make each plot, in the correct location
+ for (i in 1:numPlots) {
+ # Get the i,j matrix positions of the regions that contain this subplot
+ matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
+
+ print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
+ layout.pos.col = matchidx$col))
+ }
+ }
+}
+```
+All the demand data are as follows:
+```{r, fig.width = 20, fig.height = 20}
+multiplot(plot_list[[1]], plot_list[[2]],plot_list[[3]],plot_list[[4]],plot_list[[5]], plot_list[[6]],plot_list[[7]],plot_list[[8]], plot_list[[9]],plot_list[[10]], plot_list[[11]],plot_list[[12]],plot_list[[13]],plot_list[[14]],plot_list[[15]], plot_list[[16]], cols=4)
+```
\ No newline at end of file
diff --git a/Energy_Lab/B-view_the_data.html b/Energy_Lab/B-view_the_data.html
new file mode 100644
index 0000000..b7bbb30
--- /dev/null
+++ b/Energy_Lab/B-view_the_data.html
@@ -0,0 +1,159 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+View the demand data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
View the demand data
+
Yue Guo
+
May 26, 2015
+
+
+
+
setwd("/Users/YueGuo/Documents/Energy_Lab/data/")
+load("DemandData_use.RData")
+library(ggplot2)
+### the function give the data's plot, and we decide which city to predict
+view_data <- function(demandData){
+ # delete rows with NA's
+ demandData=na.omit(demandData)
+ # the daily peak demand
+ dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+ # give the data a date col
+ dailyMax$Date=as.Date(dailyMax[,1])
+ dailyMax=dailyMax[order(dailyMax$Date), ][,-1]
+ # name the col of demand
+ names(dailyMax)[1] <- "MW"
+ # plot the data
+ ggplot(dailyMax, aes(x=Date, y=MW))+geom_line(colour="#E69F00")+labs(title=demandData$city[1])
+}
+
+# view all the demand data after filtering the data
+plot_list <- vector("list",length(data_list_demand))
+for(i in 1:length(data_list_demand)){
+ plot_list[[i]] = view_data(data_list_demand[[i]])
+}
+
Show multiple plots
+
# code from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
+# Multiple plot function
+multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
+ library(grid)
+
+ # Make a list from the ... arguments and plotlist
+ plots <- c(list(...), plotlist)
+
+ numPlots = length(plots)
+
+ # If layout is NULL, then use 'cols' to determine layout
+ if (is.null(layout)) {
+ # Make the panel
+ # ncol: Number of columns of plots
+ # nrow: Number of rows needed, calculated from # of cols
+ layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
+ ncol = cols, nrow = ceiling(numPlots/cols))
+ }
+
+ if (numPlots==1) {
+ print(plots[[1]])
+
+ } else {
+ # Set up the page
+ grid.newpage()
+ pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
+
+ # Make each plot, in the correct location
+ for (i in 1:numPlots) {
+ # Get the i,j matrix positions of the regions that contain this subplot
+ matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
+
+ print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
+ layout.pos.col = matchidx$col))
+ }
+ }
+}
+
+
+
+
+
+
+
+
diff --git a/Energy_Lab/D-GDP_correction.R b/Energy_Lab/D-GDP_correction.R
new file mode 100644
index 0000000..88b9bed
--- /dev/null
+++ b/Energy_Lab/D-GDP_correction.R
@@ -0,0 +1,166 @@
+## we choose: (based on the data quality and GDP data resource)
+# Singapore - Manila_Philippines
+# Singapore - Dakar_Senegal
+# San Diego - Amman_Jordan (to be continued)
+## based on the GDP data we estimate the demand in 2013
+
+# GDP of countries (current US$)
+# source:World Bank
+Jodan_GDP_10_13 <- c(26425379437, 28840263380, 31015239545, 33678500148)
+Philippines_GDP_10_13 <- c(1.99589E+11, 2.24143E+11, 2.5024E+11, 2.72067E+11)
+Senegal_GDP_10_13 <- c(12932427724, 14440676498, 14045680427, 14791699009)
+Singapore_GDP_2011 <- 274065000000
+Singapore_GDP_2012 <- 286908000000
+Singapore_GDP_2013 <- 297941000000
+Queensland_GDP_2011 <- 267942000000
+Queensland_GDP_2012 <- 284441000000
+Queensland_GDP_2013 <- 290158000000
+# data issue 10-11,11-12,12-13
+# lack of Tokyo data
+
+# source: 'The global urban competitiveness report - 2011'
+Amman_GDP_2010 <- 6128640000*1.09
+Amman_GDP_2011 <- 7216940000*1.05
+Dakar_GDP_2010 <- 1363990000*1.09
+Dakar_GDP_2011 <- 1595730000*1.05
+Manila_GDP_2010 <- 3669550000*1.09
+Manila_GDP_2011 <- 4048010000*1.05
+
+# estimate data
+# calculate the city/country GDP
+Amman_in_Jodan = mean(Amman_GDP_2010/Jodan_GDP_10_13[1],Amman_GDP_2011/Jodan_GDP_10_13[2])
+Dakar_in_Senegal = mean(Dakar_GDP_2010/Senegal_GDP_10_13[1],Dakar_GDP_2011/Senegal_GDP_10_13[2])
+Manila_in_Philippines = mean(Manila_GDP_2010/Philippines_GDP_10_13[1],Manila_GDP_2011/Philippines_GDP_10_13[2])
+
+Amman_GDP_2012_est = Jodan_GDP_10_13[3]*Amman_in_Jodan
+Amman_GDP_2013_est = Jodan_GDP_10_13[4]*Amman_in_Jodan
+Amman_GDP_2014_est = Jodan_GDP_10_13[4]*1.03*Amman_in_Jodan
+Dakar_GDP_2012_est = Senegal_GDP_10_13[3]*Dakar_in_Senegal
+Dakar_GDP_2013_est = Senegal_GDP_10_13[4]*Dakar_in_Senegal
+Dakar_GDP_2014_est = Senegal_GDP_10_13[4]*1.045*Dakar_in_Senegal
+Manila_GDP_2012_est = Philippines_GDP_10_13[3]*Manila_in_Philippines
+Manila_GDP_2013_est = Philippines_GDP_10_13[4]*Manila_in_Philippines
+Manila_GDP_2014_est = Philippines_GDP_10_13[4]*1.06*Manila_in_Philippines
+# 2014 GDP growth rate forcast from World bank
+
+## the population data
+Amman_Popu_2014 = 1148000
+Dakar_Popu_2014 = 3393000
+Manila_Popu_2014 = 1745000 # NCR:12764000
+Tokyo_Popu_2014 = 37833000
+Singapore_Popu_2014 = 5517000
+# source: UN, Department of Economic and Social Affairs
+Queensland_Popu_2013 = 4690910
+Queensland_Popu_2014 = 4740927
+# http://www.qgso.qld.gov.au/products/reports/pop-growth-qld/index.php, Dec. 30
+
+# estimate the population of 2013
+Amman_Popu_2015_est = 1155000
+Dakar_Popu_2015_est = 3520000
+Manila_Popu_2015_est = 1745000*(1.661/1.581)^(1/7)
+Tokyo_Popu_2015_est = 38001000
+Singapore_Popu_2015_est = 5619000
+Amman_Popu_2013_est = (Amman_Popu_2014)^2/Amman_Popu_2015_est
+Dakar_Popu_2013_est = (Dakar_Popu_2014)^2/Dakar_Popu_2015_est
+Manila_Popu_2013_est = 1745000/(1.661/1.581)^(1/7)
+Tokyo_Popu_2013_est = (Tokyo_Popu_2014)^2/Tokyo_Popu_2015_est
+Singapore_Popu_2013_est = (Singapore_Popu_2014)^2/Singapore_Popu_2015_est # we have true value
+
+#########################
+# calculation
+#########################
+# GDP_correction function
+GDP_correction <- function(GDP_developing_2013,GDP_developing_2014,
+ Popu_developing_2013,Popu_developing_2014,
+ GDP_developed_2013,Popu_developed_2013,
+ demand_developing_2013,demand_developed_2013){
+ delta_PPP <- vector("numeric",365)
+ for(i in 1:365){
+ delta_PPP[i] = GDP_developing_2013/Popu_developing_2013*((((GDP_developing_2014/Popu_developing_2014)/(GDP_developing_2013/Popu_developing_2013))^((i-1)/365))-1)
+ }
+ delta_demand <- vector("numeric",365)
+ for(i in 1:365){
+ # delta_demand[i] = (demand_developed_2013[i]/Popu_developed_2013*Popu_developing_2013-demand_developing_2013[i])*delta_PPP[i]/(GDP_developed_2013/Popu_developed_2013-GDP_developing_2013/Popu_developing_2013)
+ delta_demand[i] = (demand_developed_2013[365]/Popu_developed_2013*Popu_developing_2013-demand_developing_2013[365])*delta_PPP[i]/(GDP_developed_2013/Popu_developed_2013-GDP_developing_2013/Popu_developing_2013)
+ }
+ return(delta_demand)
+}
+
+## Manila
+# the 2013 demand data of Manila
+demandData = data_list_demand[[6]]
+demandData = na.omit(demandData)
+dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4)
+dailyMax = subset(dailyMax,dailyMax$YR==2013)
+demand_developing_2013 <- dailyMax[,2]
+
+# the 2013 demand data of Singapore
+demandData = data_list_demand[[13]]
+demandData = na.omit(demandData)
+dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4)
+dailyMax = subset(dailyMax,dailyMax$YR==2013)
+demand_developed_2013 <- dailyMax[,2]
+
+# The GDP correction of Manila
+Manila_correction <- GDP_correction(Manila_GDP_2013_est,Manila_GDP_2014_est,
+ Manila_Popu_2013_est,Manila_Popu_2014,
+ Singapore_GDP_2013,Singapore_Popu_2013_est,
+ demand_developing_2013,demand_developed_2013)
+
+# Correction comparison (very little change)
+demandData = data_list_demand[[6]]
+demandData = na.omit(demandData)
+dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4)
+dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012))
+dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2])
+dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365)
+forecast_stl <- forecast(stl(dailyDataTs, s.window="period"), h=365, method="arima")
+true_data <- subset(dailyMax_all,dailyMax_all$YR==2013)
+library(lubridate)
+ManilaPlot = merge(actual = as.zoo(forecast_stl$x),predicted=as.zoo(cbind(mean=forecast_stl$mean,
+ lower=forecast_stl$lower[,1],
+ upper=forecast_stl$upper[,1])),
+ GDP=forecast_stl_corr,
+ TURE=ts(true_data[,2],start = c(2013),end=c(2014), frequency=365))
+ManilaPlotxts=xts(ManilaPlot, date_decimal(index(ManilaPlot)))
+ManilaPlot_final=dygraph(ManilaPlotxts, "Manila Power Demand Data") %>%
+ dySeries("actual", label = "Actual") %>%
+ dySeries(c("lower", "mean", "upper"), label = "Predicted") %>%
+ dySeries("GDP", label="Revised according to GDP") %>%
+ dySeries("TURE", label="Ture value") %>%
+ dyRangeSelector()
+ManilaPlot_final
+
+## Dakar
+# the 2013 demand data of Dakar
+demandData = data_list_demand[[5]]
+demandData = na.omit(demandData)
+dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4)
+dailyMax = subset(dailyMax,dailyMax$YR==2013)
+demand_developing_2013 <- dailyMax[,2]
+
+Dakar_correction <- GDP_correction(Dakar_GDP_2013_est,Dakar_GDP_2014_est,
+ Dakar_Popu_2013_est,Dakar_Popu_2014,
+ Singapore_GDP_2013,Singapore_Popu_2013_est,
+ demand_developing_2013,demand_developed_2013)
+
+# Correction comparison (very little change)
+library(lubridate)
+DakarPlot = merge(actual = as.zoo(forecast_stl$x),predicted=as.zoo(cbind(mean=forecast_stl$mean,
+ lower=forecast_stl$lower[,1],
+ upper=forecast_stl$upper[,1])),
+ GDP=forecast_stl_corr,
+ TURE=ts(true_data[,2],start = c(2013),end=c(2014), frequency=365))
+DakarPlotxts=xts(DakarPlot, date_decimal(index(DakarPlot)))
+DakarPlot_final=dygraph(DakarPlotxts, "Dakar Power Demand Data") %>%
+ dySeries("actual", label = "Actual") %>%
+ dySeries(c("lower", "mean", "upper"), label = "Predicted") %>%
+ dySeries("GDP", label="Revised according to GDP") %>%
+ dySeries("TURE", label="Ture value") %>%
+ dyRangeSelector()
+DakarPlot_final
+
diff --git a/Energy_Lab/D-GDP_correction.Rmd b/Energy_Lab/D-GDP_correction.Rmd
new file mode 100644
index 0000000..6b995d9
--- /dev/null
+++ b/Energy_Lab/D-GDP_correction.Rmd
@@ -0,0 +1,222 @@
+---
+title: "GDP correction of the forecast"
+author: "Yue Guo"
+date: "May 26, 2015"
+output: html_document
+---
+We choose: (based on the data quality and GDP data resource)
+(1) Singapore - Manila_Philippines
+(2) Singapore - Dakar_Senegal
+(3) San Diego - Amman_Jordan (to be continued)
+(based on the GDP data we estimate the demand in 2013)
+
+```{r,echo=FALSE}
+# GDP of countries (current US$)
+# source:World Bank
+Jodan_GDP_10_13 <- c(26425379437, 28840263380, 31015239545, 33678500148)
+Philippines_GDP_10_13 <- c(1.99589E+11, 2.24143E+11, 2.5024E+11, 2.72067E+11)
+Senegal_GDP_10_13 <- c(12932427724, 14440676498, 14045680427, 14791699009)
+Singapore_GDP_2011 <- 274065000000
+Singapore_GDP_2012 <- 286908000000
+Singapore_GDP_2013 <- 297941000000
+Queensland_GDP_2011 <- 267942000000
+Queensland_GDP_2012 <- 284441000000
+Queensland_GDP_2013 <- 290158000000
+# data issue 10-11,11-12,12-13
+# lack of Tokyo data
+
+# source: 'The global urban competitiveness report - 2011'
+Amman_GDP_2010 <- 6128640000*1.09
+Amman_GDP_2011 <- 7216940000*1.05
+Dakar_GDP_2010 <- 1363990000*1.09
+Dakar_GDP_2011 <- 1595730000*1.05
+Manila_GDP_2010 <- 3669550000*1.09
+Manila_GDP_2011 <- 4048010000*1.05
+
+# estimate data
+# calculate the city/country GDP
+Amman_in_Jodan = mean(Amman_GDP_2010/Jodan_GDP_10_13[1],Amman_GDP_2011/Jodan_GDP_10_13[2])
+Dakar_in_Senegal = mean(Dakar_GDP_2010/Senegal_GDP_10_13[1],Dakar_GDP_2011/Senegal_GDP_10_13[2])
+Manila_in_Philippines = mean(Manila_GDP_2010/Philippines_GDP_10_13[1],Manila_GDP_2011/Philippines_GDP_10_13[2])
+
+Amman_GDP_2012_est = Jodan_GDP_10_13[3]*Amman_in_Jodan
+Amman_GDP_2013_est = Jodan_GDP_10_13[4]*Amman_in_Jodan
+Amman_GDP_2014_est = Jodan_GDP_10_13[4]*1.03*Amman_in_Jodan
+Dakar_GDP_2012_est = Senegal_GDP_10_13[3]*Dakar_in_Senegal
+Dakar_GDP_2013_est = Senegal_GDP_10_13[4]*Dakar_in_Senegal
+Dakar_GDP_2014_est = Senegal_GDP_10_13[4]*1.045*Dakar_in_Senegal
+Manila_GDP_2012_est = Philippines_GDP_10_13[3]*Manila_in_Philippines
+Manila_GDP_2013_est = Philippines_GDP_10_13[4]*Manila_in_Philippines
+Manila_GDP_2014_est = Philippines_GDP_10_13[4]*1.06*Manila_in_Philippines
+# 2014 GDP growth rate forcast from World bank
+
+## the population data
+Amman_Popu_2014 = 1148000
+Dakar_Popu_2014 = 3393000
+Manila_Popu_2014 = 1745000 # NCR:12764000
+Tokyo_Popu_2014 = 37833000
+Singapore_Popu_2014 = 5517000
+# source: UN, Department of Economic and Social Affairs
+Queensland_Popu_2013 = 4690910
+Queensland_Popu_2014 = 4740927
+# http://www.qgso.qld.gov.au/products/reports/pop-growth-qld/index.php, Dec. 30
+
+# estimate the population of 2013
+Amman_Popu_2015_est = 1155000
+Dakar_Popu_2015_est = 3520000
+Manila_Popu_2015_est = 1745000*(1.661/1.581)^(1/7)
+Tokyo_Popu_2015_est = 38001000
+Singapore_Popu_2015_est = 5619000
+Amman_Popu_2013_est = (Amman_Popu_2014)^2/Amman_Popu_2015_est
+Dakar_Popu_2013_est = (Dakar_Popu_2014)^2/Dakar_Popu_2015_est
+Manila_Popu_2013_est = 1745000/(1.661/1.581)^(1/7)
+Tokyo_Popu_2013_est = (Tokyo_Popu_2014)^2/Tokyo_Popu_2015_est
+Singapore_Popu_2013_est = (Singapore_Popu_2014)^2/Singapore_Popu_2015_est # we have true value
+```
+The GDP_correction function:
+```{r}
+GDP_correction <- function(GDP_developing_2013,GDP_developing_2014,
+ Popu_developing_2013,Popu_developing_2014,
+ GDP_developed_2013,Popu_developed_2013,
+ demand_developing_2013,demand_developed_2013){
+ delta_PPP <- vector("numeric",365)
+ for(i in 1:365){
+ delta_PPP[i] = GDP_developing_2013/Popu_developing_2013*((((GDP_developing_2014/Popu_developing_2014)/(GDP_developing_2013/Popu_developing_2013))^((i-1)/365))-1)
+ }
+ delta_demand <- vector("numeric",365)
+ for(i in 1:365){
+ # delta_demand[i] = (demand_developed_2013[i]/Popu_developed_2013*Popu_developing_2013-demand_developing_2013[i])*delta_PPP[i]/(GDP_developed_2013/Popu_developed_2013-GDP_developing_2013/Popu_developing_2013)
+ delta_demand[i] = (demand_developed_2013[365]/Popu_developed_2013*Popu_developing_2013-demand_developing_2013[365])*delta_PPP[i]/(GDP_developed_2013/Popu_developed_2013-GDP_developing_2013/Popu_developing_2013)
+ }
+ return(delta_demand)
+}
+```
+1. Correction for Manila
+
+```{r,echo=FALSE}
+setwd("/Users/YueGuo/Documents/Energy_Lab/data/")
+load("DemandData_use.RData")
+# the 2013 demand data of Manila
+demandData = data_list_demand[[6]]
+demandData = na.omit(demandData)
+dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4)
+dailyMax = subset(dailyMax,dailyMax$YR==2013)
+demand_developing_2013 <- dailyMax[,2]
+# the 2013 demand data of Singapore
+demandData = data_list_demand[[13]]
+demandData = na.omit(demandData)
+dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4)
+dailyMax = subset(dailyMax,dailyMax$YR==2013)
+demand_developed_2013 <- dailyMax[,2]
+```
+```{r}
+Manila_correction <- GDP_correction(Manila_GDP_2013_est,Manila_GDP_2014_est,
+ Manila_Popu_2013_est,Manila_Popu_2014,
+ Singapore_GDP_2013,Singapore_Popu_2013_est,
+ demand_developing_2013,demand_developed_2013)
+
+```
+The Correction comparison: (very little change)
+```{r,include=FALSE}
+library(forecast)
+```
+```{r,echo=FALSE}
+demandData = data_list_demand[[6]]
+demandData = na.omit(demandData)
+dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4)
+dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012))
+dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2])
+dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365)
+forecast_stl <- forecast(stl(dailyDataTs, s.window="period"), h=365, method="arima")
+true_data <- subset(dailyMax_all,dailyMax_all$YR==2013)
+```
+```{r}
+forecast_stl_corr <- forecast_stl$mean + Manila_correction
+MPE_Cal <- function(data_true,data_est){
+ return(1/length(data_true)*sum(abs(data_true-data_est)/data_true))
+}
+MPE_Cal(true_data[,2],forecast_stl_corr)
+# the original one: 0.07354473
+
+```
+```{r}
+library(lubridate)
+ManilaPlot = merge(actual = as.zoo(forecast_stl$x),predicted=as.zoo(cbind(mean=forecast_stl$mean,
+ lower=forecast_stl$lower[,1],
+ upper=forecast_stl$upper[,1])),
+ GDP=forecast_stl_corr,
+ TURE=ts(true_data[,2],start = c(2013),end=c(2014), frequency=365))
+```
+ManilaPlotxts=xts(ManilaPlot, date_decimal(index(ManilaPlot)))
+ManilaPlot_final=dygraph(ManilaPlotxts, "Manila Power Demand Data") %>%
+ dySeries("actual", label = "Actual") %>%
+ dySeries(c("lower", "mean", "upper"), label = "Predicted") %>%
+ dySeries("GDP", label="Revised according to GDP") %>%
+ dySeries("TURE", label="Ture value") %>%
+ dyRangeSelector()
+ManilaPlot_final
+
+
+
+
+2. Correction for Dakar
+
+```{r,echo=FALSE}
+# the 2013 demand data of Dakar
+demandData = data_list_demand[[5]]
+demandData = na.omit(demandData)
+dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4)
+dailyMax = subset(dailyMax,dailyMax$YR==2013)
+demand_developing_2013 <- dailyMax[,2]
+```
+```{r}
+Dakar_correction <- GDP_correction(Dakar_GDP_2013_est,Dakar_GDP_2014_est,
+ Dakar_Popu_2013_est,Dakar_Popu_2014,
+ Singapore_GDP_2013,Singapore_Popu_2013_est,
+ demand_developing_2013,demand_developed_2013)
+
+```
+The Correction comparison: (very little change)
+```{r,echo=FALSE}
+demandData = data_list_demand[[5]]
+demandData = na.omit(demandData)
+dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4)
+dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012))
+dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2])
+dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365)
+forecast_HW <- forecast(HoltWinters(dailyDataTs), h=365, method="arima")
+true_data <- subset(dailyMax_all,dailyMax_all$YR==2013)
+```
+```{r}
+forecast_HW_corr <- forecast_HW$mean + Dakar_correction
+MPE_Cal <- function(data_true,data_est){
+ return(1/length(data_true)*sum(abs(data_true-data_est)/data_true))
+}
+MPE_Cal(true_data[,2],forecast_HW_corr)
+# the original one: 0.04296551
+# NOT IMPROVED
+
+```
+```{r}
+library(lubridate)
+DakarPlot = merge(actual = as.zoo(forecast_HW$x),predicted=as.zoo(cbind(mean=forecast_HW$mean,
+ lower=forecast_HW$lower[,1],
+ upper=forecast_HW$upper[,1])),
+ GDP=forecast_HW_corr,
+ TURE=ts(true_data[,2],start = c(2013),end=c(2014), frequency=365))
+```
+DakarPlotxts=xts(DakarPlot, date_decimal(index(DakarPlot)))
+DakarPlot_final=dygraph(DakarPlotxts, "Dakar Power Demand Data") %>%
+ dySeries("actual", label = "Actual") %>%
+ dySeries(c("lower", "mean", "upper"), label = "Predicted") %>%
+ dySeries("GDP", label="Revised according to GDP") %>%
+ dySeries("TURE", label="Ture value") %>%
+ dyRangeSelector()
+DakarPlot_final
+
+
\ No newline at end of file
diff --git a/Energy_Lab/D-GDP_correction.html b/Energy_Lab/D-GDP_correction.html
new file mode 100644
index 0000000..e0ffcac
--- /dev/null
+++ b/Energy_Lab/D-GDP_correction.html
@@ -0,0 +1,159 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+GDP correction of the forecast
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
GDP correction of the forecast
+
Yue Guo
+
May 26, 2015
+
+
+
+
We choose: (based on the data quality and GDP data resource) (1) Singapore - Manila_Philippines (2) Singapore - Dakar_Senegal (3) San Diego - Amman_Jordan (to be continued) (based on the GDP data we estimate the demand in 2013)