diff --git a/Energy_Lab/A-Similarity_equal_temp_and_dewpoint.R b/Energy_Lab/A-Similarity_equal_temp_and_dewpoint.R new file mode 100644 index 0000000..9ea1363 --- /dev/null +++ b/Energy_Lab/A-Similarity_equal_temp_and_dewpoint.R @@ -0,0 +1,197 @@ +# Similarity analysis (with equal weights of temp and dew.point) +############################### +# A. filter the data +############################### +# 1. use cities with both demand and weather data (24) +data_filter <- function(data){ + if(length(levels(factor(data$YR)))<3) return(paste("Not enough data: ",length(levels(factor(data$YR))),"-year data",sep="")) + else return(paste("qualified data: ",length(levels(factor(data$YR))),"-year data",sep="")) +} +## Filter the data +# the San Deiego demand data is from the Formatted_US_Demand.csv +# filter the demand data +setwd("/Users/YueGuo/Documents/Energy_Lab/data/") +demand_files <- list.files("demand") +demand_filter_results <- vector("character",length(demand_files)) +for(i in 1:length(demand_files)){ + data = read.csv(paste("demand/",demand_files[i],sep="")) + demand_filter_results[i] = data_filter(data) +} +# filter the weather data +setwd("/Users/YueGuo/Documents/Energy_Lab/data/") +weather_files <- list.files("weather") +weather_filter_results <- vector("character",length(weather_files)) +for(i in 1:length(weather_files)){ + data = read.csv(paste("weather/",weather_files[i],sep="")) + weather_filter_results[i] = data_filter(data) +} +# compare the results +city_names <- c("Abidjan_CotedIvoire","Accra_Ghana","Amman_Jordan","Antigua","Beirut_Lebanon", + "Chandigarh_India","Dakar_Senegal","Delhi_Discoms","Delhi_India","Kano_Nigeria", + "Kupang_Indonesia","Manila_Philippine","MbabaneCity_Swaziland","Nairobi_Kenya", + "NewSouthWales_Australia","NYC_US","Philadelphia_US","Queensland_Australia","SanDiego_US", + "Singapore_Singapore","SouthAustralia_Australia", + "Tema_Ghana","Tokyo_Japan","Victoria_Australia") +# show the description of data size: +cbind(city_names,demand_filter_results,weather_filter_results) +# Comment: but there exits years with less months; developed cities have better data + +# 2. use cities with no less than 2-year demand data +# (1)more than 2-year demand data: for building a model(2 years) + test(1 year) +# (2)no less than 1-year demand data: for similarity analysis +# (3)NYC's weather data's detail is lost +# (4)drop Delhi data +delete_list <- c(2,4-1,8-2,9-3,10-4,11-5,16-6,22-7) +for(i in 1:length(delete_list)){ + demand_files = demand_files[-delete_list[i]] + weather_files = weather_files[-delete_list[i]] + city_names = city_names[-delete_list[i]] +} +# 16 cities left (8 developing cities, 8 developed cities) +# load the data of chosen cities +data_list_demand <- vector("list",16) +data_list_weather <- vector("list",16) +setwd("/Users/YueGuo/Documents/Energy_Lab/data/") +for(i in 1:16){ + data_list_demand[[i]] = read.csv(paste("demand/",demand_files[i],sep="")) + data_list_weather[[i]] = read.csv(paste("weather/",weather_files[i],sep="")) +} +# Set same format of data's time for merging +data_Time <- function(data){ + # drop NA's + data=na.omit(data) + # set time + if(names(data)[5]=="HR"){ + #if(nchar(deparse(format(data$hours[1])))>16){ + if(is.null(data$MIN)){ + data$Time = paste(data$YR,"-",data$M,"-",data$D," ",data$HR,":00:00",sep="") + data$Time = as.POSIXct(data$Time, format="%Y-%m-%d %H:%M:%S") + } else { + data$Time = paste(data$YR,"-",data$M,"-",data$D," ",data$HR,":",data$MIN,":00",sep="") + data$Time = as.POSIXct(data$Time, format="%Y-%m-%d %H:%M:%S") + } + #else {data$Time = as.POSIXct(paste("20",sub('^..','',as.character(as.POSIXct(data$hours, format="%m/%d/%Y %H:%M"))),sep=""),format="%Y-%m-%d %H:%M:%S")} + } + else{ + if(nchar(deparse(format(data$hours[1])))>16){ + data$Time = as.POSIXct(data$hours, format="%Y-%m-%d %H:%M:%S")} + else {data$Time = as.POSIXct(paste("20",sub('^..','',as.character(as.POSIXct(data$hours, format="%m/%d/%Y %H:%M"))),sep=""),format="%Y-%m-%d %H:%M:%S")} + } + return(data) +} +# the date +data_Date <- function(data){ + data$Date <- as.POSIXct(substr(as.character(data$Time),1,10), format="%Y-%m-%d") + return(data) +} +# set Date and Time for all data +for(i in 1:16){ + data_list_weather[[i]] = data_Time(data_list_weather[[i]]) + data_list_demand[[i]] = data_Time(data_list_demand[[i]]) + data_list_weather[[i]] = data_Date(data_list_weather[[i]]) + data_list_demand[[i]] = data_Date(data_list_demand[[i]]) +} + +############################### +# B. pair the cities +############################### +# The function to get the latest whole year weather data of each city +latest_year_weaData <- function(data){ + if(nrow(subset(data,data$YR==max(data$YR)))<364*24){ + data_use <- subset(data,data$YR==(max(data$YR)-1)) + }else{data_use <- subset(data,data$YR==max(data$YR))} +} +ly_data_list_weather <- vector("list",16) +for(i in 1:16){ + ly_data_list_weather[[i]] = latest_year_weaData(data_list_weather[[i]]) + ly_data_list_weather[[i]]$Time_no_year = substr(as.character(ly_data_list_weather[[i]]$Time),6,19) +} +# The similarity score function: +similarity <- function(data1_use,data2_use){ + # merge the data according to the lastest year data + compare_data=merge(data1_use[, c("Time_no_year", "TEMP", "DEW.POINT")], + data2_use[, c("Time_no_year", "TEMP", "DEW.POINT")], by="Time_no_year") + # calculate the similarity score + simil_score = sum(compare_data[,2]*compare_data[,4])/(sum(compare_data[,2]^2))^.5/(sum(compare_data[,4]^2))^.5 + sum(compare_data[,3]*compare_data[,5])/(sum(compare_data[,3]^2))^.5/(sum(compare_data[,5]^2))^.5 + return(simil_score) +} +# calculate all chosen cities' similarity scores +similarity_matrix <- matrix(0,16,16) +for(i in 1:15){ + for(j in (i+1):16){ + similarity_matrix[i,j] = similarity(ly_data_list_weather[[i]],ly_data_list_weather[[j]]) + } +} +for(i in 1:16){ + for(j in 1:i){ + similarity_matrix[i,j] = similarity_matrix[j,i] + } +} +diag(similarity_matrix) <- rep(2,16) +rownames(similarity_matrix) <- city_names +colnames(similarity_matrix) <- city_names +## developed - stable GDP growth rate +## developing - unstable GDP growth rate + +# find the max scores for a developing city with all developed cities +similar_city <- function(city_name){ + similarity_df <- data.frame(similarity_matrix) + command <- paste("similarity_df","$",city_name,sep="") + simils <- eval(parse(text = command))[9:16] + results <- vector("list",4) + results[[1]] <- city_names[which.max(simils)+8] + results[[2]] <- max(simils) + results[[3]] <- data_list_weather[[which.max(simils)+8]]$LONG[1] + results[[4]] <- data_list_weather[[which.max(simils)+8]]$LAT[1] + return(results) +} +similar_city_list <- vector("character",8) +similar_score_list <- vector("character",8) +similar_long_list <- vector("character",8) +similar_lat_list <- vector("character",8) +long_list <- vector("character",8) +lat_list <- vector("character",8) +for(i in 1:8){ + similar_city_list[i] = similar_city(city_names[i])[[1]] + similar_score_list[i] = similar_city(city_names[i])[[2]] + similar_long_list[i] = similar_city(city_names[i])[[3]] + long_list[i] = data_list_weather[[i]]$LONG[1] + similar_lat_list[i] = similar_city(city_names[i])[[4]] + lat_list[i] = data_list_weather[[i]]$LAT[1] +} + +# show the pairs +compare <- data.frame(cbind(city_names[1:8],long_list,lat_list,similar_long_list,similar_lat_list,similar_city_list,similar_score_list)) +names(compare) <- c("city","long","lat","center-long","center-lat","center_city","similar_score") +compare + +# show the groups on the map: +# 1.Singapore group +S_sim <- rbind(compare[1,],compare[5:8,]) +map_S_sim <- data.frame(cities = c(as.character(S_sim[,1]),"Singapore_Singapore"), + long = c(as.numeric(as.character(S_sim[,2])),103.867), + lat = c(as.numeric(as.character(S_sim[,3])),1.417) +) +leaflet(data = map_S_sim) %>% addTiles() %>% + addMarkers(~long, ~lat, popup = ~cities) + +# 2.San Diego group +SD_sim <- rbind(compare[2:4,]) +map_SD_sim <- data.frame(cities = c(as.character(SD_sim[,1]),"SanDiego_US"), + long = c(as.numeric(as.character(SD_sim[,2])),-117.1635), + lat = c(as.numeric(as.character(SD_sim[,3])),32.734) +) +leaflet(data = map_SD_sim) %>% addTiles() %>% + addMarkers(~long, ~lat, popup = ~cities) + +# the table of groups +Similarity_tabel <- data.frame(group = c("group1(5 cities)","group2(3 cities)"), + developed_city = c("Singapore","San Diego"), + developed_city_latitude = c(1.417,32.734), + mean_latitude =c(mean(as.numeric(as.character(S_sim$lat))),mean(as.numeric(as.character(SD_sim$lat)))), + mean_similarity_score = c(mean(as.numeric(as.character(S_sim$similar_score))),mean(as.numeric(as.character(SD_sim$similar_score)))), + climate_zone = c("-","-")) +## the temperature dominate the similarity score + +# output demand data that we'll use +save(data_list_demand, file = "DemandData_use.RData") diff --git a/Energy_Lab/A-Similarity_equal_temp_and_dewpoint.Rmd b/Energy_Lab/A-Similarity_equal_temp_and_dewpoint.Rmd new file mode 100644 index 0000000..3c549d8 --- /dev/null +++ b/Energy_Lab/A-Similarity_equal_temp_and_dewpoint.Rmd @@ -0,0 +1,224 @@ +--- +title:

Similarity analysis
+ (with equal weights of temp and dew.point)

+author: "Yue Guo" +date: "May 25, 2015" +output: html_document +--- +```{r set-options, echo=FALSE, cache=FALSE} +options(width = 150) +``` +A. filter the data +1. use cities with both demand and weather data (24) +```{r} +data_filter <- function(data){ + if(length(levels(factor(data$YR)))<3) return(paste("Not enough data: ",length(levels(factor(data$YR))),"-year data",sep="")) + else return(paste("qualified data: ",length(levels(factor(data$YR))),"-year data",sep="")) +} +``` +Filter the data +```{r} +# the San Deiego demand data is from the Formatted_US_Demand.csv +# filter the demand data +setwd("/Users/YueGuo/Documents/Energy_Lab/data/") +demand_files <- list.files("demand") +demand_filter_results <- vector("character",length(demand_files)) +for(i in 1:length(demand_files)){ + data = read.csv(paste("demand/",demand_files[i],sep="")) + demand_filter_results[i] = data_filter(data) +} +# filter the weather data +setwd("/Users/YueGuo/Documents/Energy_Lab/data/") +weather_files <- list.files("weather") +weather_filter_results <- vector("character",length(weather_files)) +for(i in 1:length(weather_files)){ + data = read.csv(paste("weather/",weather_files[i],sep="")) + weather_filter_results[i] = data_filter(data) +} +# compare the results +city_names <- c("Abidjan_CotedIvoire","Accra_Ghana","Amman_Jordan","Antigua","Beirut_Lebanon", + "Chandigarh_India","Dakar_Senegal","Delhi_Discoms","Delhi_India","Kano_Nigeria", + "Kupang_Indonesia","Manila_Philippine","MbabaneCity_Swaziland","Nairobi_Kenya", + "NewSouthWales_Australia","NYC_US","Philadelphia_US","Queensland_Australia", + "SanDiego_US","Singapore_Singapore","SouthAustralia_Australia", + "Tema_Ghana","Tokyo_Japan","Victoria_Australia") +``` +show the description of data size: +```{r} +cbind(city_names,demand_filter_results,weather_filter_results) +# Comment: but there exits years with less months; developed cities have better data +``` +2. use cities with no less than 2-year demand data + (1)more than 2-year demand data: for building a model(2 years) + test(1 year) + (2)no less than 1-year demand data: for similarity analysis + (3)NYC's weather data's detail is lost + (4)drop Delhi data +```{r} +delete_list <- c(2,4-1,8-2,9-3,10-4,11-5,16-6,22-7) +for(i in 1:length(delete_list)){ + demand_files = demand_files[-delete_list[i]] + weather_files = weather_files[-delete_list[i]] + city_names = city_names[-delete_list[i]] +} +# 16 cities left (9 developing cities, 7 developed cities) +``` +load the data of chosen cities +```{r} +data_list_demand <- vector("list",16) +data_list_weather <- vector("list",16) +setwd("/Users/YueGuo/Documents/Energy_Lab/data/") +for(i in 1:16){ + data_list_demand[[i]] = read.csv(paste("demand/",demand_files[i],sep="")) + data_list_weather[[i]] = read.csv(paste("weather/",weather_files[i],sep="")) +} +``` +Set same format of data's time for merging +```{r} +data_Time <- function(data){ + # drop NA's + data=na.omit(data) + # set time + if(names(data)[5]=="HR"){ + #if(nchar(deparse(format(data$hours[1])))>16){ + if(is.null(data$MIN)){ + data$Time = paste(data$YR,"-",data$M,"-",data$D," ",data$HR,":00:00",sep="") + data$Time = as.POSIXct(data$Time, format="%Y-%m-%d %H:%M:%S") + } else { + data$Time = paste(data$YR,"-",data$M,"-",data$D," ",data$HR,":",data$MIN,":00",sep="") + data$Time = as.POSIXct(data$Time, format="%Y-%m-%d %H:%M:%S") + } + #else {data$Time = as.POSIXct(paste("20",sub('^..','',as.character(as.POSIXct(data$hours, format="%m/%d/%Y %H:%M"))),sep=""),format="%Y-%m-%d %H:%M:%S")} + } + else{ + if(nchar(deparse(format(data$hours[1])))>16){ + data$Time = as.POSIXct(data$hours, format="%Y-%m-%d %H:%M:%S")} + else {data$Time = as.POSIXct(paste("20",sub('^..','',as.character(as.POSIXct(data$hours, format="%m/%d/%Y %H:%M"))),sep=""),format="%Y-%m-%d %H:%M:%S")} + } + return(data) +} +# the date +data_Date <- function(data){ + data$Date <- as.POSIXct(substr(as.character(data$Time),1,10), format="%Y-%m-%d") + return(data) +} +# set Date and Time for all data +for(i in 1:16){ + data_list_weather[[i]] = data_Time(data_list_weather[[i]]) + data_list_demand[[i]] = data_Time(data_list_demand[[i]]) + data_list_weather[[i]] = data_Date(data_list_weather[[i]]) + data_list_demand[[i]] = data_Date(data_list_demand[[i]]) +} +``` +B. pair the cities +```{r} +# The function to get the latest whole year weather data of each city +latest_year_weaData <- function(data){ + if(nrow(subset(data,data$YR==max(data$YR)))<364*24){ + data_use <- subset(data,data$YR==(max(data$YR)-1)) + }else{data_use <- subset(data,data$YR==max(data$YR))} +} +ly_data_list_weather <- vector("list",16) +for(i in 1:16){ + ly_data_list_weather[[i]] = latest_year_weaData(data_list_weather[[i]]) + ly_data_list_weather[[i]]$Time_no_year = substr(as.character(ly_data_list_weather[[i]]$Time),6,19) +} +# The similarity score function: +similarity <- function(data1_use,data2_use){ + # merge the data according to the lastest year data + compare_data=merge(data1_use[, c("Time_no_year", "TEMP", "DEW.POINT")], + data2_use[, c("Time_no_year", "TEMP", "DEW.POINT")], by="Time_no_year") + # calculate the similarity score + simil_score = sum(compare_data[,2]*compare_data[,4])/(sum(compare_data[,2]^2))^.5/(sum(compare_data[,4]^2))^.5 + sum(compare_data[,3]*compare_data[,5])/(sum(compare_data[,3]^2))^.5/(sum(compare_data[,5]^2))^.5 + return(simil_score) +} +``` +calculate all chosen cities' similarity scores +```{r} +similarity_matrix <- matrix(0,16,16) +for(i in 1:15){ + for(j in (i+1):16){ + similarity_matrix[i,j] = similarity(ly_data_list_weather[[i]],ly_data_list_weather[[j]]) + } +} +for(i in 1:16){ + for(j in 1:i){ + similarity_matrix[i,j] = similarity_matrix[j,i] + } +} +diag(similarity_matrix) <- rep(2,16) +rownames(similarity_matrix) <- city_names +colnames(similarity_matrix) <- city_names +## developed - stable GDP growth rate +## developing - unstable GDP growth rate +``` +find the max scores for a developing city with all developed cities +```{r} +similar_city <- function(city_name){ + similarity_df <- data.frame(similarity_matrix) + command <- paste("similarity_df","$",city_name,sep="") + simils <- eval(parse(text = command))[9:16] + results <- vector("list",4) + results[[1]] <- city_names[which.max(simils)+8] + results[[2]] <- max(simils) + results[[3]] <- data_list_weather[[which.max(simils)+8]]$LONG[1] + results[[4]] <- data_list_weather[[which.max(simils)+8]]$LAT[1] + return(results) +} +similar_city_list <- vector("character",8) +similar_score_list <- vector("character",8) +similar_long_list <- vector("character",8) +similar_lat_list <- vector("character",8) +long_list <- vector("character",8) +lat_list <- vector("character",8) +for(i in 1:8){ + similar_city_list[i] = similar_city(city_names[i])[[1]] + similar_score_list[i] = similar_city(city_names[i])[[2]] + similar_long_list[i] = similar_city(city_names[i])[[3]] + long_list[i] = data_list_weather[[i]]$LONG[1] + similar_lat_list[i] = similar_city(city_names[i])[[4]] + lat_list[i] = data_list_weather[[i]]$LAT[1] +} +``` +show the pairs +```{r} +compare <- data.frame(cbind(city_names[1:8],long_list,lat_list,similar_long_list,similar_lat_list,similar_city_list,similar_score_list)) +names(compare) <- c("city","long","lat","center-long","center-lat","center_city","similar_score") +compare +``` +show the groups on the map: + +1.Singapore group +```{r} +S_sim <- rbind(compare[1,],compare[5:8,]) +map_S_sim <- data.frame(cities = c(as.character(S_sim[,1]),"Singapore_Singapore"), + long = c(as.numeric(as.character(S_sim[,2])),103.867), + lat = c(as.numeric(as.character(S_sim[,3])),1.417) +) +``` +leaflet(data = map_S_sim) %>% addTiles() %>% + addMarkers(~long, ~lat, popup = ~cities) +![Singapore group](/Users/YueGuo/Documents/Energy_Lab/group_1.jpg) + +2.San Diego group +```{r} +SD_sim <- rbind(compare[2:4,]) +map_SD_sim <- data.frame(cities = c(as.character(SD_sim[,1]),"SanDiego_US"), + long = c(as.numeric(as.character(SD_sim[,2])),-117.1635), + lat = c(as.numeric(as.character(SD_sim[,3])),32.734) +) +``` +leaflet(data = map_SD_sim) %>% addTiles() %>% + addMarkers(~long, ~lat, popup = ~cities) +![San Diego group](/Users/YueGuo/Documents/Energy_Lab/group_2.jpg) + +Finally,the table of groups: +```{r} +Similarity_tabel <- data.frame(group = c("group1(5 cities)","group2(3 cities)"), + developed_city = c("Singapore","San Diego"), + developed_city_latitude = c(1.417,32.734), + mean_latitude =c(mean(as.numeric(as.character(S_sim$lat))),mean(as.numeric(as.character(SD_sim$lat)))), + mean_similarity_score = c(mean(as.numeric(as.character(S_sim$similar_score))),mean(as.numeric(as.character(SD_sim$similar_score)))), + climate_zone = c("-","-")) +Similarity_tabel +## the temperature dominate the similarity score +``` \ No newline at end of file diff --git a/Energy_Lab/A-Similarity_equal_temp_and_dewpoint.html b/Energy_Lab/A-Similarity_equal_temp_and_dewpoint.html new file mode 100644 index 0000000..ee3a6ee --- /dev/null +++ b/Energy_Lab/A-Similarity_equal_temp_and_dewpoint.html @@ -0,0 +1,312 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +

A. filter the data
1. use cities with both demand and weather data (24)

+
data_filter <- function(data){
+  if(length(levels(factor(data$YR)))<3) return(paste("Not enough data: ",length(levels(factor(data$YR))),"-year data",sep=""))
+  else return(paste("qualified data: ",length(levels(factor(data$YR))),"-year data",sep=""))
+}
+

Filter the data

+
# the San Deiego demand data is from the Formatted_US_Demand.csv
+# filter the demand data
+setwd("/Users/YueGuo/Documents/Energy_Lab/data/")
+demand_files <- list.files("demand")
+demand_filter_results <- vector("character",length(demand_files))
+for(i in 1:length(demand_files)){
+  data = read.csv(paste("demand/",demand_files[i],sep=""))
+  demand_filter_results[i] = data_filter(data)
+}
+# filter the weather data
+setwd("/Users/YueGuo/Documents/Energy_Lab/data/")
+weather_files <- list.files("weather")
+weather_filter_results <- vector("character",length(weather_files))
+for(i in 1:length(weather_files)){
+  data = read.csv(paste("weather/",weather_files[i],sep=""))
+  weather_filter_results[i] = data_filter(data)
+}
+# compare the results
+city_names <- c("Abidjan_CotedIvoire","Accra_Ghana","Amman_Jordan","Antigua","Beirut_Lebanon",
+                "Chandigarh_India","Dakar_Senegal","Delhi_Discoms","Delhi_India","Kano_Nigeria",
+                "Kupang_Indonesia","Manila_Philippine","MbabaneCity_Swaziland","Nairobi_Kenya",
+                "NewSouthWales_Australia","NYC_US","Philadelphia_US","Queensland_Australia",
+                "SanDiego_US","Singapore_Singapore","SouthAustralia_Australia",
+                "Tema_Ghana","Tokyo_Japan","Victoria_Australia")
+

show the description of data size:

+
cbind(city_names,demand_filter_results,weather_filter_results)
+
##       city_names                 demand_filter_results          weather_filter_results        
+##  [1,] "Abidjan_CotedIvoire"      "qualified data: 4-year data"  "qualified data: 4-year data" 
+##  [2,] "Accra_Ghana"              "Not enough data: 2-year data" "Not enough data: 2-year data"
+##  [3,] "Amman_Jordan"             "qualified data: 4-year data"  "qualified data: 4-year data" 
+##  [4,] "Antigua"                  "Not enough data: 1-year data" "Not enough data: 1-year data"
+##  [5,] "Beirut_Lebanon"           "qualified data: 4-year data"  "qualified data: 4-year data" 
+##  [6,] "Chandigarh_India"         "qualified data: 3-year data"  "qualified data: 3-year data" 
+##  [7,] "Dakar_Senegal"            "qualified data: 4-year data"  "qualified data: 4-year data" 
+##  [8,] "Delhi_Discoms"            "qualified data: 4-year data"  "qualified data: 3-year data" 
+##  [9,] "Delhi_India"              "Not enough data: 2-year data" "qualified data: 3-year data" 
+## [10,] "Kano_Nigeria"             "Not enough data: 1-year data" "Not enough data: 1-year data"
+## [11,] "Kupang_Indonesia"         "Not enough data: 2-year data" "Not enough data: 1-year data"
+## [12,] "Manila_Philippine"        "qualified data: 5-year data"  "qualified data: 5-year data" 
+## [13,] "MbabaneCity_Swaziland"    "qualified data: 3-year data"  "qualified data: 3-year data" 
+## [14,] "Nairobi_Kenya"            "qualified data: 3-year data"  "qualified data: 4-year data" 
+## [15,] "NewSouthWales_Australia"  "qualified data: 8-year data"  "qualified data: 7-year data" 
+## [16,] "NYC_US"                   "qualified data: 7-year data"  "qualified data: 7-year data" 
+## [17,] "Philadelphia_US"          "qualified data: 3-year data"  "qualified data: 3-year data" 
+## [18,] "Queensland_Australia"     "qualified data: 8-year data"  "qualified data: 7-year data" 
+## [19,] "SanDiego_US"              "qualified data: 8-year data"  "Not enough data: 1-year data"
+## [20,] "Singapore_Singapore"      "qualified data: 3-year data"  "qualified data: 3-year data" 
+## [21,] "SouthAustralia_Australia" "qualified data: 8-year data"  "qualified data: 7-year data" 
+## [22,] "Tema_Ghana"               "Not enough data: 1-year data" "Not enough data: 1-year data"
+## [23,] "Tokyo_Japan"              "qualified data: 7-year data"  "qualified data: 7-year data" 
+## [24,] "Victoria_Australia"       "qualified data: 8-year data"  "qualified data: 7-year data"
+
# Comment: but there exits years with less months; developed cities have better data
+
    +
  1. use cities with no less than 2-year demand data
    (1)more than 2-year demand data: for building a model(2 years) + test(1 year)
    (2)no less than 1-year demand data: for similarity analysis
    (3)NYC’s weather data’s detail is lost
    (4)drop Delhi data
  2. +
+
delete_list <- c(2,4-1,8-2,9-3,10-4,11-5,16-6,22-7)
+for(i in 1:length(delete_list)){
+  demand_files = demand_files[-delete_list[i]]
+  weather_files = weather_files[-delete_list[i]]
+  city_names = city_names[-delete_list[i]]
+}
+# 16 cities left (9 developing cities, 7 developed cities)
+

load the data of chosen cities

+
data_list_demand <- vector("list",16)
+data_list_weather <- vector("list",16)
+setwd("/Users/YueGuo/Documents/Energy_Lab/data/")
+for(i in 1:16){
+  data_list_demand[[i]] = read.csv(paste("demand/",demand_files[i],sep=""))
+  data_list_weather[[i]] = read.csv(paste("weather/",weather_files[i],sep=""))
+}
+

Set same format of data’s time for merging

+
data_Time <- function(data){
+  # drop NA's
+  data=na.omit(data)
+  # set time
+  if(names(data)[5]=="HR"){
+    #if(nchar(deparse(format(data$hours[1])))>16){
+    if(is.null(data$MIN)){
+      data$Time = paste(data$YR,"-",data$M,"-",data$D," ",data$HR,":00:00",sep="")
+      data$Time = as.POSIXct(data$Time, format="%Y-%m-%d %H:%M:%S") 
+    } else {
+      data$Time = paste(data$YR,"-",data$M,"-",data$D," ",data$HR,":",data$MIN,":00",sep="")
+      data$Time = as.POSIXct(data$Time, format="%Y-%m-%d %H:%M:%S") 
+    }
+    #else {data$Time = as.POSIXct(paste("20",sub('^..','',as.character(as.POSIXct(data$hours, format="%m/%d/%Y %H:%M"))),sep=""),format="%Y-%m-%d %H:%M:%S")}
+  }
+  else{
+    if(nchar(deparse(format(data$hours[1])))>16){
+      data$Time = as.POSIXct(data$hours, format="%Y-%m-%d %H:%M:%S")} 
+    else {data$Time = as.POSIXct(paste("20",sub('^..','',as.character(as.POSIXct(data$hours, format="%m/%d/%Y %H:%M"))),sep=""),format="%Y-%m-%d %H:%M:%S")}
+  }
+  return(data)  
+}
+# the date
+data_Date <- function(data){
+  data$Date <- as.POSIXct(substr(as.character(data$Time),1,10), format="%Y-%m-%d")
+  return(data)
+}
+# set Date and Time for all data
+for(i in 1:16){
+  data_list_weather[[i]] = data_Time(data_list_weather[[i]])
+  data_list_demand[[i]] = data_Time(data_list_demand[[i]])
+  data_list_weather[[i]] = data_Date(data_list_weather[[i]])
+  data_list_demand[[i]] = data_Date(data_list_demand[[i]])
+}
+

B. pair the cities

+
# The function to get the latest whole year weather data of each city
+latest_year_weaData <- function(data){
+  if(nrow(subset(data,data$YR==max(data$YR)))<364*24){
+    data_use <- subset(data,data$YR==(max(data$YR)-1))
+  }else{data_use <- subset(data,data$YR==max(data$YR))}
+}
+ly_data_list_weather <- vector("list",16)
+for(i in 1:16){
+  ly_data_list_weather[[i]] = latest_year_weaData(data_list_weather[[i]])
+  ly_data_list_weather[[i]]$Time_no_year = substr(as.character(ly_data_list_weather[[i]]$Time),6,19)
+}
+# The similarity score function:
+similarity <- function(data1_use,data2_use){
+  # merge the data according to the lastest year data
+  compare_data=merge(data1_use[, c("Time_no_year", "TEMP", "DEW.POINT")], 
+                     data2_use[, c("Time_no_year", "TEMP", "DEW.POINT")], by="Time_no_year")
+  # calculate the similarity score
+  simil_score = sum(compare_data[,2]*compare_data[,4])/(sum(compare_data[,2]^2))^.5/(sum(compare_data[,4]^2))^.5 + sum(compare_data[,3]*compare_data[,5])/(sum(compare_data[,3]^2))^.5/(sum(compare_data[,5]^2))^.5
+  return(simil_score)
+}
+

calculate all chosen cities’ similarity scores

+
similarity_matrix <- matrix(0,16,16)
+for(i in 1:15){
+  for(j in (i+1):16){
+    similarity_matrix[i,j] = similarity(ly_data_list_weather[[i]],ly_data_list_weather[[j]])
+  }
+}
+for(i in 1:16){
+  for(j in 1:i){
+    similarity_matrix[i,j] = similarity_matrix[j,i]
+  }
+}
+diag(similarity_matrix) <- rep(2,16)
+rownames(similarity_matrix) <- city_names
+colnames(similarity_matrix) <- city_names
+## developed - stable GDP growth rate
+## developing - unstable GDP growth rate
+

find the max scores for a developing city with all developed cities

+
similar_city <- function(city_name){
+  similarity_df <- data.frame(similarity_matrix)
+  command <- paste("similarity_df","$",city_name,sep="")
+  simils <- eval(parse(text = command))[9:16]
+  results <- vector("list",4)
+  results[[1]] <- city_names[which.max(simils)+8]
+  results[[2]] <- max(simils)
+  results[[3]] <- data_list_weather[[which.max(simils)+8]]$LONG[1]
+  results[[4]] <- data_list_weather[[which.max(simils)+8]]$LAT[1]
+  return(results)
+}
+similar_city_list <- vector("character",8)
+similar_score_list <- vector("character",8)
+similar_long_list <- vector("character",8)
+similar_lat_list <- vector("character",8)
+long_list <- vector("character",8)
+lat_list <- vector("character",8)
+for(i in 1:8){
+  similar_city_list[i] = similar_city(city_names[i])[[1]]
+  similar_score_list[i] = similar_city(city_names[i])[[2]]
+  similar_long_list[i] = similar_city(city_names[i])[[3]]
+  long_list[i] = data_list_weather[[i]]$LONG[1]
+  similar_lat_list[i] = similar_city(city_names[i])[[4]]
+  lat_list[i] = data_list_weather[[i]]$LAT[1]
+}
+

show the pairs

+
compare <- data.frame(cbind(city_names[1:8],long_list,lat_list,similar_long_list,similar_lat_list,similar_city_list,similar_score_list))
+names(compare) <- c("city","long","lat","center-long","center-lat","center_city","similar_score")
+compare
+
##                    city    long     lat center-long center-lat         center_city    similar_score
+## 1   Abidjan_CotedIvoire  -3.933    5.25     103.867      1.417 Singapore_Singapore 1.98708826179911
+## 2          Amman_Jordan  35.983  31.983   -117.1635     32.734         SanDiego_US  1.7291900241956
+## 3        Beirut_Lebanon  35.483  33.817   -117.1635     32.734         SanDiego_US 1.90191460408005
+## 4      Chandigarh_India  76.467  30.333   -117.1635     32.734         SanDiego_US 1.88583586880585
+## 5         Dakar_Senegal   -17.5  14.733     103.867      1.417 Singapore_Singapore 1.95405141295681
+## 6     Manila_Philippine 120.983  14.583     103.867      1.417 Singapore_Singapore 1.99100040649518
+## 7 MbabaneCity_Swaziland    31.3 -26.533     103.867      1.417 Singapore_Singapore 1.89241127213131
+## 8         Nairobi_Kenya  36.817  -1.317     103.867      1.417 Singapore_Singapore 1.96075932215374
+

show the groups on the map:

+

1.Singapore group

+
S_sim <- rbind(compare[1,],compare[5:8,])
+map_S_sim <- data.frame(cities = c(as.character(S_sim[,1]),"Singapore_Singapore"),
+                        long = c(as.numeric(as.character(S_sim[,2])),103.867),
+                        lat = c(as.numeric(as.character(S_sim[,3])),1.417)
+)
+

leaflet(data = map_S_sim) %>% addTiles() %>% addMarkers(~long, ~lat, popup = ~cities) Singapore group

+

2.San Diego group

+
SD_sim <- rbind(compare[2:4,])
+map_SD_sim <- data.frame(cities = c(as.character(SD_sim[,1]),"SanDiego_US"),
+                         long = c(as.numeric(as.character(SD_sim[,2])),-117.1635),
+                         lat = c(as.numeric(as.character(SD_sim[,3])),32.734)
+)
+

leaflet(data = map_SD_sim) %>% addTiles() %>% addMarkers(~long, ~lat, popup = ~cities) San Diego group

+

Finally,the table of groups:

+
Similarity_tabel <- data.frame(group = c("group1(5 cities)","group2(3 cities)"),
+                               developed_city = c("Singapore","San Diego"),
+                               developed_city_latitude = c(1.417,32.734),
+                               mean_latitude =c(mean(as.numeric(as.character(S_sim$lat))),mean(as.numeric(as.character(SD_sim$lat)))),
+                               mean_similarity_score = c(mean(as.numeric(as.character(S_sim$similar_score))),mean(as.numeric(as.character(SD_sim$similar_score)))),
+                               climate_zone = c("-","-"))
+Similarity_tabel
+
##              group developed_city developed_city_latitude mean_latitude mean_similarity_score climate_zone
+## 1 group1(5 cities)      Singapore                   1.417       1.34320              1.957062            -
+## 2 group2(3 cities)      San Diego                  32.734      32.04433              1.838980            -
+
## the temperature dominate the similarity score
+ + +
+ + + + + + + + diff --git a/Energy_Lab/B-view_the_data.R b/Energy_Lab/B-view_the_data.R new file mode 100644 index 0000000..25f8b92 --- /dev/null +++ b/Energy_Lab/B-view_the_data.R @@ -0,0 +1,71 @@ +library(ggplot2) +### the function give the data's plot, and we decide which city to predict +view_data <- function(demandData){ + # delete rows with NA's + demandData=na.omit(demandData) + # the daily peak demand + dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) + # give the data a date col + dailyMax$Date=as.Date(dailyMax[,1]) + dailyMax=dailyMax[order(dailyMax$Date), ][,-1] + # name the col of demand + names(dailyMax)[1] <- "MW" + # plot the data + ggplot(dailyMax, aes(x=Date, y=MW))+geom_line(colour="#E69F00")+labs(title=demandData$city[1]) +} + +# view all the demand data after filtering the data +plot_list <- vector("list",length(data_list_demand)) +for(i in 1:length(data_list_demand)){ + plot_list[[i]] = view_data(data_list_demand[[i]]) +} + +### show multiple plots +# code from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/ +# Multiple plot function +# +# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) +# - cols: Number of columns in layout +# - layout: A matrix specifying the layout. If present, 'cols' is ignored. +# +# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), +# then plot 1 will go in the upper left, 2 will go in the upper right, and +# 3 will go all the way across the bottom. +# +multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { + library(grid) + + # Make a list from the ... arguments and plotlist + plots <- c(list(...), plotlist) + + numPlots = length(plots) + + # If layout is NULL, then use 'cols' to determine layout + if (is.null(layout)) { + # Make the panel + # ncol: Number of columns of plots + # nrow: Number of rows needed, calculated from # of cols + layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), + ncol = cols, nrow = ceiling(numPlots/cols)) + } + + if (numPlots==1) { + print(plots[[1]]) + + } else { + # Set up the page + grid.newpage() + pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) + + # Make each plot, in the correct location + for (i in 1:numPlots) { + # Get the i,j matrix positions of the regions that contain this subplot + matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) + + print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, + layout.pos.col = matchidx$col)) + } + } +} +multiplot(plot_list[[1]], plot_list[[2]],plot_list[[3]],plot_list[[4]],plot_list[[5]], plot_list[[6]],plot_list[[7]],plot_list[[8]], plot_list[[9]],plot_list[[10]], plot_list[[11]],plot_list[[12]],plot_list[[13]],plot_list[[14]],plot_list[[15]], plot_list[[16]], cols=4) + diff --git a/Energy_Lab/B-view_the_data.Rmd b/Energy_Lab/B-view_the_data.Rmd new file mode 100644 index 0000000..55c568c --- /dev/null +++ b/Energy_Lab/B-view_the_data.Rmd @@ -0,0 +1,75 @@ +--- +title: "View the demand data" +author: "Yue Guo" +date: "May 26, 2015" +output: html_document +--- +```{r} +setwd("/Users/YueGuo/Documents/Energy_Lab/data/") +load("DemandData_use.RData") +library(ggplot2) +### the function give the data's plot, and we decide which city to predict +view_data <- function(demandData){ + # delete rows with NA's + demandData=na.omit(demandData) + # the daily peak demand + dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) + # give the data a date col + dailyMax$Date=as.Date(dailyMax[,1]) + dailyMax=dailyMax[order(dailyMax$Date), ][,-1] + # name the col of demand + names(dailyMax)[1] <- "MW" + # plot the data + ggplot(dailyMax, aes(x=Date, y=MW))+geom_line(colour="#E69F00")+labs(title=demandData$city[1]) +} + +# view all the demand data after filtering the data +plot_list <- vector("list",length(data_list_demand)) +for(i in 1:length(data_list_demand)){ + plot_list[[i]] = view_data(data_list_demand[[i]]) +} +``` +Show multiple plots +```{r} +# code from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/ +# Multiple plot function +multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { + library(grid) + + # Make a list from the ... arguments and plotlist + plots <- c(list(...), plotlist) + + numPlots = length(plots) + + # If layout is NULL, then use 'cols' to determine layout + if (is.null(layout)) { + # Make the panel + # ncol: Number of columns of plots + # nrow: Number of rows needed, calculated from # of cols + layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), + ncol = cols, nrow = ceiling(numPlots/cols)) + } + + if (numPlots==1) { + print(plots[[1]]) + + } else { + # Set up the page + grid.newpage() + pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) + + # Make each plot, in the correct location + for (i in 1:numPlots) { + # Get the i,j matrix positions of the regions that contain this subplot + matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) + + print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, + layout.pos.col = matchidx$col)) + } + } +} +``` +All the demand data are as follows: +```{r, fig.width = 20, fig.height = 20} +multiplot(plot_list[[1]], plot_list[[2]],plot_list[[3]],plot_list[[4]],plot_list[[5]], plot_list[[6]],plot_list[[7]],plot_list[[8]], plot_list[[9]],plot_list[[10]], plot_list[[11]],plot_list[[12]],plot_list[[13]],plot_list[[14]],plot_list[[15]], plot_list[[16]], cols=4) +``` \ No newline at end of file diff --git a/Energy_Lab/B-view_the_data.html b/Energy_Lab/B-view_the_data.html new file mode 100644 index 0000000..b7bbb30 --- /dev/null +++ b/Energy_Lab/B-view_the_data.html @@ -0,0 +1,159 @@ + + + + + + + + + + + + + + +View the demand data + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
setwd("/Users/YueGuo/Documents/Energy_Lab/data/")
+load("DemandData_use.RData")
+library(ggplot2)
+### the function give the data's plot, and we decide which city to predict
+view_data <- function(demandData){
+  # delete rows with NA's
+  demandData=na.omit(demandData)
+  # the daily peak demand
+  dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+  # give the data a date col
+  dailyMax$Date=as.Date(dailyMax[,1])
+  dailyMax=dailyMax[order(dailyMax$Date), ][,-1]
+  # name the col of demand
+  names(dailyMax)[1] <- "MW"
+  # plot the data
+  ggplot(dailyMax, aes(x=Date, y=MW))+geom_line(colour="#E69F00")+labs(title=demandData$city[1])
+}
+
+# view all the demand data after filtering the data
+plot_list <- vector("list",length(data_list_demand))
+for(i in 1:length(data_list_demand)){
+  plot_list[[i]] = view_data(data_list_demand[[i]])
+}
+

Show multiple plots

+
# code from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
+# Multiple plot function
+multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
+  library(grid)
+  
+  # Make a list from the ... arguments and plotlist
+  plots <- c(list(...), plotlist)
+  
+  numPlots = length(plots)
+  
+  # If layout is NULL, then use 'cols' to determine layout
+  if (is.null(layout)) {
+    # Make the panel
+    # ncol: Number of columns of plots
+    # nrow: Number of rows needed, calculated from # of cols
+    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
+                     ncol = cols, nrow = ceiling(numPlots/cols))
+  }
+  
+  if (numPlots==1) {
+    print(plots[[1]])
+    
+  } else {
+    # Set up the page
+    grid.newpage()
+    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
+    
+    # Make each plot, in the correct location
+    for (i in 1:numPlots) {
+      # Get the i,j matrix positions of the regions that contain this subplot
+      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
+      
+      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
+                                      layout.pos.col = matchidx$col))
+    }
+  }
+}
+

All the demand data are as follows:

+
multiplot(plot_list[[1]], plot_list[[2]],plot_list[[3]],plot_list[[4]],plot_list[[5]], plot_list[[6]],plot_list[[7]],plot_list[[8]], plot_list[[9]],plot_list[[10]], plot_list[[11]],plot_list[[12]],plot_list[[13]],plot_list[[14]],plot_list[[15]], plot_list[[16]], cols=4)
+

+ + +
+ + + + + + + + diff --git a/Energy_Lab/C-Forecast_of_demand.R b/Energy_Lab/C-Forecast_of_demand.R new file mode 100644 index 0000000..2aa3a15 --- /dev/null +++ b/Energy_Lab/C-Forecast_of_demand.R @@ -0,0 +1,96 @@ + +# Manila +demandData = data_list_demand[[6]] +demandData = na.omit(demandData) +dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4) +dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012)) +dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2]) +dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365) + +# Decomposition +Decomp <- stl(dailyDataTs, s.window="period") +plot(Decomp) +# ARIMA +forecast_stl <- forecast(Decomp, h=365, method="arima") +plot(forecast_stl) +# HW +fit_HW <-HoltWinters(dailyDataTs) +# plot(fit_HW) +forecast_HW <- forecast(fit_HW, 365, method="arima") +plot(forecast_HW) +# true data +true_data <- subset(dailyMax_all,dailyMax_all$YR==2013) +plot(true_data[,2],type="l",,col="black",ylim=c(4000,9000)) + +plot(forecast_stl$mean,col="blue",ylim=c(4000,9000)) +par(new=T) +plot(forecast_HW$mean,col="red",ylim=c(4000,9000)) +par(new=T) +plot(true_data[,2],type="l",,col="black",ylim=c(4000,9000)) +# par(new=T) +# plot(forecast_stl_corr,type="l",,col="green",ylim=c(4000,9000)) + +# The MSE +MSE_Cal <- function(data_true,data_est){ + return(1/length(data_true)*sum((data_true-data_est)^2)) +} +MSE_Cal(true_data[,2],forecast_stl$mean) # 390698.7 +MSE_Cal(true_data[,2],forecast_HW$mean) # 519957.9 + +# The MPE +MPE_Cal <- function(data_true,data_est){ + return(1/length(data_true)*sum(abs(data_true-data_est)/data_true)) +} +MPE_Cal(true_data[,2],forecast_stl$mean) # 7.354% +MPE_Cal(true_data[,2],forecast_HW$mean) # 8.388% + + + +# Dakar +demandData = data_list_demand[[5]] +demandData = na.omit(demandData) +dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4) +dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012)) +dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2]) +dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365) + +# Decomposition +Decomp <- stl(dailyDataTs, s.window="period") +plot(Decomp) +# ARIMA +forecast_stl <- forecast(Decomp, h=365, method="arima") +plot(forecast_stl) +# HW +fit_HW <-HoltWinters(dailyDataTs) +# plot(fit_HW) +forecast_HW <- forecast(fit_HW, 365, method="arima") +plot(forecast_HW) +# true data +true_data <- subset(dailyMax_all,dailyMax_all$YR==2013) +plot(true_data[,2],type="l",,col="black",ylim=c(4000,9000)) + +# Three comparison +plot(forecast_stl$mean,col="blue",ylim=c(4000,9000)) +par(new=T) +plot(forecast_HW$mean,col="red",ylim=c(4000,9000)) +par(new=T) +plot(true_data[,2],type="l",,col="black",ylim=c(4000,9000)) +# par(new=T) +# plot(forecast_stl_corr,type="l",,col="green",ylim=c(4000,9000)) + +# The MSE +MSE_Cal <- function(data_true,data_est){ + return(1/length(data_true)*sum((data_true-data_est)^2)) +} +MSE_Cal(true_data[,2],forecast_stl$mean) # 576.8227 +MSE_Cal(true_data[,2],forecast_HW$mean) # 485.3078 + +# The MPE +MPE_Cal <- function(data_true,data_est){ + return(1/length(data_true)*sum(abs(data_true-data_est)/data_true)) +} +MPE_Cal(true_data[,2],forecast_stl$mean) ### 4.657% +MPE_Cal(true_data[,2],forecast_HW$mean) # 4.297% + diff --git a/Energy_Lab/C-Forecast_of_demand.Rmd b/Energy_Lab/C-Forecast_of_demand.Rmd new file mode 100644 index 0000000..8e0cbb5 --- /dev/null +++ b/Energy_Lab/C-Forecast_of_demand.Rmd @@ -0,0 +1,135 @@ +--- +title: "Forecast_demand" +author: "Yue Guo" +date: "May 26, 2015" +output: html_document +--- +```{r,include=FALSE} +setwd("/Users/YueGuo/Documents/Energy_Lab/data/") +load("DemandData_use.RData") +library(forecast) +``` +1. Forecast for Manila +```{r} +demandData = data_list_demand[[6]] +demandData = na.omit(demandData) +dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4) +dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012)) +dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2]) +dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365) +``` +First, Decomposition +```{r} +Decomp <- stl(dailyDataTs, s.window="period") +``` +Decomposition results: +```{r} +plot(Decomp) +``` + +Three methods to forecast: +(1) stl: use the results of decomposition; +(2) HW: Holt-Winters +And the better one in (1)and(2) with a GDP correction + +(1) ARIMA +```{r} +forecast_stl <- forecast(Decomp, h=365, method="arima") +plot(forecast_stl) +``` + +(2) Holt-Winters +```{r} +fit_HW <-HoltWinters(dailyDataTs) +forecast_HW <- forecast(fit_HW, 365, method="arima") +plot(forecast_HW) +``` + +Then compare them with the true data: +```{r} +# true data +true_data <- subset(dailyMax_all,dailyMax_all$YR==2013) +plot(forecast_stl$mean,col="blue",xlim=c(2013,2014),ylim=c(4000,9000),ylab="MW",xlab="Date") +par(new=T) +plot(forecast_HW$mean,col="red",xlim=c(2013,2014),ylim=c(4000,9000),ylab="",xlab="",xaxt="n") +par(new=T) +plot(true_data[,2],type="l",col="black",xlim=c(0,365),ylim=c(4000,9000),ylab="",xlab="",xaxt="n") +legend("bottomright",title = "Lines:",legend = c("stl","HW","true"), col=c("blue","red","black"),pch=20,cex=.8) +``` + +Calculate the mean percent error of methods above: +```{r} +MPE_Cal <- function(data_true,data_est){ + return(1/length(data_true)*sum(abs(data_true-data_est)/data_true)) +} +``` +MPE for ARIMA: +```{r} +MPE_Cal(true_data[,2],forecast_stl$mean) +``` +MPE for Holt-Winters: +```{r} +MPE_Cal(true_data[,2],forecast_HW$mean) +``` +The stl method is better. + +2. Forecast for Dakar +```{r} +demandData = data_list_demand[[5]] +demandData = na.omit(demandData) +dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4) +dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012)) +dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2]) +dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365) +``` +First, Decomposition +```{r} +Decomp <- stl(dailyDataTs, s.window="period") +``` +Decomposition results: +```{r} +plot(Decomp) +``` + +(1) ARIMA +```{r} +forecast_stl <- forecast(Decomp, h=365, method="arima") +plot(forecast_stl) +``` + +(2) Holt-Winters +```{r} +fit_HW <-HoltWinters(dailyDataTs) +forecast_HW <- forecast(fit_HW, 365, method="arima") +plot(forecast_HW) +``` + +Then compare them with the true data: +```{r} +# true data +true_data <- subset(dailyMax_all,dailyMax_all$YR==2013) +plot(forecast_stl$mean,col="blue",xlim=c(2013,2014),ylim=c(320,480),ylab="MW",xlab="Date") +par(new=T) +plot(forecast_HW$mean,col="red",xlim=c(2013,2014),ylim=c(320,480),ylab="",xlab="",xaxt="n") +par(new=T) +plot(true_data[,2],type="l",col="black",xlim=c(0,365),ylim=c(320,480),ylab="",xlab="",xaxt="n") +legend("bottomright",title = "Lines:",legend = c("stl","HW","true"), col=c("blue","red","black"),pch=20,cex=.6) +``` + +Calculate the mean percent error of methods above: +```{r} +MPE_Cal <- function(data_true,data_est){ + return(1/length(data_true)*sum(abs(data_true-data_est)/data_true)) +} +``` +MPE for ARIMA: +```{r} +MPE_Cal(true_data[,2],forecast_stl$mean) +``` +MPE for Holt-Winters: +```{r} +MPE_Cal(true_data[,2],forecast_HW$mean) +``` +The HW method is better. \ No newline at end of file diff --git a/Energy_Lab/C-Forecast_of_demand.html b/Energy_Lab/C-Forecast_of_demand.html new file mode 100644 index 0000000..d8dd358 --- /dev/null +++ b/Energy_Lab/C-Forecast_of_demand.html @@ -0,0 +1,194 @@ + + + + + + + + + + + + + + +Forecast_demand + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
    +
  1. Forecast for Manila
  2. +
+
demandData = data_list_demand[[6]]
+demandData = na.omit(demandData)
+dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4)
+dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012))
+dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2])
+dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365)
+

First, Decomposition

+
Decomp <- stl(dailyDataTs, s.window="period")
+

Decomposition results:

+
plot(Decomp)
+

+

Three methods to forecast:
(1) stl: use the results of decomposition;
(2) HW: Holt-Winters And the better one in (1)and(2) with a GDP correction

+
    +
  1. ARIMA
  2. +
+
forecast_stl <- forecast(Decomp, h=365, method="arima")
+plot(forecast_stl)
+

+
    +
  1. Holt-Winters
  2. +
+
fit_HW <-HoltWinters(dailyDataTs)
+forecast_HW <- forecast(fit_HW, 365, method="arima")
+plot(forecast_HW)
+

+

Then compare them with the true data:

+
# true data
+true_data <- subset(dailyMax_all,dailyMax_all$YR==2013)
+plot(forecast_stl$mean,col="blue",xlim=c(2013,2014),ylim=c(4000,9000),ylab="MW",xlab="Date")
+par(new=T)
+plot(forecast_HW$mean,col="red",xlim=c(2013,2014),ylim=c(4000,9000),ylab="",xlab="",xaxt="n")
+par(new=T)
+plot(true_data[,2],type="l",col="black",xlim=c(0,365),ylim=c(4000,9000),ylab="",xlab="",xaxt="n")
+legend("bottomright",title = "Lines:",legend = c("stl","HW","true"), col=c("blue","red","black"),pch=20,cex=.8)
+

+

Calculate the mean percent error of methods above:

+
MPE_Cal <- function(data_true,data_est){
+  return(1/length(data_true)*sum(abs(data_true-data_est)/data_true))
+}
+

MPE for ARIMA:

+
MPE_Cal(true_data[,2],forecast_stl$mean)
+
## [1] 0.07354473
+

MPE for Holt-Winters:

+
MPE_Cal(true_data[,2],forecast_HW$mean)
+
## [1] 0.08387863
+

The stl method is better.

+
    +
  1. Forecast for Dakar
  2. +
+
demandData = data_list_demand[[5]]
+demandData = na.omit(demandData)
+dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max)
+dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4)
+dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012))
+dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2])
+dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365)
+

First, Decomposition

+
Decomp <- stl(dailyDataTs, s.window="period")
+

Decomposition results:

+
plot(Decomp)
+

+
    +
  1. ARIMA
  2. +
+
forecast_stl <- forecast(Decomp, h=365, method="arima")
+plot(forecast_stl)
+

+
    +
  1. Holt-Winters
  2. +
+
fit_HW <-HoltWinters(dailyDataTs)
+forecast_HW <- forecast(fit_HW, 365, method="arima")
+plot(forecast_HW)
+

+

Then compare them with the true data:

+
# true data
+true_data <- subset(dailyMax_all,dailyMax_all$YR==2013)
+plot(forecast_stl$mean,col="blue",xlim=c(2013,2014),ylim=c(320,480),ylab="MW",xlab="Date")
+par(new=T)
+plot(forecast_HW$mean,col="red",xlim=c(2013,2014),ylim=c(320,480),ylab="",xlab="",xaxt="n")
+par(new=T)
+plot(true_data[,2],type="l",col="black",xlim=c(0,365),ylim=c(320,480),ylab="",xlab="",xaxt="n")
+legend("bottomright",title = "Lines:",legend = c("stl","HW","true"), col=c("blue","red","black"),pch=20,cex=.6)
+

+

Calculate the mean percent error of methods above:

+
MPE_Cal <- function(data_true,data_est){
+  return(1/length(data_true)*sum(abs(data_true-data_est)/data_true))
+}
+

MPE for ARIMA:

+
MPE_Cal(true_data[,2],forecast_stl$mean)
+
## [1] 0.04657039
+

MPE for Holt-Winters:

+
MPE_Cal(true_data[,2],forecast_HW$mean)
+
## [1] 0.04296551
+

The HW method is better.

+ + +
+ + + + + + + + diff --git a/Energy_Lab/D-GDP_correction.R b/Energy_Lab/D-GDP_correction.R new file mode 100644 index 0000000..88b9bed --- /dev/null +++ b/Energy_Lab/D-GDP_correction.R @@ -0,0 +1,166 @@ +## we choose: (based on the data quality and GDP data resource) +# Singapore - Manila_Philippines +# Singapore - Dakar_Senegal +# San Diego - Amman_Jordan (to be continued) +## based on the GDP data we estimate the demand in 2013 + +# GDP of countries (current US$) +# source:World Bank +Jodan_GDP_10_13 <- c(26425379437, 28840263380, 31015239545, 33678500148) +Philippines_GDP_10_13 <- c(1.99589E+11, 2.24143E+11, 2.5024E+11, 2.72067E+11) +Senegal_GDP_10_13 <- c(12932427724, 14440676498, 14045680427, 14791699009) +Singapore_GDP_2011 <- 274065000000 +Singapore_GDP_2012 <- 286908000000 +Singapore_GDP_2013 <- 297941000000 +Queensland_GDP_2011 <- 267942000000 +Queensland_GDP_2012 <- 284441000000 +Queensland_GDP_2013 <- 290158000000 +# data issue 10-11,11-12,12-13 +# lack of Tokyo data + +# source: 'The global urban competitiveness report - 2011' +Amman_GDP_2010 <- 6128640000*1.09 +Amman_GDP_2011 <- 7216940000*1.05 +Dakar_GDP_2010 <- 1363990000*1.09 +Dakar_GDP_2011 <- 1595730000*1.05 +Manila_GDP_2010 <- 3669550000*1.09 +Manila_GDP_2011 <- 4048010000*1.05 + +# estimate data +# calculate the city/country GDP +Amman_in_Jodan = mean(Amman_GDP_2010/Jodan_GDP_10_13[1],Amman_GDP_2011/Jodan_GDP_10_13[2]) +Dakar_in_Senegal = mean(Dakar_GDP_2010/Senegal_GDP_10_13[1],Dakar_GDP_2011/Senegal_GDP_10_13[2]) +Manila_in_Philippines = mean(Manila_GDP_2010/Philippines_GDP_10_13[1],Manila_GDP_2011/Philippines_GDP_10_13[2]) + +Amman_GDP_2012_est = Jodan_GDP_10_13[3]*Amman_in_Jodan +Amman_GDP_2013_est = Jodan_GDP_10_13[4]*Amman_in_Jodan +Amman_GDP_2014_est = Jodan_GDP_10_13[4]*1.03*Amman_in_Jodan +Dakar_GDP_2012_est = Senegal_GDP_10_13[3]*Dakar_in_Senegal +Dakar_GDP_2013_est = Senegal_GDP_10_13[4]*Dakar_in_Senegal +Dakar_GDP_2014_est = Senegal_GDP_10_13[4]*1.045*Dakar_in_Senegal +Manila_GDP_2012_est = Philippines_GDP_10_13[3]*Manila_in_Philippines +Manila_GDP_2013_est = Philippines_GDP_10_13[4]*Manila_in_Philippines +Manila_GDP_2014_est = Philippines_GDP_10_13[4]*1.06*Manila_in_Philippines +# 2014 GDP growth rate forcast from World bank + +## the population data +Amman_Popu_2014 = 1148000 +Dakar_Popu_2014 = 3393000 +Manila_Popu_2014 = 1745000 # NCR:12764000 +Tokyo_Popu_2014 = 37833000 +Singapore_Popu_2014 = 5517000 +# source: UN, Department of Economic and Social Affairs +Queensland_Popu_2013 = 4690910 +Queensland_Popu_2014 = 4740927 +# http://www.qgso.qld.gov.au/products/reports/pop-growth-qld/index.php, Dec. 30 + +# estimate the population of 2013 +Amman_Popu_2015_est = 1155000 +Dakar_Popu_2015_est = 3520000 +Manila_Popu_2015_est = 1745000*(1.661/1.581)^(1/7) +Tokyo_Popu_2015_est = 38001000 +Singapore_Popu_2015_est = 5619000 +Amman_Popu_2013_est = (Amman_Popu_2014)^2/Amman_Popu_2015_est +Dakar_Popu_2013_est = (Dakar_Popu_2014)^2/Dakar_Popu_2015_est +Manila_Popu_2013_est = 1745000/(1.661/1.581)^(1/7) +Tokyo_Popu_2013_est = (Tokyo_Popu_2014)^2/Tokyo_Popu_2015_est +Singapore_Popu_2013_est = (Singapore_Popu_2014)^2/Singapore_Popu_2015_est # we have true value + +######################### +# calculation +######################### +# GDP_correction function +GDP_correction <- function(GDP_developing_2013,GDP_developing_2014, + Popu_developing_2013,Popu_developing_2014, + GDP_developed_2013,Popu_developed_2013, + demand_developing_2013,demand_developed_2013){ + delta_PPP <- vector("numeric",365) + for(i in 1:365){ + delta_PPP[i] = GDP_developing_2013/Popu_developing_2013*((((GDP_developing_2014/Popu_developing_2014)/(GDP_developing_2013/Popu_developing_2013))^((i-1)/365))-1) + } + delta_demand <- vector("numeric",365) + for(i in 1:365){ + # delta_demand[i] = (demand_developed_2013[i]/Popu_developed_2013*Popu_developing_2013-demand_developing_2013[i])*delta_PPP[i]/(GDP_developed_2013/Popu_developed_2013-GDP_developing_2013/Popu_developing_2013) + delta_demand[i] = (demand_developed_2013[365]/Popu_developed_2013*Popu_developing_2013-demand_developing_2013[365])*delta_PPP[i]/(GDP_developed_2013/Popu_developed_2013-GDP_developing_2013/Popu_developing_2013) + } + return(delta_demand) +} + +## Manila +# the 2013 demand data of Manila +demandData = data_list_demand[[6]] +demandData = na.omit(demandData) +dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4) +dailyMax = subset(dailyMax,dailyMax$YR==2013) +demand_developing_2013 <- dailyMax[,2] + +# the 2013 demand data of Singapore +demandData = data_list_demand[[13]] +demandData = na.omit(demandData) +dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4) +dailyMax = subset(dailyMax,dailyMax$YR==2013) +demand_developed_2013 <- dailyMax[,2] + +# The GDP correction of Manila +Manila_correction <- GDP_correction(Manila_GDP_2013_est,Manila_GDP_2014_est, + Manila_Popu_2013_est,Manila_Popu_2014, + Singapore_GDP_2013,Singapore_Popu_2013_est, + demand_developing_2013,demand_developed_2013) + +# Correction comparison (very little change) +demandData = data_list_demand[[6]] +demandData = na.omit(demandData) +dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4) +dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012)) +dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2]) +dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365) +forecast_stl <- forecast(stl(dailyDataTs, s.window="period"), h=365, method="arima") +true_data <- subset(dailyMax_all,dailyMax_all$YR==2013) +library(lubridate) +ManilaPlot = merge(actual = as.zoo(forecast_stl$x),predicted=as.zoo(cbind(mean=forecast_stl$mean, + lower=forecast_stl$lower[,1], + upper=forecast_stl$upper[,1])), + GDP=forecast_stl_corr, + TURE=ts(true_data[,2],start = c(2013),end=c(2014), frequency=365)) +ManilaPlotxts=xts(ManilaPlot, date_decimal(index(ManilaPlot))) +ManilaPlot_final=dygraph(ManilaPlotxts, "Manila Power Demand Data") %>% + dySeries("actual", label = "Actual") %>% + dySeries(c("lower", "mean", "upper"), label = "Predicted") %>% + dySeries("GDP", label="Revised according to GDP") %>% + dySeries("TURE", label="Ture value") %>% + dyRangeSelector() +ManilaPlot_final + +## Dakar +# the 2013 demand data of Dakar +demandData = data_list_demand[[5]] +demandData = na.omit(demandData) +dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4) +dailyMax = subset(dailyMax,dailyMax$YR==2013) +demand_developing_2013 <- dailyMax[,2] + +Dakar_correction <- GDP_correction(Dakar_GDP_2013_est,Dakar_GDP_2014_est, + Dakar_Popu_2013_est,Dakar_Popu_2014, + Singapore_GDP_2013,Singapore_Popu_2013_est, + demand_developing_2013,demand_developed_2013) + +# Correction comparison (very little change) +library(lubridate) +DakarPlot = merge(actual = as.zoo(forecast_stl$x),predicted=as.zoo(cbind(mean=forecast_stl$mean, + lower=forecast_stl$lower[,1], + upper=forecast_stl$upper[,1])), + GDP=forecast_stl_corr, + TURE=ts(true_data[,2],start = c(2013),end=c(2014), frequency=365)) +DakarPlotxts=xts(DakarPlot, date_decimal(index(DakarPlot))) +DakarPlot_final=dygraph(DakarPlotxts, "Dakar Power Demand Data") %>% + dySeries("actual", label = "Actual") %>% + dySeries(c("lower", "mean", "upper"), label = "Predicted") %>% + dySeries("GDP", label="Revised according to GDP") %>% + dySeries("TURE", label="Ture value") %>% + dyRangeSelector() +DakarPlot_final + diff --git a/Energy_Lab/D-GDP_correction.Rmd b/Energy_Lab/D-GDP_correction.Rmd new file mode 100644 index 0000000..6b995d9 --- /dev/null +++ b/Energy_Lab/D-GDP_correction.Rmd @@ -0,0 +1,222 @@ +--- +title: "GDP correction of the forecast" +author: "Yue Guo" +date: "May 26, 2015" +output: html_document +--- +We choose: (based on the data quality and GDP data resource) +(1) Singapore - Manila_Philippines +(2) Singapore - Dakar_Senegal +(3) San Diego - Amman_Jordan (to be continued) +(based on the GDP data we estimate the demand in 2013) + +```{r,echo=FALSE} +# GDP of countries (current US$) +# source:World Bank +Jodan_GDP_10_13 <- c(26425379437, 28840263380, 31015239545, 33678500148) +Philippines_GDP_10_13 <- c(1.99589E+11, 2.24143E+11, 2.5024E+11, 2.72067E+11) +Senegal_GDP_10_13 <- c(12932427724, 14440676498, 14045680427, 14791699009) +Singapore_GDP_2011 <- 274065000000 +Singapore_GDP_2012 <- 286908000000 +Singapore_GDP_2013 <- 297941000000 +Queensland_GDP_2011 <- 267942000000 +Queensland_GDP_2012 <- 284441000000 +Queensland_GDP_2013 <- 290158000000 +# data issue 10-11,11-12,12-13 +# lack of Tokyo data + +# source: 'The global urban competitiveness report - 2011' +Amman_GDP_2010 <- 6128640000*1.09 +Amman_GDP_2011 <- 7216940000*1.05 +Dakar_GDP_2010 <- 1363990000*1.09 +Dakar_GDP_2011 <- 1595730000*1.05 +Manila_GDP_2010 <- 3669550000*1.09 +Manila_GDP_2011 <- 4048010000*1.05 + +# estimate data +# calculate the city/country GDP +Amman_in_Jodan = mean(Amman_GDP_2010/Jodan_GDP_10_13[1],Amman_GDP_2011/Jodan_GDP_10_13[2]) +Dakar_in_Senegal = mean(Dakar_GDP_2010/Senegal_GDP_10_13[1],Dakar_GDP_2011/Senegal_GDP_10_13[2]) +Manila_in_Philippines = mean(Manila_GDP_2010/Philippines_GDP_10_13[1],Manila_GDP_2011/Philippines_GDP_10_13[2]) + +Amman_GDP_2012_est = Jodan_GDP_10_13[3]*Amman_in_Jodan +Amman_GDP_2013_est = Jodan_GDP_10_13[4]*Amman_in_Jodan +Amman_GDP_2014_est = Jodan_GDP_10_13[4]*1.03*Amman_in_Jodan +Dakar_GDP_2012_est = Senegal_GDP_10_13[3]*Dakar_in_Senegal +Dakar_GDP_2013_est = Senegal_GDP_10_13[4]*Dakar_in_Senegal +Dakar_GDP_2014_est = Senegal_GDP_10_13[4]*1.045*Dakar_in_Senegal +Manila_GDP_2012_est = Philippines_GDP_10_13[3]*Manila_in_Philippines +Manila_GDP_2013_est = Philippines_GDP_10_13[4]*Manila_in_Philippines +Manila_GDP_2014_est = Philippines_GDP_10_13[4]*1.06*Manila_in_Philippines +# 2014 GDP growth rate forcast from World bank + +## the population data +Amman_Popu_2014 = 1148000 +Dakar_Popu_2014 = 3393000 +Manila_Popu_2014 = 1745000 # NCR:12764000 +Tokyo_Popu_2014 = 37833000 +Singapore_Popu_2014 = 5517000 +# source: UN, Department of Economic and Social Affairs +Queensland_Popu_2013 = 4690910 +Queensland_Popu_2014 = 4740927 +# http://www.qgso.qld.gov.au/products/reports/pop-growth-qld/index.php, Dec. 30 + +# estimate the population of 2013 +Amman_Popu_2015_est = 1155000 +Dakar_Popu_2015_est = 3520000 +Manila_Popu_2015_est = 1745000*(1.661/1.581)^(1/7) +Tokyo_Popu_2015_est = 38001000 +Singapore_Popu_2015_est = 5619000 +Amman_Popu_2013_est = (Amman_Popu_2014)^2/Amman_Popu_2015_est +Dakar_Popu_2013_est = (Dakar_Popu_2014)^2/Dakar_Popu_2015_est +Manila_Popu_2013_est = 1745000/(1.661/1.581)^(1/7) +Tokyo_Popu_2013_est = (Tokyo_Popu_2014)^2/Tokyo_Popu_2015_est +Singapore_Popu_2013_est = (Singapore_Popu_2014)^2/Singapore_Popu_2015_est # we have true value +``` +The GDP_correction function: +```{r} +GDP_correction <- function(GDP_developing_2013,GDP_developing_2014, + Popu_developing_2013,Popu_developing_2014, + GDP_developed_2013,Popu_developed_2013, + demand_developing_2013,demand_developed_2013){ + delta_PPP <- vector("numeric",365) + for(i in 1:365){ + delta_PPP[i] = GDP_developing_2013/Popu_developing_2013*((((GDP_developing_2014/Popu_developing_2014)/(GDP_developing_2013/Popu_developing_2013))^((i-1)/365))-1) + } + delta_demand <- vector("numeric",365) + for(i in 1:365){ + # delta_demand[i] = (demand_developed_2013[i]/Popu_developed_2013*Popu_developing_2013-demand_developing_2013[i])*delta_PPP[i]/(GDP_developed_2013/Popu_developed_2013-GDP_developing_2013/Popu_developing_2013) + delta_demand[i] = (demand_developed_2013[365]/Popu_developed_2013*Popu_developing_2013-demand_developing_2013[365])*delta_PPP[i]/(GDP_developed_2013/Popu_developed_2013-GDP_developing_2013/Popu_developing_2013) + } + return(delta_demand) +} +``` +1. Correction for Manila + +```{r,echo=FALSE} +setwd("/Users/YueGuo/Documents/Energy_Lab/data/") +load("DemandData_use.RData") +# the 2013 demand data of Manila +demandData = data_list_demand[[6]] +demandData = na.omit(demandData) +dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4) +dailyMax = subset(dailyMax,dailyMax$YR==2013) +demand_developing_2013 <- dailyMax[,2] +# the 2013 demand data of Singapore +demandData = data_list_demand[[13]] +demandData = na.omit(demandData) +dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4) +dailyMax = subset(dailyMax,dailyMax$YR==2013) +demand_developed_2013 <- dailyMax[,2] +``` +```{r} +Manila_correction <- GDP_correction(Manila_GDP_2013_est,Manila_GDP_2014_est, + Manila_Popu_2013_est,Manila_Popu_2014, + Singapore_GDP_2013,Singapore_Popu_2013_est, + demand_developing_2013,demand_developed_2013) + +``` +The Correction comparison: (very little change) +```{r,include=FALSE} +library(forecast) +``` +```{r,echo=FALSE} +demandData = data_list_demand[[6]] +demandData = na.omit(demandData) +dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4) +dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012)) +dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2]) +dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365) +forecast_stl <- forecast(stl(dailyDataTs, s.window="period"), h=365, method="arima") +true_data <- subset(dailyMax_all,dailyMax_all$YR==2013) +``` +```{r} +forecast_stl_corr <- forecast_stl$mean + Manila_correction +MPE_Cal <- function(data_true,data_est){ + return(1/length(data_true)*sum(abs(data_true-data_est)/data_true)) +} +MPE_Cal(true_data[,2],forecast_stl_corr) +# the original one: 0.07354473 + +``` +```{r} +library(lubridate) +ManilaPlot = merge(actual = as.zoo(forecast_stl$x),predicted=as.zoo(cbind(mean=forecast_stl$mean, + lower=forecast_stl$lower[,1], + upper=forecast_stl$upper[,1])), + GDP=forecast_stl_corr, + TURE=ts(true_data[,2],start = c(2013),end=c(2014), frequency=365)) +``` +ManilaPlotxts=xts(ManilaPlot, date_decimal(index(ManilaPlot))) +ManilaPlot_final=dygraph(ManilaPlotxts, "Manila Power Demand Data") %>% + dySeries("actual", label = "Actual") %>% + dySeries(c("lower", "mean", "upper"), label = "Predicted") %>% + dySeries("GDP", label="Revised according to GDP") %>% + dySeries("TURE", label="Ture value") %>% + dyRangeSelector() +ManilaPlot_final + +![Manila comprison](/Users/YueGuo/Documents/Energy_Lab/Manila_correction.jpg) + + +2. Correction for Dakar + +```{r,echo=FALSE} +# the 2013 demand data of Dakar +demandData = data_list_demand[[5]] +demandData = na.omit(demandData) +dailyMax=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax$YR = substr(as.factor(dailyMax[,1]),1,4) +dailyMax = subset(dailyMax,dailyMax$YR==2013) +demand_developing_2013 <- dailyMax[,2] +``` +```{r} +Dakar_correction <- GDP_correction(Dakar_GDP_2013_est,Dakar_GDP_2014_est, + Dakar_Popu_2013_est,Dakar_Popu_2014, + Singapore_GDP_2013,Singapore_Popu_2013_est, + demand_developing_2013,demand_developed_2013) + +``` +The Correction comparison: (very little change) +```{r,echo=FALSE} +demandData = data_list_demand[[5]] +demandData = na.omit(demandData) +dailyMax_all=aggregate(demandData[, c("MW")], by=list(demandData$Date), FUN=max) +dailyMax_all$YR = substr(as.factor(dailyMax_all[,1]),1,4) +dailyMax_raw = rbind(subset(dailyMax_all,dailyMax_all$YR==2011),subset(dailyMax_all,dailyMax_all$YR==2012)) +dailyMax <- data.frame(Date = dailyMax_raw[,1],MW = dailyMax_raw[,2]) +dailyDataTs=ts(dailyMax$MW, start=c(2011), end=c(2013), frequency=365) +forecast_HW <- forecast(HoltWinters(dailyDataTs), h=365, method="arima") +true_data <- subset(dailyMax_all,dailyMax_all$YR==2013) +``` +```{r} +forecast_HW_corr <- forecast_HW$mean + Dakar_correction +MPE_Cal <- function(data_true,data_est){ + return(1/length(data_true)*sum(abs(data_true-data_est)/data_true)) +} +MPE_Cal(true_data[,2],forecast_HW_corr) +# the original one: 0.04296551 +# NOT IMPROVED + +``` +```{r} +library(lubridate) +DakarPlot = merge(actual = as.zoo(forecast_HW$x),predicted=as.zoo(cbind(mean=forecast_HW$mean, + lower=forecast_HW$lower[,1], + upper=forecast_HW$upper[,1])), + GDP=forecast_HW_corr, + TURE=ts(true_data[,2],start = c(2013),end=c(2014), frequency=365)) +``` +DakarPlotxts=xts(DakarPlot, date_decimal(index(DakarPlot))) +DakarPlot_final=dygraph(DakarPlotxts, "Dakar Power Demand Data") %>% + dySeries("actual", label = "Actual") %>% + dySeries(c("lower", "mean", "upper"), label = "Predicted") %>% + dySeries("GDP", label="Revised according to GDP") %>% + dySeries("TURE", label="Ture value") %>% + dyRangeSelector() +DakarPlot_final + +![Dakar comprison](/Users/YueGuo/Documents/Energy_Lab/Dakar_correction.jpg) \ No newline at end of file diff --git a/Energy_Lab/D-GDP_correction.html b/Energy_Lab/D-GDP_correction.html new file mode 100644 index 0000000..e0ffcac --- /dev/null +++ b/Energy_Lab/D-GDP_correction.html @@ -0,0 +1,159 @@ + + + + + + + + + + + + + + +GDP correction of the forecast + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +

We choose: (based on the data quality and GDP data resource) (1) Singapore - Manila_Philippines (2) Singapore - Dakar_Senegal (3) San Diego - Amman_Jordan (to be continued) (based on the GDP data we estimate the demand in 2013)

+

The GDP_correction function:

+
GDP_correction <- function(GDP_developing_2013,GDP_developing_2014,
+               Popu_developing_2013,Popu_developing_2014,
+               GDP_developed_2013,Popu_developed_2013,
+               demand_developing_2013,demand_developed_2013){
+  delta_PPP <- vector("numeric",365)
+  for(i in 1:365){
+    delta_PPP[i] = GDP_developing_2013/Popu_developing_2013*((((GDP_developing_2014/Popu_developing_2014)/(GDP_developing_2013/Popu_developing_2013))^((i-1)/365))-1)
+  }
+  delta_demand <- vector("numeric",365)
+  for(i in 1:365){
+    # delta_demand[i] = (demand_developed_2013[i]/Popu_developed_2013*Popu_developing_2013-demand_developing_2013[i])*delta_PPP[i]/(GDP_developed_2013/Popu_developed_2013-GDP_developing_2013/Popu_developing_2013)
+    delta_demand[i] = (demand_developed_2013[365]/Popu_developed_2013*Popu_developing_2013-demand_developing_2013[365])*delta_PPP[i]/(GDP_developed_2013/Popu_developed_2013-GDP_developing_2013/Popu_developing_2013)  
+  }
+  return(delta_demand)
+}
+
    +
  1. Correction for Manila
  2. +
+
Manila_correction <- GDP_correction(Manila_GDP_2013_est,Manila_GDP_2014_est,
+                                    Manila_Popu_2013_est,Manila_Popu_2014,
+                                    Singapore_GDP_2013,Singapore_Popu_2013_est,
+                                    demand_developing_2013,demand_developed_2013)
+

The Correction comparison: (very little change)

+
forecast_stl_corr <- forecast_stl$mean + Manila_correction
+MPE_Cal <- function(data_true,data_est){
+  return(1/length(data_true)*sum(abs(data_true-data_est)/data_true))
+}
+MPE_Cal(true_data[,2],forecast_stl_corr)
+
## [1] 0.07346054
+
# the original one: 0.07354473
+
library(lubridate)
+ManilaPlot = merge(actual = as.zoo(forecast_stl$x),predicted=as.zoo(cbind(mean=forecast_stl$mean,
+                                                                         lower=forecast_stl$lower[,1],
+                                                                         upper=forecast_stl$upper[,1])),
+                  GDP=forecast_stl_corr,
+                  TURE=ts(true_data[,2],start = c(2013),end=c(2014), frequency=365))
+

ManilaPlotxts=xts(ManilaPlot, date_decimal(index(ManilaPlot)))
ManilaPlot_final=dygraph(ManilaPlotxts, “Manila Power Demand Data”) %>%
dySeries(“actual”, label = “Actual”) %>%
dySeries(c(“lower”, “mean”, “upper”), label = “Predicted”) %>%
dySeries(“GDP”, label=“Revised according to GDP”) %>%
dySeries(“TURE”, label=“Ture value”) %>%
dyRangeSelector()
ManilaPlot_final

+

Manila comprison

+
    +
  1. Correction for Dakar
  2. +
+
Dakar_correction <- GDP_correction(Dakar_GDP_2013_est,Dakar_GDP_2014_est,
+                                    Dakar_Popu_2013_est,Dakar_Popu_2014,
+                                    Singapore_GDP_2013,Singapore_Popu_2013_est,
+                                    demand_developing_2013,demand_developed_2013)
+

The Correction comparison: (very little change)

+
forecast_HW_corr <- forecast_HW$mean + Dakar_correction
+MPE_Cal <- function(data_true,data_est){
+  return(1/length(data_true)*sum(abs(data_true-data_est)/data_true))
+}
+MPE_Cal(true_data[,2],forecast_HW_corr)
+
## [1] 0.04297823
+
# the original one: 0.04296551
+# NOT IMPROVED
+
library(lubridate)
+DakarPlot = merge(actual = as.zoo(forecast_HW$x),predicted=as.zoo(cbind(mean=forecast_HW$mean,
+                                                                          lower=forecast_HW$lower[,1],
+                                                                          upper=forecast_HW$upper[,1])),
+                  GDP=forecast_HW_corr,
+                  TURE=ts(true_data[,2],start = c(2013),end=c(2014), frequency=365))
+

DakarPlotxts=xts(DakarPlot, date_decimal(index(DakarPlot)))
DakarPlot_final=dygraph(DakarPlotxts, “Dakar Power Demand Data”) %>%
dySeries(“actual”, label = “Actual”) %>%
dySeries(c(“lower”, “mean”, “upper”), label = “Predicted”) %>%
dySeries(“GDP”, label=“Revised according to GDP”) %>%
dySeries(“TURE”, label=“Ture value”) %>%
dyRangeSelector()
DakarPlot_final

+

Dakar comprison

+ + +
+ + + + + + + +