diff --git a/doc/sphinxext/example_builder.py b/doc/sphinxext/example_builder.py index aff0418..c512489 100644 --- a/doc/sphinxext/example_builder.py +++ b/doc/sphinxext/example_builder.py @@ -447,12 +447,14 @@ def figure_contents(self, path, filelist): def subdir_contents(self, path, subdirs): subdirs = [os.path.join(path, subdir) for subdir in subdirs] - headlines = {'missingdata' : 'Missing Data', + headlines = {'benchmark' : 'Benchmark', + 'missingdata' : 'Missing Data', 'prediction' : 'Prediction', 'visualization' : 'Visualization', 'statistics' : 'Statistics'} - order = ['prediction', 'visualization', 'missingdata', 'statistics'] + order = ['prediction', 'visualization', 'missingdata', 'statistics',\ + 'benchmark'] toctree = ("\n\n" ".. toctree::\n" @@ -472,7 +474,7 @@ def subdir_contents(self, path, subdirs): rel_html = os.path.relpath(self.html_filename(f), path) toctree += " ./%s\n\n" % os.path.splitext(rel_html)[0] - + contents += (" .. figure:: ./%s\n" " :target: ./%s\n" "\n" diff --git a/examples/benchmark/comparison.py b/examples/benchmark/comparison.py new file mode 100644 index 0000000..281d9bc --- /dev/null +++ b/examples/benchmark/comparison.py @@ -0,0 +1,27 @@ +""" +MSE Comparison of Predictors +------------------------------------------------------------------------- + +This example shows a comparison of different predictors: Standard +spatio-temporal predictor with linear regression, univariate predictor +with linear regression (only based on target turbine measurements) and +the naive / persistance model. The testbed is QuickNDirty, +which is only used for the example page presentation. +""" + +# Author: Jendrik Poloczek +# Author: Nils A. Treiber +# License: BSD 3 clause + +from windml.benchmark.benchmark import Benchmark +from windml.prediction.std_linreg import StdLinreg +from windml.prediction.univariate_linreg import UnivariateLinreg +from windml.prediction.naive import Naive + +bench = Benchmark() +bench.run(StdLinreg(), 'QuickNDirty') +bench.run(UnivariateLinreg(), 'QuickNDirty') +bench.run(Naive(), 'QuickNDirty') + +bench.visualize_mse_on_parks() + diff --git a/examples/missingdata/mar_damaged.py b/examples/missingdata/mar_damaged.py index ba65c74..08c32c0 100644 --- a/examples/missingdata/mar_damaged.py +++ b/examples/missingdata/mar_damaged.py @@ -54,7 +54,7 @@ ax.grid(True) plt.ylim(-2, 32) -plt.ylabel("Corrected Power (MW), Wind Speed (m/s)") +plt.ylabel("Corrected Power [MW], Wind Speed [m/s]") plt.plot(d_time, y1, label = 'Power Production', color="b", alpha=0.5) plt.plot(d_time, y2, label = 'Wind Speed', color="g", alpha=0.5) diff --git a/examples/missingdata/mreg_knn_interpolation.py b/examples/missingdata/mreg_knn_interpolation.py index 690e023..bff2790 100644 --- a/examples/missingdata/mreg_knn_interpolation.py +++ b/examples/missingdata/mreg_knn_interpolation.py @@ -79,7 +79,7 @@ ax.grid(True) plt.ylim(-2, 32) -plt.ylabel("Corrected Power (MW), Wind Speed (m/s)") +plt.ylabel("Corrected Power [MW], Wind Speed [m/s]") plt.plot(d_time, y1, label = 'Power Production (interpolated)', color="b") plt.plot(d_time, y2, label = 'Wind Speed (interpolated)', color="g") diff --git a/examples/missingdata/mreg_lin_interpolation.py b/examples/missingdata/mreg_lin_interpolation.py index a95e4ce..d3e3c2f 100644 --- a/examples/missingdata/mreg_lin_interpolation.py +++ b/examples/missingdata/mreg_lin_interpolation.py @@ -78,7 +78,7 @@ ax.grid(True) plt.ylim(-2, 32) -plt.ylabel("Corrected Power (MW), Wind Speed (m/s)") +plt.ylabel("Corrected Power [MW], Wind Speed [m/s]") plt.plot(d_time, y1, label = 'Power Production (interpolated)', color="b") plt.plot(d_time, y2, label = 'Wind Speed (interpolated)', color="g") diff --git a/examples/missingdata/mreg_svr_interpolation.py b/examples/missingdata/mreg_svr_interpolation.py index ea53eb6..9c255cd 100644 --- a/examples/missingdata/mreg_svr_interpolation.py +++ b/examples/missingdata/mreg_svr_interpolation.py @@ -92,7 +92,7 @@ ax.grid(True) plt.ylim(-2, 32) -plt.ylabel("Corrected Power (MW), Wind Speed (m/s)") +plt.ylabel("Corrected Power [MW], Wind Speed [m/s]") plt.plot(d_time, y1, label = 'Power Production (interpolated)', color="b") plt.plot(d_time, y2, label = 'Wind Speed (interpolated)', color="g") diff --git a/examples/missingdata/reconstruction.py b/examples/missingdata/reconstruction.py index 31a841b..951571d 100644 --- a/examples/missingdata/reconstruction.py +++ b/examples/missingdata/reconstruction.py @@ -239,8 +239,8 @@ def to_percent(y, position): formatter = FuncFormatter(to_percent) plt.gca().xaxis.set_major_formatter(formatter) -plt.xlabel("Rate of Missing Data") -plt.ylabel("Reconstruction MSE") +plt.xlabel("Rate of Missing Data in Percent") +plt.ylabel("Reconstruction MSE of Power [MW]") plt.xlim([0.1, 0.9]) plt.ylim([0, 10]) plt.legend(loc="upper left") diff --git a/examples/missingdata/reconstruction_prediction.py b/examples/missingdata/reconstruction_prediction.py index 467a992..7ee01dc 100644 --- a/examples/missingdata/reconstruction_prediction.py +++ b/examples/missingdata/reconstruction_prediction.py @@ -251,8 +251,8 @@ def to_percent(y, position): formatter = FuncFormatter(to_percent) plt.gca().xaxis.set_major_formatter(formatter) -plt.xlabel("Rate of Missing Data") -plt.ylabel("Prediction MSE") +plt.xlabel("Rate of Missing Data in Percent") +plt.ylabel("Prediction MSE of Power [MW]") plt.xlim([0.1, 0.9]) plt.ylim([12.5, 16]) plt.legend(loc="upper left") diff --git a/examples/prediction/compare_regressors_param.py b/examples/prediction/compare_regressors_param.py index 810ce8a..902da1a 100644 --- a/examples/prediction/compare_regressors_param.py +++ b/examples/prediction/compare_regressors_param.py @@ -23,6 +23,7 @@ from numpy import zeros, float32 from windml.datasets.nrel import NREL from windml.mapping.power_mapping import PowerMapping +from windml.visualization.colorset import colorset from sklearn.grid_search import GridSearchCV from sklearn import linear_model @@ -90,12 +91,13 @@ def compute_mse(regressor, param): regressors = ['rf', 'knn'] params = [1,2,4,8,16,32,64,128] -marker = {'rf': 'go--', 'knn': 'ro--', 'naive': 'bo--'} +color = {'rf': colorset[0], 'knn': colorset[3], 'naive': colorset[2]} +marker = {'rf': 'o--', 'knn': 'o--', 'naive': 'o--'} labels = {'rf': 'Random Forest', 'knn': 'KNN', 'naive' : 'Naive'} plt.title("MSE depending on Algorithm Parameter") plt.xlabel("Algorithm Parameter (k for KNN, Number of Estimators for RF )") -plt.ylabel("MSE") +plt.ylabel("MSE of Power [MW]") plt.xlim([1, 128]) mse_naive_hats = [] @@ -106,9 +108,11 @@ def compute_mse(regressor, param): mse_y_hat, mse_naive_hat = compute_mse(regressor, param) mse.append(mse_y_hat) mse_naive_hats.append(mse_naive_hat) - plt.plot(params, mse, marker[regressor], label=labels[regressor]) + plt.plot(params, mse, marker[regressor], label=labels[regressor],\ + color=color[regressor]) -plt.plot(params, mse_naive_hats, marker['naive'], label=labels['naive']) +plt.plot(params, mse_naive_hats, marker['naive'], label=labels['naive'],\ + color=color['naive']) plt.legend(loc='upper right') plt.show() diff --git a/examples/prediction/horizon_mse.py b/examples/prediction/horizon_mse.py index 7193a8d..d50b0d1 100644 --- a/examples/prediction/horizon_mse.py +++ b/examples/prediction/horizon_mse.py @@ -23,6 +23,7 @@ from numpy import zeros, float32 from windml.datasets.nrel import NREL from windml.mapping.power_mapping import PowerMapping +from windml.visualization.colorset import colorset from sklearn.grid_search import GridSearchCV from sklearn import linear_model @@ -87,12 +88,13 @@ def compute_mse(regressor, horizon): regressors = ['linear', 'knn'] horizons = range(2, 18, 2) -marker = {'linear': 'go--', 'knn': 'ro--', 'naive': 'bo--'} +color = {'linear': colorset[0], 'knn': colorset[3], 'naive': colorset[2]} +marker = {'linear': 'o--', 'knn': 'o--', 'naive': 'o--'} labels = {'linear': 'Linear', 'knn': 'KNN', 'naive' : 'Naive'} plt.title("MSE depending on Forecast Horizon") -plt.xlabel("Forecast Horizon") -plt.ylabel("MSE") +plt.xlabel("Forecast Horizon Time [600s]") +plt.ylabel("MSE of Power [MW]") mse_naive_hats = [] for regressor in regressors: @@ -102,9 +104,11 @@ def compute_mse(regressor, horizon): mse_y_hat, mse_naive_hat = compute_mse(regressor, horizon) mse.append(mse_y_hat) mse_naive_hats.append(mse_naive_hat) - plt.plot(horizons, mse, marker[regressor], label=labels[regressor]) + plt.plot(horizons, mse, marker[regressor], color=color[regressor],\ + label=labels[regressor]) -plt.plot(horizons, mse_naive_hats, marker['naive'], label=labels['naive']) +plt.plot(horizons, mse_naive_hats, marker['naive'], label=labels['naive'],\ + color=color['naive']) plt.legend(loc='lower right') plt.show() diff --git a/examples/prediction/knn_regression_turbine.py b/examples/prediction/knn_regression_turbine.py index 4a5f777..13041bf 100644 --- a/examples/prediction/knn_regression_turbine.py +++ b/examples/prediction/knn_regression_turbine.py @@ -31,6 +31,7 @@ from numpy import zeros, float32 from windml.datasets.nrel import NREL from windml.mapping.power_mapping import PowerMapping +from windml.visualization.colorset import cmap, colorset from sklearn.neighbors import KNeighborsRegressor # get windpark and corresponding target. forecast is for the target turbine @@ -86,44 +87,52 @@ figure = plt.figure(figsize=(15, 10)) plot_abs = plt.subplot(2, 2, 1) -plt.title("Absolute Labels and True Measurements") +plt.title("Predicted and True Measurements") # Array of true labels for plotting. y = zeros(len(y_hat)) for i in range(0, len(y_hat)): y[i] = (Y[train_to + (i * test_step)]) +colors = {'predictor' : colorset[0], + 'naive' : colorset[1], + 'true' : colorset[3]} + time = range(0, len(y_hat)) -plt.plot(time, y, "g-", label="Measurement") -plt.plot(time, y_hat, "r-", label="KNN Label") -plt.plot(time, naive_hat, "b-", label="Naive Label") +plt.plot(time, y, color=colors['true'], label="Measurement") +plt.plot(time, y_hat, color=colors['predictor'], label="KNN-predicted") +plt.plot(time, naive_hat, color=colors['naive'], label="Naive-predicted") +plt.xlabel("Time [600s]") +plt.ylabel("Power [MW]") plt.xlim([9600, 9750]) plt.ylim([-30, 50]) plt.legend() plot_scatter = plt.subplot(2, 2, 2) -plt.title("Naive Label and True Measurement") +plt.title("Naive-predicted and True Measurement") col = abs(y - naive_hat) -plt.scatter(y, naive_hat, c=col, linewidth=0.0, cmap=plt.cm.jet) -plt.xlabel("Y") -plt.ylabel("Naive Label") +plt.scatter(y, naive_hat, c=col, linewidth=0.0, cmap=cmap) +plt.xlabel("True Measurement [MW]") +plt.ylabel("Naive-predicted Measurement [MW]") plt.xlim([0, 30]) plt.ylim([0, 30]) plot_abs = plt.subplot(2, 2, 3) plt.title("Absolute Difference") -plt.plot(time, (y_hat - y), "r-", label="KNN Label") -plt.plot(time, (naive_hat - y), "b-", label="Naive Label") +plt.plot(time, (y_hat - y), color=colors['predictor'], label="KNN-predicted") +plt.plot(time, (naive_hat - y), color=colors['true'], label="Naive-predicted") plt.xlim([9600, 9750]) plt.ylim([-20, 30]) +plt.xlabel("Time [600s]") +plt.ylabel("Deviation of True Power [MW]") plt.legend() plot_scatter = plt.subplot(2, 2, 4) -plt.title("KNN Label and True Measurement") +plt.title("KNN-predicted and True Measurement") col = abs(y - y_hat) -plt.scatter(y, y_hat, c=col, linewidth=0.0, cmap=plt.cm.jet) -plt.xlabel("Y") -plt.ylabel("KNN Label") +plt.scatter(y, y_hat, c=col, linewidth=0.0, cmap=cmap) +plt.xlabel("True Measurement [MW]") +plt.ylabel("KNN-predicted Measurement [MW]") plt.xlim([0, 30]) plt.ylim([0, 30]) diff --git a/examples/prediction/lin_regression_turbine.py b/examples/prediction/lin_regression_turbine.py index db4a0ed..af8398d 100644 --- a/examples/prediction/lin_regression_turbine.py +++ b/examples/prediction/lin_regression_turbine.py @@ -24,12 +24,10 @@ import math import matplotlib.pyplot as plt - from numpy import zeros, float32 from windml.datasets.nrel import NREL +from windml.visualization.colorset import cmap, colorset from windml.mapping.power_mapping import PowerMapping - -from sklearn.grid_search import GridSearchCV from sklearn import linear_model # get windpark and corresponding target. forecast is for the target turbine @@ -82,47 +80,54 @@ figure = plt.figure(figsize=(15, 10)) plot_abs = plt.subplot(2, 2, 1) -plt.title("Absolute Labels and True Measurements") +plt.title("Predicted and True Measurements") # Array of true labels for plotting. y = zeros(len(y_hat)) for i in range(0, len(y_hat)): y[i] = (Y[train_to + (i * test_step)]) +colors = {'predictor' : colorset[0], + 'naive' : colorset[1], + 'true' : colorset[3]} + time = range(0, len(y_hat)) -plt.plot(time, y, "g-", label="Measurement") -plt.plot(time, y_hat, "r-", label="Linear Label") -plt.plot(time, naive_hat, "b-", label="Naive Label") +plt.plot(time, y, color=colors['true'], label="Measurement") +plt.plot(time, y_hat, color=colors['predictor'], label="Linear-predicted") +plt.plot(time, naive_hat, color=colors['naive'], label="Naive-predicted") +plt.xlabel("Time [600s]") +plt.ylabel("Power [MW]") plt.xlim([9600, 9750]) plt.ylim([-30, 50]) plt.legend() plot_scatter = plt.subplot(2, 2, 2) -plt.title("Naive Label and True Measurement") +plt.title("Naive-predicted and True Measurement") col = abs(y - naive_hat) -plt.scatter(y, naive_hat, c=col, linewidth=0.0, cmap=plt.cm.jet) -plt.xlabel("Y") -plt.ylabel("Naive Label") +plt.scatter(y, naive_hat, c=col, linewidth=0.0, cmap=cmap) +plt.xlabel("True Measurement [MW]") +plt.ylabel("Naive-predicted Measurement [MW]") plt.xlim([0, 30]) plt.ylim([0, 30]) plot_abs = plt.subplot(2, 2, 3) plt.title("Absolute Difference") -plt.plot(time, (y_hat - y), "r-", label="Linear Label") -plt.plot(time, (naive_hat - y), "b-", label="Naive Label") +plt.plot(time, (y_hat - y), color=colors['predictor'], label="Linear-predicted") +plt.plot(time, (naive_hat - y), color=colors['true'], label="Naive-predicted") plt.xlim([9600, 9750]) plt.ylim([-20, 30]) +plt.xlabel("Time [600s]") +plt.ylabel("Deviation of True Power [MW]") plt.legend() plot_scatter = plt.subplot(2, 2, 4) -plt.title("Linear Label and True Measurement") +plt.title("Linear-predicted and True Measurement") col = abs(y - y_hat) -plt.scatter(y, y_hat, c=col, linewidth=0.0, cmap=plt.cm.jet) -plt.xlabel("Y") -plt.ylabel("Linear Label") +plt.scatter(y, y_hat, c=col, linewidth=0.0, cmap=cmap) +plt.xlabel("True Measurement [MW]") +plt.ylabel("Linear-predicted Measurement [MW]") plt.xlim([0, 30]) plt.ylim([0, 30]) plt.show() - diff --git a/examples/prediction/svr_response_curve.py b/examples/prediction/response_curve_quartett.py similarity index 74% rename from examples/prediction/svr_response_curve.py rename to examples/prediction/response_curve_quartett.py index afe405d..04582a9 100644 --- a/examples/prediction/svr_response_curve.py +++ b/examples/prediction/response_curve_quartett.py @@ -2,8 +2,10 @@ Response Curve of a Turbine -------------------------------------------------- -The response curve is the mapping from wind speed to wind power production. In -this example the response curve is learned via support vector regression. +The response curve illustrates the relationship between wind speed +and generated power. The response curve is fitted via k-nearest neighbor +regression. Distribution of wind speeds and frequencies of cut-out help +to interpret the response curve shape. """ # Author: Nils A. Treiber @@ -23,20 +25,18 @@ from sklearn.neighbors import KNeighborsRegressor from windml.datasets.nrel import NREL from windml.visualization.plot_response_curve import plot_response_curve - +from windml.visualization.colorset import colorset ds = NREL() -turbine = ds.get_turbine(NREL.park_id['palmsprings'], 2004, 2006) +turbine = ds.get_turbine(NREL.park_id['tehachapi'], 2004, 2006) timeseries = turbine.get_measurements() max_speed = 40 skip = 1 - # plot true values as blue points speed = [m[2] for m in timeseries[::skip]] score = [m[1] for m in timeseries[::skip]] - # Second Plot: KNN-Interpolation # Built patterns und labels X_train = speed[0:len(speed):1] @@ -45,13 +45,12 @@ # initialize KNN regressor from sklearn. k_neighbors = 30 -knn = KNeighborsRegressor(k_neighbors, 'uniform') +knn = KNeighborsRegressor(k_neighbors, 'uniform', warn_on_equidistant = False) # fitting the pattern-label pairs T = np.linspace(0, max_speed, 500)[:, np.newaxis] Y_hat = knn.fit(X_train_array, Y_train).predict(T) - # Last Plot start_speed = 15 threshold = 25 @@ -71,21 +70,20 @@ num_over_thres[elem] += 1 elem += 1 - fraction = np.zeros(max_speed, dtype=np.float32) for i in range(start_speed, len(fraction)): - if (np.float32(num_below_thres[i-start_speed]+num_over_thres[i-start_speed]) > 0): - fraction[i] = np.float32(num_below_thres[i-start_speed])/(num_below_thres[i-start_speed]+num_over_thres[i-start_speed]) + if (np.float32(num_below_thres[i-start_speed]+\ + num_over_thres[i-start_speed]) > 0): + fraction[i] = np.float32(num_below_thres[i-start_speed])/\ + (num_below_thres[i-start_speed]+\ + num_over_thres[i-start_speed]) else: fraction[i] = -1 -print fraction - - figure = plt.figure(figsize=(15, 10)) plot_abs = plt.subplot(2, 2, 1) plt.title("Measurements") -plt.scatter(speed, score, color="b") +plt.scatter(speed, score, color=colorset[0]) plt.xlim([-1, max_speed]) plt.xlabel("Windspeed [m/s]") plt.ylim([-2, 32]) @@ -93,7 +91,7 @@ plot_scatter = plt.subplot(2, 2, 2) plt.title("KNN Interpolation of the Response Curve") -plt.plot(T, Y_hat, color='b') +plt.plot(T, Y_hat, color=colorset[0]) plt.scatter(speed, score, color="#CCCCCC") plt.xlim([-1, max_speed]) plt.xlabel("Windspeed [m/s]") @@ -102,7 +100,8 @@ plot_abs = plt.subplot(2, 2, 3) plt.title("Distribution of Wind Speeds") -plt.hist( X_train, bins=np.float(max_speed), histtype='stepfilled', normed=True, color='b') +plt.hist(X_train, bins=np.float(max_speed), histtype='stepfilled',\ + normed=True, color=colorset[0], linewidth=0) plt.xlim([-1, max_speed]) plt.ylim(-0.01, 0.13) plt.xlabel("Windspeed [m/s]") @@ -111,12 +110,10 @@ plot_scatter = plt.subplot(2, 2, 4) plt.title("Frequency of Cut-Outs") steps = range(40) -plt.plot(steps, fraction, "o", color='b') -plt.xlim([-1,max_speed]) +plt.plot(steps, fraction, "o", color=colorset[0], linewidth=0) +plt.xlim([-1, max_speed]) plt.xlabel("Windspeed [m/s]") plt.ylim([-0.1, 1.1]) plt.ylabel("Probabilty of Cut-Out Events (Thres = 15)") plt.show() - - diff --git a/examples/prediction/svr_regression_turbine.py b/examples/prediction/svr_regression_turbine.py index 8360bab..5bba6c7 100644 --- a/examples/prediction/svr_regression_turbine.py +++ b/examples/prediction/svr_regression_turbine.py @@ -29,15 +29,12 @@ import math import matplotlib.pyplot as plt -from sklearn.grid_search import GridSearchCV -from sklearn.cross_validation import KFold from sklearn import __version__ as sklearn_version from sklearn.svm import SVR - from numpy import zeros, float32 from windml.datasets.nrel import NREL from windml.mapping.power_mapping import PowerMapping -from sklearn.neighbors import KNeighborsRegressor +from windml.visualization.colorset import cmap, colorset # get windpark and corresponding target. forecast is for the target turbine park_id = NREL.park_id['tehachapi'] @@ -118,44 +115,52 @@ figure = plt.figure(figsize=(15, 10)) plot_abs = plt.subplot(2, 2, 1) -plt.title("Absolute Labels and True Measurements") +plt.title("Predicted and True Measurements") # Array of true labels for plotting. y = zeros(len(y_hat)) for i in range(0, len(y_hat)): y[i] = (Y[train_to + (i * test_step)]) +colors = {'predictor' : colorset[0], + 'naive' : colorset[1], + 'true' : colorset[3]} + time = range(0, len(y_hat)) -plt.plot(time, y, "g-", label="Measurement") -plt.plot(time, y_hat, "r-", label="SVR Label") -plt.plot(time, naive_hat, "b-", label="Naive Label") +plt.plot(time, y, color=colors['true'], label="Measurement") +plt.plot(time, y_hat, color=colors['predictor'], label="SVR-predicted") +plt.plot(time, naive_hat, color=colors['naive'], label="Naive-predicted") plt.xlim([9600, 9750]) plt.ylim([-30, 50]) +plt.xlabel("Time [600s]") +plt.ylabel("Power [MW]") plt.legend() plot_scatter = plt.subplot(2, 2, 2) -plt.title("Naive Label and True Measurement") +plt.title("Naive-predicted and True Measurement") col = abs(y - naive_hat) -plt.scatter(y, naive_hat, c=col, linewidth=0.0, cmap=plt.cm.jet) -plt.xlabel("Y") -plt.ylabel("Naive Label") +plt.scatter(y, naive_hat, c=col, linewidth=0.0, cmap=cmap) +plt.xlabel("True Measurement [MW]") +plt.ylabel("Naive-predicted Measurement [MW]") plt.xlim([0, 30]) plt.ylim([0, 30]) plot_abs = plt.subplot(2, 2, 3) plt.title("Absolute Difference") -plt.plot(time, (y_hat - y), "r-", label="SVR Label") -plt.plot(time, (naive_hat - y), "b-", label="Naive Label") +plt.plot(time, (y_hat - y), color=colors['predictor'], label="SVR-predicted") +plt.plot(time, (naive_hat - y), color=colors['true'], label="Naive-predicted") plt.xlim([9600, 9750]) plt.ylim([-20, 30]) +plt.xlabel("Time [600s]") +plt.ylabel("Deviation of True Power [MW]") plt.legend() plot_scatter = plt.subplot(2, 2, 4) -plt.title("SVR Label and True Measurement") +plt.title("SVR-predicted and True Measurement") col = abs(y - y_hat) -plt.scatter(y, y_hat, c=col, linewidth=0.0, cmap=plt.cm.jet) -plt.xlabel("Y") -plt.ylabel("SVR Label") +plt.scatter(y, y_hat, c=col, linewidth=0.0, cmap=cmap) +plt.xlabel("True Measurement [MW]") +plt.ylabel("SVR-predicted Measurement [MW]") plt.xlim([0, 30]) plt.ylim([0, 30]) # y_hat (SVR) might be greater than 30 and smaller than 0. diff --git a/examples/preprocessing/smoothing.py b/examples/preprocessing/smoothing.py new file mode 100644 index 0000000..12e0918 --- /dev/null +++ b/examples/preprocessing/smoothing.py @@ -0,0 +1,32 @@ +""" +Smoothing of a Time Series +-------------------------------------------------- + +This example shows how to smooth a time series with the +smoothen preprocessing operator. +""" + +# Author: Nils A. Treiber +# Author: Jendrik Poloczek +# License: BSD 3 clause + +from matplotlib import dates +import matplotlib.pylab as plt +import numpy as np +import datetime, time + +from windml.datasets.nrel import NREL +from windml.visualization.plot_timeseries import plot_timeseries +from windml.preprocessing.preprocessing import smoothen + +ds = NREL() +turbine = ds.get_turbine(NREL.park_id['tehachapi'], 2004) +measurements = turbine.get_measurements() + +smoothed_measurements = smoothen(measurements, interval_length=11) +turbine.add_measurements(smoothed_measurements) + +plot_timeseries(turbine, 0, 288) + + + diff --git a/examples/statistics/prop_ramps.py b/examples/statistics/prop_ramps.py new file mode 100644 index 0000000..922d7bc --- /dev/null +++ b/examples/statistics/prop_ramps.py @@ -0,0 +1,44 @@ +""" +Propagating Wind Changes +-------------------------------------------------- +""" + +# Author: Jendrik Poloczek +# Author: Oliver Kramer +# License: BSD 3 clause + +import matplotlib.pyplot as plt +import numpy as np +from windml.datasets.nrel import NREL + +ds = NREL() + +park_id = NREL.park_id['tehachapi'] +windpark = NREL().get_windpark(park_id, 3, 2004) +turbines = windpark.get_turbines() +t1, t2 = turbines[0], turbines[1] + +i=7 +window_size = 200 +steps = 3 + +X1 = t1.get_measurements()['speed'] +X2 = t2.get_measurements()['speed'] +X1 = X1[i*window_size:i*window_size+window_size] +X2 = X2[i*window_size:i*window_size+window_size] + +x1= [] +x2 = [] +colors = [] +for i in xrange(len(X1)-steps): + x1.append(X1[i]) + x2.append(X2[i+steps]) + # depending on the distance to the diagonal, + # choose the color of the event + colors.append(abs(X1[i]-X2[i+steps])) + ax = plt.subplot(1, 1, 1) +plt.title("Propagating Ramps, Horizon = "+str(steps)) +ax.scatter(x1, x2, s=15, c=colors, linewidth=0.0, cmap=plt.cm.jet) +plt.xlabel("Wind Speed in time t (Turbine 1)") +plt.ylabel("Wind Speed in time t + 3 (Turbine 2)") +plt.show() diff --git a/examples/statistics/ramps.py b/examples/statistics/ramps.py index c4149d7..96fed84 100644 --- a/examples/statistics/ramps.py +++ b/examples/statistics/ramps.py @@ -8,6 +8,7 @@ """ # Author: Oliver Kramer +# Author: Jendrik Poloczek # License: BSD 3 clause import matplotlib.pyplot as plt @@ -41,6 +42,9 @@ colors.append(abs(X[i]-X[i+steps])) ax = plt.subplot(2, 2, j) plt.title("Ramps, Horizon = "+str(steps)) + plt.xlabel("Wind Speed [m/s] in time t") + plt.ylabel("Wind Speed [m/s] in time t + " +str(steps)) + ax.scatter(x1, x2, s=15, c=colors, linewidth=0.0, cmap=plt.cm.jet) j+=1 diff --git a/examples/statistics/windspeed_histogram.py b/examples/statistics/windspeed_histogram.py index 4ea1e5c..ee6dd6c 100644 --- a/examples/statistics/windspeed_histogram.py +++ b/examples/statistics/windspeed_histogram.py @@ -11,10 +11,14 @@ import matplotlib.pyplot as plt from pylab import plt from windml.datasets.nrel import NREL +from windml.visualization.colorset import colorset ds = NREL() turbine = ds.get_turbine(NREL.park_id['cheyenne'], 2004) speeds = map(lambda x : x[2], turbine.measurements) -plt.hist(speeds, color="#c4d8eb", bins=10, normed = 1) +plt.title("Histogram of Wind Speeds") +plt.xlabel("Wind Speed [m/s] in time t") +plt.ylabel("Relative Frequency") +plt.hist(speeds, color=colorset[0], bins=10, normed = 1) plt.show() diff --git a/examples/statistics/windspeed_joined.py b/examples/statistics/windspeed_joined.py new file mode 100644 index 0000000..077ed13 --- /dev/null +++ b/examples/statistics/windspeed_joined.py @@ -0,0 +1,23 @@ +""" +Joined Probability of Wind Speeds +------------------------------------------------------------------------- + +This example plots show the joined probability of wind speeds. +""" + +# Author: Jendrik Poloczek +# License: BSD 3 clause + +import numpy as np + +from windml.datasets.nrel import NREL +from windml.visualization.plot_wind_interdependecies import plot_joined + +ds = NREL() +windpark = ds.get_windpark(NREL.park_id['tehachapi'], 3, 2004) + +turbines = windpark.get_turbines() +turbine_a = turbines[0] +turbine_b = turbines[1] + +plot_joined(turbine_a, turbine_b) diff --git a/examples/visualization/sequence.py b/examples/visualization/sequence.py index 027acd3..a0123a1 100644 --- a/examples/visualization/sequence.py +++ b/examples/visualization/sequence.py @@ -2,9 +2,10 @@ Sequence Visualization Based on ISOMAP ------------------------------------------------------------------------- -This example allows to visualize high-dimensional wind time-series, employing dimensionality reduction. A wind speed sequence is mapped into a -3-dimensional latent space to monitor its intrinsic structure onto one time axis. -The values of the 3-dimensional latent space are normalized and mapped to RGB-values. +This example allows to visualize high-dimensional wind time-series, employing +dimensionality reduction. A wind speed sequence is mapped into a 3-dimensional +latent space to monitor its intrinsic structure onto one time axis. The values +of the 3-dimensional latent space are normalized and mapped to RGB-values. Thereby, the mapping maintains important properties of the original high-dimensional data so that varying wind conditions and seasonal changes can be monitored. @@ -19,6 +20,7 @@ from sklearn import manifold, decomposition from windml.datasets.nrel import NREL +from windml.visualization.colorset import cmap # load data and define parameters / training and test sequences K = 30 diff --git a/examples/visualization/wind_embeddings.py b/examples/visualization/wind_embeddings.py index 704e1c8..694bf23 100644 --- a/examples/visualization/wind_embeddings.py +++ b/examples/visualization/wind_embeddings.py @@ -19,6 +19,7 @@ from sklearn import manifold, datasets, decomposition, lda from windml.visualization.plot_timeseries import plot_timeseries +from windml.visualization.colorset import cmap from windml.datasets.nrel import NREL K = 30 @@ -40,7 +41,7 @@ def plot_embedding(X, title=None, j=1): ax = plt.subplot(2, 2, j) for i in range(X.shape[0]): plt.text(X[i, 0], X[i, 1], str(int(y[i])), - color = plt.cm.jet(y[i] / 30.), + color = cmap(y[i] / 30.), fontdict={'weight': 'bold', 'size': 9}) if title is not None: diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..79e29cb --- /dev/null +++ b/setup.py @@ -0,0 +1,5 @@ +from setuptools import setup, find_packages + +setup(name='windml', + packages=find_packages(exclude=['examples', 'tests']), + include_package_data=True) diff --git a/windml/benchmark/__init__.py b/windml/benchmark/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/windml/benchmark/benchmark.py b/windml/benchmark/benchmark.py new file mode 100644 index 0000000..227cba9 --- /dev/null +++ b/windml/benchmark/benchmark.py @@ -0,0 +1,123 @@ +""" +Copyright (c) 2013, +Fabian Gieseke, Justin P. Heinermann, Oliver Kramer, Jendrik Poloczek, +Nils A. Treiber +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of the Computational Intelligence Group of the University + of Oldenburg nor the names of its contributors may be used to endorse or + promote products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +from testbeds import NREL_4_parks, QuickNDirty +from numpy import arange +import matplotlib.pyplot as plt +from windml.visualization.colorset import colorset + +class Benchmark(): + + testbeds = {'NREL_4_parks' : NREL_4_parks, + 'QuickNDirty' : QuickNDirty} + results = {} + + def _mse(self, y_hat, y_test): + mse_y_hat = 0 + for i in range(0, len(y_hat)): + mse_y_hat += (y_hat[i] - y_test[i]) ** 2 + + return (1.0/len(y_hat)) * mse_y_hat + + def run(self, regressor, testbed_name, parallel=False): + parks = self.testbeds[testbed_name]() + + def run_park(job): + park, testbed = job + wp_train, wp_test = testbed + + regressor.fit(wp_train) + y_hat, y_test = regressor.predict(wp_test) + + statistics = {'mse': self._mse(y_hat, y_test)} + return {'park' : park, 'statistics' : statistics} + + jobs = [] + for k, v in parks.iteritems(): + jobs.append((k,v)) + + # parallel + if(parallel == True): + from playdoh import map as pmap + from multiprocessing import cpu_count + cpus = cpu_count() + self.results[regressor] = pmap(run_park, jobs, cpu = cpus) + else: + # sequential + self.results[regressor] = map(run_park, jobs) + + return self.results + + def print_results(self): + for regressor, rresults in self.results.iteritems(): + print "Results for regressor %s" % str(regressor) + for rresult in rresults: + park = rresult['park'] + statistics = rresult['statistics'] + print "%s MSE: %f" % (park, statistics['mse']) + + def visualize_mse_on_parks(self): + index = arange(len(self.results[self.results.keys()[0]])) + bar_width = 0.3 + + current_bar = 0 + for regressor, rresults in self.results.iteritems(): + + park_names = [] + mses = [] + + for result in rresults: + park = result['park'] + park_names.append(park) + mse = result['statistics']['mse'] + mses.append(mse) + + rects1 = plt.bar(index + current_bar * bar_width, + mses, + bar_width, + color=colorset[current_bar], + label=regressor.__class__.__name__) + + current_bar = current_bar + 1 + + plt.xlabel('Park') + plt.ylabel('MSE [MW]') + plt.title('MSE [MW] by Park') + + plt.xticks(index + (current_bar * bar_width) / 2.0,\ + park_names) + plt.legend() + + plt.tight_layout() + plt.show() + diff --git a/windml/benchmark/testbeds.py b/windml/benchmark/testbeds.py new file mode 100644 index 0000000..e9e5462 --- /dev/null +++ b/windml/benchmark/testbeds.py @@ -0,0 +1,117 @@ +""" +Copyright (c) 2013, +Fabian Gieseke, Justin P. Heinermann, Oliver Kramer, Jendrik Poloczek, +Nils A. Treiber +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of the Computational Intelligence Group of the University + of Oldenburg nor the names of its contributors may be used to endorse or + promote products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +def NREL_4_parks(): + """ Four parks, with 5 km radius, 2 years, horizon and feature + window equals 30 minutes.""" + + from windml.datasets.nrel import NREL + from windml.preprocessing.preprocessing import repair_nrel + + parks = {'palmsprings' : 1175, + 'reno' : 11637, + 'casper' : 23167, + 'carway' : 30498} + + testbed = {} + + # return X,y tuples + for park in parks.keys(): + + # training data ---- + windpark_train = NREL().get_windpark_nearest(parks[park], 5, 2004) + target = windpark_train.get_target() + + # repair NREL data + turbines = windpark_train.get_turbines() + for t in range(len(turbines)): + turbines[t].add_measurements(\ + repair_nrel(turbines[t].get_measurements()[:39457])) + + # test data ---- + windpark_test = NREL().get_windpark_nearest(parks[park], 5, 2005) + target = windpark_test.get_target() + + # repair NREL data + turbines = windpark_test.get_turbines() + for t in range(len(turbines)): + turbines[t].add_measurements(\ + repair_nrel(turbines[t].get_measurements()[:39457])) + + testbed[park] = (windpark_train, windpark_test) + + return testbed + +def QuickNDirty(): + """ Four parks, with 5 km radius, 2 years, horizon and feature + window equals 30 minutes.""" + + from windml.datasets.nrel import NREL + from windml.preprocessing.preprocessing import repair_nrel + + parks = {'palmsprings' : 1175, + 'reno' : 11637, + 'casper' : 23167, + 'carway' : 30498} + + + testbed = {} + + # return X,y tuples + for park in parks.keys(): + + # training data ---- + windpark_train = NREL().get_windpark_nearest(parks[park], 5, 2004) + target = windpark_train.get_target() + + # repair NREL data + turbines = windpark_train.get_turbines() + for t in range(len(turbines)): + turbines[t].add_measurements(\ + repair_nrel(turbines[t].get_measurements()[:3457])) + + # test data ---- + windpark_test = NREL().get_windpark_nearest(parks[park], 5, 2005) + target = windpark_test.get_target() + + # repair NREL data + turbines = windpark_test.get_turbines() + for t in range(len(turbines)): + turbines[t].add_measurements(\ + repair_nrel(turbines[t].get_measurements()[:3457])) + + testbed[park] = (windpark_train, windpark_test) + + return testbed + + diff --git a/windml/prediction/__init__.py b/windml/prediction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/windml/prediction/naive.py b/windml/prediction/naive.py new file mode 100644 index 0000000..b3f6fe5 --- /dev/null +++ b/windml/prediction/naive.py @@ -0,0 +1,55 @@ +""" +Copyright (c) 2013, +Fabian Gieseke, Justin P. Heinermann, Oliver Kramer, Jendrik Poloczek, +Nils A. Treiber +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of the Computational Intelligence Group of the University + of Oldenburg nor the names of its contributors may be used to endorse or + promote products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +class Naive(): + """ Naive model """ + + def __init__(self): + self.horizon = 3 + + def fit(self, wp_train): + return # nothing to fit here + + def predict(self, wp_test): + target = wp_test.get_target() + measurements = target.get_measurements() + + y_predict = [] + y_test = [] + for i in range(measurements.shape[0] - self.horizon): + y_predict.append(measurements[i]['corrected_score']) + y_test.append(measurements[i + self.horizon]['corrected_score']) + + return y_predict, y_test + + diff --git a/windml/prediction/smoothed_linreg.py b/windml/prediction/smoothed_linreg.py new file mode 100644 index 0000000..bd84970 --- /dev/null +++ b/windml/prediction/smoothed_linreg.py @@ -0,0 +1,72 @@ +""" +Copyright (c) 2013, +Fabian Gieseke, Justin P. Heinermann, Oliver Kramer, Jendrik Poloczek, +Nils A. Treiber +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of the Computational Intelligence Group of the University + of Oldenburg nor the names of its contributors may be used to endorse or + promote products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +from sklearn import linear_model +from windml.mapping.power_mapping import PowerMapping +from windml.preprocessing.preprocessing import smoothen +from windml.visualization.colorset import colorset + +class SmoothedLinreg(): + """ Standard spatio-temporal model with smoothing preprocessing, + PowerMapping and linear regression. """ + + def __init__(self, smooth=3): + self.model = linear_model.LinearRegression() + self.feature_window = 3 + self.horizon = 3 + self.smooth = smooth + + def fit(self, wp_train): + target = wp_train.get_target() + + # smoothing of all time series + turbines = wp_train.get_turbines() + for turbine in turbines: + turbine.add_measurements(\ + smoothen(turbine.get_measurements(), interval_length=self.smooth)) + + mapping = PowerMapping() + X_train = mapping.get_features_park(wp_train, self.feature_window, self.horizon) + y_train = mapping.get_labels_turbine(target, self.feature_window, self.horizon) + + self.model.fit(X_train, y_train) + + def predict(self, wp_test): + target = wp_test.get_target() + mapping = PowerMapping() + X_test = mapping.get_features_park(wp_test, self.feature_window, self.horizon) + y_test = mapping.get_labels_turbine(target, self.feature_window, self.horizon) + + return self.model.predict(X_test), y_test + + diff --git a/windml/prediction/std_linreg.py b/windml/prediction/std_linreg.py new file mode 100644 index 0000000..a4a645f --- /dev/null +++ b/windml/prediction/std_linreg.py @@ -0,0 +1,62 @@ +""" +Copyright (c) 2013, +Fabian Gieseke, Justin P. Heinermann, Oliver Kramer, Jendrik Poloczek, +Nils A. Treiber +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of the Computational Intelligence Group of the University + of Oldenburg nor the names of its contributors may be used to endorse or + promote products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +from sklearn import linear_model +from windml.mapping.power_mapping import PowerMapping + +class StdLinreg(): + """ Standard spatio-temporal model with PowerMapping and + linear regression. """ + + def __init__(self): + self.model = linear_model.LinearRegression() + self.feature_window = 3 + self.horizon = 3 + + def fit(self, wp_train): + target = wp_train.get_target() + mapping = PowerMapping() + X_train = mapping.get_features_park(wp_train, self.feature_window, self.horizon) + y_train = mapping.get_labels_turbine(target, self.feature_window, self.horizon) + + self.model.fit(X_train, y_train) + + def predict(self, wp_test): + target = wp_test.get_target() + mapping = PowerMapping() + X_test = mapping.get_features_park(wp_test, self.feature_window, self.horizon) + y_test = mapping.get_labels_turbine(target, self.feature_window, self.horizon) + + return self.model.predict(X_test), y_test + + diff --git a/windml/prediction/univariate_linreg.py b/windml/prediction/univariate_linreg.py new file mode 100644 index 0000000..3b3fa48 --- /dev/null +++ b/windml/prediction/univariate_linreg.py @@ -0,0 +1,68 @@ +""" +Copyright (c) 2013, +Fabian Gieseke, Justin P. Heinermann, Oliver Kramer, Jendrik Poloczek, +Nils A. Treiber +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of the Computational Intelligence Group of the University + of Oldenburg nor the names of its contributors may be used to endorse or + promote products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +from sklearn import linear_model +from windml.mapping.power_mapping import PowerMapping + +class UnivariateLinreg(): + """ Univariate model with PowerMapping and linear regression. This model + only uses measurements of the target turbine for training """ + + def __init__(self): + self.model = linear_model.LinearRegression() + self.feature_window = 3 + self.horizon = 3 + + def fit(self, wp_train): + target = wp_train.get_target() + turbines = wp_train.get_turbines() + wp_train.turbines = [turbines[-1]] # only use target + + mapping = PowerMapping() + X_train = mapping.get_features_park(wp_train, self.feature_window, self.horizon) + y_train = mapping.get_labels_turbine(target, self.feature_window, self.horizon) + + self.model.fit(X_train, y_train) + + def predict(self, wp_test): + target = wp_test.get_target() + + turbines = wp_test.get_turbines() + wp_test.turbines = [turbines[-1]] # only use target + + mapping = PowerMapping() + X_test = mapping.get_features_park(wp_test, self.feature_window, self.horizon) + y_test = mapping.get_labels_turbine(target, self.feature_window, self.horizon) + + return self.model.predict(X_test), y_test + diff --git a/windml/preprocessing/preprocessing.py b/windml/preprocessing/preprocessing.py index 629b8a3..39e1de6 100644 --- a/windml/preprocessing/preprocessing.py +++ b/windml/preprocessing/preprocessing.py @@ -42,6 +42,10 @@ from windml.preprocessing.duplicate_remover import DuplicateRemover from windml.preprocessing.nrel_repair import NRELRepair from windml.preprocessing.mreg_interpolation import MRegInterpolation +from windml.preprocessing.smoother import Smoother + +def smoothen(timeseries, **args): + return Smoother().smooth(timeseries, args) def repair_nrel(timeseries): return NRELRepair().repair(timeseries) diff --git a/windml/preprocessing/smoother.py b/windml/preprocessing/smoother.py new file mode 100644 index 0000000..8cb644a --- /dev/null +++ b/windml/preprocessing/smoother.py @@ -0,0 +1,66 @@ +""" +Copyright (c) 2013, +Fabian Gieseke, Justin P. Heinermann, Oliver Kramer, Jendrik Poloczek, +Nils A. Treiber +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of the Computational Intelligence Group of the University + of Oldenburg nor the names of its contributors may be used to endorse or + promote products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +from numpy import zeros, int32, float32 + +class Smoother(object): + def smooth(self, timeseries, args): + ilen = args['interval_length'] + + new_amount = timeseries.shape[0] + smoothed_ts = zeros((new_amount,), dtype=[('date', int32),\ + ('corrected_score', float32),\ + ('speed', float32)]) + + if(ilen % 2 == 0 or ilen == 1): + raise Exception("interval length must be odd and not 1.") + + padding = int((ilen - 1) / 2.0) + + # copy data in the beginning and the end cannot be smoothed + # because of padding + for i in range(0, padding): + smoothed_ts[i] = timeseries[i] # copy old data + for i in range(len(timeseries) - padding, len(timeseries)): + smoothed_ts[i] = timeseries[i] # copy old data + + # smooth data + for i in range(padding, len(timeseries) - padding): + sp_mean = timeseries[(i - padding):(i + padding)]['speed'].mean() + cs_mean = timeseries[(i - padding):(i + padding)]['corrected_score'].mean() + + smoothed_ts[i] = timeseries[i] # copy old data + smoothed_ts[i]['corrected_score'] = cs_mean + smoothed_ts[i]['speed'] = sp_mean + + return smoothed_ts diff --git a/windml/visualization/colorset.py b/windml/visualization/colorset.py new file mode 100644 index 0000000..45b24aa --- /dev/null +++ b/windml/visualization/colorset.py @@ -0,0 +1,18 @@ +from pylab import * + +colorset = ["#6698cc","#c4d8eb","#cccccc","#69bb3e","#b8e1a5"] + +cdict = {'red': ((0.0, 0.4, 0.4), + (0.5, 0.72, 0.72), + (1.0, 0.41, 0.41)), + 'green': ((0.0, 0.6, 0.6), + (0.5, 0.88, 0.88), + (1.0, 0.73, 0.73)), + 'blue': ((0.0, 0.8, 0.8), + (0.5, 0.65, 0.65), + (1.0, 0.01, 0.01))} + +0.412, 0.733, 0.012 + +cmap = matplotlib.colors.LinearSegmentedColormap(\ + 'windml_colormap',cdict,256) diff --git a/windml/visualization/plot_multiple_timeseries.py b/windml/visualization/plot_multiple_timeseries.py index f6d3174..32168c2 100644 --- a/windml/visualization/plot_multiple_timeseries.py +++ b/windml/visualization/plot_multiple_timeseries.py @@ -72,11 +72,11 @@ def plot_multiple_timeseries(windpark, show = True): poly.set_alpha(0.7) ax.add_collection3d(poly, zs=zs, zdir='y') - ax.set_xlabel('Time') + ax.set_xlabel('Time [600s]') ax.set_xlim3d(0, length) ax.set_ylabel('Turbine') ax.set_ylim3d(-1, number_turbines) - ax.set_zlabel('Power') + ax.set_zlabel('Power [MW]') ax.set_zlim3d(0,30.) plt.title("Time Series Comparison") diff --git a/windml/visualization/plot_timeseries.py b/windml/visualization/plot_timeseries.py index ac557eb..a6732cf 100644 --- a/windml/visualization/plot_timeseries.py +++ b/windml/visualization/plot_timeseries.py @@ -67,7 +67,7 @@ def plot_timeseries(turbine, start, end, show = True): ax.grid(True) plt.ylim(-2, 32) - plt.ylabel("Corrected Power (MW), Wind Speed (m/s)") + plt.ylabel("Corrected Power [MW], Wind Speed [m/s]") plt.plot(d_time[start:end], y1[start:end], label = 'Power Production') plt.plot(d_time[start:end], y2[start:end], label = 'Wind Speed') plt.legend(loc='lower right') diff --git a/windml/visualization/plot_wind_interdependecies.py b/windml/visualization/plot_wind_interdependecies.py new file mode 100644 index 0000000..f4b29c5 --- /dev/null +++ b/windml/visualization/plot_wind_interdependecies.py @@ -0,0 +1,77 @@ +""" +Copyright (c) 2013, +Fabian Gieseke, Justin P. Heinermann, Oliver Kramer, Jendrik Poloczek, +Nils A. Treiber +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of the Computational Intelligence Group of the University + of Oldenburg nor the names of its contributors may be used to endorse or + promote products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" +from mpl_toolkits.mplot3d import axes3d +from matplotlib import cm +import matplotlib.pyplot as plt +import numpy as np +import time + +def plot_joined(turbine_a, turbine_b): + + speeds_a = turbine_a.get_measurements()['speed'] + speeds_b = turbine_b.get_measurements()['speed'] + + bins = (0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25) + X_a, Y_a = np.histogram(speeds_a, bins = bins, density = True) + X_b, Y_b = np.histogram(speeds_b, bins = bins, density = True) + + akv = {k: v for k, v in zip(Y_a, X_a)} + bkv = {k: v for k, v in zip(Y_b, X_b)} + + def join(x,y): + return akv[x] * bkv[y] + + def generate(X,Y): + Z = np.zeros(X.shape) + for i in range(X.shape[0]): + Z[i] = map(join, X[i], Y[i]) + return Z + + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + + xs = range(0, 25, 2) + ys = range(0, 25, 2) + X, Y = np.meshgrid(xs, ys) + Z = generate(X, Y) + + plot = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, linewidth=0,\ + cmap=cm.coolwarm) + + ax.set_xlabel("Turbine 1") + ax.set_ylabel("Turbine 2") + ax.set_zlabel("Joined Windspeed Probability") + ax.set_title("Joined Windspeed Probability") + + plt.show() + diff --git a/windml/visualization/show_flip_book.py b/windml/visualization/show_flip_book.py index f8fe1d0..0466ca8 100644 --- a/windml/visualization/show_flip_book.py +++ b/windml/visualization/show_flip_book.py @@ -37,6 +37,7 @@ import time import math from mpl_toolkits.basemap import Basemap, shiftgrid, cm +from windml.visualization.colorset import cmap def show_flip_book(windpark, num_plots, start_time, diff_time, show=True): """Plots a flip book of a given windpark @@ -124,7 +125,8 @@ def show_flip_book(windpark, num_plots, start_time, diff_time, show=True): m.drawmeridians(meridians,labels=[True,False,False,True]) m.shadedrelief() m.drawcoastlines - m.scatter(rel_inputs_lon, rel_inputs_lat, c = zlist, s = 35, vmin = 0.0, vmax = 35) + m.scatter(rel_inputs_lon, rel_inputs_lat, c = zlist, s = 35, vmin = 0.0, vmax = 35,\ + cmap=cmap) plt.title(datetime.datetime.fromtimestamp(unix_timestamps[i-1]).strftime('%Y-%m-%d %H:%M:%S')) plt.colorbar()