Closed quockhanh1198 closed 2 years ago
import logging, urllib3, shutil
logging.basicConfig(level=logging.INFO)
def download_dataset(url, LOCAL_FILE_NAME):
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
c = urllib3.PoolManager()
with c.request("GET", url, preload_content=False) as res, open(
LOCAL_FILE_NAME, "wb"
) as out_file:
shutil.copyfileobj(res, out_file)
logging.info("Dowload completed.")
logging.info("Started download script")
URL = 'https://raw.githubusercontent.com/chanelcolgate/hydroelectric-project/master/data/SMP_weather.csv'
LOCAL_FILE_NAME = "SMP_weather.csv"
download_dataset(URL, LOCAL_FILE_NAME)
from pandas import read_csv
dataset = read_csv(LOCAL_FILE_NAME, header=0, infer_datetime_format=True,
parse_dates={'datetime':[0]}, index_col=['datetime'])
# Lay du lieu thang 9
dfpre = dataset['2021-09']
steps = 48
dataset_prediction = dfpre.copy()
dataset_prediction['Actual'] = dataset_prediction['SMP'].shift(-steps)
dataset_prediction = dataset_prediction.dropna()
dataset_prediction.to_csv('SMP_Pre3.csv')
# doc file SMP_Pre3.csv
dataset_prediction = read_csv('SMP_Pre3.csv', header=0, infer_datetime_format=True,
parse_dates=['datetime'], index_col=['datetime'])
from sklearn.preprocessing import MinMaxScaler
from pandas import DataFrame
from pandas import concat
def table2lags(table, max_lag, min_lag=0, separator='_'):
#return a dataframe with different lags of all its columns
values=[]
for i in range(min_lag, max_lag + 1):
values.append(table.shift(i).copy())
values[-1].columns = [c + separator + str(i) for c in table.columns]
return concat(values, axis=1)
scaled_in = MinMaxScaler(feature_range=(0,1))
scaled_input = scaled_in.fit_transform(dataset_prediction[['SMP', 'Temperature', 'Wind Speed', 'Visibility', 'Cloud Cover', 'Actual']])
scaled_out = MinMaxScaler(feature_range=(0,1))
scaled_output = scaled_out.fit_transform(dataset_prediction[['Actual']])
data_in = DataFrame(scaled_input)
data_in.rename(columns = {0:'SMP',
1:'Temperature',
2:'Wind Speed',
3:'Visibility',
4:'Cloud Cover',
5:'Actual'}, inplace = True)
data_in.index = dataset_prediction.index
data_in.drop(columns = 'Actual', inplace = True)
data_in_lagged = table2lags(data_in, 336, 48)
# ngay 29 du doan 30
X_test = data_in_lagged[1344:]
X_test.shape
!unzip "/content/RF_SMP.zip" -d "/"
!pip install joblib
from sklearn.externals import joblib
# load
loaded_rf = joblib.load("RF_SMP.joblib")
prediction = loaded_rf.predict(X_test)
prediction = scaled_out.inverse_transform(prediction.reshape(-1, 1))
actual = dataset['2021-09-30']['SMP']
import numpy as np
from matplotlib import pyplot
x1 = [i for i in range(48)] pyplot.plot(x1, actual, label='actual') pyplot.plot(x1, prediction, label='predict') pyplot.xticks([]) pyplot.legend() pyplot.show()
- RMSE
```python
from math import sqrt
from sklearn.metrics import mean_squared_error
# root mean squared error or rmse
def measure_rmse(actual, predicted):
return sqrt(mean_squared_error(actual, predicted))
measure_rmse(actual, prediction)
https://colab.research.google.com/drive/1hNO8g2w5T1U2gLsunLgJZqsnrwq13uLK?usp=sharing