chanelcolgate / hydroelectric-project

0 stars 0 forks source link

RandomForest_SMP #24

Closed quockhanh1198 closed 2 years ago

quockhanh1198 commented 2 years ago

https://colab.research.google.com/drive/1hNO8g2w5T1U2gLsunLgJZqsnrwq13uLK?usp=sharing

chanelcolgate commented 2 years ago

Load Dataset

import logging, urllib3, shutil
logging.basicConfig(level=logging.INFO)

def download_dataset(url, LOCAL_FILE_NAME):
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
  c = urllib3.PoolManager()
  with c.request("GET", url, preload_content=False) as res, open(
      LOCAL_FILE_NAME, "wb"
  ) as out_file:
    shutil.copyfileobj(res, out_file)
  logging.info("Dowload completed.")

logging.info("Started download script")
URL = 'https://raw.githubusercontent.com/chanelcolgate/hydroelectric-project/master/data/SMP_weather.csv'
LOCAL_FILE_NAME = "SMP_weather.csv"
download_dataset(URL, LOCAL_FILE_NAME)

Prepare Dataset

from pandas import read_csv

dataset = read_csv(LOCAL_FILE_NAME, header=0, infer_datetime_format=True,
                   parse_dates={'datetime':[0]}, index_col=['datetime'])

# Lay du lieu thang 9
dfpre = dataset['2021-09']
steps = 48
dataset_prediction = dfpre.copy()
dataset_prediction['Actual'] = dataset_prediction['SMP'].shift(-steps)
dataset_prediction = dataset_prediction.dropna()
dataset_prediction.to_csv('SMP_Pre3.csv')

# doc file SMP_Pre3.csv
dataset_prediction = read_csv('SMP_Pre3.csv', header=0, infer_datetime_format=True,
                              parse_dates=['datetime'], index_col=['datetime'])

Scale And Create Input

from sklearn.preprocessing import MinMaxScaler
from pandas import DataFrame
from pandas import concat

def table2lags(table, max_lag, min_lag=0, separator='_'):
    #return a dataframe with different lags of all its columns
    values=[]
    for i in range(min_lag, max_lag + 1):
        values.append(table.shift(i).copy())
        values[-1].columns = [c + separator + str(i) for c in table.columns]
    return concat(values, axis=1) 

scaled_in = MinMaxScaler(feature_range=(0,1))
scaled_input = scaled_in.fit_transform(dataset_prediction[['SMP', 'Temperature', 'Wind Speed', 'Visibility', 'Cloud Cover', 'Actual']])
scaled_out = MinMaxScaler(feature_range=(0,1))
scaled_output = scaled_out.fit_transform(dataset_prediction[['Actual']])
data_in = DataFrame(scaled_input)
data_in.rename(columns = {0:'SMP',
 1:'Temperature',
 2:'Wind Speed',
 3:'Visibility',
 4:'Cloud Cover',
 5:'Actual'}, inplace = True)

data_in.index = dataset_prediction.index
data_in.drop(columns = 'Actual', inplace = True)

data_in_lagged = table2lags(data_in, 336, 48)
# ngay 29 du doan 30
X_test = data_in_lagged[1344:]
X_test.shape

Model

x1 = [i for i in range(48)] pyplot.plot(x1, actual, label='actual') pyplot.plot(x1, prediction, label='predict') pyplot.xticks([]) pyplot.legend() pyplot.show()

- RMSE
```python
from math import sqrt
from sklearn.metrics import mean_squared_error
# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

measure_rmse(actual, prediction)

Tham khao

SMP6