Open dinarmalik37 opened 5 hours ago
print(lr.coef) print(lr.intercept) from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error import numpy as np
print('R² Score = ', r2_score(y_test, pred)) print('MAE = ', mean_absolute_error(y_test, pred)) # Call the function directly after importing print('RMSE = ', np.sqrt(mean_squared_error(y_test, pred))) import matplotlib.pyplot as plt
actual = results['Penjualan real (Actual)'] predicted = results['Penjualan real (Predict)']
plt.figure(figsize=(10, 5)) plt.scatter(actual, predicted, color='blue', label='Predicted vs Actual') plt.plot(actual, actual, color='red', linewidth=2, label='Perfect Prediction Line') plt.xlabel('Actual Values') plt.ylabel('Predicted Values') plt.title('Actual vs Predicted Values') plt.legend() plt.grid(True) plt.show()
data = { "Produk": ["Bio Solar", "Pertalite", "Pertamax Turbo", "Pertamax", "Pertamina Dex"], "RMSE": [4.93, 3.56, 0.24, 1.26, 0.23], "MAE": [4.19, 2.34, 0.16, 0.86, 0.18] }
for i in range(len(data["Produk"])): data["RMSE"][i] = data["RMSE"][i] 100 data["MAE"][i] = data["MAE"][i] 100
for i in range(len(data["Produk"])): print(f"{data['Produk'][i]} - RMSE: {data['RMSE'][i]:.2f}%, MAE: {data['MAE'][i]:.2f}%")
mape_values = [ 0.4407406724661195, 345576011805501.1, 102775132738909.39, 68609354818462.65, 73154835778935.28 ]
total_sum = sum(mape_values)
percentages = [(value / total_sum) * 100 for value in mape_values] percentages
import pandas as pd
print(data.columns)
target_column = 'Actual' # Replace 'Actual' with the correct column name data_target = data[target_column].mean() print(f'Rata-rata nilai target: {data_target}')
RMSE = 1.7084453018220232e-15 average_target_value = 0.4677528089887641
percentage_error = (RMSE / average_target_value) * 100 percentage_error
dates = pd.date_range(start='2022-01-01', end='2023-09-30', freq='M') data = np.random.rand(len(dates))*3500
df = pd.DataFrame({ 'Date' : dates, 'Amount' : data })
df['Date'] = pd.to_datetime(df['Date'])
start_date = '2022-01-01' end_date = '2023-09-30' filtered_df = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
total_transactions = filtered_df['Amount'].sum() average_per_month = filtered_df.groupby(filtered_df['Date'].dt.to_period('M'))['Amount'].mean()
print(f"Total Transactions: {total_transactions}") print("Rata-rata per Bulan:") print(average_per_month)
plt.figure(figsize=(10, 6)) sns.scatterplot(x='Date', y='Amount', data=filtered_df)
plt.title('Scatter Plot untuk Transaksi Penjualan Pertalite dari Tahun 2022 hingga September 2023') plt.xlabel('Date') plt.ylabel('Amount') plt.grid(True) plt.show() import matplotlib.pyplot as plt
actual = data # If 'data' contains only actual values
predicted = np.random.rand(len(actual))
print(f"Total Transactions: {total_transactions}") print("Rata-rata per Bulan:") print(average_per_month)
plt.figure(figsize=(10, 5)) plt.scatter(actual, predicted, color='blue', label='Predicted vs Actual') plt.plot(actual, actual, color='red', linewidth=2, label='Perfect Prediction Line') plt.xlabel('Actual Values') plt.ylabel('Predicted Values') plt.title('Actual vs Predicted Values') plt.legend() plt.grid(True) plt.show()
average_prediction = pred.mean() print(f'Rata-rata nilai prediksi: {average_prediction}') import matplotlib.pyplot as plt import seaborn as sns
results = X_test.copy() results['Penjualan real (Actual)'] = y_test.values results['Penjualan real (Predict)'] = y_pred
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Penjualan real (Actual)', y='Penjualan real (Predict)', data=results, label='Data Points')
plt.plot([results['Penjualan real (Actual)'].min(), results['Penjualan real (Actual)'].max()], [results['Penjualan real (Actual)'].min(), results['Penjualan real (Actual)'].max()], color='red', lw=2, label='Ideal Prediction')
plt.title('Actual vs Predicted Penjualan Real') plt.xlabel('Penjualan Real (Actual)') plt.ylabel('Penjualan Real (Predicted)') plt.legend() plt.grid(True) plt.show() from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error import numpy as np
print('R² Score = ', r2_score(y_test, pred)) print('MAE = ', mean_absolute_error(y_test, pred)) # Call the function directly after importing print('RMSE = ', np.sqrt(mean_squared_error(y_test, pred))) import matplotlib.pyplot as plt
actual = data # If 'data' contains only actual values
predicted = np.random.rand(len(actual))
plt.figure(figsize=(10, 5)) plt.scatter(actual, predicted, color='blue', label='Predicted vs Actual') plt.plot(actual, actual, color='red', linewidth=2, label='Perfect Prediction Line') plt.xlabel('Actual Values') plt.ylabel('Predicted Values') plt.title('Actual vs Predicted Values') plt.legend() plt.grid(True) plt.show()
average_prediction = predicted.mean() print(f'Rata-rata nilai prediksi: {average_prediction}') from sklearn.preprocessing import MinMaxScaler import pandas as pd
data_numeric = data.drop(['Transaction Date'], axis=1, errors='ignore')
min_values = data_numeric.min() max_values = data_numeric.max()
scaler = MinMaxScaler(feature_range=(0, 1)) scaler.fit(data_numeric)
input_values = [[17.84] * len(data_numeric.columns)] normalized_values = scaler.transform(input_values) print(normalized_values)
data = 3.5591632235523107
percentage = data * 100
rounded_percentage = round(percentage)
print(f"{data} sebagai persentase adalah {rounded_percentage}%")
data = [ 4.927935266767451, 3.5591632235523107, 0.23995953264050024, 1.2564941305530912, 0.23080689494857123 ]
jumlah = sum(data) jumlah
import numpy as np
values = np.array([4.93, 3.56, 0.24, 1.26, 0.23])
rmse = np.sqrt(np.mean(values**2)) rmse
values_mae = np.array([4.19, 2.34, 0.16, 0.86, 0.18])
mae = np.mean(np.abs(values_mae)) mae
from google.colab import drive drive.mount('/content/drives/', force_remount=True) import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns data = pd.read_csv('/content/PERTALITE_data.csv') data.head()
Jumlah Missing Value
jumlah_missing_value = data.isnull().sum().sum() print(f'Jumlah Missing Value: {jumlah_missing_value}')
Jumlah Duplikasi Data
jumlah_duplikasi = data.duplicated().sum() print(f'Jumlah Duplikasi Data: {jumlah_duplikasi}') Produk_to_num = { 'PERTALITE': 0, } data['Produk'] = data['Produk'].map(Produk_to_num)
Ubah seluruh dataset ke bentuk float64, kecuali kolom 'Transaction Date'
for col in data.columns: if col != 'Transaction Date': # Lewati konversi untuk kolom 'Transaction Date' data[col] = data[col].astype('float64')
data.info() print(data.head()) print(data.columns) data.head() from sklearn.preprocessing import MinMaxScaler from sklearn.tree import DecisionTreeRegressor from sklearn.metrics import mean_squared_error
Membuat objek scaler
scaler = MinMaxScaler()
Use the 'data' DataFrame instead of the undefined 'df_normalized'
Explicitly drop the 'Produk' column and handle potential errors
data_numeric = data.drop(['Transaction Date'], axis=1, errors='ignore')
Melakukan fit dan transform pada data numerik saja
df_normalized_numeric = pd.DataFrame(scaler.fit_transform(data_numeric), columns=data_numeric.columns)
If you want to include 'Transaction Date' and 'Produk' in the final dataframe, you can concatenate it back
df_normalized = pd.concat([data[['Transaction Date']], df_normalized_numeric], axis=1)
Menampilkan data yang telah dinormalisasi
print(df_normalized.head()) import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import OneHotEncoder, StandardScaler from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.linear_model import LinearRegression from sklearn.preprocessing import LabelEncoder # Import LabelEncoder
Use the correct column name 'Transaction Date'
data['Transaction Date'] = pd.to_datetime(data['Transaction Date'])
Mengekstraksi fitur dari 'Transaction Date'
data['Year'] = data['Transaction Date'].dt.year data['Month'] = data['Transaction Date'].dt.month data['Day'] = data['Transaction Date'].dt.day
Memilih kolom yang relevan untuk model
X = data[['Year', 'Month', 'Day', 'Produk']] y = data['Penjualan_Real (kl)']
Label encoding untuk kolom 'Produk'
label_encoder = LabelEncoder() X['Produk'] = label_encoder.fit_transform(X['Produk'])
Membagi data menjadi data train dan test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
Preprocessing untuk kolom numerikal (Year, Month, Day, Produk)
preprocessor = ColumnTransformer( transformers=[ ('num', StandardScaler(), ['Year', 'Month', 'Day', 'Produk']) ])
Membuat pipeline yang mencakup preprocessing dan model
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', LinearRegression())])
Melatih model
pipeline.fit(X_train, y_train)
Membuat prediksi
y_pred = pipeline.predict(X_test)
Membuat DataFrame untuk menampilkan hasil
results = X_test.copy() results['Penjualan real (Actual)'] = y_test.values results['Penjualan real (Predict)'] = y_pred
menampilkan hasil prediksi linier regresi
Assuming 'lr' is your fitted linear regression model and 'X_test' is available
pred = lr.predict(X_test) # Calculate predictions
Use Y_prediksi instead of y_pred
rmse = np.sqrt(mean_squared_error(y_test, pred)) print(f'Root Mean Squared Error: {rmse}')
data = pd.DataFrame({'Actual': y_test, 'Predicted': pred}) data lr=LinearRegression() lr.fit(X_train,y_train) pred=lr.predict(X_test)