Open gabriel-joy opened 4 weeks ago
How was "finetune_forecast_model" finetuned?
I just followed the exogen tutorial
column_specifiers = {
"timestamp_column": timestamp_column,
"id_columns": id_columns,
"target_columns": ["far_ea"],
"control_columns": [
"DayIndex",
"Hour",
"temperature",
"prod_ea"
],
}
split_params = {"train": [0, 0.5], "valid": [0.5, 0.75], "test": [0.75, 1.0]}
tsp = TimeSeriesPreprocessor(
**column_specifiers,
context_length=context_length,
prediction_length=forecast_length,
scaling=True,
encode_categorical=False,
scaler_type="standard",
)
train_dataset, valid_dataset, test_dataset = get_datasets(
tsp,
data,
split_params,
)
train_dataset[3]
finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(
"ibm-granite/granite-timeseries-ttm-v1",
revision=TTM_MODEL_REVISION,
num_input_channels=tsp.num_input_channels,
decoder_mode="mix_channel", # exog: set to mix_channel for mixing channels in history
prediction_channel_indices=tsp.prediction_channel_indices,
exogenous_channel_indices=tsp.exogenous_channel_indices,
fcm_context_length=1, # exog: indicates lag length to use in the exog fusion. for Ex. if today sales can get affected by discount on +/- 2 days, mention 2
fcm_use_mixer=True, # exog: Try true (1st option) or false
fcm_mix_layers=2, # exog: Number of layers for exog mixing
enable_forecast_channel_mixing=True, # exog: set true for exog mixing
fcm_prepend_past=True, # exog: set true to include lag from history during exog infusion.
)
finetune_forecast_model
print(
"Number of params before freezing backbone",
count_parameters(finetune_forecast_model),
)
# Freeze the backbone of the model
for param in finetune_forecast_model.backbone.parameters():
param.requires_grad = False
# Count params
print(
"Number of params after freezing the backbone",
count_parameters(finetune_forecast_model),
)
# Important parameters
learning_rate = 0.000298364724028334
num_epochs = 50 # Ideally, we need more epochs (try offline preferably in a gpu for faster computation)
batch_size = 64
print(f"Using learning rate = {learning_rate}")
finetune_forecast_args = TrainingArguments(
output_dir=os.path.join(OUT_DIR, "output"),
overwrite_output_dir=True,
learning_rate=learning_rate,
num_train_epochs=num_epochs,
do_eval=True,
evaluation_strategy="epoch",
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
dataloader_num_workers=8,
report_to=None,
save_strategy="epoch",
logging_strategy="epoch",
save_total_limit=1,
logging_dir=os.path.join(OUT_DIR, "logs"), # Make sure to specify a logging directory
load_best_model_at_end=True, # Load the best model when training ends
metric_for_best_model="eval_loss", # Metric to monitor for early stopping
greater_is_better=False, # For loss
)
# Create the early stopping callback
early_stopping_callback = EarlyStoppingCallback(
early_stopping_patience=10, # Number of epochs with no improvement after which to stop
early_stopping_threshold=0.0, # Minimum improvement required to consider as improvement
)
tracking_callback = TrackingCallback()
# Optimizer and scheduler
optimizer = AdamW(finetune_forecast_model.parameters(), lr=learning_rate)
scheduler = OneCycleLR(
optimizer,
learning_rate,
epochs=num_epochs,
steps_per_epoch=math.ceil(len(train_dataset) / (batch_size)),
)
finetune_forecast_trainer = Trainer(
model=finetune_forecast_model,
args=finetune_forecast_args,
train_dataset=train_dataset,
eval_dataset=valid_dataset,
callbacks=[early_stopping_callback, tracking_callback],
optimizers=(optimizer, scheduler),
)
# Fine tune
finetune_forecast_trainer.train()
Whan I try to introduce known data from the future in the forecast I get higher values, I guess I miss something, Please advise.
future_data = pd.read_csv( "~/Downloads/ebstestmures-futures-20241024.csv", parse_dates=[timestamp_column], )
pipeline = TimeSeriesForecastingPipeline( model=finetune_forecast_model, timestamp_column=timestamp_column, target_columns=["far_ea"], observable_columns=["DayIndex","Hour","temperature","prod_ea"], future_time_series=future_data, explode_forecasts=True, inverse_scale_outputs=True, freq="1h", id_columns=[], device="cuda" )
fewshots_forecast = pipeline(data.iloc[-context_length:].copy())
without future_time_series data are in the range:
with future_time_series data included values are way too high: