awslabs / gluonts

Probabilistic time series modeling in Python
https://ts.gluon.ai
Apache License 2.0
4.62k stars 753 forks source link

A demo symbol block cannot run inference after being serialized and deserialized #2180

Closed Carkham closed 2 years ago

Carkham commented 2 years ago

Description

Hello, I tried to serialize my predictor as symbo block format, but when I deserialize them and use for predition, I got some error show below

To Reproduce

# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

"""
This example shows how to fit a model and evaluate its predictions.
"""
import pprint
from functools import partial

import pandas as pd

from gluonts.dataset.repository.datasets import get_dataset
# from gluonts.mx.distribution.piecewise_linear import PiecewiseLinearOutput
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.model.deepar import DeepAREstimator
# from gluonts.model.transformer import TransformerEstimator
# from gluonts.mx import MQCNNEstimator
from gluonts.mx.trainer import Trainer
import os
import mxnet as mx
import numpy as np
from gluonts.mx.model import predictor as mxPredictor
from gluonts.model.predictor import Predictor as gPredictor

from pathlib import Path

from gluonts.mx.util import import_symb_block

datasets = [
    "m4_hourly",
    # "m4_daily",
    # "m4_weekly",
    # "m4_monthly",
    # "m4_quarterly",
    # "m4_yearly",
]

epochs = 10
num_batches_per_epoch = 5
estimators = [
    # MQCNNEstimator,
    DeepAREstimator,
    # partial(
    #     DeepAREstimator,
    #     distr_output=PiecewiseLinearOutput(8),
    # ),
]

def evaluate(dataset_name, estimator):
    dataset = get_dataset(dataset_name)
    print("got dataset")
    estimator = estimator(
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
        # use_feat_static_cat=True,
        # cardinality=[
        #     feat_static_cat.cardinality
        #     for feat_static_cat in dataset.metadata.feat_static_cat
        # ],
        trainer=Trainer(
            epochs=epochs,
            num_batches_per_epoch=num_batches_per_epoch,
        ),
    )

    predictor = estimator.train(dataset.train)

    symbol_predictor:mxPredictor.SymbolBlockPredictor = predictor.as_symbol_block_predictor(dataset=dataset.train)
    symbol_predictor.serialize(Path("/home/carkham/gluon-ts/gluon-ts-dev/examples/output"))
    print("exported model")

    new_symbol_predictor = mxPredictor.SymbolBlockPredictor.deserialize(Path("/home/carkham/gluon-ts/gluon-ts-dev/examples/output"))
    print("load predictor success")
    forecast_it, ts_it = make_evaluation_predictions(
        dataset.test, predictor=new_symbol_predictor, num_samples=100
    )

    agg_metrics, item_metrics = Evaluator()(
        ts_it, forecast_it, num_series=len(dataset.test)
    )
    return

if __name__ == "__main__":

    results = []
    for dataset_name in datasets:
        for estimator in estimators:
            # catch exceptions that are happening during training to avoid failing the whole evaluation
            try:
                results.append(evaluate(dataset_name, estimator))
            except Exception as e:
                print(str(e))

Error message or code output

Parameter 'deeparpredictionnetwork0_featureembedder0_cat_0_embedding_weight' has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks

Environment

(Add as much information about your environment as possible, e.g. dependencies versions.)

lostella commented 2 years ago

@Carkham thanks for filing the issue! I'm able to reproduce it on dev as well, we'll have to look into this.

Following is a simpler reproducer. What's interesting is that skipping the as_symbol_block_predictor and doing predictor.serialize directly appears to be running fine.

from gluonts.dataset.repository.datasets import get_dataset
from gluonts.mx.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.mx.trainer import Trainer
from gluonts.mx.model.predictor import Predictor

from pathlib import Path

model_path = Path(__file__).parent / "model"

dataset = get_dataset("m4_hourly")

estimator = SimpleFeedForwardEstimator(
    prediction_length=dataset.metadata.prediction_length,
    trainer=Trainer(
        epochs=2,
        num_batches_per_epoch=5,
    ),
)

predictor = estimator.train(dataset.train)

predictor = predictor.as_symbol_block_predictor(dataset=dataset.train)

model_path.mkdir()
predictor.serialize(model_path)

new_symbol_predictor = Predictor.deserialize(model_path)

forecasts = list(new_symbol_predictor.predict(dataset.train))
lostella commented 2 years ago

Other observations:

Traceback (most recent call last):
  File "issues/2180/run.py", line 36, in <module>
    predictor.as_symbol_block_predictor(dataset=dataset.train).serialize(model_path)
  File "/Users/stellalo/gluon-ts/src/gluonts/mx/model/predictor.py", line 189, in serialize
    self.serialize_prediction_net(path)
  File "/Users/stellalo/gluon-ts/src/gluonts/mx/model/predictor.py", line 234, in serialize_prediction_net
    export_symb_block(self.prediction_net, path, "prediction_net")
  File "/Users/stellalo/gluon-ts/src/gluonts/mx/util.py", line 207, in export_symb_block
    hb.export(path=str(model_dir / model_name), epoch=epoch)
  File "/Users/stellalo/.virtualenvs/gluonts/lib/python3.7/site-packages/mxnet/gluon/block.py", line 1284, in export
    for is_arg, param in self._cached_op_args:
AttributeError: 'SymbolBlock' object has no attribute '_cached_op_args'
lostella commented 2 years ago

Size of the prediction_net-0000.params file, resulting from serializing the model in my snippet above:

So some stuff is missing there.