Cannot load tensorflow 2x model

intel-analytics / ipex-llm

Accelerate local LLM inference and finetuning (LLaMA, Mistral, ChatGLM, Qwen, Baichuan, Mixtral, Gemma, Phi, MiniCPM, etc.) on Intel XPU (e.g., local PC with iGPU and NPU, discrete GPU such as Arc, Flex and Max); seamlessly integrate with llama.cpp, Ollama, HuggingFace, LangChain, LlamaIndex, GraphRAG, DeepSpeed, vLLM, FastChat, Axolotl, etc.

Apache License 2.0

6.61k stars 1.26k forks source link

I followed this tutorial

https://bigdl.readthedocs.io/en/latest/doc/Orca/QuickStart/orca-tf2keras-quickstart.html

However i cannot load the model with this command new_model = tf.keras.models.load_model("/home/hadoop/model/re1/model.ckpt")

Given this error

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-3-cc9d1c1bd9c2> in <module>
----> 1 new_model = tf.keras.models.load_model("/home/hadoop/model/re1/model.ckpt")

~/miniconda3/envs/pyenv/lib/python3.7/site-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
     65     except Exception as e:  # pylint: disable=broad-except
     66       filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67       raise e.with_traceback(filtered_tb) from None
     68     finally:
     69       del filtered_tb

~/miniconda3/envs/pyenv/lib/python3.7/site-packages/h5py/_hl/files.py in __init__(self, name, mode, driver, libver, userblock_size, swmr, rdcc$nslots, rdcc_nbytes, rdcc_w0, track_order, fs_strategy, fs_persist, fs_threshold, fs_page_size, page_buf_size, min_meta_keep, min_raw_keep, lo$king, alignment_threshold, alignment_interval, **kwds)
    531                                  fs_persist=fs_persist, fs_threshold=fs_threshold,
    532                                  fs_page_size=fs_page_size)
--> 533                 fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr)
    534 
    535             if isinstance(libver, tuple):

~/miniconda3/envs/pyenv/lib/python3.7/site-packages/h5py/_hl/files.py in make_fid(name, mode, userblock_size, fapl, fcpl, swmr)
    224         if swmr and swmr_support:
    225             flags |= h5f.ACC_SWMR_READ
--> 226         fid = h5f.open(name, flags, fapl=fapl)
    227     elif mode == 'r+':
    228         fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)

h5py/_objects.pyx in h5py._objects.with_phil.wrapper()

h5py/_objects.pyx in h5py._objects.with_phil.wrapper()

h5py/h5f.pyx in h5py.h5f.open()

OSError: Unable to open file (file signature not found)

import torch import tensorflow as tf import numpy as np spark.read.parquet(train_hdfs_dir).createOrReplaceTempView("train_view") train_samples_bigdl = spark.sql(""" select array(tv.cos_dist) as feature, cast((CASE WHEN tv.label = 0 THEN -1 ELSE 1 END) as int) as label from train_view as tv """) train_size = train_samples_bigdl.count() def model_creator(config): x_inputs = tf.keras.Input(shape=(1,)) initializer = tf.keras.initializers.HeNormal() regularizer = tf.keras.regularizers.L2(0.001) SVM_layer = tf.keras.layers.Dense(units=10, \ kernel_initializer=initializer, \ bias_initializer=initializer, \ kernel_regularizer=regularizer, \ bias_regularizer=regularizer) def SVM_linear_loss(y_true, y_pred): loss_t = tf.math.maximum( \ 0., \ tf.math.subtract(1., tf.math.multiply(tf.cast(y_true, tf.float32), y_pred))) \ return tf.math.reduce_mean(loss_t) model = tf.keras.Model(inputs=x_inputs, outputs=SVM_layer(x_inputs)) model.compile(optimizer="rmsprop", loss=SVM_linear_loss) return model from bigdl.orca.learn.tf2.ray_estimator import TensorFlow2Estimator est = TensorFlow2Estimator( model_creator=model_creator, \ workers_per_node=10, \ ) batch_size = 4096 est.fit(data=train_samples_bigdl, \ epochs=2, \ batch_size=batch_size, \ steps_per_epoch= train_size // batch_size, \ feature_cols=['feature'], \ label_cols=['label']) est.get_model().save_weights("/home/hadoop/model/re1/model.h5") new_model = model_creator(None) new_model.load_weights("/home/hadoop/model/re1/model.h5")

intel-analytics / ipex-llm

Cannot load tensorflow 2x model #4964