keras-team / keras-hub

Pretrained model hub for Keras 3
Apache License 2.0
807 stars 245 forks source link

keras_nlp.models.from_preset gets Segmentation fault (core dumped) #1371

Open FCInter opened 11 months ago

FCInter commented 11 months ago

I'm using keras_nlp and I get Segmentation fault (core dumped) in the following code:

import os
os.environ["KERAS_BACKEND"] = "torch"  # "jax" or "tensorflow" or "torch" 

import keras_nlp
import keras_core as keras
import keras_core.backend as K

import torch
import tensorflow as tf

import numpy as np 
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib as mpl

cmap = mpl.cm.get_cmap('coolwarm')

class CFG:
    verbose = 0  # Verbosity

    wandb = True  # Weights & Biases logging
    competition = 'llm-detect-ai-generated-text'  # Competition name
    _wandb_kernel = 'awsaf49'  # WandB kernel
    comment = 'DebertaV3-MaxSeq_200-ext_s-torch'  # Comment description

    preset = "deberta_v3_base_en"  # Name of pretrained models
    sequence_length = 200  # Input sequence length

    device = 'TPU'  # Device

    seed = 42  # Random seed

    num_folds = 5  # Total folds
    selected_folds = [0, 1, 2]  # Folds to train on

    epochs = 3 # Training epochs
    batch_size = 3  # Batch size
    drop_remainder = True  # Drop incomplete batches
    cache = True # Caches data after one iteration, use only with `TPU` to avoid OOM

    scheduler = 'cosine'  # Learning rate scheduler

    class_names = ["real", "fake"]  # Class names [A, B, C, D, E]
    num_classes = len(class_names)  # Number of classes
    class_labels = list(range(num_classes))  # Class labels [0, 1, 2, 3, 4]
    label2name = dict(zip(class_labels, class_names))  # Label to class name mapping
    name2label = {v: k for k, v in label2name.items()}  # Class name to label mapping

keras.utils.set_random_seed(CFG.seed)

def get_device():
    "Detect and intializes GPU/TPU automatically"
    try:
        # Connect to TPU
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect() 
        # Set TPU strategy
        strategy = tf.distribute.TPUStrategy(tpu)
        print(f'> Running on TPU', tpu.master(), end=' | ')
        print('Num of TPUs: ', strategy.num_replicas_in_sync)
        device=CFG.device
    except:
        # If TPU is not available, detect GPUs
        gpus = tf.config.list_logical_devices('GPU')
        ngpu = len(gpus)
         # Check number of GPUs
        if ngpu:
            # Set GPU strategy
            strategy = tf.distribute.MirroredStrategy(gpus) # single-GPU or multi-GPU
            # Print GPU details
            print("> Running on GPU", end=' | ')
            print("Num of GPUs: ", ngpu)
            device='GPU'
        else:
            # If no GPUs are available, use CPU
            print("> Running on CPU")
            strategy = tf.distribute.get_strategy()
            device='CPU'
    return strategy, device

# Initialize GPU/TPU/TPU-VM
strategy, CFG.device = get_device()
CFG.replicas = strategy.num_replicas_in_sync

BASE_PATH = '/some/path/'

print(1)
preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset(
    preset=CFG.preset, # Name of the model
    sequence_length=CFG.sequence_length, # Max sequence length, will be padded if shorter
)

print(2)

The complete log is as follows:

$python test.py 
Using PyTorch backend.
/mypath/test.py:22: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
  cmap = mpl.cm.get_cmap('coolwarm')
> Running on GPU | Num of GPUs:  1
1
Segmentation fault (core dumped)

What's wrong with this? I'm using a Nvidia-A100 with 80GB memory, it should be large enough I guess. On CPU It works well, this only happens on GPU.

Thank you all for helping me!!!

SuryanarayanaY commented 8 months ago

Hi @FCInter ,

I tested the given code with latest keras_nlp and with Keras3 which executes fine. Please refer attached gist.