neuralmagic / sparseml

Libraries for applying sparsification recipes to neural networks with a few lines of code, enabling faster and smaller models
Apache License 2.0
2.01k stars 140 forks source link

[1.7] Enable one-shot flow for non-LLMs #2288

Closed dbogunowicz closed 1 month ago

dbogunowicz commented 1 month ago

Feature Description

Enabling one-shot for non text-generation models in 1.7. Changes:

Example Use

from sparseml.transformers import oneshot, SparseAutoModel
from datasets import load_dataset
from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from typing import Union
from evaluate import evaluator
from sparseml import export
import sparseml.core.session as session_manager

# Part 1: Setup
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
dataset_name = "tweet_eval"
dataset_subname = "sentiment"
num_calibration_samples = 512 # number of samples to use for calibration
num_evaluation_samples = 3000 # number of samples to use for evaluation
save_dir_oneshot = "./oneshot_output" # directory to save the oneshot model

model = AutoModelForSequenceClassification.from_pretrained(model_name)
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

dataset_train = load_dataset(dataset_name, dataset_subname, split="train").shuffle(seed=420).select(range(num_calibration_samples))
dataset_test = load_dataset(dataset_name, dataset_subname, split="test").shuffle(seed=420).select(range(num_evaluation_samples))

# Part 2: Preparing the recipe and applying one-shot
recipe = """
test_stage:
  obcq_modifiers:
    QuantizationModifier:
      ignore:
        - LayerNorm
        - GELUActivation
      scheme_overrides:
        Embedding:
          input_activations: null
          weights:
            num_bits: 8
            symmetric: false
        Linear:
          input_activations:
            num_bits: 8
            symmetric: false
          weights:
            num_bits: 8
            symmetric: true
    SparseGPTModifier:
      sparsity: 0.0
      quantize: true
      targets: ["re:roberta.encoder.layer.\\\d+$"]
"""

def preprocessing_func(data):
    result = tokenizer(data["text"], padding="max_length", truncation=True)
    # oneshot assumes that the labels are in the "labels" key
    result["labels"] = data["label"]
    return result

oneshot(
    model=model,
    dataset=dataset_train,
    recipe=recipe,
    preprocessing_func = preprocessing_func,
    output_dir=save_dir_oneshot,
    num_calibration_samples = num_calibration_samples,
    # remove the label column from the dataset (we have labels instead)
    remove_columns = ["label"],
)