Enabling one-shot for non text-generation models in 1.7. Changes:
user can now override the labels, by providing their own labels in the input dataset. This is crucial, since the labels must match the actual output of the model (text-generation models expect (batch_size, num_tokens, vocab_size) outputs, other transformers may expect for instance (batch_size, num_classes).
Example Use
from sparseml.transformers import oneshot, SparseAutoModel
from datasets import load_dataset
from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from typing import Union
from evaluate import evaluator
from sparseml import export
import sparseml.core.session as session_manager
# Part 1: Setup
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
dataset_name = "tweet_eval"
dataset_subname = "sentiment"
num_calibration_samples = 512 # number of samples to use for calibration
num_evaluation_samples = 3000 # number of samples to use for evaluation
save_dir_oneshot = "./oneshot_output" # directory to save the oneshot model
model = AutoModelForSequenceClassification.from_pretrained(model_name)
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
dataset_train = load_dataset(dataset_name, dataset_subname, split="train").shuffle(seed=420).select(range(num_calibration_samples))
dataset_test = load_dataset(dataset_name, dataset_subname, split="test").shuffle(seed=420).select(range(num_evaluation_samples))
# Part 2: Preparing the recipe and applying one-shot
recipe = """
test_stage:
obcq_modifiers:
QuantizationModifier:
ignore:
- LayerNorm
- GELUActivation
scheme_overrides:
Embedding:
input_activations: null
weights:
num_bits: 8
symmetric: false
Linear:
input_activations:
num_bits: 8
symmetric: false
weights:
num_bits: 8
symmetric: true
SparseGPTModifier:
sparsity: 0.0
quantize: true
targets: ["re:roberta.encoder.layer.\\\d+$"]
"""
def preprocessing_func(data):
result = tokenizer(data["text"], padding="max_length", truncation=True)
# oneshot assumes that the labels are in the "labels" key
result["labels"] = data["label"]
return result
oneshot(
model=model,
dataset=dataset_train,
recipe=recipe,
preprocessing_func = preprocessing_func,
output_dir=save_dir_oneshot,
num_calibration_samples = num_calibration_samples,
# remove the label column from the dataset (we have labels instead)
remove_columns = ["label"],
)
Feature Description
Enabling one-shot for non
text-generation
models in 1.7. Changes:labels
, by providing their ownlabels
in the input dataset. This is crucial, since the labels must match the actual output of the model (text-generation
models expect(batch_size, num_tokens, vocab_size)
outputs, othertransformers
may expect for instance(batch_size, num_classes)
.Example Use