TimKoornstra / FinTwitBERT

FinTwitBERT: Specialized BERT Model for Financial Twitter Analysis. Trained on vast financial tweets, it's ideal for sentiment analysis, trend prediction, and financial NLP tasks.
MIT License
5 stars 1 forks source link

Add .ipynb notebook for analysis #19

Closed StephanAkkerman closed 8 months ago

StephanAkkerman commented 8 months ago

SHAP -> only works using notebook...

import shap
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from datasets import load_dataset

# Load pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("StephanAkkerman/FinTwitBERT-sentiment")
model = AutoModelForSequenceClassification.from_pretrained(
    "StephanAkkerman/FinTwitBERT-sentiment"
)

classifier = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0,
    top_k=None,
)

dataset = load_dataset(
    "financial_phrasebank",
    cache_dir="datasets/",
    split="train",
    name="sentences_50agree",
)

# Rename sentence to text
dataset = dataset.rename_column("sentence", "text")

short_data = [v[:500] for v in dataset["text"][:20]]

# define the explainer
explainer = shap.Explainer(classifier)

# explain the predictions of the pipeline on the first two samples
shap_values = explainer(short_data[:2])

# Try visualizing the SHAP values without indexing
shap.plots.text(shap_values)

LIME

import torch.nn.functional as F
from lime.lime_text import LimeTextExplainer

from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("StephanAkkerman/FinTwitBERT-sentiment")
model = AutoModelForSequenceClassification.from_pretrained(
    "StephanAkkerman/FinTwitBERT-sentiment"
)
class_names = ["BULLISH", "BEARISH", "NEUTRAL"]

def predictor(texts):
    outputs = model(**tokenizer(texts, return_tensors="pt", padding=True))
    probas = F.softmax(outputs.logits).detach().numpy()
    return probas

explainer = LimeTextExplainer(class_names=class_names)

str_to_predict = "surprising increase in revenue in spite of decrease in market share"
exp = explainer.explain_instance(
    str_to_predict, predictor, num_features=20, num_samples=2000
)

exp.save_to_file("temp.html")
fig = exp.as_pyplot_figure()
fig.savefig("lime_report.jpg")
StephanAkkerman commented 8 months ago

Add a .ipynb file for analysis of our model