Title - Githubissues

import nltk from nltk.corpus import stopwords from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.metrics import accuracy_score, classification_report from sklearn.datasets import load_files # Download NLTK data nltk.download('stopwords') # Load and preprocess the dataset reviews = load_files('path_to_imdb_dataset', categories=['pos', 'neg']) X, y = reviews.data, reviews.target stop_words = set(stopwords.words('english')) tfidf_vectorizer = TfidfVectorizer(max_features=5000, stop_words=stop_words) X = tfidf_vectorizer.fit_transform(X).toarray() # Split the data and train a model X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = SVC(kernel='linear') model.fit(X_train, y_train) # Evaluate the model y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print(f"Accuracy: {accuracy:.2f}") report = classification_report(y_test, y_pred, target_names=['positive', 'negative']) print("Classification Report:\n", report)

ELONISEVIL / amfora

Title #9