SarcasmDetection

Developed a sarcasm detection system using Random Forest and SVM algorithms on a dataset of 1 million entries, achieving an accuracy of 85%. • Implemented advanced data pre-processing techniques, including text cleaning, tokenization, and feature engineering, resulting in a 20% improvement in accuracy for sarcasm detection models

import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.metrics import accuracy_score, confusion_matrix, classification_report from sklearn.model_selection import train_test_split

df = pd.read_csv('/content/saarcasm.csv') df.head()

df.columns

missing_values = df.isnull().sum() print(missing_values)

df.fillna(df.mean(), inplace=True)

import nltk import re nltk.download('stopwords') stemmer = nltk.SnowballStemmer("english") from nltk.corpus import stopwords import string stopword=set(stopwords.words('english'))

def clean(comment): comment = str(comment).lower() comment = re.sub('[.?]', '', comment) comment = re.sub('https?://\S+|www.\S+', '', comment) comment = re.sub('<.?>+', '', comment) comment = re.sub('[%s]' % re.escape(string.punctuation), '', comment) comment = re.sub('\n', '', comment) comment = re.sub('\w*\d\w*', '', comment) comment = [word for word in comment.split(' ') if word not in stopword] comment=" ".join(comment) comment = [stemmer.stem(word) for word in comment.split(' ')] comment=" ".join(comment) return comment df["comment"] = df["comment"].apply(clean)

import matplotlib.pyplot as plt from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator comment = " ".join(i for i in df.comment) stopwords = set(STOPWORDS) wordcloud = WordCloud(stopwords=stopwords, background_color="white").generate(comment) plt.figure( figsize=(15,10)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") plt.show()

df["label"] = df["label"].map({0: "Not Sarcastic", 1: "Sarcastic"}) df = df[["comment", "label"]] print(df.head())

X = df['comment'] y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

vectorizer = CountVectorizer() X_train = vectorizer.fit_transform(X_train) X_test = vectorizer.transform(X_test)

rfc = RandomForestClassifier(n_estimators=100, random_state=62) rfc.fit(X_train, y_train) y_pred_rfc = rfc.predict(X_test) print("Random Forest Classifier") print("Accuracy:", accuracy_score(y_test, y_pred_rfc)) print("Confusion Matrix:") print(confusion_matrix(y_test, y_pred_rfc)) print("Classification Report:") print(classification_report(y_test, y_pred_rfc))

svm = SVC(kernel='linear', random_state=42) svm.fit(X_train, y_train) y_pred_svm = svm.predict(X_test) print("SVM Classifier") print("Accuracy:", accuracy_score(y_test, y_pred_svm)) print("Confusion Matrix:") print(confusion_matrix(y_test, y_pred_svm)) print("Classification Report:") print(classification_report(y_test, y_pred_svm))

import matplotlib.pyplot as plt

rfc_score = accuracy_score(y_test, y_pred_rfc) svm_score = accuracy_score(y_test, y_pred_svm)

fig, ax = plt.subplots() ax.bar(['Random Forest', 'SVM'], [rfc_score, svm_score]) ax.set_ylabel('Accuracy') ax.set_title('Comparison of Random Forest and SVM on Sarcasm Dataset') plt.show()

new_comment = "Hey it's okay they were rained on by debris and body bits coz Trump screwed him on the deal...right guys?" new_comment_vectorized = vectorizer.transform([new_comment]) rfc_predicted_label = rfc.predict(new_comment_vectorized) svm_predicted_label = svm.predict(new_comment_vectorized) print("Random Forest predicted label:", rfc_predicted_label) print("SVM predicted label:", svm_predicted_label)

new_comment = "Loving this West Ham implosion" new_comment_vectorized = vectorizer.transform([new_comment]) rfc_predicted_label = rfc.predict(new_comment_vectorized) svm_predicted_label = svm.predict(new_comment_vectorized) print("Random Forest predicted label:", rfc_predicted_label) print("SVM predicted label:", svm_predicted_label)

vanipandit27 / sarcasmdetection Goto Github PK

sarcasmdetection's Introduction

SarcasmDetection

sarcasmdetection's People

Contributors

Watchers

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent