from sklearn.feature_extraction.text import CountVectorizer
# Filter positive and negative comments
data_positive = data[data['Sentiment'] == 'Positive']['Comment']
data_negative = data[data['Sentiment'] == 'Negative']['Comment']
# Function to extract most frequent words
def get_top_words(comments, num_words=10):
vec = CountVectorizer(stop_words='english').fit(comments)
bag_of_words = vec.transform(comments)
sum_words = bag_of_words.sum(axis=0)
words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
words_freq = sorted(words_freq, key=lambda x: x[1], reverse=True)
return words_freq[:num_words]
# Get top words for positive and negative comments
top_positive_words = get_top_words(data_positive)
top_negative_words = get_top_words(data_negative)
# Display the results
'Positive Comments Top Words:', top_positive_words, 'Negative Comments Top Words:', top_negative_words