from sklearn.datasets import fetch_20newsgroups text_data= fetch_20newsgroups() print("Keys : \n{}".format(text_data.keys())) print(text_data.target_names) print(text_data.data[1]) categories= ['sci.space','rec.sport.hockey','rec.motorcycles','rec.sport.baseball'] train = fetch_20newsgroups(subset='train',categories= categories) test = fetch_20newsgroups(subset='test',categories=categories) print(train.data[5]) print(test.data[5]) from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline model = make_pipeline(TfidfVectorizer(), MultinomialNB()) model.fit(train.data,train.target) labels=model.predict(test.data) from sklearn.metrics import confusion_matrix import seaborn as sns import matplotlib.pyplot as plt mat = confusion_matrix(test.target, labels) sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=True, cmap="YlGnBu",vmin=3,vmax=8, xticklabels=train.target_names, yticklabels=train.target_names) plt.xlabel('true label') plt.ylabel('predicted label') plt.show()