from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_extraction.text import CountVectorizer
categories = ['talk.religion.misc', 'comp.graphics', 'sci.space']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
X, Y = newsgroups_train.data, newsgroups_train.target
cv = CountVectorizer(max_df=0.95, min_df=2, max_features=10000, stop_words='english')
X_vec = cv.fit_transform(X)
res = dict(zip(cv.get_feature_names(), mutual_info_classif(X_vec, Y, discrete_features=True)))
print(res)