## Libraries
## Libraries
``` python
``` python
#!pip install sklearn
#!pip install datasets
!pip install sklearn
!pip install datasets
import pandas as pd
import numpy as np
import string
import re
import matplotlib.pyplot as plt
import seaborn as sns
import unicodedata
from textblob import TextBlob,Word
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from datasets import load_dataset
import nltk
from nltk.tokenize import word_tokenize'wordnet')'punkt')
from nltk.corpus import stopwords'stopwords')
# Given logistic regression model
# Given logistic regression model
%% Cell type:markdown id:4f79f0c2 tags:
* The original model is a basline sentiment analysis system that uses logistic regression.
* The model was trained on 80% of the dataset and tested on the remainder
* The model obtained a 77% accuracy score
``` python
``` python
raw_datasets = load_dataset("imdb")
``` python
``` python
train_dataset = raw_datasets['train'].shuffle(seed=42).select(range(25000))
train_data = []
train_data_labels = []
for item in train_dataset:
``` python
``` python
vectorizer = CountVectorizer(analyzer='word',max_features=200,lowercase=True)
features = vectorizer.fit_transform(train_data)
features_nd = features.toarray()

``` python
``` python
X_train, X_test, y_train, y_test = train_test_split(features_nd,train_data_labels,train_size=0.8,random_state=123)
``` python
``` python
log_model = LogisticRegression()
``` python
``` python
log_model =,y=y_train)
``` python
``` python
y_pred = log_model.predict(X_test)
``` python
``` python