Back

Explore Courses Blog Tutorials Interview Questions
0 votes
2 views
in Data Science by (17.6k points)

I am at a standstill because my output keeps displaying 100% accuracy on both random forest and decision tree but not support vector machine

I beleive the issue resides on how the data is either trained or tested. I think it is testing on the training data and not the testing data. However, I have no idea how to fix it.

import pandas as pd

import numpy as np

import keras

from keras.models import Sequential

from keras.layers import Dense

import matplotlib.pyplot as plt

from sklearn.svm import SVC

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix, classification_report

import sklearn.metrics as metrics

import seaborn as sns

import warnings

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")

heart_data = pd.read_csv('data1.csv')

heart_data.head()

y = heart_data.target.values

x_data = heart_data.drop(['target'], axis = 1)

x = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data)).values

n_cols = x.shape[1]

#Splitting Data

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20)

def regression_model():

    # create model

    model = Sequential()

    #inputs

    model.add(Dense(50, activation='relu', input_shape=(n_cols,)))

    model.add(Dense(50, activation='relu')) # activation function

    model.add(Dense(1))

    # compile model

    model.compile(optimizer='adam', loss='mean_squared_error')

    #loss measures the results and figures out how bad it did. Optimizer generates next guess.

    return model

# build the model

model = regression_model()

print (model)

# fit the model

history=model.fit(x_train, y_train, validation_data=(x_test,y_test), epochs=10, batch_size=10)

# summarize history for loss

plt.plot(history.history['loss'])

plt.plot(history.history['val_loss'])

plt.title('model loss')

plt.ylabel('loss')

plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

#Decision Tree

print ("Processing Decision Tree")

dtc = DecisionTreeClassifier()

dtc.fit(x_test,y_test)

print("Decision Tree Test Accuracy {:.2f}%".format(dtc.score(x_test, y_test)*100))

#Support Vector Machine

print ("Processing Support Vector Machine")

svm = SVC(random_state = 1)

svm.fit(x_test, y_test)

print("Test Accuracy of SVM Algorithm: {:.2f}%".format(svm.score(x_test,y_test)*100))

#Random Forest

print ("Processing Random Forest")

rf = RandomForestClassifier(n_estimators = 1000, random_state = 1)

rf.fit(x_test, y_test)

print("Random Forest Algorithm Accuracy Score : {:.2f}%".format(rf.score(x_test,y_test)*100))

I am hopeing to get +90% on the random forest. Any syntax advice or changes are greatly appreciated.

1 Answer

0 votes
by (17.6k points)

1.With x_train and y_train, train your model.

2.After that, validate them with the test data.

Example:

#Random Forest

print ("Processing Random Forest")

rf = RandomForestClassifier(n_estimators = 1000, random_state = 1)

rf.fit(x_train, y_train)

y_test_pred = rf.predict(x_test)

print("Random Forest Algorithm Accuracy Score : {:.2f}%".format(rf.score(x_test,y_test)*100))

Browse Categories

...