Back

Explore Courses Blog Tutorials Interview Questions
0 votes
2 views
in Data Science by (17.6k points)

I am getting the error stated in the title when trying to fit the model. The following script is supposed to classify between 3 types of traffic lights (red, green, yellow).

I have already printed the lengths of X_train and y_train, and they are the same lengths (they are both 513), so now I am confused how to fix this error.

DATADIR = "/Users/path-to-data/"

CATEGORIES = ['green', 'yellow', 'red']

training_data = []

for category in CATEGORIES:

    path = os.path.join(DATADIR, category)

    class_num = CATEGORIES.index(category)

    print(class_num)

    for img in os.listdir(path):

        try:

            img_array = cv2.imread(os.path.join(path,img))

            new_array = cv2.resize(img_array,(IMG_SIZE, IMG_SIZE))

            new_array = np.expand_dims(new_array, axis=0)

            training_data.append([new_array, class_num])

        except Exception as e:

            pass

import random

random.shuffle(training_data)

X = []

y = []

for features, label in training_data:

    X.append(features)

    y.append(label)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(150,150, 3)))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, kernel_size=(3, 3),activation='relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(64, activation='relu'))

model.add(Dropout(0.5))

model.add(Dense(1, activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Here is the full traceback:

ValueError                                

Traceback (most recent call last)

<ipython-input-14-3119fea43292> in <module>

      8 

      9 model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])

---> 10 model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)

    804         steps=steps_per_epoch,

    805         validation_split=validation_split,

--> 806         shuffle=shuffle)

    807 

    808     # Prepare validation data.

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset)

   2652 

   2653       if not self._distribution_strategy:

-> 2654         training_utils.check_array_lengths(x, y, sample_weights)

   2655         if self._is_graph_network and not self.run_eagerly:

   2656           # Additional checks to avoid users mistakenly using improper loss fns.

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_utils.py in check_array_lengths(inputs, targets, weights)

    445                      'the same number of samples as target arrays. '

    446                      'Found ' + str(list(set_x)[0]) + ' input samples '

--> 447                      'and ' + str(list(set_y)[0]) + ' target samples.')

    448   if len(set_w) > 1:

    449     raise ValueError('All sample_weight arrays should have '

ValueError: Input arrays should have the same number of samples as target arrays. Found 1 input samples and 513 target samples.

1 Answer

+1 vote
by (36.8k points)

Try below points:

  • Remove new_array = np.expand_dims(new_array, axis=0).
  • Add X = np.array(X) and y = np.array(y) before train_test_split function.
  • You are not using one-hot encoded labels, so for the above code to work change loss function from categorical_crossentropy to sparse_categorical_crossentropy.
  • Change model.add(Dense(1, activation='softmax')) to model.add(Dense(3, activation='softmax')), since number of classes are 3, i.e. green, yellow and red.
  • remember that you need to normalize data before pushing into the model.

The code is as follows:

IMG_SIZE = 150

DATADIR = "/Users/path-to-data/"

CATEGORIES = ['green', 'yellow', 'red']

training_data = []

for category in CATEGORIES:

    path = os.path.join(DATADIR, category)

    class_num = CATEGORIES.index(category)

    print(class_num)

    for img in os.listdir(path):

        try:

            img_array = cv2.imread(os.path.join(path,img))

            new_array = cv2.resize(img_array,(IMG_SIZE, IMG_SIZE))

            training_data.append([new_array, class_num])

        except Exception as e:

            pass

import random

random.shuffle(training_data)

X = []

y = []

for features, label in training_data:

    X.append(features) 

    y.append(label)

# X and y are currently of type list (list of python array), we will convert these to numpy array so we can feed it into our model.

X = np.array(X) # (112, 150, 150, 3)

y = np.array(y) # (112,)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(150, 150, 3)))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, kernel_size=(3, 3),activation='relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(64, activation='relu'))

model.add(Dropout(0.5))

model.add(Dense(3, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',optimizer='Adam',metrics=['sparse_categorical_accuracy'])

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

 If you want to know more about the Data Science then do check out the following Data Science which will help you in understanding Data Science from scratch

by (130 points)
hi what will be its predict function will it be similar to because i am getting wrong output :

import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow.keras.models import load_model

CATEGORIES=['Blue','Red','Green']
image=r'E:\colours_classifier\test\105.jpg'

def prepare(image):
    img_size=150
    img_array=cv2.imread(image,cv2.IMREAD_COLOR)
    new_array=cv2.resize(img_array,(img_size,img_size))
    return new_array.reshape(-1,img_size,img_size,3)


model = tf.keras.models.load_model(r"E:\colours_classifier\COLOURS_CNN.model")
prediction=model.predict([prepare(image)])
print(CATEGORIES[int(prediction[0][0])])
img=mpimg.imread(image)
imgplot=plt.imshow(img)
plt.title(CATEGORIES[int(prediction[0][0])])
plt.show()

Browse Categories

...