ValueError: Input arrays should have the same number of samples as target arrays. Found 1 input samples and 513 target samples

Question

asked Jul 27, 2019 in Data Science by sourav (17.6k points)

I am getting the error stated in the title when trying to fit the model. The following script is supposed to classify between 3 types of traffic lights (red, green, yellow).

I have already printed the lengths of X_train and y_train, and they are the same lengths (they are both 513), so now I am confused how to fix this error.

DATADIR = "/Users/path-to-data/"
CATEGORIES = ['green', 'yellow', 'red']
training_data = []
for category in CATEGORIES:
path = os.path.join(DATADIR, category)
class_num = CATEGORIES.index(category)
print(class_num)
for img in os.listdir(path):
try:
img_array = cv2.imread(os.path.join(path,img))
new_array = cv2.resize(img_array,(IMG_SIZE, IMG_SIZE))
new_array = np.expand_dims(new_array, axis=0)
training_data.append([new_array, class_num])
except Exception as e:
pass
import random
random.shuffle(training_data)
X = []
y = []
for features, label in training_data:
X.append(features)
y.append(label)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(150,150, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Here is the full traceback:

ValueError
Traceback (most recent call last)
<ipython-input-14-3119fea43292> in <module>
8
9 model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
---> 10 model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
804 steps=steps_per_epoch,
805 validation_split=validation_split,
--> 806 shuffle=shuffle)
807
808 # Prepare validation data.
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset)
2652
2653 if not self._distribution_strategy:
-> 2654 training_utils.check_array_lengths(x, y, sample_weights)
2655 if self._is_graph_network and not self.run_eagerly:
2656 # Additional checks to avoid users mistakenly using improper loss fns.
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_utils.py in check_array_lengths(inputs, targets, weights)
445 'the same number of samples as target arrays. '
446 'Found ' + str(list(set_x)[0]) + ' input samples '
--> 447 'and ' + str(list(set_y)[0]) + ' target samples.')
448 if len(set_w) > 1:
449 raise ValueError('All sample_weight arrays should have '
ValueError: Input arrays should have the same number of samples as target arrays. Found 1 input samples and 513 target samples.

1 Answer

supriya · Answer 1 · 2020-05-15T04:24:58+0000

Try below points:

Remove new_array = np.expand_dims(new_array, axis=0).
Add X = np.array(X) and y = np.array(y) before train_test_split function.
You are not using one-hot encoded labels, so for the above code to work change loss function from categorical_crossentropy to sparse_categorical_crossentropy.
Change model.add(Dense(1, activation='softmax')) to model.add(Dense(3, activation='softmax')), since number of classes are 3, i.e. green, yellow and red.
remember that you need to normalize data before pushing into the model.

The code is as follows:

IMG_SIZE = 150
DATADIR = "/Users/path-to-data/"
CATEGORIES = ['green', 'yellow', 'red']
training_data = []
for category in CATEGORIES:
  path = os.path.join(DATADIR, category)
  class_num = CATEGORIES.index(category)
  print(class_num)
  for img in os.listdir(path):
  try:
  img_array = cv2.imread(os.path.join(path,img))
  new_array = cv2.resize(img_array,(IMG_SIZE, IMG_SIZE))
  training_data.append([new_array, class_num])
  except Exception as e:
  pass
import random
random.shuffle(training_data)
X = []
y = []
for features, label in training_data:
  X.append(features)
  y.append(label)
# X and y are currently of type list (list of python array), we will convert these to numpy array so we can feed it into our model.
X = np.array(X) # (112, 150, 150, 3)
y = np.array(y) # (112,)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(150, 150, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy',optimizer='Adam',metrics=['sparse_categorical_accuracy'])
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

If you want to know more about the Data Science then do check out the following Data Science which will help you in understanding Data Science from scratch

hi what will be its predict function will it be similar to because i am getting wrong output :

import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow.keras.models import load_model

CATEGORIES=['Blue','Red','Green']
image=r'E:\colours_classifier\test\105.jpg'

def prepare(image):
img_size=150
img_array=cv2.imread(image,cv2.IMREAD_COLOR)
new_array=cv2.resize(img_array,(img_size,img_size))
return new_array.reshape(-1,img_size,img_size,3)

model = tf.keras.models.load_model(r"E:\colours_classifier\COLOURS_CNN.model")
prediction=model.predict([prepare(image)])
print(CATEGORIES[int(prediction[0][0])])
img=mpimg.imread(image)
imgplot=plt.imshow(img)
plt.title(CATEGORIES[int(prediction[0][0])])
plt.show() — TYS2000, Jun 24, 2021

ValueError: Input arrays should have the same number of samples as target arrays. Found 1 input samples and 513 target samples

1 Answer

Related questions

Browse Categories