1. ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋กœ๋“œ

import os
import zipfile
import random
import tensorflow as tf
import shutil
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
from os import getcwd

 

2. ImageDataGenerator ์‚ฌ์šฉํ•˜๊ธฐ ์œ„ํ•ด์„œ ํŒŒ์ผ ๊ตฌ์กฐ ๋ณ€๊ฒฝํ•˜๊ธฐ

 

2-1. ๋ฐ์ดํ„ฐ ์••์ถ• ํ•ด์ œ

path_cats_and_dogs = f"{getcwd()}/../tmp2/cats-and-dogs.zip"
shutil.rmtree('/tmp')

local_zip = path_cats_and_dogs
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

 

2-2. ๊ฐ ๋””๋ ‰ํ† ๋ฆฌ์— ๋ช‡ ๊ฐœ์˜ ๊ฐ•์•„์ง€, ๊ณ ์–‘์ด ์ด๋ฏธ์ง€๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ

print(len(os.listdir('/tmp/PetImages/Cat/')))
print(len(os.listdir('/tmp/PetImages/Dog/')))

# Expected Output:
# 1500
# 1500

 

2-3. ๋””๋ ‰ํ† ๋ฆฌ ๋งŒ๋“ค๊ธฐ

๋””๋ ‰ํ† ๋ฆฌ์˜ ๊ตฌ์กฐ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™๋‹ค

  • cats-v-dogs
    • training
      • dogs
      • cats
    • testing
      • dogs
      • cats
# Use os.mkdir to create your directories
# You will need a directory for cats-v-dogs, and subdirectories for training
# and testing. These in turn will need subdirectories for 'cats' and 'dogs'
try:
    os.mkdir("/tmp/cats-v-dogs")
    os.mkdir("/tmp/cats-v-dogs/training")
    os.mkdir("/tmp/cats-v-dogs/training/dogs")
    os.mkdir("/tmp/cats-v-dogs/training/cats")
    os.mkdir("/tmp/cats-v-dogs/testing")
    os.mkdir("/tmp/cats-v-dogs/testing/dogs")
    os.mkdir("/tmp/cats-v-dogs/testing/cats")
except OSError:
    pass

 

2-4. ์ด๋ฏธ์ง€ ๋ณต์‚ฌํ•˜๊ธฐ

  • Cat ํด๋”์— ์žˆ๋˜ ๊ณ ์–‘์ด ์ด๋ฏธ์ง€๋ฅผ training/cats ๋””๋ ‰ํ† ๋ฆฌ์™€ testing/cats ๋””๋ ‰ํ† ๋ฆฌ์— ๋‚˜๋ˆ„์–ด ์ €์žฅ
  • Dog ํด๋”์— ์žˆ๋˜ ๊ฐ•์•„์ง€ ์ด๋ฏธ์ง€๋ฅผ training/dogs ๋””๋ ‰ํ† ๋ฆฌ์™€ testing/dogs ๋””๋ ‰ํ† ๋ฆฌ์— ๋‚˜๋ˆ„์–ด ์ €์žฅ
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
# YOUR CODE STARTS HERE
# YOUR CODE ENDS HERE
    file = os.listdir(SOURCE)
    random.sample(file, len(file))
    
    num_of_train = int(len(file)*SPLIT_SIZE)
    num_of_test = len(file)-num_of_train
    
    for i in range(num_of_train):
        file_name = file.pop()
        copyfile(os.path.join(SOURCE, file_name), os.path.join(TRAINING, file_name))
    for i in range(num_of_test):
        file_name = file.pop()
        copyfile(os.path.join(SOURCE, file_name), os.path.join(TESTING, file_name))

CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "/tmp/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "/tmp/cats-v-dogs/testing/cats/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "/tmp/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "/tmp/cats-v-dogs/testing/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

 

2-5. train / test ํ•  ๊ฐ•์•„์ง€, ๊ณ ์–‘์ด ์ด๋ฏธ์ง€ ์ˆ˜ ํ™•์ธ

print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))

# Expected output:
# 1350
# 1350
# 150
# 150

 

3. ๋ชจ๋ธ ์ •์˜ํ•˜๊ธฐ

  • ์—ฌ๊ธฐ์„œ๋Š” convolutional layer๋ฅผ ์‚ฌ์šฉํ•˜์˜€๋‹ค.
  • ์—ฌ๊ธฐ์„œ ์ฃผ์˜ํ•  ์ ์€ input_shape ์ง€์ •ํ•ด์ฃผ๋Š” ๊ฒƒ์ด๋‹ค.
  • ๊ฐ•์•„์ง€์™€ ๊ณ ์–‘์ด๋ฅผ ๋ถ„๋ฅ˜ํ•˜๋Š” ์ด์ง„ ๋ถ„๋ฅ˜ ๋ฌธ์ œ์ด๊ธฐ ๋•Œ๋ฌธ์— ๋งˆ์ง€๋ง‰ ๋ ˆ์ด์–ด์˜ node ์ˆ˜๋Š” 1๊ฐœ๋กœ, activation function์œผ๋กœ๋Š” sigmoid๋ฅผ ์‚ฌ์šฉํ•ด์ฃผ์—ˆ๋‹ค.
  • ์ด์ง„ ๋ถ„๋ฅ˜ ๋ฌธ์ œ์ด๊ธฐ ๋•Œ๋ฌธ์— loss๋Š” binary_crossentropy๋ฅผ ์‚ฌ์šฉํ•˜์˜€๋‹ค.
# DEFINE A KERAS MODEL TO CLASSIFY CATS V DOGS
# USE AT LEAST 3 CONVOLUTION LAYERS
model = tf.keras.models.Sequential([
# YOUR CODE HERE
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu', input_shape=(300, 300, 3)),
    tf.keras.layers.MaxPool2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPool2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPool2D((2, 2)),
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu'),
    tf.keras.layers.MaxPool2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['acc'])

 

4. data generator

  • ImageDataGenerator ๋งŒ๋“ ๋‹ค (ํ•™์Šต์ด ๋” ์ž˜ ๋  ์ˆ˜ ์žˆ๋„๋ก rescale=1./255๋ฅผ ์„ค์ •ํ•ด 0~1 ์‚ฌ์ด์˜ ๊ฐ’์œผ๋กœ ์ •๊ทœํ™” ํ•ด์ฃผ์—ˆ๋‹ค.)
  • train_datagen๊ณผ validation_datagen์—์„œ์˜ target_size๋Š” ๋ฌด์กฐ๊ฑด ๋™์ผํ•ด์•ผ ํ•œ๋‹ค.
  • class_mode๋Š” ์ด์ง„ ๋ถ„๋ฅ˜ ๋ฌธ์ œ์ด๊ธฐ ๋•Œ๋ฌธ์— 'binary'๋กœ ์„ค์ •ํ•ด์ฃผ์—ˆ๋‹ค. (์‹ค์ œ ๊ณ ์–‘์ด, ๊ฐ•์•„์ง€ ์ด 2๊ฐœ์˜ ํ•˜์œ„ํด๋”๋งŒ ์กด์žฌ)
TRAINING_DIR = "/tmp/cats-v-dogs/training"
train_datagen = ImageDataGenerator(rescale=1./255)

# NOTE: YOU MUST USE A BATCH SIZE OF 10 (batch_size=10) FOR THE 
# TRAIN GENERATOR.
train_generator = train_datagen.flow_from_directory(
    TRAINING_DIR,
    target_size=(300, 300),
    batch_size = 10,
    class_mode='binary'
)

VALIDATION_DIR = "/tmp/cats-v-dogs/testing"
validation_datagen = ImageDataGenerator(rescale=1./255)

# NOTE: YOU MUST USE A BACTH SIZE OF 10 (batch_size=10) FOR THE 
# VALIDATION GENERATOR.
validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=(300, 300),
    batch_size=10,
    class_mode='binary'
)


# Expected Output:
# Found 2700 images belonging to 2 classes.
# Found 300 images belonging to 2 classes.

 

5. ๋ชจ๋ธ ํ•™์Šต

  • ๋ชจ๋ธ ํ•™์Šต์„ ์œ„ํ•ด train data, epochs ์ง€์ •ํ•ด์ค€๋‹ค.
  • ์ถ”๊ฐ€๋กœ validation score ํ™•์ธํ•˜๊ธฐ ์œ„ํ•ด validation data๋ฅผ ์ง€์ •ํ•ด์ฃผ์—ˆ๋‹ค.
history = model.fit_generator(train_generator,
                              epochs=2,
                              verbose=1,
                              validation_data=validation_generator)

 

6. ๋ชจ๋ธ ๋ถ„์„

  • train data์—์„œ์˜ accuracy, loss ๊ทธ๋ฆฌ๊ณ  validation data์—์„œ์˜ accuracy, loss๋ฅผ ์ด์šฉํ•˜์—ฌ ๋ชจ๋ธ์„ ๊ฐœ์„ ํ•œ๋‹ค.
  • ์ด ๊ฐ’์„ ์ด์šฉํ•ด ํ˜„์žฌ overfitting์ด ์ผ์–ด๋‚˜๊ณ  ์žˆ๋Š”์ง€, underfitting์ด ์ผ์–ด๋‚˜๊ณ  ์žˆ๋Š”์ง€,
  • ์–ด๋– ํ•œ ํด๋ž˜์Šค๋ฅผ ์ž˜ ์˜ˆ์ธกํ•˜์ง€ ๋ชปํ•˜๋Š”์ง€ ๋“ฑ์„ ํŒŒ์•…ํ•  ์ˆ˜ ์žˆ๋‹ค.
# PLOT LOSS AND ACCURACY
%matplotlib inline

import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['acc']
val_acc=history.history['val_acc']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")


plt.title('Training and validation loss')

# Desired output. Charts with training and validation metrics. No crash :)

+ Recent posts