import matplotlib.pyplot as plt
import matplotlib.image as pimg
import tensorflow as tf
import pandas as pd
import numpy as np
import random
import os
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
Mounted at /content/drive
Problem Definition
We are trying to distinguish between images that are real from images that are fake. This will enable me or any firm planning to integrate this fish out fake image upload from users on their platform.
In order to achieve this we have downloaded a dataset of faces from kaggle.com/ciplab/real-and-fake-face-detection which is categorized into fake and real classes.
Steps we will take to get this project done are:
- Get the image data
- Visualize and get to understand what our data looks like
- Prepare our data for our neural network
- Build a CNN base model for prediction
- Evaluate and Improve our model
- Are we satisfied? if not, REPEAT 😅
Deployment
After building this model it will be deployed to google GCP (Google Cloud Platform) where it will be rendered as an API
Get the image data
path = '/content/drive/MyDrive/real_and_fake_face/'
image_classes = os.listdir(path)
# Lets checkout how many paths are there in our image_path
image_classes
['not_face', 'face']
# Our image path contains an extra class '.ipynb_checkpoints', so let's see if we can take it out
! rm -rf .ipynb_checkpoints*
image_classes
['not_face', 'face']
# Check the amount of data we are trying to work with
for dir in os.listdir(path):
print(f'There are at least {len(os.listdir(os.path.join(path, dir)))} {dir} images to work with.')
There are at least 960 not_face images to work with.
There are at least 1078 face images to work with.
Visualize our Images
So far we've gotten our image data, therefore in order to get a better understanding of what they look like, let's visualize them.
We will create a helper function to help us visualize what these images look like. Let's code.
def show_image(path='/content/drive/MyDrive/real_and_fake_face/', class_name=''):
if class_name == '':
print('Please pass a class name')
else:
try:
img_path = os.listdir(os.path.join(path, class_name))
selected_img = random.choice(img_path)
image = plt.imread(os.path.join(path, class_name, selected_img))
# Let's plot the image
plt.figure(figsize=(15, 5))
plt.imshow(image)
plt.axis('off')
plt.title(f'File Name: {selected_img} \n Class Name: {class_name} \n Shape: {image.shape}')
except FileNotFoundError:
print(f'Sorry no such class name "{str(class_name).capitalize()}" was found in path {path}')
# Let's Checkout some real images
show_image(class_name='face')
# Let's checkout some fake image
show_image(class_name='not_face')
Prepare our data for our neural network
We've visualized a fair few amount of images, now lets prepare our data, to feed to a neural network
from tensorflow.keras.preprocessing.image import ImageDataGenerator
BATCH_SIZE = 16
IMG_HEIGHT = 124
IMG_WIDTH = 124
PATH = '/content/drive/MyDrive/real_and_fake_face/'
# Instantiate ImageDataGenerator
image_gen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, validation_split=0.2)
# scale the images
train_data = image_gen.flow_from_directory(directory=PATH,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='binary',
subset='training',
batch_size=BATCH_SIZE,
seed=42,
shuffle=True)
test_data = image_gen.flow_from_directory(directory=PATH,
target_size=(IMG_HEIGHT,IMG_WIDTH),
class_mode='binary',
subset='validation',
batch_size=BATCH_SIZE,
seed=42,
shuffle=True)
Found 1630 images belonging to 2 classes.
Found 407 images belonging to 2 classes.
image_classes = [*train_data.class_indices]
image_classes
['face', 'not_face']
Build a CNN base model for prediction
Now let's build a model to try predict what class an image falls into, let's do this!!
We took a few look at what our processed image data looks like and we found out the image not yet normalized. Remember that neural network best perform well on normalized data. So we need to normalize the data before feeding it to our neural network
# Build our model
model = tf.keras.Sequential([
tf.keras.layers.InputLayer(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
tf.keras.layers.Conv2D(16, 2, activation='relu'),
tf.keras.layers.MaxPool2D(),
tf.keras.layers.Conv2D(32, 2, activation='relu'),
tf.keras.layers.MaxPool2D(),
tf.keras.layers.Conv2D(64,2, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(1, activation='sigmoid'),
])
# Compile our model
model.compile(loss=tf.keras.losses.binary_crossentropy,
optimizer=tf.keras.optimizers.Adam(),
metrics=['accuracy'])
# Fit the data to our model
history_1 = model.fit(train_data,
epochs=5,
validation_data=test_data,
validation_steps=len(test_data),
steps_per_epoch=len(train_data))
Epoch 1/5
102/102 [==============================] - 435s 4s/step - loss: 0.3324 - accuracy: 0.8472 - val_loss: 0.3306 - val_accuracy: 0.8624
Epoch 2/5
102/102 [==============================] - 20s 198ms/step - loss: 0.1399 - accuracy: 0.9466 - val_loss: 0.0866 - val_accuracy: 0.9607
Epoch 3/5
102/102 [==============================] - 20s 197ms/step - loss: 0.0904 - accuracy: 0.9699 - val_loss: 0.0980 - val_accuracy: 0.9631
Epoch 4/5
102/102 [==============================] - 20s 199ms/step - loss: 0.0696 - accuracy: 0.9779 - val_loss: 0.1108 - val_accuracy: 0.9582
Epoch 5/5
102/102 [==============================] - 19s 191ms/step - loss: 0.0553 - accuracy: 0.9822 - val_loss: 0.0677 - val_accuracy: 0.9803
Plot a history graph to visualize how our model is learning
def plot_history(history):
"""
Returns a plots of model learning history object
Args:
history: Model's learning history object
Returns:
A plot of of model learning history
"""
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
# Plot the histories
plt.plot(train_loss)
plt.plot(val_loss)
plt.xlabel('Epochs')
plt.legend(['train loss','val loss']);
plot_history(history_1)
# Create a function to enable us check our model performance on unseen data
def decode_image(image_path):
image = tf.io.read_file(image_path)
decoded_image = tf.io.decode_image(image,channels=3)
# Resize the image
image = tf.image.resize(decoded_image, size=[IMG_HEIGHT, IMG_WIDTH])
# Normalize the image
image = image/255.
# Predict
prediction = model.predict(tf.expand_dims(image, axis=0))
print(prediction)
print(int(tf.round(prediction)))
prediction = image_classes[int(tf.round(prediction))]
# plot
plt.figure(figsize=(12, 8))
plt.imshow(decoded_image)
plt.title(prediction)
plt.axis('off')
image_classes
['face', 'not_face']
decode_image('/content/drive/MyDrive/me.jpg')
[[0.00042969]]
0
decode_image('/content/drive/MyDrive/cga-ai.png')
[[0.8060113]]
1