Chapter8_DeepfakeVideoDetection.ipynb · homer_1943/Ultimate-Deepfake-Detection-Using-Python

#import the required libraries
from tensorflow import keras
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import imageio
import cv2
import os

Dataset

We will use the Deepfake Detection Challenge’s sample training data set available on Kaggle (Deepfake Detection Challenge Data, 2019) at https://www.kaggle.com/c/deepfake-detection-challenge/data.
This sample data contains 800 video files (real videos and fake videos included) in .mp4 format.
The videos are placed in two folders – training and testing with 400 videos each.
A metadata.json file indicates for each video following four pieces of information – filename of the video file, whether the video is real or fake as its label, if the video is fake then original indicates the name of original video from which the fake was created, and the split is train for all videos in the training folder.

Code

The code you will see below is inspired from the Keras blog on Video classification (Paul, 2021), and these Kaggle Notebooks - DeepFake Starter Kit page at Kaggle (Deepfake Starter Kit, Kaggle, 2020) and (DB, 2022). Both the notebooks were released under the Apache 2.0 Licence (https://www.apache.org/licenses/LICENSE-2.0).
This code was implemented in Jupyter Notebook under Anaconda 3 Python Distribution (Python 3.11.9) on my Windows 11 PC.

#store the path to root folder containing all videos
data_dir = 'deepfake-detection-challenge'
#store the path to subfolder containing videos we will use for model training
train_dir = 'train_sample_videos'
#store the path to subfolder containing videos we will use for model testing
test_dir = 'test_videos'

#print the number of video samples in train and test folders
print(f"Number of Train samples: {len(os.listdir(os.path.join(data_dir, train_dir)))}")
print(f"Numbrt of Test samples: {len(os.listdir(os.path.join(data_dir, test_dir)))}")

Number of Train samples: 400
Numbrt of Test samples: 400

#reading the metadata.json file into a pandas dataframe
train_metadata = pd.read_json('deepfake-detection-challenge/metadata.json').T
train_metadata.head()

	label	split	original
aagfhgtpmv.mp4	FAKE	train	vudstovrck.mp4
aapnvogymq.mp4	FAKE	train	jdubbvfswz.mp4
abarnvbtwb.mp4	REAL	train	None
abofeumbvv.mp4	FAKE	train	atvmxvwyns.mp4
abqwwspghj.mp4	FAKE	train	qzimuostzz.mp4

train_metadata.shape

(400, 3)

#store the names of test videos as a dataframe
test_videos = pd.DataFrame(list(os.listdir(os.path.join(data_dir, test_dir))), columns=['video'])

Let us create a CNN model for classification

# video frame properties and model training hyperparameters
#size of each image/video frame, we are using 224x224 pixels images
img_size = 224
#number of videos used in each training step
batch_size = 16
#number of epochs of training
no_of_epochs = 50

#maximum number of video frames considered from each video.
#this truncates longer videos and pads shorter ones
max_seq_length= 40

#number of features extracted from each video frame
num_features = 2048

#helper function to crop the center square portion of a video frame
def crop_frame_center(frame):
    y, x = frame.shape[0:2]#frame dimensions
    min_dim = min(y, x)#smaller dimension
    start_x = (x // 2) - (min_dim // 2)#starting x coordinate of cropping square
    start_y = (y // 2) - (min_dim // 2)#starting x coordinate of cropping square
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]#return cropped area

#helper function to load frames of a video located at a specified path 
#each frame is resized to img_size x img_size 
def load_video_frames(path, max_frames=0, resize=(img_size, img_size)):
    cap = cv2.VideoCapture(path)#open a video using OpenCV
    frames = []
    try:
        while True:
            ret, frame = cap.read()#read a frame
            if not ret:
                break
            frame = crop_frame_center(frame)#crop center square from the frame
            frame = cv2.resize(frame, resize)#resize the cropped region
            frame = frame[:, :, [2, 1, 0]]#reorder the color channels to RGB
            frames.append(frame)#append the frame to frames list

            if len(frames) == max_frames:#if max number of frames have been processed, break
                break
    finally:
        cap.release() #release video capture
    return np.array(frames) #return a numpy arrray of video frames

#helper function to extract features from each video using InceptionV3 model
def extract_video_features():
    """
    This function defines and builds a pre-trained feature extractor model 
    using InceptionV3 architecture.

    Returns:
    A compiled Keras model for feature extraction.
    """
    # Load the InceptionV3 model pre-trained on ImageNet dataset
    feature_extractor = keras.applications.InceptionV3(
      weights="imagenet",  # Load pre-trained weights from imagenet for feature extraction
      include_top=False,  # Exclude the classification layers (we only need features)
      pooling="avg",      # Use average pooling for feature representation
      input_shape=(img_size, img_size, 3),# Specify the input shape of the video frames 224x224x3
    )

    # Access the pre-processing function for InceptionV3
    preprocess_input = keras.applications.inception_v3.preprocess_input

    # Define the model input layer
    inputs = keras.Input((img_size, img_size, 3))

    # Preprocess the input using the InceptionV3 function preprocess_input()
    preprocessed = preprocess_input(inputs)

    # Extract features using the pre-trained InceptionV3 model
    outputs = feature_extractor(preprocessed)

    # Create a Keras model with the defined inputs and feature extraction outputs
    return keras.Model(inputs, outputs, name="feature_extractor")

# Call the function to build and instantiate the feature extractor model
feature_extractor = extract_video_features()

def preprocess_videos(df, root_dir):
    """
    This function preprocesses and prepares video data for training the model.
    Args:
      df (pandas.DataFrame): DataFrame containing video information (paths and labels).
      root_dir (str): Root directory path containing the video files.
    Returns:
      tuple: A tuple containing two elements:
          - features (numpy.ndarray): A 3D array of video features,shaped (num_samples, max_seq_length, num_features).
          - labels (numpy.ndarray): A 1D array of video labels (0 - real, 1 - fake).
     """
    # Get the number of video samples and their paths from the DataFrame
    num_samples = len(df)
    video_paths = list(df.index)
    # Extract labels and convert them to a binary numpy array (0 - real, 1 - fake)
    labels = df["label"].values
    labels = np.array(labels == 'FAKE').astype(int)
    # Initialize arrays to store features and masks for all videos
    frame_masks = np.zeros(shape=(num_samples, max_seq_length), dtype="bool")
    frame_features = np.zeros(shape=(num_samples, max_seq_length, num_features), dtype="float32")
    # Process each video in the DataFrame
    for idx, path in enumerate(video_paths):
        # Load all frames from the video and add a batch dimension
        frames = load_video_frames(os.path.join(root_dir, path))
        frames = frames[None, ...]  # Add batch dimension
        # Initialize temporary arrays for features and masks of the current video
        temp_frame_mask = np.zeros(shape=(1, max_seq_length,), dtype="bool")
        temp_frame_features = np.zeros(shape=(1, max_seq_length, num_features), dtype="float32")
        # Extract features for each frame in the video
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(max_seq_length, video_length)  # Truncate if longer than max_seq_length
            for j in range(length):
              # Extract features from a single frame using the feature extractor model
              temp_frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
            # Create a mask for the current video (1 for valid frames, 0 for padding)
            temp_frame_mask[i, :length] = 1
        # Store the features and mask for the current video in the main arrays
        frame_features[idx, :] = temp_frame_features.squeeze()
        frame_masks[idx, :] = temp_frame_mask.squeeze()
    # Return the preprocessed video features and corresponding labels
    return (frame_features, frame_masks), labels

from sklearn.model_selection import train_test_split

# Split the training data into training and testing sets
Train_set, Test_set = train_test_split(train_metadata, 
                                       test_size=0.1, 
                                       random_state=42, 
                                       stratify=train_metadata['label'])

# Print the shapes of the resulting training and testing sets
print(Train_set.shape, Test_set.shape)

(360, 3) (40, 3)

# Preprocess, extract features and create masks from all videos from train set
#train_data is a NumPy array containing preprocessed video features for the training set.
# train_labels is a NumPy array containing labels (0 - real, 1 - fake) for the training set videos.
train_data, train_labels = preprocess_videos(Train_set, "train")

# Preprocess, extract features and create masks from all videos from test set
#test_data is a NumPy array containing preprocessed video features for the test set.
# test_labels is a NumPy array containing labels (0 - real, 1 - fake) for the test set videos.
test_data, test_labels = preprocess_videos(Test_set, "test")

# Print the shapes of training data components
# train_data[0] contains features and train_data[1] contains masks
print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")

Frame features in train set: (360, 40, 2048)
Frame masks in train set: (360, 40)

#let us see what a mask looks like
train_data[1][2]

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False])

# Define model inputs
frame_features_input = keras.Input((max_seq_length, num_features))  # Input for video features
mask_input = keras.Input((max_seq_length,), dtype="bool")          # Input for frame masks
# mask_input takes frame masks containing boolean values, True for valid frames, False for padding



# First GRU layer with 16 units, returning entire output sequence for further processing by next GRU layer
# mask=mask_input to utilize the masking functionality based on the provided frame masks.
x = keras.layers.GRU(16, return_sequences=True)(frame_features_input, mask=mask_input)
# Masking (https://keras.io/api/layers/recurrent_layers/gru/) allows the GRU layer to handle sequences with variable lengths 
# by ignoring padded elements based on the mask.

# Second GRU layer with 8 units processes the output from the first GRU layer
x = keras.layers.GRU(8)(x)

# Dropout layer with 40% dropout rate for regularization
#randomly drop 40% of the units' outputs during training to prevent overfitting.
x = keras.layers.Dropout(0.4)(x)

# Dense layer with 8 units and ReLU activation for non-linearity
x = keras.layers.Dense(8, activation="relu")(x)

# Output layer with 1 unit and sigmoid activation for binary classification (fake/real)
output = keras.layers.Dense(1, activation="sigmoid")(x)

# Create the model with specified inputs and output
model = keras.Model([frame_features_input, mask_input], output)

# Compile the model with binary crossentropy loss, Adam optimizer, and accuracy metric
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Print a summary of the model architecture
model.summary()

Model: "functional_4"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                  ┃ Output Shape              ┃         Param # ┃ Connected to               ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ input_layer_16 (InputLayer)   │ (None, 40, 2048)          │               0 │ -                          │
├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤
│ input_layer_17 (InputLayer)   │ (None, 40)                │               0 │ -                          │
├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤
│ gru_8 (GRU)                   │ (None, 40, 16)            │          99,168 │ input_layer_16[0][0],      │
│                               │                           │                 │ input_layer_17[0][0]       │
├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤
│ gru_9 (GRU)                   │ (None, 8)                 │             624 │ gru_8[0][0]                │
├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤
│ dropout_4 (Dropout)           │ (None, 8)                 │               0 │ gru_9[0][0]                │
├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤
│ dense_11 (Dense)              │ (None, 8)                 │              72 │ dropout_4[0][0]            │
├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤
│ dense_12 (Dense)              │ (None, 1)                 │               9 │ dense_11[0][0]             │
└───────────────────────────────┴───────────────────────────┴─────────────────┴────────────────────────────┘

 Total params: 99,873 (390.13 KB)

 Trainable params: 99,873 (390.13 KB)

 Non-trainable params: 0 (0.00 B)

# Define ModelCheckpoint callback
checkpoint = keras.callbacks.ModelCheckpoint(
    filepath='./videoclassification.weights.h5',   # Path to save weights
    save_weights_only=True,                         # Save only model weights
    save_best_only=True                             # Save only the model with best validation performance
)

# Train the model
# The 'history' variable returned by model.fit() stores the training and validation loss (or other metrics) across epochs.
history = model.fit(
    [train_data[0], train_data[1]],                  # Training features and masks
    train_labels,                                    # Training labels
    validation_data=([test_data[0], test_data[1]],   # Validation features and masks
                     test_labels),                   # Validation labels
    callbacks=[checkpoint],                          # Include ModelCheckpoint callback to use model weight saving 
    epochs = no_of_epochs,                           # Number of training epochs
    batch_size=8                                     # Batch size for training
)

Epoch 1/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.7796 - loss: 0.6899 - val_accuracy: 0.8000 - val_loss: 0.6811
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8145 - loss: 0.6775 - val_accuracy: 0.8000 - val_loss: 0.6697
Epoch 3/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8180 - loss: 0.6654 - val_accuracy: 0.8000 - val_loss: 0.6589
Epoch 4/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8225 - loss: 0.6534 - val_accuracy: 0.8000 - val_loss: 0.6485
Epoch 5/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.7403 - loss: 0.6561 - val_accuracy: 0.8000 - val_loss: 0.6395
Epoch 6/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8324 - loss: 0.6304 - val_accuracy: 0.8000 - val_loss: 0.6297
Epoch 7/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7906 - loss: 0.6299 - val_accuracy: 0.8000 - val_loss: 0.6214
Epoch 8/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8398 - loss: 0.6082 - val_accuracy: 0.8000 - val_loss: 0.6129
Epoch 9/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8015 - loss: 0.6106 - val_accuracy: 0.8000 - val_loss: 0.6055
Epoch 10/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8235 - loss: 0.5955 - val_accuracy: 0.8000 - val_loss: 0.5983
Epoch 11/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.7908 - loss: 0.6002 - val_accuracy: 0.8000 - val_loss: 0.5917
Epoch 12/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8191 - loss: 0.5820 - val_accuracy: 0.8000 - val_loss: 0.5850
Epoch 13/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8538 - loss: 0.5595 - val_accuracy: 0.8000 - val_loss: 0.5789
Epoch 14/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8249 - loss: 0.5656 - val_accuracy: 0.8000 - val_loss: 0.5734
Epoch 15/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7908 - loss: 0.5768 - val_accuracy: 0.8000 - val_loss: 0.5684
Epoch 16/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7863 - loss: 0.5746 - val_accuracy: 0.8000 - val_loss: 0.5636
Epoch 17/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8155 - loss: 0.5534 - val_accuracy: 0.8000 - val_loss: 0.5587
Epoch 18/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.8271 - loss: 0.5415 - val_accuracy: 0.8000 - val_loss: 0.5546
Epoch 19/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8200 - loss: 0.5410 - val_accuracy: 0.8000 - val_loss: 0.5504
Epoch 20/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8044 - loss: 0.5467 - val_accuracy: 0.8000 - val_loss: 0.5468
Epoch 21/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.8148 - loss: 0.5359 - val_accuracy: 0.8000 - val_loss: 0.5432
Epoch 22/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8092 - loss: 0.5360 - val_accuracy: 0.8000 - val_loss: 0.5400
Epoch 23/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8032 - loss: 0.5368 - val_accuracy: 0.8000 - val_loss: 0.5368
Epoch 24/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8102 - loss: 0.5286 - val_accuracy: 0.8000 - val_loss: 0.5342
Epoch 25/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7984 - loss: 0.5348 - val_accuracy: 0.8000 - val_loss: 0.5315
Epoch 26/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.8156 - loss: 0.5183 - val_accuracy: 0.8000 - val_loss: 0.5290
Epoch 27/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7759 - loss: 0.5482 - val_accuracy: 0.8000 - val_loss: 0.5269
Epoch 28/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8213 - loss: 0.5084 - val_accuracy: 0.8000 - val_loss: 0.5246
Epoch 29/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8171 - loss: 0.5095 - val_accuracy: 0.8000 - val_loss: 0.5229
Epoch 30/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.8217 - loss: 0.5031 - val_accuracy: 0.8000 - val_loss: 0.5209
Epoch 31/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.7939 - loss: 0.5260 - val_accuracy: 0.8000 - val_loss: 0.5192
Epoch 32/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step - accuracy: 0.8044 - loss: 0.5147 - val_accuracy: 0.8000 - val_loss: 0.5177
Epoch 33/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.8200 - loss: 0.4984 - val_accuracy: 0.8000 - val_loss: 0.5162
Epoch 34/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8054 - loss: 0.5105 - val_accuracy: 0.8000 - val_loss: 0.5147
Epoch 35/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8005 - loss: 0.5139 - val_accuracy: 0.8000 - val_loss: 0.5136
Epoch 36/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8363 - loss: 0.4771 - val_accuracy: 0.8000 - val_loss: 0.5124
Epoch 37/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7725 - loss: 0.5400 - val_accuracy: 0.8000 - val_loss: 0.5114
Epoch 38/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.7727 - loss: 0.5392 - val_accuracy: 0.8000 - val_loss: 0.5104
Epoch 39/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7930 - loss: 0.5174 - val_accuracy: 0.8000 - val_loss: 0.5096
Epoch 40/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.8012 - loss: 0.5080 - val_accuracy: 0.8000 - val_loss: 0.5087
Epoch 41/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.7798 - loss: 0.5301 - val_accuracy: 0.8000 - val_loss: 0.5078
Epoch 42/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.8088 - loss: 0.4980 - val_accuracy: 0.8000 - val_loss: 0.5072
Epoch 43/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8079 - loss: 0.4983 - val_accuracy: 0.8000 - val_loss: 0.5065
Epoch 44/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8138 - loss: 0.4909 - val_accuracy: 0.8000 - val_loss: 0.5060
Epoch 45/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8203 - loss: 0.4829 - val_accuracy: 0.8000 - val_loss: 0.5054
Epoch 46/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7908 - loss: 0.5158 - val_accuracy: 0.8000 - val_loss: 0.5049
Epoch 47/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7835 - loss: 0.5239 - val_accuracy: 0.8000 - val_loss: 0.5045
Epoch 48/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.8323 - loss: 0.4667 - val_accuracy: 0.8000 - val_loss: 0.5040
Epoch 49/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.8208 - loss: 0.4793 - val_accuracy: 0.8000 - val_loss: 0.5036
Epoch 50/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.8625 - loss: 0.4291 - val_accuracy: 0.8000 - val_loss: 0.5032

# plot training accuracy, validation accuracy, training loss and validation loss over 25 epochs
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = 50
epochs_range = range(epochs)

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

#Evaluate the trained model on test data
model.evaluate([test_data[0], test_data[1]],test_labels)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.8042 - loss: 0.4982

[0.5032228231430054, 0.800000011920929]

def preprocess_a_testvideo(frames):
    """
    This function preprocesses a single video for prediction.

    Args:
      frames: A NumPy array containing frames of a video.

    Returns:
      tuple: A tuple containing two elements:
          - features (numpy.ndarray): A 3D array of video features, 
              shaped (1, max_seq_length, num_features).
          - mask (numpy.ndarray): A 2D array of frame masks, 
              shaped (1, max_seq_length).
    """

    # Add a batch dimension to the frames
    frames = frames[None, ...]
    # Initialize arrays for features and mask of the video
    frame_mask = np.zeros(shape=(1, max_seq_length,), dtype="bool")
    frame_features = np.zeros(shape=(1, max_seq_length, num_features), dtype="float32")

    # Process each frame in the video
    for i, batch in enumerate(frames):
        video_length = batch.shape[0]
        length = min(max_seq_length, video_length)  # Truncate if longer than max_seq_length
        for j in range(length):
            # Extract features from a single frame using the feature extractor model
            frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
        # Create a mask for the video (1 for valid frames, 0 for padding)
        frame_mask[i, :length] = 1

    # Return the preprocessed video features and mask
    return frame_features, frame_mask


def classify_a_testvideo(path):
    """
    This function predicts the class (fake or real) for a given video path.

    Args:
      path (str): Path to the video file.

    Returns:
      float: Predicted probability of the video being fake (between 0 and 1).
    """

    # Load frames from the video
    frames = load_video(os.path.join(data_dir, test_dir, path))

    # Preprocess the video (features and mask)
    frame_features, frame_mask = preprocess_a_testvideo(frames)

    # Predict the class using the trained model
    prediction = model.predict([frame_features, frame_mask])[0]

    # Return the predicted probability for the fake class
    return prediction


# Select a random test video path
test_video = np.random.choice(test_videos["video"].values.tolist())
print(f"Test video path: {test_video}")

# Predict the class (fake or real) for the chosen video
prediction = classify_a_testvideo(test_video)

if prediction >= 0.5:
    print(f'The predicted class of the video is FAKE (probability: {prediction[0]:.2f})')
else:
    print(f'The predicted class of the video is REAL (probability: {1-prediction[0]:.2f})')

Test video path: nthpnwylxo.mp4
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 281ms/step
The predicted class of the video is FAKE (probability: 0.77)

homer_1943/Ultimate-Deepfake-Detection-Using-Python

Dataset

Code

Let us create a CNN model for classification

简介

发行版

贡献者

语言

近期动态

homer_1943/Ultimate-Deepfake-Detection-Using-Python .gitee-modal { width: 500px !important; }

Dataset

Code

Let us create a CNN model for classification

简介

发行版

贡献者

语言

近期动态

搜索帮助

homer_1943/Ultimate-Deepfake-Detection-Using-Python