#import the required libraries
from tensorflow import keras
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import imageio
import cv2
import os
We will use the Deepfake Detection Challenge’s sample training data set available on Kaggle (Deepfake Detection Challenge Data, 2019) at https://www.kaggle.com/c/deepfake-detection-challenge/data.
This sample data contains 800 video files (real videos and fake videos included) in .mp4 format.
The videos are placed in two folders – training and testing with 400 videos each.
A metadata.json file indicates for each video following four pieces of information – filename of the video file, whether the video is real or fake as its label, if the video is fake then original indicates the name of original video from which the fake was created, and the split is train for all videos in the training folder.
The code you will see below is inspired from the Keras blog on Video classification (Paul, 2021), and these Kaggle Notebooks - DeepFake Starter Kit page at Kaggle (Deepfake Starter Kit, Kaggle, 2020) and (DB, 2022). Both the notebooks were released under the Apache 2.0 Licence (https://www.apache.org/licenses/LICENSE-2.0).
This code was implemented in Jupyter Notebook under Anaconda 3 Python Distribution (Python 3.11.9) on my Windows 11 PC.
#store the path to root folder containing all videos
data_dir = 'deepfake-detection-challenge'
#store the path to subfolder containing videos we will use for model training
train_dir = 'train_sample_videos'
#store the path to subfolder containing videos we will use for model testing
test_dir = 'test_videos'
#print the number of video samples in train and test folders
print(f"Number of Train samples: {len(os.listdir(os.path.join(data_dir, train_dir)))}")
print(f"Numbrt of Test samples: {len(os.listdir(os.path.join(data_dir, test_dir)))}")
Number of Train samples: 400 Numbrt of Test samples: 400
#reading the metadata.json file into a pandas dataframe
train_metadata = pd.read_json('deepfake-detection-challenge/metadata.json').T
train_metadata.head()
label | split | original | |
---|---|---|---|
aagfhgtpmv.mp4 | FAKE | train | vudstovrck.mp4 |
aapnvogymq.mp4 | FAKE | train | jdubbvfswz.mp4 |
abarnvbtwb.mp4 | REAL | train | None |
abofeumbvv.mp4 | FAKE | train | atvmxvwyns.mp4 |
abqwwspghj.mp4 | FAKE | train | qzimuostzz.mp4 |
train_metadata.shape
(400, 3)
#store the names of test videos as a dataframe
test_videos = pd.DataFrame(list(os.listdir(os.path.join(data_dir, test_dir))), columns=['video'])
# video frame properties and model training hyperparameters
#size of each image/video frame, we are using 224x224 pixels images
img_size = 224
#number of videos used in each training step
batch_size = 16
#number of epochs of training
no_of_epochs = 50
#maximum number of video frames considered from each video.
#this truncates longer videos and pads shorter ones
max_seq_length= 40
#number of features extracted from each video frame
num_features = 2048
#helper function to crop the center square portion of a video frame
def crop_frame_center(frame):
y, x = frame.shape[0:2]#frame dimensions
min_dim = min(y, x)#smaller dimension
start_x = (x // 2) - (min_dim // 2)#starting x coordinate of cropping square
start_y = (y // 2) - (min_dim // 2)#starting x coordinate of cropping square
return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]#return cropped area
#helper function to load frames of a video located at a specified path
#each frame is resized to img_size x img_size
def load_video_frames(path, max_frames=0, resize=(img_size, img_size)):
cap = cv2.VideoCapture(path)#open a video using OpenCV
frames = []
try:
while True:
ret, frame = cap.read()#read a frame
if not ret:
break
frame = crop_frame_center(frame)#crop center square from the frame
frame = cv2.resize(frame, resize)#resize the cropped region
frame = frame[:, :, [2, 1, 0]]#reorder the color channels to RGB
frames.append(frame)#append the frame to frames list
if len(frames) == max_frames:#if max number of frames have been processed, break
break
finally:
cap.release() #release video capture
return np.array(frames) #return a numpy arrray of video frames
#helper function to extract features from each video using InceptionV3 model
def extract_video_features():
"""
This function defines and builds a pre-trained feature extractor model
using InceptionV3 architecture.
Returns:
A compiled Keras model for feature extraction.
"""
# Load the InceptionV3 model pre-trained on ImageNet dataset
feature_extractor = keras.applications.InceptionV3(
weights="imagenet", # Load pre-trained weights from imagenet for feature extraction
include_top=False, # Exclude the classification layers (we only need features)
pooling="avg", # Use average pooling for feature representation
input_shape=(img_size, img_size, 3),# Specify the input shape of the video frames 224x224x3
)
# Access the pre-processing function for InceptionV3
preprocess_input = keras.applications.inception_v3.preprocess_input
# Define the model input layer
inputs = keras.Input((img_size, img_size, 3))
# Preprocess the input using the InceptionV3 function preprocess_input()
preprocessed = preprocess_input(inputs)
# Extract features using the pre-trained InceptionV3 model
outputs = feature_extractor(preprocessed)
# Create a Keras model with the defined inputs and feature extraction outputs
return keras.Model(inputs, outputs, name="feature_extractor")
# Call the function to build and instantiate the feature extractor model
feature_extractor = extract_video_features()
def preprocess_videos(df, root_dir):
"""
This function preprocesses and prepares video data for training the model.
Args:
df (pandas.DataFrame): DataFrame containing video information (paths and labels).
root_dir (str): Root directory path containing the video files.
Returns:
tuple: A tuple containing two elements:
- features (numpy.ndarray): A 3D array of video features,shaped (num_samples, max_seq_length, num_features).
- labels (numpy.ndarray): A 1D array of video labels (0 - real, 1 - fake).
"""
# Get the number of video samples and their paths from the DataFrame
num_samples = len(df)
video_paths = list(df.index)
# Extract labels and convert them to a binary numpy array (0 - real, 1 - fake)
labels = df["label"].values
labels = np.array(labels == 'FAKE').astype(int)
# Initialize arrays to store features and masks for all videos
frame_masks = np.zeros(shape=(num_samples, max_seq_length), dtype="bool")
frame_features = np.zeros(shape=(num_samples, max_seq_length, num_features), dtype="float32")
# Process each video in the DataFrame
for idx, path in enumerate(video_paths):
# Load all frames from the video and add a batch dimension
frames = load_video_frames(os.path.join(root_dir, path))
frames = frames[None, ...] # Add batch dimension
# Initialize temporary arrays for features and masks of the current video
temp_frame_mask = np.zeros(shape=(1, max_seq_length,), dtype="bool")
temp_frame_features = np.zeros(shape=(1, max_seq_length, num_features), dtype="float32")
# Extract features for each frame in the video
for i, batch in enumerate(frames):
video_length = batch.shape[0]
length = min(max_seq_length, video_length) # Truncate if longer than max_seq_length
for j in range(length):
# Extract features from a single frame using the feature extractor model
temp_frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
# Create a mask for the current video (1 for valid frames, 0 for padding)
temp_frame_mask[i, :length] = 1
# Store the features and mask for the current video in the main arrays
frame_features[idx, :] = temp_frame_features.squeeze()
frame_masks[idx, :] = temp_frame_mask.squeeze()
# Return the preprocessed video features and corresponding labels
return (frame_features, frame_masks), labels
from sklearn.model_selection import train_test_split
# Split the training data into training and testing sets
Train_set, Test_set = train_test_split(train_metadata,
test_size=0.1,
random_state=42,
stratify=train_metadata['label'])
# Print the shapes of the resulting training and testing sets
print(Train_set.shape, Test_set.shape)
(360, 3) (40, 3)
# Preprocess, extract features and create masks from all videos from train set
#train_data is a NumPy array containing preprocessed video features for the training set.
# train_labels is a NumPy array containing labels (0 - real, 1 - fake) for the training set videos.
train_data, train_labels = preprocess_videos(Train_set, "train")
# Preprocess, extract features and create masks from all videos from test set
#test_data is a NumPy array containing preprocessed video features for the test set.
# test_labels is a NumPy array containing labels (0 - real, 1 - fake) for the test set videos.
test_data, test_labels = preprocess_videos(Test_set, "test")
# Print the shapes of training data components
# train_data[0] contains features and train_data[1] contains masks
print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")
Frame features in train set: (360, 40, 2048) Frame masks in train set: (360, 40)
#let us see what a mask looks like
train_data[1][2]
array([False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False])
# Define model inputs
frame_features_input = keras.Input((max_seq_length, num_features)) # Input for video features
mask_input = keras.Input((max_seq_length,), dtype="bool") # Input for frame masks
# mask_input takes frame masks containing boolean values, True for valid frames, False for padding
# First GRU layer with 16 units, returning entire output sequence for further processing by next GRU layer
# mask=mask_input to utilize the masking functionality based on the provided frame masks.
x = keras.layers.GRU(16, return_sequences=True)(frame_features_input, mask=mask_input)
# Masking (https://keras.io/api/layers/recurrent_layers/gru/) allows the GRU layer to handle sequences with variable lengths
# by ignoring padded elements based on the mask.
# Second GRU layer with 8 units processes the output from the first GRU layer
x = keras.layers.GRU(8)(x)
# Dropout layer with 40% dropout rate for regularization
#randomly drop 40% of the units' outputs during training to prevent overfitting.
x = keras.layers.Dropout(0.4)(x)
# Dense layer with 8 units and ReLU activation for non-linearity
x = keras.layers.Dense(8, activation="relu")(x)
# Output layer with 1 unit and sigmoid activation for binary classification (fake/real)
output = keras.layers.Dense(1, activation="sigmoid")(x)
# Create the model with specified inputs and output
model = keras.Model([frame_features_input, mask_input], output)
# Compile the model with binary crossentropy loss, Adam optimizer, and accuracy metric
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
# Print a summary of the model architecture
model.summary()
Model: "functional_4"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ Connected to ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ input_layer_16 (InputLayer) │ (None, 40, 2048) │ 0 │ - │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ input_layer_17 (InputLayer) │ (None, 40) │ 0 │ - │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ gru_8 (GRU) │ (None, 40, 16) │ 99,168 │ input_layer_16[0][0], │ │ │ │ │ input_layer_17[0][0] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ gru_9 (GRU) │ (None, 8) │ 624 │ gru_8[0][0] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ dropout_4 (Dropout) │ (None, 8) │ 0 │ gru_9[0][0] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ dense_11 (Dense) │ (None, 8) │ 72 │ dropout_4[0][0] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ dense_12 (Dense) │ (None, 1) │ 9 │ dense_11[0][0] │ └───────────────────────────────┴───────────────────────────┴─────────────────┴────────────────────────────┘
Total params: 99,873 (390.13 KB)
Trainable params: 99,873 (390.13 KB)
Non-trainable params: 0 (0.00 B)
# Define ModelCheckpoint callback
checkpoint = keras.callbacks.ModelCheckpoint(
filepath='./videoclassification.weights.h5', # Path to save weights
save_weights_only=True, # Save only model weights
save_best_only=True # Save only the model with best validation performance
)
# Train the model
# The 'history' variable returned by model.fit() stores the training and validation loss (or other metrics) across epochs.
history = model.fit(
[train_data[0], train_data[1]], # Training features and masks
train_labels, # Training labels
validation_data=([test_data[0], test_data[1]], # Validation features and masks
test_labels), # Validation labels
callbacks=[checkpoint], # Include ModelCheckpoint callback to use model weight saving
epochs = no_of_epochs, # Number of training epochs
batch_size=8 # Batch size for training
)
Epoch 1/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.7796 - loss: 0.6899 - val_accuracy: 0.8000 - val_loss: 0.6811 Epoch 2/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8145 - loss: 0.6775 - val_accuracy: 0.8000 - val_loss: 0.6697 Epoch 3/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8180 - loss: 0.6654 - val_accuracy: 0.8000 - val_loss: 0.6589 Epoch 4/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8225 - loss: 0.6534 - val_accuracy: 0.8000 - val_loss: 0.6485 Epoch 5/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.7403 - loss: 0.6561 - val_accuracy: 0.8000 - val_loss: 0.6395 Epoch 6/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8324 - loss: 0.6304 - val_accuracy: 0.8000 - val_loss: 0.6297 Epoch 7/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7906 - loss: 0.6299 - val_accuracy: 0.8000 - val_loss: 0.6214 Epoch 8/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8398 - loss: 0.6082 - val_accuracy: 0.8000 - val_loss: 0.6129 Epoch 9/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8015 - loss: 0.6106 - val_accuracy: 0.8000 - val_loss: 0.6055 Epoch 10/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8235 - loss: 0.5955 - val_accuracy: 0.8000 - val_loss: 0.5983 Epoch 11/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.7908 - loss: 0.6002 - val_accuracy: 0.8000 - val_loss: 0.5917 Epoch 12/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8191 - loss: 0.5820 - val_accuracy: 0.8000 - val_loss: 0.5850 Epoch 13/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8538 - loss: 0.5595 - val_accuracy: 0.8000 - val_loss: 0.5789 Epoch 14/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8249 - loss: 0.5656 - val_accuracy: 0.8000 - val_loss: 0.5734 Epoch 15/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7908 - loss: 0.5768 - val_accuracy: 0.8000 - val_loss: 0.5684 Epoch 16/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7863 - loss: 0.5746 - val_accuracy: 0.8000 - val_loss: 0.5636 Epoch 17/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8155 - loss: 0.5534 - val_accuracy: 0.8000 - val_loss: 0.5587 Epoch 18/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.8271 - loss: 0.5415 - val_accuracy: 0.8000 - val_loss: 0.5546 Epoch 19/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8200 - loss: 0.5410 - val_accuracy: 0.8000 - val_loss: 0.5504 Epoch 20/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8044 - loss: 0.5467 - val_accuracy: 0.8000 - val_loss: 0.5468 Epoch 21/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.8148 - loss: 0.5359 - val_accuracy: 0.8000 - val_loss: 0.5432 Epoch 22/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8092 - loss: 0.5360 - val_accuracy: 0.8000 - val_loss: 0.5400 Epoch 23/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8032 - loss: 0.5368 - val_accuracy: 0.8000 - val_loss: 0.5368 Epoch 24/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8102 - loss: 0.5286 - val_accuracy: 0.8000 - val_loss: 0.5342 Epoch 25/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7984 - loss: 0.5348 - val_accuracy: 0.8000 - val_loss: 0.5315 Epoch 26/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.8156 - loss: 0.5183 - val_accuracy: 0.8000 - val_loss: 0.5290 Epoch 27/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7759 - loss: 0.5482 - val_accuracy: 0.8000 - val_loss: 0.5269 Epoch 28/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8213 - loss: 0.5084 - val_accuracy: 0.8000 - val_loss: 0.5246 Epoch 29/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8171 - loss: 0.5095 - val_accuracy: 0.8000 - val_loss: 0.5229 Epoch 30/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.8217 - loss: 0.5031 - val_accuracy: 0.8000 - val_loss: 0.5209 Epoch 31/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.7939 - loss: 0.5260 - val_accuracy: 0.8000 - val_loss: 0.5192 Epoch 32/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step - accuracy: 0.8044 - loss: 0.5147 - val_accuracy: 0.8000 - val_loss: 0.5177 Epoch 33/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.8200 - loss: 0.4984 - val_accuracy: 0.8000 - val_loss: 0.5162 Epoch 34/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8054 - loss: 0.5105 - val_accuracy: 0.8000 - val_loss: 0.5147 Epoch 35/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8005 - loss: 0.5139 - val_accuracy: 0.8000 - val_loss: 0.5136 Epoch 36/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8363 - loss: 0.4771 - val_accuracy: 0.8000 - val_loss: 0.5124 Epoch 37/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7725 - loss: 0.5400 - val_accuracy: 0.8000 - val_loss: 0.5114 Epoch 38/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.7727 - loss: 0.5392 - val_accuracy: 0.8000 - val_loss: 0.5104 Epoch 39/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7930 - loss: 0.5174 - val_accuracy: 0.8000 - val_loss: 0.5096 Epoch 40/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.8012 - loss: 0.5080 - val_accuracy: 0.8000 - val_loss: 0.5087 Epoch 41/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.7798 - loss: 0.5301 - val_accuracy: 0.8000 - val_loss: 0.5078 Epoch 42/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.8088 - loss: 0.4980 - val_accuracy: 0.8000 - val_loss: 0.5072 Epoch 43/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8079 - loss: 0.4983 - val_accuracy: 0.8000 - val_loss: 0.5065 Epoch 44/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8138 - loss: 0.4909 - val_accuracy: 0.8000 - val_loss: 0.5060 Epoch 45/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8203 - loss: 0.4829 - val_accuracy: 0.8000 - val_loss: 0.5054 Epoch 46/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7908 - loss: 0.5158 - val_accuracy: 0.8000 - val_loss: 0.5049 Epoch 47/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7835 - loss: 0.5239 - val_accuracy: 0.8000 - val_loss: 0.5045 Epoch 48/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.8323 - loss: 0.4667 - val_accuracy: 0.8000 - val_loss: 0.5040 Epoch 49/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.8208 - loss: 0.4793 - val_accuracy: 0.8000 - val_loss: 0.5036 Epoch 50/50 [1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.8625 - loss: 0.4291 - val_accuracy: 0.8000 - val_loss: 0.5032
# plot training accuracy, validation accuracy, training loss and validation loss over 25 epochs
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = 50
epochs_range = range(epochs)
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
#Evaluate the trained model on test data
model.evaluate([test_data[0], test_data[1]],test_labels)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.8042 - loss: 0.4982
[0.5032228231430054, 0.800000011920929]
def preprocess_a_testvideo(frames):
"""
This function preprocesses a single video for prediction.
Args:
frames: A NumPy array containing frames of a video.
Returns:
tuple: A tuple containing two elements:
- features (numpy.ndarray): A 3D array of video features,
shaped (1, max_seq_length, num_features).
- mask (numpy.ndarray): A 2D array of frame masks,
shaped (1, max_seq_length).
"""
# Add a batch dimension to the frames
frames = frames[None, ...]
# Initialize arrays for features and mask of the video
frame_mask = np.zeros(shape=(1, max_seq_length,), dtype="bool")
frame_features = np.zeros(shape=(1, max_seq_length, num_features), dtype="float32")
# Process each frame in the video
for i, batch in enumerate(frames):
video_length = batch.shape[0]
length = min(max_seq_length, video_length) # Truncate if longer than max_seq_length
for j in range(length):
# Extract features from a single frame using the feature extractor model
frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
# Create a mask for the video (1 for valid frames, 0 for padding)
frame_mask[i, :length] = 1
# Return the preprocessed video features and mask
return frame_features, frame_mask
def classify_a_testvideo(path):
"""
This function predicts the class (fake or real) for a given video path.
Args:
path (str): Path to the video file.
Returns:
float: Predicted probability of the video being fake (between 0 and 1).
"""
# Load frames from the video
frames = load_video(os.path.join(data_dir, test_dir, path))
# Preprocess the video (features and mask)
frame_features, frame_mask = preprocess_a_testvideo(frames)
# Predict the class using the trained model
prediction = model.predict([frame_features, frame_mask])[0]
# Return the predicted probability for the fake class
return prediction
# Select a random test video path
test_video = np.random.choice(test_videos["video"].values.tolist())
print(f"Test video path: {test_video}")
# Predict the class (fake or real) for the chosen video
prediction = classify_a_testvideo(test_video)
if prediction >= 0.5:
print(f'The predicted class of the video is FAKE (probability: {prediction[0]:.2f})')
else:
print(f'The predicted class of the video is REAL (probability: {1-prediction[0]:.2f})')
Test video path: nthpnwylxo.mp4 [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 281ms/step The predicted class of the video is FAKE (probability: 0.77)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。