Ai
1 Star 0 Fork 1

Leo/Learning-OpenCV-4-Computer-Vision-with-Python-Third-Edition

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
detect_car_bow_svm_sliding_window.py 4.90 KB
一键复制 编辑 原始数据 按行查看 历史
import cv2
import numpy as np
import os
# When running in Jupyter, the `non_max_suppression_fast` function should
# already be in the global scope. Otherwise, import it now.
if 'non_max_suppression_fast' not in globals():
from non_max_suppression import non_max_suppression_fast
if not os.path.isdir('CarData'):
print('CarData folder not found. Please download and unzip '
'https://github.com/gcr/arc-evaluator/raw/master/CarData.tar.gz '
'into the same folder as this script.')
exit(1)
BOW_NUM_TRAINING_SAMPLES_PER_CLASS = 10
SVM_NUM_TRAINING_SAMPLES_PER_CLASS = 110
BOW_NUM_CLUSTERS = 12
SVM_SCORE_THRESHOLD = 2.2
NMS_OVERLAP_THRESHOLD = 0.4
sift = cv2.SIFT_create()
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
bow_kmeans_trainer = cv2.BOWKMeansTrainer(BOW_NUM_CLUSTERS)
bow_extractor = cv2.BOWImgDescriptorExtractor(sift, flann)
def get_pos_and_neg_paths(i):
pos_path = 'CarData/TrainImages/pos-%d.pgm' % (i+1)
neg_path = 'CarData/TrainImages/neg-%d.pgm' % (i+1)
return pos_path, neg_path
def add_sample(path):
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
keypoints, descriptors = sift.detectAndCompute(img, None)
if descriptors is not None:
bow_kmeans_trainer.add(descriptors)
for i in range(BOW_NUM_TRAINING_SAMPLES_PER_CLASS):
pos_path, neg_path = get_pos_and_neg_paths(i)
add_sample(pos_path)
add_sample(neg_path)
voc = bow_kmeans_trainer.cluster()
bow_extractor.setVocabulary(voc)
def extract_bow_descriptors(img):
features = sift.detect(img)
return bow_extractor.compute(img, features)
training_data = []
training_labels = []
for i in range(SVM_NUM_TRAINING_SAMPLES_PER_CLASS):
pos_path, neg_path = get_pos_and_neg_paths(i)
pos_img = cv2.imread(pos_path, cv2.IMREAD_GRAYSCALE)
pos_descriptors = extract_bow_descriptors(pos_img)
if pos_descriptors is not None:
training_data.extend(pos_descriptors)
training_labels.append(1)
neg_img = cv2.imread(neg_path, cv2.IMREAD_GRAYSCALE)
neg_descriptors = extract_bow_descriptors(neg_img)
if neg_descriptors is not None:
training_data.extend(neg_descriptors)
training_labels.append(-1)
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setC(50)
svm.train(np.array(training_data), cv2.ml.ROW_SAMPLE,
np.array(training_labels))
def pyramid(img, scale_factor=1.05, min_size=(100, 40),
max_size=(600, 240)):
h, w = img.shape
min_w, min_h = min_size
max_w, max_h = max_size
while w >= min_w and h >= min_h:
if w <= max_w and h <= max_h:
yield img
w /= scale_factor
h /= scale_factor
img = cv2.resize(img, (int(w), int(h)),
interpolation=cv2.INTER_AREA)
def sliding_window(img, step=20, window_size=(100, 40)):
img_h, img_w = img.shape
window_w, window_h = window_size
for y in range(0, img_w, step):
for x in range(0, img_h, step):
roi = img[y:y+window_h, x:x+window_w]
roi_h, roi_w = roi.shape
if roi_w == window_w and roi_h == window_h:
yield (x, y, roi)
for test_img_path in ['CarData/TestImages/test-0.pgm',
'CarData/TestImages/test-1.pgm',
'../images/car.jpg',
'../images/haying.jpg',
'../images/statue.jpg',
'../images/woodcutters.jpg']:
img = cv2.imread(test_img_path)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
pos_rects = []
for resized in pyramid(gray_img):
for x, y, roi in sliding_window(resized):
descriptors = extract_bow_descriptors(roi)
if descriptors is None:
continue
prediction = svm.predict(descriptors)
if prediction[1][0][0] == 1.0:
raw_prediction = svm.predict(
descriptors, flags=cv2.ml.STAT_MODEL_RAW_OUTPUT)
score = -raw_prediction[1][0][0]
if score > SVM_SCORE_THRESHOLD:
h, w = roi.shape
scale = gray_img.shape[0] / float(resized.shape[0])
pos_rects.append([int(x * scale),
int(y * scale),
int((x+w) * scale),
int((y+h) * scale),
score])
pos_rects = non_max_suppression_fast(
np.array(pos_rects), NMS_OVERLAP_THRESHOLD)
for x0, y0, x1, y1, score in pos_rects:
cv2.rectangle(img, (int(x0), int(y0)), (int(x1), int(y1)),
(0, 255, 255), 2)
text = '%.2f' % score
cv2.putText(img, text, (int(x0), int(y0) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
cv2.imshow(test_img_path, img)
cv2.waitKey(0)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/akwkevin/Learning-OpenCV-4-Computer-Vision-with-Python-Third-Edition.git
git@gitee.com:akwkevin/Learning-OpenCV-4-Computer-Vision-with-Python-Third-Edition.git
akwkevin
Learning-OpenCV-4-Computer-Vision-with-Python-Third-Edition
Learning-OpenCV-4-Computer-Vision-with-Python-Third-Edition
master

搜索帮助