对单个文件进行整合

This commit is contained in:
leaf 2022-07-02 09:34:42 +08:00
parent 55d9b207b1
commit 96cc01855c
6 changed files with 447 additions and 615 deletions

94
TM.py
View File

@ -134,65 +134,45 @@ class HandDetector:
class Main: class Main:
def __init__(self): def __init__(self):
self.detector = None self.detector = None
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW) self.camera = None
self.camera.set(3, 1280) # self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(4, 720) # self.camera.set(3, 1280)
# self.camera.set(4, 720)
def gesture_recognition(self): def gesture_recognition(self, img, detector):
self.detector = HandDetector() self.detector = detector
while True: lm_list, bbox = detector.find_position(img)
frame, img = self.camera.read()
img = self.detector.find_hands(img)
lm_list, bbox = self.detector.find_position(img)
if lm_list: if lm_list:
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1] x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
x1, x2, x3, x4, x5 = self.detector.fingers_up() x1, x2, x3, x4, x5 = detector.fingers_up()
if (np.linalg.norm(lm_list[4]-lm_list[8]) < 50) and (np.linalg.norm(lm_list[4]-lm_list[12]) < 50): if (np.linalg.norm(lm_list[4]-lm_list[8]) < 50) and (np.linalg.norm(lm_list[4]-lm_list[12]) < 50):
cv2.putText(img, "7_SEVEN", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, cv2.putText(img, "7_SEVEN", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3) (0, 0, 255), 3)
elif (np.linalg.norm(lm_list[4]-lm_list[8]) < 50) and (x4 == 1 and x5 == 1 and x3 == 1): elif (x2 == 1 and x3 == 1) and (x4 == 0 and x5 == 0 and x1 == 0):
cv2.putText(img, "OK", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, cv2.putText(img, "2_TWO", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3) (0, 0, 255), 3)
elif (np.linalg.norm(lm_list[4]-lm_list[12]) < 50) and (x4 == 1 and x5 == 1 and x2 == 1): elif (x2 == 1 and x3 == 1 and x4 == 1) and (x1 == 0 and x5 == 0):
cv2.putText(img, "flip", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, cv2.putText(img, "3_THREE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3) (0, 0, 255), 3)
elif (x2 == 1 and x3 == 1) and (x4 == 0 and x5 == 0 and x1 == 0): elif (x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1) and (x1 == 0):
cv2.putText(img, "2_TWO", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, cv2.putText(img, "4_FOUR", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3) (0, 0, 255), 3)
elif (x2 == 1 and x3 == 1 and x4 == 1) and (x1 == 0 and x5 == 0): elif x1 == 1 and x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1:
cv2.putText(img, "3_THREE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, cv2.putText(img, "5_FIVE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3) (0, 0, 255), 3)
elif (x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1) and (x1 == 0): elif (x2 == 1 and x1 == 0) and (x3 == 0 and x4 == 0 and x5 == 0):
cv2.putText(img, "4_FOUR", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, cv2.putText(img, "1_ONE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3) (0, 0, 255), 3)
elif x1 == 1 and x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1: elif (x1 == 1 and x2 == 1) and (x3 == 0 and x4 == 0 and x5 == 0):
cv2.putText(img, "5_FIVE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, cv2.putText(img, "8_EIGHT", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3) (0, 0, 255), 3)
elif (x2 == 1 and x1 == 0) and (x3 == 0 and x4 == 0 and x5 == 0): elif (x1 == 1 and x5 == 1) and (x3 == 0 and x4 == 0 and x2 == 0):
cv2.putText(img, "1_ONE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, cv2.putText(img, "6_SIX", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3) (0, 0, 255), 3)
elif (x1 == 1 and x2 == 1) and (x3 == 0 and x4 == 0 and x5 == 0): else:
cv2.putText(img, "8_EIGHT", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, return 1
(0, 0, 255), 3) return 0
elif (x1 == 1 and x5 == 1) and (x3 == 0 and x4 == 0 and x2 == 0):
cv2.putText(img, "6_SIX", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
elif x1 and (x2 == 0 and x3 == 0 and x4 == 0 and x5 == 0):
cv2.putText(img, "GOOD!", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
elif (x1 == 1 and x2 == 1 and x5 == 1) and (x3 == 0 and x4 == 0):
cv2.putText(img, "yo", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
else:
cv2.putText(img, "unknown", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
if __name__ == '__main__': if __name__ == '__main__':

113
ai.py
View File

@ -6,13 +6,12 @@
@ by: Leaf @ by: Leaf
@ date: 2022-05-28 @ date: 2022-05-28
""" """
import tkinter as tk
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
import torch import torch
import torch.nn as nn import torch.nn as nn
import numpy as np import numpy as np
import tkinter as tk
import shutil import shutil
import math import math
from scipy import stats from scipy import stats
@ -33,7 +32,7 @@ def rotate(angle, x, y, point_x, point_y):
def normalize(x): def normalize(x):
max_x = np.max(x) max_x = np.max(x)
min_x = np.min(x) min_x = np.min(x)
return (x-min_x)/(max_x-min_x) return (x - min_x) / (max_x - min_x)
class CNN(nn.Module): class CNN(nn.Module):
@ -57,7 +56,7 @@ class CNN(nn.Module):
nn.MaxPool2d(2), nn.MaxPool2d(2),
) )
self.med = nn.Linear(32 * 11 * 2, 500) self.med = nn.Linear(32 * 11 * 2, 500)
self.med2 = nn.Linear(1*21*3, 100) self.med2 = nn.Linear(1 * 21 * 3, 100)
self.med3 = nn.Linear(100, 500) self.med3 = nn.Linear(100, 500)
self.out = nn.Linear(500, m) # fully connected layer, output 10 classes self.out = nn.Linear(500, m) # fully connected layer, output 10 classes
@ -285,6 +284,23 @@ class Main:
self.len_x = 22 self.len_x = 22
self.len_y = 4 self.len_y = 4
self.label = '' self.label = ''
self.result = []
self.disp = ""
def change_state(self):
self.label = self.entry.get() # 调用get()方法将Entry中的内容获取出来
self.top1.quit()
if self.label == "":
self.top1.destroy()
def make_datasets(self, camera, datasets_dir="default", n=100):
if datasets_dir == "default":
return
if exists(datasets_dir):
shutil.rmtree(datasets_dir)
mkdir(datasets_dir)
self.camera = camera
self.top1 = tk.Tk() self.top1 = tk.Tk()
self.top1.geometry('300x50') self.top1.geometry('300x50')
self.top1.title('请输入标签') self.top1.title('请输入标签')
@ -293,22 +309,6 @@ class Main:
self.entry.place(x=80, y=10) self.entry.place(x=80, y=10)
tk.Button(self.top1, text='确定', command=self.change_state).place(x=235, y=5) tk.Button(self.top1, text='确定', command=self.change_state).place(x=235, y=5)
def change_state(self):
self.label = self.entry.get() # 调用get()方法将Entry中的内容获取出来
self.top1.quit()
if self.label == "":
self.top1.destroy()
def make_datasets(self, datasets_dir="default", n=100):
if datasets_dir == "default":
return
if exists(datasets_dir):
shutil.rmtree(datasets_dir)
mkdir(datasets_dir)
if self.camera is None:
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(3, 1280)
self.camera.set(4, 720)
self.top1.mainloop() self.top1.mainloop()
while not self.label == "": while not self.label == "":
data = np.zeros([n, self.len_x, self.len_y]) data = np.zeros([n, self.len_x, self.len_y])
@ -349,7 +349,6 @@ class Main:
open(datasets_dir + "/" + self.label + ".npz", "w") open(datasets_dir + "/" + self.label + ".npz", "w")
np.savez(datasets_dir + "/" + self.label + ".npz", label=self.label, data=data, np.savez(datasets_dir + "/" + self.label + ".npz", label=self.label, data=data,
handtype=hand_type, shape=shape_list) handtype=hand_type, shape=shape_list)
self.top1.mainloop() self.top1.mainloop()
def train(self, datasets_dir="default"): def train(self, datasets_dir="default"):
@ -359,21 +358,75 @@ class Main:
ai.load_datasets() ai.load_datasets()
ai.train_cnn() ai.train_cnn()
def gesture_recognition(self): def gesture_recognition_camera(self, detector, img, cnn):
if self.camera is None: self.detector = detector
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW) out_label = cnn.out_label
self.camera.set(3, 1280) img = self.detector.find_hands(img)
self.camera.set(4, 720) lm_list, bbox = self.detector.find_position(img)
if lm_list.any():
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data = torch.Tensor(lm_list)
data = data.unsqueeze(0)
data = data.unsqueeze(0)
test_output = cnn(data)
self.result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
if len(self.result) > 5:
self.disp = str(out_label[stats.mode(self.result)[0][0]])
self.result = []
cv2.putText(img, self.disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
def gesture_recognition_video(self, filedir):
self.detector = HandDetector() self.detector = HandDetector()
cnn = torch.load("CNN.pkl") cnn = torch.load("CNN.pkl")
out_label = cnn.out_label out_label = cnn.out_label
result = [] result = []
disp = "" disp = ""
cap = cv2.VideoCapture(filedir)
while True: while True:
frame, img = self.camera.read() ret, img = cap.read()
img = self.detector.find_hands(img) img = self.detector.find_hands(img)
lm_list, bbox = self.detector.find_position(img) lm_list, bbox = self.detector.find_position(img)
if lm_list.any():
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data = torch.Tensor(lm_list)
data = data.unsqueeze(0)
data = data.unsqueeze(0)
test_output = cnn(data)
result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
if len(result) > 5:
disp = str(out_label[stats.mode(result)[0][0]])
result = []
cv2.putText(img, disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
cap.release()
def gesture_recognition_img(self, filedir):
self.detector = HandDetector()
cnn = torch.load("CNN.pkl")
out_label = cnn.out_label
result = []
disp = ""
img = cv2.imread(filedir)
img = self.detector.find_hands(img)
while True:
lm_list, bbox = self.detector.find_position(img)
if lm_list.any(): if lm_list.any():
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1] x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data = torch.Tensor(lm_list) data = torch.Tensor(lm_list)
@ -402,4 +455,8 @@ if __name__ == '__main__':
my_datasets_dir = "test" my_datasets_dir = "test"
solution.make_datasets(my_datasets_dir, 100) solution.make_datasets(my_datasets_dir, 100)
solution.train(my_datasets_dir) solution.train(my_datasets_dir)
solution.gesture_recognition() dir_video = "C:/Users/Liar/Pictures/Camera Roll/WIN_20220630_20_11_47_Pro.mp4"
dir_img = "C:/Users/Liar/Pictures/Camera Roll/WIN_20220630_20_01_22_Pro.jpg"
# solution.gesture_recognition_camera()
# solution.gesture_recognition_video(dir_video)
# solution.gesture_recognition_img(dir_img)

476
ai_UI.py
View File

@ -1,476 +0,0 @@
# -*- coding:utf-8 -*-
"""
信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
"""
import tkinter as tk
import cv2
import mediapipe as mp
import torch
import torch.nn as nn
import numpy as np
import shutil
import math
from scipy import stats
from os.path import exists
from os import mkdir
from pathlib import Path
from torch.utils.data import DataLoader, TensorDataset
# 旋转函数
def rotate(angle, x, y, point_x, point_y):
px = (x - point_x) * math.cos(angle) - (y - point_y) * math.sin(angle) + point_x
py = (x - point_x) * math.sin(angle) + (y - point_y) * math.cos(angle) + point_y
return px, py
# 归一化
def normalize(x):
max_x = np.max(x)
min_x = np.min(x)
return (x - min_x) / (max_x - min_x)
class CNN(nn.Module):
def __init__(self, m):
super(CNN, self).__init__()
self.out_label = []
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=1),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.med = nn.Linear(32 * 11 * 2, 500)
self.med2 = nn.Linear(1 * 21 * 3, 100)
self.med3 = nn.Linear(100, 500)
self.out = nn.Linear(500, m) # fully connected layer, output 10 classes
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
x = self.med(x)
# x = self.med2(x)
# x = self.med3(x)
output = self.out(x)
return output
class HandDetector:
"""
使用mediapipe库查找手导出地标像素格式添加了额外的功能
如查找方式许多手指向上或两个手指之间的距离而且提供找到的手的边界框信息
"""
def __init__(self, mode=False, max_hands=2, detection_con=0.5, min_track_con=0.5):
"""
:param mode: 在静态模式下对每个图像进行检测
:param max_hands: 要检测的最大手数
:param detection_con: 最小检测置信度
:param min_track_con: 最小跟踪置信度
"""
self.results = None
self.mode = mode
self.max_hands = max_hands
self.modelComplex = 1
self.detection_con = detection_con
self.min_track_con = min_track_con
# 初始化手部的识别模型
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(static_image_mode=self.mode,
max_num_hands=self.max_hands,
min_detection_confidence=self.detection_con,
min_tracking_confidence=self.min_track_con)
self.mpDraw = mp.solutions.drawing_utils # 初始化绘图器
self.tipIds = [4, 8, 12, 16, 20] # 指尖列表
self.fingers = []
self.lmList = []
self.re_lmList = []
def find_hands(self, img, draw=True):
"""
从图像(BRG)中找到手部
:param img: 用于查找手的图像
:param draw: 在图像上绘制输出的标志
:return: 带或不带图形的图像
"""
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 将传入的图像由BGR模式转标准的Opencv模式——RGB模式
self.results = self.hands.process(img_rgb)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def find_position(self, img, hand_no=0, draw=True):
"""
查找单手的地标并将其放入列表中像素格式还可以返回手部的周围的边界框
:param img: 要查找的主图像
:param hand_no: 如果检测到多只手则为手部id
:param draw: 在图像上绘制输出的标志(默认绘制矩形框)
:return: 像素格式的手部关节位置列表手部边界框
"""
x_list = []
y_list = []
bbox_info = []
self.lmList = []
h, w, c = img.shape
if self.results.multi_hand_landmarks:
my_hand = self.results.multi_hand_landmarks[hand_no]
for i, lm in enumerate(my_hand.landmark):
px, py = int(lm.x * w), int(lm.y * h)
x_list.append(px)
y_list.append(py)
self.lmList.append([lm.x, lm.y, 0])
if draw:
cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
x_min, x_max = min(x_list), max(x_list)
y_min, y_max = min(y_list), max(y_list)
box_w, box_h = x_max - x_min, y_max - y_min
bbox = x_min, y_min, box_w, box_h
cx, cy = bbox[0] + (bbox[2] // 2), bbox[1] + (bbox[3] // 2)
bbox_info = {"id": hand_no, "bbox": bbox, "center": (cx, cy), "shape": (h, w)}
if draw:
cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
(bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
(0, 255, 0), 2)
self.revolve(img)
self.re_lmList = np.array(self.re_lmList)
if self.re_lmList.any():
self.re_lmList = np.concatenate((np.zeros((21, 1)), self.re_lmList), axis=1)
self.re_lmList = np.concatenate((self.re_lmList, np.zeros((1, 4))), axis=0)
return self.re_lmList, bbox_info
def revolve(self, img, draw=True):
"""
旋转手势识别点
:param img: 要查找的主图像
:param draw: 在图像上绘制输出的标志(默认绘制矩形框)
:return: 像素格式的手部关节位置列表
"""
h, w, c = img.shape
if len(self.lmList) >= 21:
# print(self.lmList)
self.re_lmList = []
point_x = self.lmList[0][0]
point_y = self.lmList[0][1]
delta_x = self.lmList[13][0] - point_x
delta_y = self.lmList[13][1] - point_y
if delta_y == 0:
if delta_x < 0:
theta = math.pi / 2
else:
theta = -math.pi / 2
else:
theta = math.atan(delta_x / delta_y)
if delta_y > 0:
theta = theta + math.pi
# print(theta*180/math.pi)
for i in self.lmList:
px, py = rotate(theta, i[0] * w, i[1] * h, point_x * w, point_y * h)
self.re_lmList.append([px, py, 0])
if draw:
cv2.circle(img, (int(px), int(py)), 5, (0, 0, 255), cv2.FILLED)
# 归一化
x_array = normalize(np.array(self.re_lmList)[:, 0])
# print(x_array)
for i in range(len(x_array)):
self.re_lmList[i][0] = x_array[i]
y_array = normalize(np.array(self.re_lmList)[:, 1])
for i in range(len(y_array)):
self.re_lmList[i][1] = x_array[i]
else:
self.re_lmList = self.lmList
return self.re_lmList
def hand_type(self):
"""
检查传入的手部 是左还是右
:return: 1 0
"""
if self.results.multi_hand_landmarks:
if self.lmList[17][0] < self.lmList[5][0]:
return 1
else:
return 0
class AI:
def __init__(self, datasets_dir):
self.EPOCH = 20
self.BATCH_SIZE = 2
self.LR = 10e-5
self.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.datasets_dir = datasets_dir
self.train_loader = None
self.m = 0
self.out_label = [] # CNN网络输出后数字标签转和字符串标签的映射关系
def load_datasets(self):
train_data = []
train_label = []
self.m = 0
for file in Path(self.datasets_dir).rglob("*.npz"):
data = np.load(str(file))
train_data.append(data["data"])
label_number = np.ones(len(data["data"])) * len(self.out_label)
train_label.append(label_number)
self.out_label.append(data["label"])
self.m += 1
train_data = torch.Tensor(np.concatenate(train_data, axis=0))
train_data = train_data.unsqueeze(1)
train_label = torch.tensor(np.concatenate(train_label, axis=0)).long()
dataset = TensorDataset(train_data, train_label)
self.train_loader = DataLoader(dataset, batch_size=self.BATCH_SIZE, shuffle=True)
return self.m
def train_cnn(self):
cnn = CNN(self.m).to(self.DEVICE)
optimizer = torch.optim.Adam(cnn.parameters(), self.LR) # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted
for epoch in range(self.EPOCH):
for step, (data, target) in enumerate(self.train_loader):
# 分配 batch data, normalize x when iterate train_loader
data, target = data.to(self.DEVICE), target.to(self.DEVICE)
output = cnn(data) # cnn output
loss = loss_func(output, target) # cross entropy loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
if (step + 1) % 50 == 0: # 输出结果
print(
"\r[Epoch: %d] [%d/%d (%0.f %%)][Loss: %f]"
% (
epoch + 1,
(step + 1) * len(data),
len(self.train_loader.dataset),
100. * (step + 1) / len(self.train_loader),
loss.item()
), end="")
cnn.out_label = self.out_label
torch.save(cnn, 'CNN.pkl')
print("训练结束")
class Main:
def __init__(self):
self.camera = None
self.detector = HandDetector()
self.default_datasets = "Datasets"
self.len_x = 22
self.len_y = 4
self.label = ''
self.top1 = tk.Tk()
self.top1.geometry('300x50')
self.top1.title('请输入标签')
tk.Label(self.top1, text='Label:').place(x=27, y=10)
self.entry = tk.Entry(self.top1, width=15)
self.entry.place(x=80, y=10)
tk.Button(self.top1, text='确定', command=self.change_state).place(x=235, y=5)
def change_state(self):
self.label = self.entry.get() # 调用get()方法将Entry中的内容获取出来
self.top1.quit()
if self.label == "":
self.top1.destroy()
def make_datasets(self, datasets_dir="default", n=100):
if datasets_dir == "default":
return
if exists(datasets_dir):
shutil.rmtree(datasets_dir)
mkdir(datasets_dir)
if self.camera is None:
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(3, 1280)
self.camera.set(4, 720)
self.top1.mainloop()
while not self.label == "":
data = np.zeros([n, self.len_x, self.len_y])
shape_list = np.zeros([n, 2], dtype=np.int16)
hand_type = np.zeros(n, dtype=np.int8)
count = 0
cv2.startWindowThread()
while True:
frame, img = self.camera.read()
img = self.detector.find_hands(img)
result = np.zeros((self.len_x, self.len_y))
lm_list, bbox = self.detector.find_position(img)
for i in range(len(lm_list)):
result[i] = np.array(lm_list[i])
if result.sum() > 0: # 假设矩阵不为0即捕捉到手部时
shape = bbox["shape"]
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data[count] = result
hand_type[count] = self.detector.hand_type()
shape_list[count] = np.array(shape)
count += 1
cv2.putText(img, str("{}/{}".format(count, n)), (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 255, 0), 3)
cv2.imshow("camera", img)
key = cv2.waitKey(100)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
elif count == n - 1:
break
cv2.destroyAllWindows()
open(datasets_dir + "/" + self.label + ".npz", "w")
np.savez(datasets_dir + "/" + self.label + ".npz", label=self.label, data=data,
handtype=hand_type, shape=shape_list)
self.top1.mainloop()
def train(self, datasets_dir="default"):
if datasets_dir == "default":
datasets_dir = self.default_datasets
ai = AI(datasets_dir)
ai.load_datasets()
ai.train_cnn()
def gesture_recognition_camera(self):
if self.camera is None:
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(3, 1280)
self.camera.set(4, 720)
self.detector = HandDetector()
cnn = torch.load("CNN.pkl")
out_label = cnn.out_label
result = []
disp = ""
while True:
frame, img = self.camera.read()
img = self.detector.find_hands(img)
lm_list, bbox = self.detector.find_position(img)
if lm_list.any():
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data = torch.Tensor(lm_list)
data = data.unsqueeze(0)
data = data.unsqueeze(0)
test_output = cnn(data)
result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
if len(result) > 5:
disp = str(out_label[stats.mode(result)[0][0]])
result = []
cv2.putText(img, disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
def gesture_recognition_video(self, filedir):
self.detector = HandDetector()
cnn = torch.load("CNN.pkl")
out_label = cnn.out_label
result = []
disp = ""
cap = cv2.VideoCapture(filedir)
while True:
ret, img = cap.read()
img = self.detector.find_hands(img)
lm_list, bbox = self.detector.find_position(img)
if lm_list.any():
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data = torch.Tensor(lm_list)
data = data.unsqueeze(0)
data = data.unsqueeze(0)
test_output = cnn(data)
result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
if len(result) > 5:
disp = str(out_label[stats.mode(result)[0][0]])
result = []
cv2.putText(img, disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
cap.release()
def gesture_recognition_img(self, filedir):
self.detector = HandDetector()
cnn = torch.load("CNN.pkl")
out_label = cnn.out_label
result = []
disp = ""
img = cv2.imread(filedir)
img = self.detector.find_hands(img)
while True:
lm_list, bbox = self.detector.find_position(img)
if lm_list.any():
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data = torch.Tensor(lm_list)
data = data.unsqueeze(0)
data = data.unsqueeze(0)
test_output = cnn(data)
result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
if len(result) > 5:
disp = str(out_label[stats.mode(result)[0][0]])
result = []
cv2.putText(img, disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
if __name__ == '__main__':
solution = Main()
my_datasets_dir = "test"
# solution.make_datasets(my_datasets_dir, 100)
# solution.train(my_datasets_dir)
dir_video = "C:/Users/Liar/Pictures/Camera Roll/WIN_20220630_20_11_47_Pro.mp4"
dir_img = "C:/Users/Liar/Pictures/Camera Roll/WIN_20220630_20_01_22_Pro.jpg"
solution.gesture_recognition_camera()
# solution.gesture_recognition_video(dir_video)
# solution.gesture_recognition_img(dir_img)

View File

@ -36,9 +36,9 @@ def normalize(x):
return (x-min_x)/(max_x-min_x) return (x-min_x)/(max_x-min_x)
class CNN(nn.Module): class CNNTwo(nn.Module):
def __init__(self, m): def __init__(self, m):
super(CNN, self).__init__() super(CNNTwo, self).__init__()
self.out_label = [] self.out_label = []
self.conv1 = nn.Sequential( self.conv1 = nn.Sequential(
nn.Conv2d( nn.Conv2d(
@ -250,7 +250,7 @@ class AI:
return self.m return self.m
def train_cnn(self): def train_cnn(self):
cnn = CNN(self.m).to(self.DEVICE) cnn = CNNTwo(self.m).to(self.DEVICE)
optimizer = torch.optim.Adam(cnn.parameters(), self.LR) # optimize all cnn parameters optimizer = torch.optim.Adam(cnn.parameters(), self.LR) # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted
@ -275,7 +275,7 @@ class AI:
), end="") ), end="")
cnn.out_label = self.out_label cnn.out_label = self.out_label
torch.save(cnn, 'CNN.pkl') torch.save(cnn, 'CNN_two.pkl')
print("训练结束") print("训练结束")
@ -287,13 +287,9 @@ class Main:
self.len_x = 44 self.len_x = 44
self.len_y = 4 self.len_y = 4
self.label = '' self.label = ''
self.top1 = tk.Tk()
self.top1.geometry('300x50') self.result = []
self.top1.title('请输入标签') self.disp = ""
tk.Label(self.top1, text='Label:').place(x=27, y=10)
self.entry = tk.Entry(self.top1, width=15)
self.entry.place(x=80, y=10)
tk.Button(self.top1, text='确定', command=self.change_state).place(x=235, y=5)
def change_state(self): def change_state(self):
self.label = self.entry.get() # 调用get()方法将Entry中的内容获取出来 self.label = self.entry.get() # 调用get()方法将Entry中的内容获取出来
@ -301,16 +297,27 @@ class Main:
if self.label == "": if self.label == "":
self.top1.destroy() self.top1.destroy()
def make_datasets(self, datasets_dir="default", n=100): def on_closing(self):
self.label = ""
self.top1.destroy()
def make_datasets(self, camera, datasets_dir="default", n=100):
if datasets_dir == "default": if datasets_dir == "default":
return return
if exists(datasets_dir): if exists(datasets_dir):
shutil.rmtree(datasets_dir) shutil.rmtree(datasets_dir)
mkdir(datasets_dir) mkdir(datasets_dir)
if self.camera is None: self.camera = camera
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(3, 1280) self.top1 = tk.Tk()
self.camera.set(4, 720) self.top1.geometry('300x50')
self.top1.title('请输入标签')
self.top1.protocol("WM_DELETE_WINDOW", self.on_closing)
tk.Label(self.top1, text='Label:').place(x=27, y=10)
self.entry = tk.Entry(self.top1, width=15)
self.entry.place(x=80, y=10)
tk.Button(self.top1, text='确定', command=self.change_state).place(x=235, y=5)
self.top1.mainloop() self.top1.mainloop()
while not self.label == "": while not self.label == "":
data = np.zeros([n, self.len_x, self.len_y]) data = np.zeros([n, self.len_x, self.len_y])
@ -369,47 +376,34 @@ class Main:
ai.load_datasets() ai.load_datasets()
ai.train_cnn() ai.train_cnn()
def gesture_recognition(self): def gesture_recognition(self, detector, img, cnn):
if self.camera is None: self.detector = detector
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(3, 1280)
self.camera.set(4, 720)
self.detector = HandDetector()
cnn = torch.load("CNN.pkl")
out_label = cnn.out_label out_label = cnn.out_label
result = [] img, is_two_hand = self.detector.find_hands(img)
disp = "" if is_two_hand:
while True: lm_list1, bbox1 = self.detector.find_position(img, 0)
frame, img = self.camera.read() lm_list2, bbox2 = self.detector.find_position(img, 1)
img, is_two_hand = self.detector.find_hands(img) if lm_list1.any() and lm_list2.any():
if is_two_hand: x_1, y_1 = bbox1["bbox"][0], bbox1["bbox"][1]
lm_list1, bbox1 = self.detector.find_position(img, 0) x_2, y_2 = bbox2["bbox"][0], bbox2["bbox"][1]
lm_list2, bbox2 = self.detector.find_position(img, 1) lm_list = np.concatenate((lm_list1, lm_list2), axis=0)
if lm_list1.any() and lm_list2.any(): data = torch.Tensor(lm_list)
x_1, y_1 = bbox1["bbox"][0], bbox1["bbox"][1] data = data.unsqueeze(0)
x_2, y_2 = bbox2["bbox"][0], bbox2["bbox"][1] data = data.unsqueeze(0)
lm_list = np.concatenate((lm_list1, lm_list2), axis=0)
data = torch.Tensor(lm_list)
data = data.unsqueeze(0)
data = data.unsqueeze(0)
test_output = cnn(data) test_output = cnn(data)
result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0]) self.result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
if len(result) > 4: if len(self.result) > 4:
disp = str(out_label[stats.mode(result)[0][0]]) self.disp = str(out_label[stats.mode(self.result)[0][0]])
result = [] self.result = []
cv2.putText(img, disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, cv2.putText(img, self.disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3) (0, 0, 255), 3)
cv2.putText(img, disp, (x_2, y_2), cv2.FONT_HERSHEY_PLAIN, 3, cv2.putText(img, self.disp, (x_2, y_2), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3) (0, 0, 255), 3)
else:
cv2.imshow("camera", img) return 1
key = cv2.waitKey(1) return 0
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
if __name__ == '__main__': if __name__ == '__main__':

165
gr.py Normal file
View File

@ -0,0 +1,165 @@
import TM
import ai
import ai_two
import cv2
import torch
import torch.nn as nn
class CNN(nn.Module):
def __init__(self, m):
super(CNN, self).__init__()
self.out_label = []
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=1),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.med = nn.Linear(32 * 11 * 2, 500)
self.med2 = nn.Linear(1 * 21 * 3, 100)
self.med3 = nn.Linear(100, 500)
self.out = nn.Linear(500, m) # fully connected layer, output 10 classes
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
x = self.med(x)
# x = self.med2(x)
# x = self.med3(x)
output = self.out(x)
return output
class CNNTwo(nn.Module):
def __init__(self, m):
super(CNNTwo, self).__init__()
self.out_label = []
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.med = nn.Linear(32 * 11 * 1, 500)
self.med2 = nn.Linear(1*21*3, 100)
self.med3 = nn.Linear(100, 500)
self.out = nn.Linear(500, m) # fully connected layer, output 10 classes
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
x = self.med(x)
# x = self.med2(x)
# x = self.med3(x)
output = self.out(x)
return output
class Main:
def __init__(self):
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(3, 1280)
self.camera.set(4, 720)
self.tm_detector = TM.HandDetector()
self.ai_detector = ai.HandDetector()
self.at_detector = ai_two.HandDetector()
self.tm_main = TM.Main()
self.ai_main = ai.Main()
self.at_main = ai_two.Main()
def gr_img(self, filedir, diy):
print(filedir)
img = cv2.imread(filedir)
if diy:
cnn = torch.load("CNN.pkl")
cnn_two = torch.load("CNN_two.pkl")
tm_img = self.tm_detector.find_hands(img)
while True:
is_one_hand = self.at_main.gesture_recognition(self.at_detector, img, cnn_two)
if is_one_hand:
not_match = self.ai_main.gesture_recognition_camera(self.ai_detector, img, cnn)
if not_match:
self.tm_main.gesture_recognition(tm_img, self.tm_detector)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
def gr_video(self, filedir, diy):
cap = cv2.VideoCapture(filedir)
if diy:
cnn = torch.load("CNN.pkl")
cnn_two = torch.load("CNN_two.pkl")
while True:
ret, img = cap.read()
tm_status = self.tm_main.gesture_recognition(self.tm_detector.find_hands(img), self.tm_detector)
if tm_status and diy:
is_one_hand = self.at_main.gesture_recognition(self.at_detector, img, cnn_two)
if is_one_hand:
self.ai_main.gesture_recognition_camera(self.ai_detector, img, cnn)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
cap.release()
def gr_realtime(self, diy):
if diy:
cnn = torch.load("CNN.pkl")
cnn_two = torch.load("CNN_two.pkl")
while True:
frame, img = self.camera.read()
tm_status = self.tm_main.gesture_recognition(self.tm_detector.find_hands(img), self.tm_detector)
if tm_status and diy:
is_one_hand = self.at_main.gesture_recognition(self.at_detector, img, cnn_two)
if is_one_hand:
self.ai_main.gesture_recognition_camera(self.ai_detector, img, cnn)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
def ai_input(self):
self.ai_main.make_datasets(self.camera, "ai_datasets", 100)
self.ai_main.train("ai_datasets")
self.at_main.make_datasets(self.camera, "ai_two_datasets", 100)
self.at_main.train("ai_two_datasets")
if __name__ == '__main__':
main = Main()
main.gr_img("C:/Users/leafl/Pictures/图片1.png", 0)

112
main.py Normal file
View File

@ -0,0 +1,112 @@
# -*- coding:utf-8 -*-
"""
信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
"""
import gr
import tkinter as tk
from tkinter import filedialog, Button, Label, Frame, ACTIVE, LEFT
from PIL import Image, ImageTk
class DisplayImage:
"""用于展示选择的图片"""
def __init__(self, master):
self.master = master
master.title("GUI")
self.Text_lab0 = Label(master, text='已加载图像/视频')
self.Text_lab0.pack(pady=10)
self.image_frame = Frame(master, bd=0, height=300, width=300, bg='white', highlightthickness=2,
highlightbackground='gray', highlightcolor='black')
self.image_frame.pack()
self.Text_label = Label(master, text='加载待识别影像/视频')
self.Text_label.place(x=60, y=410)
self.Choose_image = Button(master, command=self.choose_img, text="图像",
width=7, default=ACTIVE, borderwidth=0)
self.Choose_image.place(x=50, y=450)
self.Choose_image = Button(master, command=self.choose_video, text="视频",
width=7, default=ACTIVE, borderwidth=0)
self.Choose_image.place(x=120, y=450)
self.Text_label2 = Label(master, text='运行手势识别程序')
self.Text_label2.place(x=60, y=500)
self.image_mosaic = Button(master, command=self.gesture_recognition, text="Gesture recognition",
width=17, default=ACTIVE, borderwidth=0)
self.image_mosaic.place(x=50, y=540)
self.Text_label3 = Label(master, text='运行实时手势识别程序')
self.Text_label3.place(x=300, y=410)
self.realtime = Button(master, command=self.realtime_gr, text="Realtime\n gesture recognition",
width=17, height=6, default=ACTIVE, borderwidth=0)
self.realtime.place(x=300, y=450)
self.Text_label4 = Label(master, text='录入自定义手势')
self.Text_label4.place(x=180, y=610)
self.input = Button(master, command=self.input_image, text="Input gesture",
width=42, default=ACTIVE, borderwidth=0)
self.input.place(x=60, y=650)
self.gr = gr.Main()
self.temp_dir = "temp"
self.mode = 0
self.directory = ""
self.diy = 1
def choose_img(self):
self.mode = 1
# 清空框架中的内容
for widget in self.image_frame.winfo_children():
widget.destroy()
self.directory = filedialog.askopenfilename()
# 布局所选图片
img = Image.open(self.directory).resize((300, 300))
img.save(self.temp_dir + "/photo.png")
image = ImageTk.PhotoImage(image=img)
label = Label(self.image_frame, highlightthickness=0, borderwidth=0)
label.configure(image=image)
label.pack(side=LEFT, expand=True)
def choose_video(self):
# 清空框架中的内容
self.mode = 2
for widget in self.image_frame.winfo_children():
widget.destroy()
self.directory = filedialog.askopenfilename()
# 布局所选图片
img = Image.open(self.temp_dir+"/video.jpg").resize((300, 300))
img.save(self.temp_dir + "/photo.png")
image = ImageTk.PhotoImage(image=img)
label = Label(self.image_frame, highlightthickness=0, borderwidth=0)
label.configure(image=image)
label.pack(side=LEFT, expand=True)
def gesture_recognition(self):
if self.mode == 1:
self.gr.gr_img(self.directory, self.diy)
elif self.mode == 2:
self.gr.gr_video(self.directory, self.diy)
def realtime_gr(self):
self.gr.gr_realtime(self.diy)
def input_image(self):
self.diy = 1
self.gr.ai_input()
def main():
window = tk.Tk()
DisplayImage(window)
window.title('手势识别')
window.geometry('500x720')
window.mainloop()
if __name__ == '__main__':
main()