Compare commits

...

40 Commits
demo ... main

Author SHA1 Message Date
leaf
b7ee7599fb 修复bug 2022-07-02 16:37:41 +08:00
leaf
90c7612ea2 修复bug 2022-07-02 14:30:38 +08:00
leaf
96cc01855c 对单个文件进行整合 2022-07-02 09:34:42 +08:00
leaf
55d9b207b1
Merge pull request #6 from leafliber/TM
Tm
2022-07-01 11:10:21 +08:00
leaf
da3c89fb7e
Merge pull request #4 from leafliber/AI
Ai
2022-07-01 11:08:16 +08:00
leaf
eb72bd8723 双手手势识别 2022-06-30 21:56:56 +08:00
leaf
2d1e26144d
Merge pull request #5 from leafliber/DM
Dm
2022-06-30 21:16:16 +08:00
Liar
763c37c9d8 Merge remote-tracking branch 'origin/AI' into AI 2022-06-30 21:11:52 +08:00
leaf
e2e37af9c0 删除demo 2022-06-30 21:11:21 +08:00
Liar
9698cd8fa2 加了图像识别和视频识别 2022-06-30 20:45:23 +08:00
leaf
29960f24c2 修改:低置信度,输出unknown 2022-06-29 19:56:55 +08:00
leaf
717f9fe606 增加UI
优化(zj)
2022-06-29 19:35:16 +08:00
leaf
7103d4a7aa 重命名 2022-06-29 19:14:04 +08:00
leaf
1117a6a5ea 重命名 2022-06-29 19:13:47 +08:00
Liar
67b9d8c91c 修复了CNN的显示错误 2022-06-29 19:07:38 +08:00
Liar
9ee836276c 修复了CNN的显示错误 2022-06-29 19:06:51 +08:00
leaf
96c6039474 修改:增加众数判定 2022-06-29 09:39:50 +08:00
leaf
8bfe21e11b 删除:原有数据集 2022-06-25 13:41:11 +08:00
leaf
b3ef803adf 增加:数据预处理:DM方案的旋转矫正 2022-06-25 13:38:29 +08:00
Tabs
85fb3ad83c 实现了正面手势测试,可添加新手势并加上标签 2022-06-13 17:23:55 +08:00
Tabs
d0137634ed 修改了手势模型,优化指关节识别
添加了测试框架
2022-06-13 15:48:59 +08:00
leaf
458fce4066 增加:用户自定义手势(重命名) 2022-06-09 22:45:55 +08:00
leaf
ff20ecd6ff 增加:用户自定义手势 2022-06-09 22:39:40 +08:00
leaf
5f9d6f5abc 增加:CNN网络测试(基本流程已完成)
附:训练结果
2022-06-09 13:03:13 +08:00
Tabs
25baacbca9 添加了revolve函数的参数说明 2022-06-09 12:51:13 +08:00
leaf
ea6ee46971 增加:数据集读入、CNN网络训练 2022-06-09 11:19:36 +08:00
Liar
c48b63903a 修改了旋转90°会闪退的bug 2022-06-08 18:27:29 +08:00
leafiber
8fa99ecbeb 增加:数据集readme 2022-06-08 17:29:04 +08:00
leaf
3dfee5a275
Update Readme.txt 2022-06-08 17:26:03 +08:00
leaf
7ad07fc8ea
Merge pull request #2 from leafliber/Dataset
Dataset 1.0
2022-06-08 17:22:13 +08:00
leafiber
4e1361e9ab 增加:数据集0-9数字手势 2022-06-08 17:13:36 +08:00
Tabs
cc18db847b 将识别改为旋转后数据识别
修改了指关节识别
2022-06-08 17:03:46 +08:00
Tabs
76fe23b4f2 Merge remote-tracking branch 'origin/DM' into DM 2022-06-08 17:03:10 +08:00
Tabs
2f43f7fd8d 将识别改为旋转后数据识别
添加了指关节识别
2022-06-08 16:50:44 +08:00
leafiber
0b0dcb27f3 增加:MP版本兼容
(被动包含调整复杂系数为1)
2022-06-08 16:32:40 +08:00
leafiber
b435541723 增加:MP版本兼容
(被动包含调整复杂系数为1)
2022-06-08 16:18:25 +08:00
leafiber
3a363b8c54 增加:MP版本兼容
(被动包含调整复杂系数为1)
2022-06-08 16:14:59 +08:00
leafiber
0aac474cd0 增加:MP版本兼容
(被动包含调整复杂系数为1)
2022-06-08 15:53:12 +08:00
leafiber
4a85384e6e 增加:MP版本兼容
(被动包含调整复杂系数为1)
2022-06-08 15:46:57 +08:00
Seikin33
8feabea649
Add files via upload 2022-05-30 22:35:23 +08:00
19 changed files with 1738 additions and 55 deletions

BIN
CNN.pkl Normal file

Binary file not shown.

318
DM.py Normal file
View File

@ -0,0 +1,318 @@
# -*- coding:utf-8 -*-
"""
信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
"""
import mediapipe as mp
import cv2
# import HandDetector
import math
from datetime import datetime
import time
import numpy as np
# 旋转函数
def Rotate(angle, x, y, point_x, point_y):
px = (x - point_x) * math.cos(angle) - (y - point_y) * math.sin(angle) + point_x
py = (x - point_x) * math.sin(angle) + (y - point_y) * math.cos(angle) + point_y
return px, py
class HandDetector:
"""
使用mediapipe库查找手导出地标像素格式添加了额外的功能
如查找方式许多手指向上或两个手指之间的距离而且提供找到的手的边界框信息
"""
def __init__(self, mode=False, max_hands=2, detection_con=0.5, min_track_con=0.5):
"""
:param mode: 在静态模式下对每个图像进行检测
:param max_hands: 要检测的最大手数
:param detection_con: 最小检测置信度
:param min_track_con: 最小跟踪置信度
"""
self.results = None
self.mode = mode
self.max_hands = max_hands
self.modelComplex = 1
self.detection_con = detection_con
self.min_track_con = min_track_con
# 初始化手部的识别模型
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(static_image_mode=self.mode,
max_num_hands=self.max_hands,
min_detection_confidence=self.detection_con,
min_tracking_confidence=self.min_track_con)
self.mpDraw = mp.solutions.drawing_utils # 初始化绘图器
self.tipIds = [4, 8, 12, 16, 20] # 指尖列表
# self.knuckles = {'0': [4, 3, 2, 1], "1": [8, 7, 6, 5], "2": [12, 11, 10, 9], "3": [16, 15, 14, 13],
# "4": [20, 19, 18, 17]}
self.fingers = []
self.lmList = []
self.re_lmList = []
def find_hands(self, img, draw=True):
"""
从图像(BRG)中找到手部
:param img: 用于查找手的图像
:param draw: 在图像上绘制输出的标志
:return: 带或不带图形的图像
"""
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 将传入的图像由BGR模式转标准的Opencv模式——RGB模式
self.results = self.hands.process(img_rgb)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def find_position(self, img, hand_no=0, draw=True):
"""
查找单手的地标并将其放入列表中像素格式还可以返回手部的周围的边界框
:param img: 要查找的主图像
:param hand_no: 如果检测到多只手则为手部id
:param draw: 在图像上绘制输出的标志(默认绘制矩形框)
:return: 像素格式的手部关节位置列表手部边界框
"""
x_list = []
y_list = []
bbox_info = []
self.lmList = []
self.re_lmList = []
if self.results.multi_hand_landmarks:
my_hand = self.results.multi_hand_landmarks[hand_no]
for _, lm in enumerate(my_hand.landmark):
h, w, c = img.shape
px, py = int(lm.x * w), int(lm.y * h)
x_list.append(px)
y_list.append(py)
self.lmList.append([px, py])
if draw:
cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
x_min, x_max = min(x_list), max(x_list)
y_min, y_max = min(y_list), max(y_list)
box_w, box_h = x_max - x_min, y_max - y_min
bbox = x_min, y_min, box_w, box_h
cx, cy = bbox[0] + (bbox[2] // 2), bbox[1] + (bbox[3] // 2)
bbox_info = {"id": hand_no, "bbox": bbox, "center": (cx, cy)}
if draw:
cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
(bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
(0, 255, 0), 2)
return self.lmList, bbox_info
def revolve(self, img, draw=True):
"""
旋转手势识别点
:param img: 要查找的主图像
:param draw: 在图像上绘制输出的标志(默认绘制矩形框)
:return: 像素格式的手部关节位置列表
"""
# print(self.lmList)
point_x = self.lmList[0][0]
point_y = self.lmList[0][1]
delta_x = self.lmList[13][0] - point_x
delta_y = self.lmList[13][1] - point_y
if delta_y == 0:
if delta_x < 0:
theta = math.pi / 2
else:
theta = -math.pi / 2
else:
theta = math.atan(delta_x / delta_y)
if delta_y > 0:
theta = theta + math.pi
# print(theta*180/math.pi)
for i in self.lmList:
px, py = Rotate(theta, i[0], i[1], point_x, point_y)
px = int(px)
py = int(py)
self.re_lmList.append([px, py])
if draw:
cv2.circle(img, (px, py), 5, (0, 0, 255), cv2.FILLED)
return self.re_lmList
def fingers_up(self):
"""
查找列表中打开并返回的手指数会分别考虑左手和右手
:return: 竖起手指的列表
"""
fingers = []
if self.results.multi_hand_landmarks:
my_hand_type = self.hand_type()
# Thumb
if my_hand_type == "Right":
if self.lmList[self.tipIds[0]][0] > self.lmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
else:
if self.lmList[self.tipIds[0]][0] < self.lmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
# 4 Fingers
for i in range(1, 5):
if self.lmList[self.tipIds[i]][1] < self.lmList[self.tipIds[i] - 2][1]:
fingers.append(1)
else:
fingers.append(0)
return fingers
def re_fingers_up(self):
"""
查找列表中打开并返回的手指数会分别考虑左手和右手
:return: 竖起手指的列表
"""
fingers = []
if self.results.multi_hand_landmarks:
my_hand_type = self.hand_type()
# Thumb
if my_hand_type == "Right":
if self.re_lmList[self.tipIds[0]][0] > self.re_lmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
else:
if self.re_lmList[self.tipIds[0]][0] < self.re_lmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
# 4 Fingers
for i in range(1, 5):
if self.re_lmList[self.tipIds[i]][1] < self.re_lmList[self.tipIds[i] - 2][1]:
fingers.append(1)
else:
fingers.append(0)
return fingers
def knuckles_up(self):
"""
查找列表中打开并返回的手指数会分别考虑左手和右手
:return: 竖起手指的列表
"""
knuckles = []
distan = 10
if self.results.multi_hand_landmarks:
my_hand_type = self.hand_type()
# Thumb
xx = self.re_lmList[self.tipIds[0]][0]
yy = self.re_lmList[self.tipIds[0] - 1][0]
if my_hand_type == "Right":
if -distan < xx - yy < distan:
knuckles.append(2)
elif xx > yy:
knuckles.append(1)
else:
knuckles.append(0)
else:
if -distan < xx - yy < distan:
knuckles.append(2)
elif xx < yy:
knuckles.append(1)
else:
knuckles.append(0)
# 12 knuckles
for i in range(1, 5):
for j in range(3):
xx = self.re_lmList[self.tipIds[i]-j][1]
yy = self.re_lmList[self.tipIds[i]-j - 1][1]
if -distan < xx - yy < distan:
knuckles.append(2)
elif xx < yy:
knuckles.append(1)
else:
knuckles.append(0)
return knuckles
def hand_type(self):
"""
检查传入的手部是左还是右
:return: "Right" "Left"
"""
if self.results.multi_hand_landmarks:
if self.lmList[17][0] < self.lmList[5][0]:
return "Right"
else:
return "Left"
class Main:
def __init__(self):
self.detector = None
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(3, 1280)
self.camera.set(4, 720)
def gesture_recognition(self):
self.detector = HandDetector()
gesture_store = {}
startTime = time.time()
stored_round = 1
stored_flag = 0
xl = np.zeros((1, 13)) # 特征值存储
while True:
frame, img = self.camera.read()
img = self.detector.find_hands(img)
lm_list, bbox = self.detector.find_position(img)
if lm_list:
re_lm_list = self.detector.revolve(img)
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
knucks = self.detector.knuckles_up()
# x1, x2, x3, x4, x5 = self.detector.re_fingers_up()
#
# if (x2 == 1 and x3 == 1) and (x4 == 0 and x5 == 0 and x1 == 0):
# cv2.putText(img, "GOOD!", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
# (0, 0, 255), 3)
print(time.time() - startTime)
if (time.time() - startTime) < 3: # 手势存储时间
xl = np.vstack((xl, knucks))
cv2.putText(img, 'Please put the gesture to be stored in 1 second', (50, 50),
cv2.FONT_HERSHEY_PLAIN, 1.2, (255, 255, 255), 2)
else: # 开始手势识别
self.detector.fingers = xl
value = ''
for j in range(13):
value = value + str(np.argmax(
np.bincount(xl[:, j].astype(int)))) # 找出第3列最频繁出现的值
gesture_store[value] = stored_round
stored_flag = 1
# startTime = time.time()
gesture_dete = ''.join(str(knuck) for knuck in knucks)
if gesture_dete in gesture_store:
cv2.putText(img, str(gesture_store[gesture_dete]), (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
cv2.putText(img, 'Gesture stored, recognition started', (50, 50),
cv2.FONT_HERSHEY_PLAIN, 1.2, (255, 255, 255), 2)
else:
if stored_flag:
stored_round += 1
stored_flag = 0
startTime = time.time() # 当检测不到手势时,初始化手势存储
xl = np.zeros((1, 13)) # 特征值存储
cv2.putText(img, 'Please put the gesture to be stored in 1 second', (50, 50), cv2.FONT_HERSHEY_PLAIN,
1.2, (255, 255, 255), 2)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
if __name__ == '__main__':
Solution = Main()
Solution.gesture_recognition()

15
Datasets/README.md Normal file
View File

@ -0,0 +1,15 @@
## GestureData 手势数据 v1.0
# 文件格式:
每个数据集npz文件包含
1个标签label手势标签整个数据集都是这一个标签
500组数据data每组数据是21*3即21个点的3维数据就是demo.py-find_position()中的lm.x, lm.y, lm.z
左右手区分handtype0为左手1为右手
画布大小shape一般都是720*1280对应demo.py-find_position()中的w, h
# 注意事项:
1. 在使用之前建议先熟悉npz文件的读写与使用很简单的
2. 数据集shape类最后会保存一个[0, 0],其他都是正常的[720, 1280]
3. 左右手不建议使用,因为面向屏幕的手心手背就可以导致程序的误判。
# 更新说明:
1. 保存了0~9的手势。

BIN
Datasets/eight.npz Normal file

Binary file not shown.

BIN
Datasets/five.npz Normal file

Binary file not shown.

BIN
Datasets/four.npz Normal file

Binary file not shown.

BIN
Datasets/nine.npz Normal file

Binary file not shown.

BIN
Datasets/one.npz Normal file

Binary file not shown.

BIN
Datasets/seven.npz Normal file

Binary file not shown.

BIN
Datasets/six.npz Normal file

Binary file not shown.

BIN
Datasets/three.npz Normal file

Binary file not shown.

BIN
Datasets/two.npz Normal file

Binary file not shown.

BIN
Datasets/zero.npz Normal file

Binary file not shown.

View File

@ -1,14 +1,8 @@
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
"""
信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
"""
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
import numpy as np
class HandDetector: class HandDetector:
@ -33,8 +27,10 @@ class HandDetector:
# 初始化手部的识别模型 # 初始化手部的识别模型
self.mpHands = mp.solutions.hands self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.max_hands, self.modelComplex, self.hands = self.mpHands.Hands(static_image_mode=self.mode,
self.detection_con, self.min_track_con) max_num_hands=self.max_hands,
min_detection_confidence=self.detection_con,
min_tracking_confidence=self.min_track_con)
self.mpDraw = mp.solutions.drawing_utils # 初始化绘图器 self.mpDraw = mp.solutions.drawing_utils # 初始化绘图器
self.tipIds = [4, 8, 12, 16, 20] # 指尖列表 self.tipIds = [4, 8, 12, 16, 20] # 指尖列表
self.fingers = [] self.fingers = []
@ -77,7 +73,7 @@ class HandDetector:
px, py = int(lm.x * w), int(lm.y * h) px, py = int(lm.x * w), int(lm.y * h)
x_list.append(px) x_list.append(px)
y_list.append(py) y_list.append(py)
self.lmList.append([px, py]) self.lmList.append(np.array([px, py]))
if draw: if draw:
cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED) cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
x_min, x_max = min(x_list), max(x_list) x_min, x_max = min(x_list), max(x_list)
@ -115,7 +111,9 @@ class HandDetector:
fingers.append(0) fingers.append(0)
# 4 Fingers # 4 Fingers
for i in range(1, 5): for i in range(1, 5):
if self.lmList[self.tipIds[i]][1] < self.lmList[self.tipIds[i] - 2][1]: # if self.lmList[self.tipIds[i]][1] < self.lmList[self.tipIds[i] - 2][1]:
if np.dot(self.lmList[self.tipIds[i]-2]-self.lmList[self.tipIds[i]-3],
self.lmList[self.tipIds[i]-1]-self.lmList[self.tipIds[i]-2]) >= 0:
fingers.append(1) fingers.append(1)
else: else:
fingers.append(0) fingers.append(0)
@ -136,52 +134,49 @@ class HandDetector:
class Main: class Main:
def __init__(self): def __init__(self):
self.detector = None self.detector = None
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW) self.camera = None
self.camera.set(3, 1280) # self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(4, 720) # self.camera.set(3, 1280)
# self.camera.set(4, 720)
def gesture_recognition(self): def gesture_recognition(self, img, detector):
self.detector = HandDetector() self.detector = detector
while True: img = self.detector.find_hands(img)
frame, img = self.camera.read() lm_list, bbox = self.detector.find_position(img)
img = self.detector.find_hands(img)
lm_list, bbox = self.detector.find_position(img)
if lm_list: if lm_list:
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1] x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
x1, x2, x3, x4, x5 = self.detector.fingers_up() x1, x2, x3, x4, x5 = self.detector.fingers_up()
if (np.linalg.norm(lm_list[4]-lm_list[8]) < 50) and (np.linalg.norm(lm_list[4]-lm_list[12]) < 50):
if (x2 == 1 and x3 == 1) and (x4 == 0 and x5 == 0 and x1 == 0): cv2.putText(img, "7_SEVEN", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
cv2.putText(img, "2_TWO", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3)
(0, 0, 255), 3) elif (x2 == 1 and x3 == 1) and (x4 == 0 and x5 == 0 and x1 == 0):
elif (x2 == 1 and x3 == 1 and x4 == 1) and (x1 == 0 and x5 == 0): cv2.putText(img, "2_TWO", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
cv2.putText(img, "3_THREE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3)
(0, 0, 255), 3) elif (x2 == 1 and x3 == 1 and x4 == 1) and (x1 == 0 and x5 == 0):
elif (x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1) and (x1 == 0): cv2.putText(img, "3_THREE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
cv2.putText(img, "4_FOUR", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3)
(0, 0, 255), 3) elif (x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1) and (x1 == 0):
elif x1 == 1 and x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1: cv2.putText(img, "4_FOUR", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
cv2.putText(img, "5_FIVE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3)
(0, 0, 255), 3) elif x1 == 1 and x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1:
elif x2 == 1 and x1 == 0 and (x3 == 0, x4 == 0, x5 == 0): cv2.putText(img, "5_FIVE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
cv2.putText(img, "1_ONE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3)
(0, 0, 255), 3) elif (x2 == 1 and x1 == 0) and (x3 == 0 and x4 == 0 and x5 == 0):
elif x1 == 1 and x2 == 1 and (x3 == 0, x4 == 0, x5 == 0): cv2.putText(img, "1_ONE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
cv2.putText(img, "8_EIGHT", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3)
(0, 0, 255), 3) elif (x1 == 1 and x2 == 1) and (x3 == 0 and x4 == 0 and x5 == 0):
elif x1 == 1 and x5 == 1 and (x3 == 0, x4 == 0, x5 == 0): cv2.putText(img, "8_EIGHT", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
cv2.putText(img, "6_SIX", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3)
(0, 0, 255), 3) elif (x1 == 1 and x5 == 1) and (x3 == 0 and x4 == 0 and x2 == 0):
elif x1 and (x2 == 0, x3 == 0, x4 == 0, x5 == 0): cv2.putText(img, "6_SIX", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
cv2.putText(img, "GOOD!", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3)
(0, 0, 255), 3) elif x1 == 0 and x5 == 0 and x3 == 0 and x4 == 0 and x2 == 0:
cv2.putText(img, "0_ZERO", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
cv2.imshow("camera", img) (0, 0, 255), 3)
key = cv2.waitKey(1) else:
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1: return 1
break return 0
elif key == 27:
break
if __name__ == '__main__': if __name__ == '__main__':

472
ai.py Normal file
View File

@ -0,0 +1,472 @@
# -*- coding:utf-8 -*-
"""
信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
"""
import tkinter as tk
import cv2
import mediapipe as mp
import torch
import torch.nn as nn
import numpy as np
import shutil
import math
from scipy import stats
from os.path import exists
from os import mkdir
from pathlib import Path
from torch.utils.data import DataLoader, TensorDataset
# 旋转函数
def rotate(angle, x, y, point_x, point_y):
px = (x - point_x) * math.cos(angle) - (y - point_y) * math.sin(angle) + point_x
py = (x - point_x) * math.sin(angle) + (y - point_y) * math.cos(angle) + point_y
return px, py
# 归一化
def normalize(x):
max_x = np.max(x)
min_x = np.min(x)
return (x - min_x) / (max_x - min_x)
class CNN(nn.Module):
def __init__(self, m):
super(CNN, self).__init__()
self.out_label = []
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=1),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.med = nn.Linear(32 * 11 * 2, 500)
self.med2 = nn.Linear(1 * 21 * 3, 100)
self.med3 = nn.Linear(100, 500)
self.out = nn.Linear(500, m) # fully connected layer, output 10 classes
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
x = self.med(x)
# x = self.med2(x)
# x = self.med3(x)
output = self.out(x)
return output
class HandDetector:
"""
使用mediapipe库查找手导出地标像素格式添加了额外的功能
如查找方式许多手指向上或两个手指之间的距离而且提供找到的手的边界框信息
"""
def __init__(self, mode=False, max_hands=2, detection_con=0.5, min_track_con=0.5):
"""
:param mode: 在静态模式下对每个图像进行检测
:param max_hands: 要检测的最大手数
:param detection_con: 最小检测置信度
:param min_track_con: 最小跟踪置信度
"""
self.results = None
self.mode = mode
self.max_hands = max_hands
self.modelComplex = 1
self.detection_con = detection_con
self.min_track_con = min_track_con
# 初始化手部的识别模型
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(static_image_mode=self.mode,
max_num_hands=self.max_hands,
min_detection_confidence=self.detection_con,
min_tracking_confidence=self.min_track_con)
self.mpDraw = mp.solutions.drawing_utils # 初始化绘图器
self.tipIds = [4, 8, 12, 16, 20] # 指尖列表
self.fingers = []
self.lmList = []
self.re_lmList = []
def find_hands(self, img, draw=True):
"""
从图像(BRG)中找到手部
:param img: 用于查找手的图像
:param draw: 在图像上绘制输出的标志
:return: 带或不带图形的图像
"""
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 将传入的图像由BGR模式转标准的Opencv模式——RGB模式
self.results = self.hands.process(img_rgb)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def find_position(self, img, hand_no=0, draw=True):
"""
查找单手的地标并将其放入列表中像素格式还可以返回手部的周围的边界框
:param img: 要查找的主图像
:param hand_no: 如果检测到多只手则为手部id
:param draw: 在图像上绘制输出的标志(默认绘制矩形框)
:return: 像素格式的手部关节位置列表手部边界框
"""
x_list = []
y_list = []
bbox_info = []
self.lmList = []
h, w, c = img.shape
if self.results.multi_hand_landmarks:
my_hand = self.results.multi_hand_landmarks[hand_no]
for i, lm in enumerate(my_hand.landmark):
px, py = int(lm.x * w), int(lm.y * h)
x_list.append(px)
y_list.append(py)
self.lmList.append([lm.x, lm.y, 0])
if draw:
cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
x_min, x_max = min(x_list), max(x_list)
y_min, y_max = min(y_list), max(y_list)
box_w, box_h = x_max - x_min, y_max - y_min
bbox = x_min, y_min, box_w, box_h
cx, cy = bbox[0] + (bbox[2] // 2), bbox[1] + (bbox[3] // 2)
bbox_info = {"id": hand_no, "bbox": bbox, "center": (cx, cy), "shape": (h, w)}
if draw:
cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
(bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
(0, 255, 0), 2)
self.revolve(img)
self.re_lmList = np.array(self.re_lmList)
if self.re_lmList.any():
self.re_lmList = np.concatenate((np.zeros((21, 1)), self.re_lmList), axis=1)
self.re_lmList = np.concatenate((self.re_lmList, np.zeros((1, 4))), axis=0)
return self.re_lmList, bbox_info
def revolve(self, img, draw=True):
"""
旋转手势识别点
:param img: 要查找的主图像
:param draw: 在图像上绘制输出的标志(默认绘制矩形框)
:return: 像素格式的手部关节位置列表
"""
h, w, c = img.shape
if len(self.lmList) >= 21:
# print(self.lmList)
self.re_lmList = []
point_x = self.lmList[0][0]
point_y = self.lmList[0][1]
delta_x = self.lmList[13][0] - point_x
delta_y = self.lmList[13][1] - point_y
if delta_y == 0:
if delta_x < 0:
theta = math.pi / 2
else:
theta = -math.pi / 2
else:
theta = math.atan(delta_x / delta_y)
if delta_y > 0:
theta = theta + math.pi
# print(theta*180/math.pi)
for i in self.lmList:
px, py = rotate(theta, i[0] * w, i[1] * h, point_x * w, point_y * h)
self.re_lmList.append([px, py, 0])
if draw:
cv2.circle(img, (int(px), int(py)), 5, (0, 0, 255), cv2.FILLED)
# 归一化
x_array = normalize(np.array(self.re_lmList)[:, 0])
# print(x_array)
for i in range(len(x_array)):
self.re_lmList[i][0] = x_array[i]
y_array = normalize(np.array(self.re_lmList)[:, 1])
for i in range(len(y_array)):
self.re_lmList[i][1] = x_array[i]
else:
self.re_lmList = self.lmList
return self.re_lmList
def hand_type(self):
"""
检查传入的手部 是左还是右
:return: 1 0
"""
if self.results.multi_hand_landmarks:
if self.lmList[17][0] < self.lmList[5][0]:
return 1
else:
return 0
class AI:
def __init__(self, datasets_dir):
self.EPOCH = 20
self.BATCH_SIZE = 2
self.LR = 10e-5
self.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.datasets_dir = datasets_dir
self.train_loader = None
self.m = 0
self.out_label = [] # CNN网络输出后数字标签转和字符串标签的映射关系
def load_datasets(self):
train_data = []
train_label = []
self.m = 0
for file in Path(self.datasets_dir).rglob("*.npz"):
data = np.load(str(file))
train_data.append(data["data"])
label_number = np.ones(len(data["data"])) * len(self.out_label)
train_label.append(label_number)
self.out_label.append(data["label"])
self.m += 1
train_data = torch.Tensor(np.concatenate(train_data, axis=0))
train_data = train_data.unsqueeze(1)
train_label = torch.tensor(np.concatenate(train_label, axis=0)).long()
dataset = TensorDataset(train_data, train_label)
self.train_loader = DataLoader(dataset, batch_size=self.BATCH_SIZE, shuffle=True)
return self.m
def train_cnn(self):
cnn = CNN(self.m).to(self.DEVICE)
optimizer = torch.optim.Adam(cnn.parameters(), self.LR) # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted
for epoch in range(self.EPOCH):
for step, (data, target) in enumerate(self.train_loader):
# 分配 batch data, normalize x when iterate train_loader
data, target = data.to(self.DEVICE), target.to(self.DEVICE)
output = cnn(data) # cnn output
loss = loss_func(output, target) # cross entropy loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
if (step + 1) % 50 == 0: # 输出结果
print(
"\r[Epoch: %d] [%d/%d (%0.f %%)][Loss: %f]"
% (
epoch + 1,
(step + 1) * len(data),
len(self.train_loader.dataset),
100. * (step + 1) / len(self.train_loader),
loss.item()
), end="")
cnn.out_label = self.out_label
torch.save(cnn, 'CNN.pkl')
print("训练结束")
class Main:
def __init__(self):
self.camera = None
self.detector = HandDetector()
self.default_datasets = "Datasets"
self.len_x = 22
self.len_y = 4
self.label = ''
self.result = []
self.disp = ""
def change_state(self):
self.label = self.entry.get() # 调用get()方法将Entry中的内容获取出来
self.top1.quit()
if self.label == "":
self.top1.destroy()
def make_datasets(self, camera, datasets_dir="default", n=100):
if datasets_dir == "default":
return
if exists(datasets_dir):
shutil.rmtree(datasets_dir)
mkdir(datasets_dir)
self.camera = camera
self.top1 = tk.Tk()
self.top1.geometry('300x50')
self.top1.title('请输入标签')
tk.Label(self.top1, text='Label:').place(x=27, y=10)
self.entry = tk.Entry(self.top1, width=15)
self.entry.place(x=80, y=10)
tk.Button(self.top1, text='确定', command=self.change_state).place(x=235, y=5)
self.top1.mainloop()
while not self.label == "":
data = np.zeros([n, self.len_x, self.len_y])
shape_list = np.zeros([n, 2], dtype=np.int16)
hand_type = np.zeros(n, dtype=np.int8)
count = 0
cv2.startWindowThread()
while True:
frame, img = self.camera.read()
img = self.detector.find_hands(img)
result = np.zeros((self.len_x, self.len_y))
lm_list, bbox = self.detector.find_position(img)
for i in range(len(lm_list)):
result[i] = np.array(lm_list[i])
if result.sum() > 0: # 假设矩阵不为0即捕捉到手部时
shape = bbox["shape"]
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data[count] = result
hand_type[count] = self.detector.hand_type()
shape_list[count] = np.array(shape)
count += 1
cv2.putText(img, str("{}/{}".format(count, n)), (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 255, 0), 3)
cv2.imshow("camera", img)
key = cv2.waitKey(100)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
elif count == n - 1:
break
cv2.destroyAllWindows()
open(datasets_dir + "/" + self.label + ".npz", "w")
np.savez(datasets_dir + "/" + self.label + ".npz", label=self.label, data=data,
handtype=hand_type, shape=shape_list)
self.top1.mainloop()
def train(self, datasets_dir="default"):
if datasets_dir == "default":
datasets_dir = self.default_datasets
ai = AI(datasets_dir)
ai.load_datasets()
ai.train_cnn()
def gesture_recognition_camera(self, detector, img, cnn):
self.detector = detector
out_label = cnn.out_label
img = self.detector.find_hands(img)
lm_list, bbox = self.detector.find_position(img)
if lm_list.any():
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data = torch.Tensor(lm_list)
data = data.unsqueeze(0)
data = data.unsqueeze(0)
test_output = cnn(data)
test_np = test_output.detach().numpy()[0]
# normal_temp = normalize(test_np)
# temp = normal_temp[np.argpartition(normal_temp, -2)[-2:]]
temp = test_np[np.argpartition(test_np, -2)[-2:]]
print(temp[1]-temp[0])
if temp[1]-temp[0] < 5.5:
return 1
self.result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
if len(self.result) > 4:
self.disp = str(out_label[stats.mode(self.result)[0][0]])
self.result = []
cv2.putText(img, self.disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
return 0
def gesture_recognition_video(self, filedir):
self.detector = HandDetector()
cnn = torch.load("CNN.pkl")
out_label = cnn.out_label
result = []
disp = ""
cap = cv2.VideoCapture(filedir)
while True:
ret, img = cap.read()
img = self.detector.find_hands(img)
lm_list, bbox = self.detector.find_position(img)
if lm_list.any():
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data = torch.Tensor(lm_list)
data = data.unsqueeze(0)
data = data.unsqueeze(0)
test_output = cnn(data)
result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
if len(result) > 5:
disp = str(out_label[stats.mode(result)[0][0]])
result = []
cv2.putText(img, disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
cap.release()
def gesture_recognition_img(self, filedir):
self.detector = HandDetector()
cnn = torch.load("CNN.pkl")
out_label = cnn.out_label
result = []
disp = ""
img = cv2.imread(filedir)
img = self.detector.find_hands(img)
while True:
lm_list, bbox = self.detector.find_position(img)
if lm_list.any():
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
data = torch.Tensor(lm_list)
data = data.unsqueeze(0)
data = data.unsqueeze(0)
test_output = cnn(data)
result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
if len(result) > 5:
disp = str(out_label[stats.mode(result)[0][0]])
result = []
cv2.putText(img, disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
if __name__ == '__main__':
solution = Main()
my_datasets_dir = "test"
solution.make_datasets(my_datasets_dir, 100)
solution.train(my_datasets_dir)
dir_video = "C:/Users/Liar/Pictures/Camera Roll/WIN_20220630_20_11_47_Pro.mp4"
dir_img = "C:/Users/Liar/Pictures/Camera Roll/WIN_20220630_20_01_22_Pro.jpg"
# solution.gesture_recognition_camera()
# solution.gesture_recognition_video(dir_video)
# solution.gesture_recognition_img(dir_img)

414
ai_two.py Normal file
View File

@ -0,0 +1,414 @@
# -*- coding:utf-8 -*-
"""
信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
"""
import cv2
import mediapipe as mp
import torch
import torch.nn as nn
import numpy as np
import tkinter as tk
import shutil
import math
from scipy import stats
from os.path import exists
from os import mkdir
from pathlib import Path
from torch.utils.data import DataLoader, TensorDataset
# 旋转函数
def rotate(angle, x, y, point_x, point_y):
px = (x - point_x) * math.cos(angle) - (y - point_y) * math.sin(angle) + point_x
py = (x - point_x) * math.sin(angle) + (y - point_y) * math.cos(angle) + point_y
return px, py
# 归一化
def normalize(x):
max_x = np.max(x)
min_x = np.min(x)
return (x-min_x)/(max_x-min_x)
class CNNTwo(nn.Module):
def __init__(self, m):
super(CNNTwo, self).__init__()
self.out_label = []
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.med = nn.Linear(32 * 11 * 1, 500)
self.med2 = nn.Linear(1*21*3, 100)
self.med3 = nn.Linear(100, 500)
self.out = nn.Linear(500, m) # fully connected layer, output 10 classes
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
x = self.med(x)
# x = self.med2(x)
# x = self.med3(x)
output = self.out(x)
return output
class HandDetector:
"""
使用mediapipe库查找手导出地标像素格式添加了额外的功能
如查找方式许多手指向上或两个手指之间的距离而且提供找到的手的边界框信息
"""
def __init__(self, mode=False, max_hands=2, detection_con=0.5, min_track_con=0.5):
"""
:param mode: 在静态模式下对每个图像进行检测
:param max_hands: 要检测的最大手数
:param detection_con: 最小检测置信度
:param min_track_con: 最小跟踪置信度
"""
self.results = None
self.mode = mode
self.max_hands = max_hands
self.modelComplex = 1
self.detection_con = detection_con
self.min_track_con = min_track_con
# 初始化手部的识别模型
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(static_image_mode=self.mode,
max_num_hands=self.max_hands,
min_detection_confidence=self.detection_con,
min_tracking_confidence=self.min_track_con)
self.mpDraw = mp.solutions.drawing_utils # 初始化绘图器
self.tipIds = [4, 8, 12, 16, 20] # 指尖列表
self.fingers = []
self.lmList = []
self.re_lmList = []
def find_hands(self, img, draw=True):
"""
从图像(BRG)中找到手部
:param img: 用于查找手的图像
:param draw: 在图像上绘制输出的标志
:return: 带或不带图形的图像
"""
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 将传入的图像由BGR模式转标准的Opencv模式——RGB模式
self.results = self.hands.process(img_rgb)
is_two_hand = False
if self.results.multi_hand_landmarks is not None and len(self.results.multi_hand_landmarks) >= 2:
is_two_hand = True
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img, is_two_hand
def find_position(self, img, hand_no=0, draw=True):
"""
查找单手的地标并将其放入列表中像素格式还可以返回手部的周围的边界框
:param img: 要查找的主图像
:param hand_no: 如果检测到多只手则为手部id
:param draw: 在图像上绘制输出的标志(默认绘制矩形框)
:return: 像素格式的手部关节位置列表手部边界框
"""
x_list = []
y_list = []
bbox_info = []
self.lmList = []
h, w, c = img.shape
if self.results.multi_hand_landmarks:
my_hand = self.results.multi_hand_landmarks[hand_no]
for i, lm in enumerate(my_hand.landmark):
px, py = int(lm.x * w), int(lm.y * h)
x_list.append(px)
y_list.append(py)
self.lmList.append([lm.x, lm.y, 0])
if draw:
cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
x_min, x_max = min(x_list), max(x_list)
y_min, y_max = min(y_list), max(y_list)
box_w, box_h = x_max - x_min, y_max - y_min
bbox = x_min, y_min, box_w, box_h
cx, cy = bbox[0] + (bbox[2] // 2), bbox[1] + (bbox[3] // 2)
bbox_info = {"id": hand_no, "bbox": bbox, "center": (cx, cy), "shape": (h, w)}
if draw:
cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
(bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
(0, 255, 0), 2)
self.revolve(img)
self.re_lmList = np.array(self.re_lmList)
if self.re_lmList.any():
self.re_lmList = np.concatenate((np.zeros((21, 1)), self.re_lmList), axis=1)
self.re_lmList = np.concatenate((self.re_lmList, np.zeros((1, 4))), axis=0)
return self.re_lmList, bbox_info
def revolve(self, img, draw=True):
"""
旋转手势识别点
:param img: 要查找的主图像
:param draw: 在图像上绘制输出的标志(默认绘制矩形框)
:return: 像素格式的手部关节位置列表
"""
h, w, c = img.shape
if len(self.lmList) >= 21:
# print(self.lmList)
self.re_lmList = []
point_x = self.lmList[0][0]
point_y = self.lmList[0][1]
delta_x = self.lmList[13][0] - point_x
delta_y = self.lmList[13][1] - point_y
if delta_y == 0:
if delta_x < 0:
theta = math.pi / 2
else:
theta = -math.pi / 2
else:
theta = math.atan(delta_x / delta_y)
if delta_y > 0:
theta = theta + math.pi
# print(theta*180/math.pi)
for i in self.lmList:
px, py = rotate(theta, i[0] * w, i[1] * h, point_x * w, point_y * h)
self.re_lmList.append([px, py, 0])
if draw:
cv2.circle(img, (int(px), int(py)), 5, (0, 0, 255), cv2.FILLED)
# 归一化
x_array = normalize(np.array(self.re_lmList)[:, 0])
# print(x_array)
for i in range(len(x_array)):
self.re_lmList[i][0] = x_array[i]
y_array = normalize(np.array(self.re_lmList)[:, 1])
for i in range(len(y_array)):
self.re_lmList[i][1] = x_array[i]
else:
self.re_lmList = self.lmList
return self.re_lmList
def hand_type(self):
"""
检查传入的手部 是左还是右
:return: 1 0
"""
if self.results.multi_hand_landmarks:
if self.lmList[17][0] < self.lmList[5][0]:
return 1
else:
return 0
class AI:
def __init__(self, datasets_dir):
self.EPOCH = 100
self.BATCH_SIZE = 4
self.LR = 10e-5
self.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.datasets_dir = datasets_dir
self.train_loader = None
self.m = 0
self.out_label = [] # CNN网络输出后数字标签转和字符串标签的映射关系
def load_datasets(self):
train_data = []
train_label = []
self.m = 0
for file in Path(self.datasets_dir).rglob("*.npz"):
data = np.load(str(file))
train_data.append(data["data"])
label_number = np.ones(len(data["data"])) * len(self.out_label)
train_label.append(label_number)
self.out_label.append(data["label"])
self.m += 1
train_data = torch.Tensor(np.concatenate(train_data, axis=0))
train_data = train_data.unsqueeze(1)
train_label = torch.tensor(np.concatenate(train_label, axis=0)).long()
dataset = TensorDataset(train_data, train_label)
self.train_loader = DataLoader(dataset, batch_size=self.BATCH_SIZE, shuffle=True)
return self.m
def train_cnn(self):
cnn = CNNTwo(self.m).to(self.DEVICE)
optimizer = torch.optim.Adam(cnn.parameters(), self.LR) # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted
for epoch in range(self.EPOCH):
for step, (data, target) in enumerate(self.train_loader):
# 分配 batch data, normalize x when iterate train_loader
data, target = data.to(self.DEVICE), target.to(self.DEVICE)
output = cnn(data) # cnn output
loss = loss_func(output, target) # cross entropy loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
if (step + 1) % 50 == 0: # 输出结果
print(
"\r[Epoch: %d] [%d/%d (%0.f %%)][Loss: %f]"
% (
epoch + 1,
(step + 1) * len(data),
len(self.train_loader.dataset),
100. * (step + 1) / len(self.train_loader),
loss.item()
), end="")
cnn.out_label = self.out_label
torch.save(cnn, 'CNN_two.pkl')
print("训练结束")
class Main:
def __init__(self):
self.camera = None
self.detector = HandDetector()
self.default_datasets = "Datasets"
self.len_x = 44
self.len_y = 4
self.label = ''
self.result = []
self.disp = ""
def change_state(self):
self.label = self.entry.get() # 调用get()方法将Entry中的内容获取出来
self.top1.quit()
if self.label == "":
self.top1.destroy()
def on_closing(self):
self.label = ""
self.top1.destroy()
def make_datasets(self, camera, datasets_dir="default", n=100):
if datasets_dir == "default":
return
if exists(datasets_dir):
shutil.rmtree(datasets_dir)
mkdir(datasets_dir)
self.camera = camera
self.top1 = tk.Tk()
self.top1.geometry('300x50')
self.top1.title('请输入标签')
self.top1.protocol("WM_DELETE_WINDOW", self.on_closing)
tk.Label(self.top1, text='Label:').place(x=27, y=10)
self.entry = tk.Entry(self.top1, width=15)
self.entry.place(x=80, y=10)
tk.Button(self.top1, text='确定', command=self.change_state).place(x=235, y=5)
self.top1.mainloop()
while not self.label == "":
data = np.zeros([n, self.len_x, self.len_y])
shape_list = np.zeros([n, 2], dtype=np.int16)
hand_type = np.zeros(n, dtype=np.int8)
count = 0
cv2.startWindowThread()
while True:
frame, img = self.camera.read()
img, is_two_hand = self.detector.find_hands(img)
result = np.zeros((self.len_x, self.len_y))
if is_two_hand:
lm_list1, bbox1 = self.detector.find_position(img, 0)
lm_list2, bbox2 = self.detector.find_position(img, 1)
for i in range(len(lm_list1)):
result[i] = np.array(lm_list1[i])
for i in range(len(lm_list1), len(lm_list1)+len(lm_list2)):
result[i] = np.array(lm_list2[i-len(lm_list1)])
if result.sum() > 0: # 假设矩阵不为0即捕捉到手部时
shape1 = bbox1["shape"]
x_1, y_1 = bbox1["bbox"][0], bbox1["bbox"][1]
shape2 = bbox2["shape"]
x_2, y_2 = bbox2["bbox"][0], bbox2["bbox"][1]
data[count] = result
hand_type[count] = self.detector.hand_type()
shape_list[count] = np.array(shape1)
count += 1
cv2.putText(img, str("{}/{}".format(count, n)), (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 255, 0), 3)
cv2.putText(img, str("{}/{}".format(count, n)), (x_2, y_2), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 255, 0), 3)
cv2.imshow("camera", img)
key = cv2.waitKey(100)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
elif count == n - 1:
break
cv2.destroyAllWindows()
open(datasets_dir + "/" + self.label + ".npz", "w")
np.savez(datasets_dir + "/" + self.label + ".npz", label=self.label, data=data,
handtype=hand_type, shape=shape_list)
self.top1.mainloop()
def train(self, datasets_dir="default"):
if datasets_dir == "default":
datasets_dir = self.default_datasets
ai = AI(datasets_dir)
ai.load_datasets()
ai.train_cnn()
def gesture_recognition(self, detector, img, cnn):
self.detector = detector
out_label = cnn.out_label
img, is_two_hand = self.detector.find_hands(img)
if is_two_hand:
lm_list1, bbox1 = self.detector.find_position(img, 0)
lm_list2, bbox2 = self.detector.find_position(img, 1)
if lm_list1.any() and lm_list2.any():
x_1, y_1 = bbox1["bbox"][0], bbox1["bbox"][1]
x_2, y_2 = bbox2["bbox"][0], bbox2["bbox"][1]
lm_list = np.concatenate((lm_list1, lm_list2), axis=0)
data = torch.Tensor(lm_list)
data = data.unsqueeze(0)
data = data.unsqueeze(0)
test_output = cnn(data)
self.result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
if len(self.result) > 4:
self.disp = str(out_label[stats.mode(self.result)[0][0]])
self.result = []
cv2.putText(img, self.disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
cv2.putText(img, self.disp, (x_2, y_2), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
else:
return 1
return 0
if __name__ == '__main__':
solution = Main()
my_datasets_dir = "test-two"
solution.make_datasets(my_datasets_dir, 100)
solution.train(my_datasets_dir)
solution.gesture_recognition()

177
datatest.py Normal file
View File

@ -0,0 +1,177 @@
import cv2
import mediapipe as mp
import numpy as np
class HandDetector:
"""
使用mediapipe库查找手导出地标像素格式添加了额外的功能
如查找方式许多手指向上或两个手指之间的距离而且提供找到的手的边界框信息
"""
def __init__(self, mode=False, max_hands=2, detection_con=0.5, min_track_con=0.5):
"""
:param mode: 在静态模式下对每个图像进行检测
:param max_hands: 要检测的最大手数
:param detection_con: 最小检测置信度
:param min_track_con: 最小跟踪置信度
"""
self.results = None
self.mode = mode
self.max_hands = max_hands
self.modelComplex = 1
self.detection_con = detection_con
self.min_track_con = min_track_con
# 初始化手部的识别模型
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(static_image_mode=self.mode,
max_num_hands=self.max_hands,
min_detection_confidence=self.detection_con,
min_tracking_confidence=self.min_track_con)
self.mpDraw = mp.solutions.drawing_utils # 初始化绘图器
self.tipIds = [4, 8, 12, 16, 20] # 指尖列表
self.fingers = []
self.lmList = []
def find_hands(self, img, draw=True):
"""
从图像(BRG)中找到手部
:param img: 用于查找手的图像
:param draw: 在图像上绘制输出的标志
:return: 带或不带图形的图像
"""
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 将传入的图像由BGR模式转标准的Opencv模式——RGB模式
self.results = self.hands.process(img_rgb)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def find_position(self, img, hand_no=0, draw=True):
"""
查找单手的地标并将其放入列表中像素格式还可以返回手部的周围的边界框
:param img: 要查找的主图像
:param hand_no: 如果检测到多只手则为手部id
:param draw: 在图像上绘制输出的标志(默认绘制矩形框)
:return: 像素格式的手部关节位置列表手部边界框
"""
x_list = []
y_list = []
onedata = np.zeros([21,3])
zerodata = np.zeros([21,3])
h, w, c = img.shape
self.lmList = []
if self.results.multi_hand_landmarks:
my_hand = self.results.multi_hand_landmarks[hand_no]
for i, lm in enumerate(my_hand.landmark):
onedata[i] = np.array([lm.x,lm.y,lm.z]) #将三维坐标添加到单次截屏的数据中
px, py= int(lm.x * w), int(lm.y * h)
x_list.append(px)
y_list.append(py)
self.lmList.append([px, py])
if draw:
cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
return onedata, (h, w)
def fingers_up(self):
"""
查找列表中打开并返回的手指数会分别考虑左手和右手
:return: 竖起手指的列表
"""
fingers = []
if self.results.multi_hand_landmarks:
my_hand_type = self.hand_type()
# Thumb
if my_hand_type == "Right":
if self.lmList[self.tipIds[0]][0] > self.lmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
else:
if self.lmList[self.tipIds[0]][0] < self.lmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
# 4 Fingers
for i in range(1, 5):
if self.lmList[self.tipIds[i]][1] < self.lmList[self.tipIds[i] - 2][1]:
fingers.append(1)
else:
fingers.append(0)
return fingers
def hand_type(self):
"""
检查传入的手部是左还是右
:return: "Right" "Left"
"""
if self.results.multi_hand_landmarks:
if self.lmList[17][0] < self.lmList[5][0]:
return 1
else:
return 0
class Main:
def __init__(self, label, N = 100):
self.detector = None
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(3, 1280)
self.camera.set(4, 720)
self.N = N
#初始化数据包
self.label = label
self.data = np.zeros([N,21,3])
self.shape = np.zeros([N,2], dtype = np.int16)
self.handtype = np.zeros(N, dtype = np.int8)
def gesture_recognition(self):
self.detector = HandDetector()
#初始化数据
zerodata = np.zeros([21,3])
rezult = np.zeros([21,3])
count = 0
while True:
frame, img = self.camera.read()
img = self.detector.find_hands(img)
rezult,shape = self.detector.find_position(img)
if rezult.all() != zerodata.all(): #假设矩阵不为0即捕捉到手部时
self.data[count] = rezult
self.handtype[count] = self.detector.hand_type()
self.shape[count] = np.array(shape)
count += 1
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
elif count == self.N - 1:
break
np.savez('firstdata', label = self.label, data = self.data,
handtype = self.handtype, shape = self.shape)
if __name__ == '__main__':
Solution = Main(label = "five")
Solution.gesture_recognition()
npzfile = np.load('firstdata.npz')
#print(npzfile['data'][0])
#print(" ")
#print(npzfile['handtype'])
#print(npzfile['label'])
#print(npzfile['shape'])

180
gr.py Normal file
View File

@ -0,0 +1,180 @@
import TM
import ai
import ai_two
import cv2
import copy
import torch
import torch.nn as nn
class CNN(nn.Module):
def __init__(self, m):
super(CNN, self).__init__()
self.out_label = []
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=1),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.med = nn.Linear(32 * 11 * 2, 500)
self.med2 = nn.Linear(1 * 21 * 3, 100)
self.med3 = nn.Linear(100, 500)
self.out = nn.Linear(500, m) # fully connected layer, output 10 classes
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
x = self.med(x)
# x = self.med2(x)
# x = self.med3(x)
output = self.out(x)
return output
class CNNTwo(nn.Module):
def __init__(self, m):
super(CNNTwo, self).__init__()
self.out_label = []
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.med = nn.Linear(32 * 11 * 1, 500)
self.med2 = nn.Linear(1*21*3, 100)
self.med3 = nn.Linear(100, 500)
self.out = nn.Linear(500, m) # fully connected layer, output 10 classes
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
x = self.med(x)
# x = self.med2(x)
# x = self.med3(x)
output = self.out(x)
return output
class Main:
def __init__(self):
self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.camera.set(3, 1280)
self.camera.set(4, 720)
self.tm_detector = TM.HandDetector()
self.ai_detector = ai.HandDetector()
self.at_detector = ai_two.HandDetector()
self.tm_main = TM.Main()
self.ai_main = ai.Main()
self.at_main = ai_two.Main()
def gr_img(self, filedir, diy):
print(filedir)
if diy:
cnn = torch.load("CNN.pkl")
cnn_two = torch.load("CNN_two.pkl")
while True:
not_match = 0
img = cv2.imread(filedir)
img_tm = copy.deepcopy(img)
is_one_hand = self.at_main.gesture_recognition(self.at_detector, img, cnn_two)
if is_one_hand:
not_match = self.ai_main.gesture_recognition_camera(self.ai_detector, img, cnn)
if not_match:
self.tm_main.gesture_recognition(img_tm, self.tm_detector)
if not_match:
cv2.imshow("camera", img_tm)
else:
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
def gr_video(self, filedir, diy):
cap = cv2.VideoCapture(filedir)
if diy:
cnn = torch.load("CNN.pkl")
cnn_two = torch.load("CNN_two.pkl")
while True:
ret, img = cap.read()
not_match = 0
img_tm = copy.deepcopy(img)
is_one_hand = self.at_main.gesture_recognition(self.at_detector, img, cnn_two)
if is_one_hand:
not_match = self.ai_main.gesture_recognition_camera(self.ai_detector, img, cnn)
if not_match:
self.tm_main.gesture_recognition(img_tm, self.tm_detector)
if not_match:
cv2.imshow("camera", img_tm)
else:
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
cap.release()
def gr_realtime(self, diy):
if diy:
cnn = torch.load("CNN.pkl")
cnn_two = torch.load("CNN_two.pkl")
while True:
frame, img = self.camera.read()
not_match = 0
img_tm = copy.deepcopy(img)
is_one_hand = self.at_main.gesture_recognition(self.at_detector, img, cnn_two)
if is_one_hand:
not_match = self.ai_main.gesture_recognition_camera(self.ai_detector, img, cnn)
if not_match:
self.tm_main.gesture_recognition(img_tm, self.tm_detector)
if not_match:
cv2.imshow("camera", img_tm)
else:
cv2.imshow("camera", img)
key = cv2.waitKey(1)
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
elif key == 27:
break
def ai_input(self):
self.ai_main.make_datasets(self.camera, "ai_datasets", 100)
self.ai_main.train("ai_datasets")
self.at_main.make_datasets(self.camera, "ai_two_datasets", 100)
self.at_main.train("ai_two_datasets")
if __name__ == '__main__':
main = Main()
main.gr_img("", 0)

112
main.py Normal file
View File

@ -0,0 +1,112 @@
# -*- coding:utf-8 -*-
"""
信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
"""
import gr
import tkinter as tk
from tkinter import filedialog, Button, Label, Frame, ACTIVE, LEFT
from PIL import Image, ImageTk
class DisplayImage:
"""用于展示选择的图片"""
def __init__(self, master):
self.master = master
master.title("GUI")
self.Text_lab0 = Label(master, text='已加载图像/视频')
self.Text_lab0.pack(pady=10)
self.image_frame = Frame(master, bd=0, height=300, width=300, bg='white', highlightthickness=2,
highlightbackground='gray', highlightcolor='black')
self.image_frame.pack()
self.Text_label = Label(master, text='加载待识别影像/视频')
self.Text_label.place(x=60, y=410)
self.Choose_image = Button(master, command=self.choose_img, text="图像",
width=7, default=ACTIVE, borderwidth=0)
self.Choose_image.place(x=50, y=450)
self.Choose_image = Button(master, command=self.choose_video, text="视频",
width=7, default=ACTIVE, borderwidth=0)
self.Choose_image.place(x=120, y=450)
self.Text_label2 = Label(master, text='运行手势识别程序')
self.Text_label2.place(x=60, y=500)
self.image_mosaic = Button(master, command=self.gesture_recognition, text="Gesture recognition",
width=17, default=ACTIVE, borderwidth=0)
self.image_mosaic.place(x=50, y=540)
self.Text_label3 = Label(master, text='运行实时手势识别程序')
self.Text_label3.place(x=300, y=410)
self.realtime = Button(master, command=self.realtime_gr, text="Realtime\n gesture recognition",
width=17, height=6, default=ACTIVE, borderwidth=0)
self.realtime.place(x=300, y=450)
self.Text_label4 = Label(master, text='录入自定义手势')
self.Text_label4.place(x=180, y=610)
self.input = Button(master, command=self.input_image, text="Input gesture",
width=42, default=ACTIVE, borderwidth=0)
self.input.place(x=60, y=650)
self.gr = gr.Main()
self.temp_dir = "temp"
self.mode = 0
self.directory = ""
self.diy = 1
def choose_img(self):
self.mode = 1
# 清空框架中的内容
for widget in self.image_frame.winfo_children():
widget.destroy()
self.directory = filedialog.askopenfilename()
# 布局所选图片
img = Image.open(self.directory).resize((300, 300))
img.save(self.temp_dir + "/photo.png")
image = ImageTk.PhotoImage(image=img)
label = Label(self.image_frame, highlightthickness=0, borderwidth=0)
label.configure(image=image)
label.pack(side=LEFT, expand=True)
def choose_video(self):
# 清空框架中的内容
self.mode = 2
for widget in self.image_frame.winfo_children():
widget.destroy()
self.directory = filedialog.askopenfilename()
# 布局所选图片
img = Image.open(self.temp_dir+"/video.jpg").resize((300, 300))
img.save(self.temp_dir + "/photo.png")
image = ImageTk.PhotoImage(image=img)
label = Label(self.image_frame, highlightthickness=0, borderwidth=0)
label.configure(image=image)
label.pack(side=LEFT, expand=True)
def gesture_recognition(self):
if self.mode == 1:
self.gr.gr_img(self.directory, self.diy)
elif self.mode == 2:
self.gr.gr_video(self.directory, self.diy)
def realtime_gr(self):
self.gr.gr_realtime(self.diy)
def input_image(self):
self.diy = 1
self.gr.ai_input()
def main():
window = tk.Tk()
DisplayImage(window)
window.title('手势识别')
window.geometry('500x720')
window.mainloop()
if __name__ == '__main__':
main()