8 changed files with 72 additions and 1558 deletions
--- a/CNN.pkl
+++ b/CNN.pkl
--- a/DM.py
+++ b/DM.py
@ -1,318 +0,0 @@
 # -*- coding:utf-8 -*-
 """
 信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
 """
 import mediapipe as mp
 import cv2
 # import HandDetector
 import math
 from datetime import datetime
 import time
 import numpy as np
 # 旋转函数
 def Rotate(angle, x, y, point_x, point_y):
    px = (x - point_x) * math.cos(angle) - (y - point_y) * math.sin(angle) + point_x
    py = (x - point_x) * math.sin(angle) + (y - point_y) * math.cos(angle) + point_y
    return px, py
 class HandDetector:
    """
    使用mediapipe库查找手。导出地标像素格式。添加了额外的功能。
    如查找方式，许多手指向上或两个手指之间的距离。而且提供找到的手的边界框信息。
    """
    def __init__(self, mode=False, max_hands=2, detection_con=0.5, min_track_con=0.5):
        """
        :param mode: 在静态模式下，对每个图像进行检测
        :param max_hands: 要检测的最大手数
        :param detection_con: 最小检测置信度
        :param min_track_con: 最小跟踪置信度
        """
        self.results = None
        self.mode = mode
        self.max_hands = max_hands
        self.modelComplex = 1
        self.detection_con = detection_con
        self.min_track_con = min_track_con
        # 初始化手部的识别模型
        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(static_image_mode=self.mode,
                                        max_num_hands=self.max_hands,
                                        min_detection_confidence=self.detection_con,
                                        min_tracking_confidence=self.min_track_con)
        self.mpDraw = mp.solutions.drawing_utils  # 初始化绘图器
        self.tipIds = [4, 8, 12, 16, 20]  # 指尖列表
        # self.knuckles = {'0': [4, 3, 2, 1], "1": [8, 7, 6, 5], "2": [12, 11, 10, 9], "3": [16, 15, 14, 13],
        #                  "4": [20, 19, 18, 17]}
        self.fingers = []
        self.lmList = []
        self.re_lmList = []
    def find_hands(self, img, draw=True):
        """
        从图像(BRG)中找到手部。
        :param img: 用于查找手的图像。
        :param draw: 在图像上绘制输出的标志。
        :return: 带或不带图形的图像
        """
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # 将传入的图像由BGR模式转标准的Opencv模式——RGB模式，
        self.results = self.hands.process(img_rgb)
        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms,
                                               self.mpHands.HAND_CONNECTIONS)
        return img
    def find_position(self, img, hand_no=0, draw=True):
        """
        查找单手的地标并将其放入列表中像素格式。还可以返回手部的周围的边界框。
        :param img: 要查找的主图像
        :param hand_no: 如果检测到多只手，则为手部id
        :param draw: 在图像上绘制输出的标志。(默认绘制矩形框)
        :return: 像素格式的手部关节位置列表；手部边界框
        """
        x_list = []
        y_list = []
        bbox_info = []
        self.lmList = []
        self.re_lmList = []
        if self.results.multi_hand_landmarks:
            my_hand = self.results.multi_hand_landmarks[hand_no]
            for _, lm in enumerate(my_hand.landmark):
                h, w, c = img.shape
                px, py = int(lm.x * w), int(lm.y * h)
                x_list.append(px)
                y_list.append(py)
                self.lmList.append([px, py])
                if draw:
                    cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
            x_min, x_max = min(x_list), max(x_list)
            y_min, y_max = min(y_list), max(y_list)
            box_w, box_h = x_max - x_min, y_max - y_min
            bbox = x_min, y_min, box_w, box_h
            cx, cy = bbox[0] + (bbox[2] // 2), bbox[1] + (bbox[3] // 2)
            bbox_info = {"id": hand_no, "bbox": bbox, "center": (cx, cy)}
            if draw:
                cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
                              (bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
                              (0, 255, 0), 2)
        return self.lmList, bbox_info
    def revolve(self, img, draw=True):
        """
            旋转手势识别点
            :param img: 要查找的主图像
            :param draw: 在图像上绘制输出的标志。(默认绘制矩形框)
            :return: 像素格式的手部关节位置列表
        """
        # print(self.lmList)
        point_x = self.lmList[0][0]
        point_y = self.lmList[0][1]
        delta_x = self.lmList[13][0] - point_x
        delta_y = self.lmList[13][1] - point_y
        if delta_y == 0:
            if delta_x < 0:
                theta = math.pi / 2
            else:
                theta = -math.pi / 2
        else:
            theta = math.atan(delta_x / delta_y)
            if delta_y > 0:
                theta = theta + math.pi
        # print(theta*180/math.pi)
        for i in self.lmList:
            px, py = Rotate(theta, i[0], i[1], point_x, point_y)
            px = int(px)
            py = int(py)
            self.re_lmList.append([px, py])
            if draw:
                cv2.circle(img, (px, py), 5, (0, 0, 255), cv2.FILLED)
        return self.re_lmList
    def fingers_up(self):
        """
        查找列表中打开并返回的手指数。会分别考虑左手和右手
        :return: 竖起手指的列表
        """
        fingers = []
        if self.results.multi_hand_landmarks:
            my_hand_type = self.hand_type()
            # Thumb
            if my_hand_type == "Right":
                if self.lmList[self.tipIds[0]][0] > self.lmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)
            else:
                if self.lmList[self.tipIds[0]][0] < self.lmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)
            # 4 Fingers
            for i in range(1, 5):
                if self.lmList[self.tipIds[i]][1] < self.lmList[self.tipIds[i] - 2][1]:
                    fingers.append(1)
                else:
                    fingers.append(0)
        return fingers
    def re_fingers_up(self):
        """
        查找列表中打开并返回的手指数。会分别考虑左手和右手
        :return: 竖起手指的列表
        """
        fingers = []
        if self.results.multi_hand_landmarks:
            my_hand_type = self.hand_type()
            # Thumb
            if my_hand_type == "Right":
                if self.re_lmList[self.tipIds[0]][0] > self.re_lmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)
            else:
                if self.re_lmList[self.tipIds[0]][0] < self.re_lmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)
            # 4 Fingers
            for i in range(1, 5):
                if self.re_lmList[self.tipIds[i]][1] < self.re_lmList[self.tipIds[i] - 2][1]:
                    fingers.append(1)
                else:
                    fingers.append(0)
        return fingers
    def knuckles_up(self):
        """
                查找列表中打开并返回的手指数。会分别考虑左手和右手
                :return: 竖起手指的列表
                """
        knuckles = []
        distan = 10
        if self.results.multi_hand_landmarks:
            my_hand_type = self.hand_type()
            # Thumb
            xx = self.re_lmList[self.tipIds[0]][0]
            yy = self.re_lmList[self.tipIds[0] - 1][0]
            if my_hand_type == "Right":
                if -distan < xx - yy < distan:
                    knuckles.append(2)
                elif xx > yy:
                    knuckles.append(1)
                else:
                    knuckles.append(0)
            else:
                if -distan < xx - yy < distan:
                    knuckles.append(2)
                elif xx < yy:
                    knuckles.append(1)
                else:
                    knuckles.append(0)
            # 12 knuckles
            for i in range(1, 5):
                for j in range(3):
                    xx = self.re_lmList[self.tipIds[i]-j][1]
                    yy = self.re_lmList[self.tipIds[i]-j - 1][1]
                    if -distan < xx - yy < distan:
                        knuckles.append(2)
                    elif xx < yy:
                        knuckles.append(1)
                    else:
                        knuckles.append(0)
        return knuckles
    def hand_type(self):
        """
        检查传入的手部是左还是右
        :return: "Right" 或 "Left"
        """
        if self.results.multi_hand_landmarks:
            if self.lmList[17][0] < self.lmList[5][0]:
                return "Right"
            else:
                return "Left"
 class Main:
    def __init__(self):
        self.detector = None
        self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
        self.camera.set(3, 1280)
        self.camera.set(4, 720)
    def gesture_recognition(self):
        self.detector = HandDetector()
        gesture_store = {}
        startTime = time.time()
        stored_round = 1
        stored_flag = 0
        xl = np.zeros((1, 13))  # 特征值存储
        while True:
            frame, img = self.camera.read()
            img = self.detector.find_hands(img)
            lm_list, bbox = self.detector.find_position(img)
            if lm_list:
                re_lm_list = self.detector.revolve(img)
                x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
                knucks = self.detector.knuckles_up()
                # x1, x2, x3, x4, x5 = self.detector.re_fingers_up()
                #
                # if (x2 == 1 and x3 == 1) and (x4 == 0 and x5 == 0 and x1 == 0):
                #     cv2.putText(img, "GOOD!", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
                #                 (0, 0, 255), 3)
                print(time.time() - startTime)
                if (time.time() - startTime) < 3:  # 手势存储时间
                    xl = np.vstack((xl, knucks))
                    cv2.putText(img, 'Please put the gesture to be stored in 1 second', (50, 50),
                                cv2.FONT_HERSHEY_PLAIN, 1.2, (255, 255, 255), 2)
                else:  # 开始手势识别
                    self.detector.fingers = xl
                    value = ''
                    for j in range(13):
                        value = value + str(np.argmax(
                            np.bincount(xl[:, j].astype(int))))   # 找出第3列最频繁出现的值
                    gesture_store[value] = stored_round
                    stored_flag = 1
                    # startTime = time.time()
                    gesture_dete = ''.join(str(knuck) for knuck in knucks)
                    if gesture_dete in gesture_store:
                        cv2.putText(img, str(gesture_store[gesture_dete]), (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
                                    (0, 0, 255), 3)
                    cv2.putText(img, 'Gesture stored, recognition started', (50, 50),
                                cv2.FONT_HERSHEY_PLAIN, 1.2, (255, 255, 255), 2)
            else:
                if stored_flag:
                    stored_round += 1
                    stored_flag = 0
                startTime = time.time()  # 当检测不到手势时，初始化手势存储
                xl = np.zeros((1, 13))  # 特征值存储
                cv2.putText(img, 'Please put the gesture to be stored in 1 second', (50, 50), cv2.FONT_HERSHEY_PLAIN,
                            1.2, (255, 255, 255), 2)
            cv2.imshow("camera", img)
            key = cv2.waitKey(1)
            if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
                break
            elif key == 27:
                break
 if __name__ == '__main__':
    Solution = Main()
    Solution.gesture_recognition()
--- a/Datasets/Readme.txt
+++ b/Datasets/Readme.txt
@ -1,15 +1,15 @@
-## GestureData 手势数据 v1.0
+GestureData 手势数据 v1.0
-# 文件格式：
+文件格式：
-每个数据集（npz文件）包含：
+每个数据集（npz文件）包含：
-1个标签label（手势标签，整个数据集都是这一个标签）；
+1个标签label（手势标签，整个数据集都是这一个标签）；
-500组数据data（每组数据是21*3，即21个点的3维数据，就是demo.py-find_position()中的lm.x, lm.y, lm.z）；
+500组数据data（每组数据是21*3，即21个点的3维数据，就是demo.py-find_position()中的lm.x, lm.y, lm.z）；
-左右手区分handtype（0为左手，1为右手）；
+左右手区分handtype（0为左手，1为右手）；
-画布大小shape（一般都是720*1280，对应demo.py-find_position()中的w, h）。
+画布大小shape（一般都是720*1280，对应demo.py-find_position()中的w, h）。
-
+
-# 注意事项：
+注意事项：
-1. 在使用之前建议先熟悉npz文件的读写与使用（很简单的）；
+1. 在使用之前建议先熟悉npz文件的读写与使用（很简单的）；
-2. 数据集shape类最后会保存一个[0, 0]，其他都是正常的[720, 1280]；
+2. 数据集shape类最后会保存一个[0, 0]，其他都是正常的[720, 1280]；
-3. 左右手不建议使用，因为面向屏幕的手心手背就可以导致程序的误判。
+3. 左右手不建议使用，因为面向屏幕的手心手背就可以导致程序的误判。
-
+
-# 更新说明：
+更新说明：
-1. 保存了0~9的手势。
+1. 保存了0~9的手势。
--- a/ai.py
+++ b/ai.py
@ -1,472 +0,0 @@
 # -*- coding:utf-8 -*-
 """
 信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
 """
 import tkinter as tk
 import cv2
 import mediapipe as mp
 import torch
 import torch.nn as nn
 import numpy as np
 import shutil
 import math
 from scipy import stats
 from os.path import exists
 from os import mkdir
 from pathlib import Path
 from torch.utils.data import DataLoader, TensorDataset
 # 旋转函数
 def rotate(angle, x, y, point_x, point_y):
    px = (x - point_x) * math.cos(angle) - (y - point_y) * math.sin(angle) + point_x
    py = (x - point_x) * math.sin(angle) + (y - point_y) * math.cos(angle) + point_y
    return px, py
 # 归一化
 def normalize(x):
    max_x = np.max(x)
    min_x = np.min(x)
    return (x - min_x) / (max_x - min_x)
 class CNN(nn.Module):
    def __init__(self, m):
        super(CNN, self).__init__()
        self.out_label = []
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=1),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.med = nn.Linear(32 * 11 * 2, 500)
        self.med2 = nn.Linear(1 * 21 * 3, 100)
        self.med3 = nn.Linear(100, 500)
        self.out = nn.Linear(500, m)  # fully connected layer, output 10 classes
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)  # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        x = self.med(x)
        # x = self.med2(x)
        # x = self.med3(x)
        output = self.out(x)
        return output
 class HandDetector:
    """
    使用mediapipe库查找手。导出地标像素格式。添加了额外的功能。
    如查找方式，许多手指向上或两个手指之间的距离。而且提供找到的手的边界框信息。
    """
    def __init__(self, mode=False, max_hands=2, detection_con=0.5, min_track_con=0.5):
        """
        :param mode: 在静态模式下，对每个图像进行检测
        :param max_hands: 要检测的最大手数
        :param detection_con: 最小检测置信度
        :param min_track_con: 最小跟踪置信度
        """
        self.results = None
        self.mode = mode
        self.max_hands = max_hands
        self.modelComplex = 1
        self.detection_con = detection_con
        self.min_track_con = min_track_con
        # 初始化手部的识别模型
        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(static_image_mode=self.mode,
                                        max_num_hands=self.max_hands,
                                        min_detection_confidence=self.detection_con,
                                        min_tracking_confidence=self.min_track_con)
        self.mpDraw = mp.solutions.drawing_utils  # 初始化绘图器
        self.tipIds = [4, 8, 12, 16, 20]  # 指尖列表
        self.fingers = []
        self.lmList = []
        self.re_lmList = []
    def find_hands(self, img, draw=True):
        """
        从图像(BRG)中找到手部。
        :param img: 用于查找手的图像。
        :param draw: 在图像上绘制输出的标志。
        :return: 带或不带图形的图像
        """
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # 将传入的图像由BGR模式转标准的Opencv模式——RGB模式，
        self.results = self.hands.process(img_rgb)
        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms,
                                               self.mpHands.HAND_CONNECTIONS)
        return img
    def find_position(self, img, hand_no=0, draw=True):
        """
        查找单手的地标并将其放入列表中像素格式。还可以返回手部的周围的边界框。
        :param img: 要查找的主图像
        :param hand_no: 如果检测到多只手，则为手部id
        :param draw: 在图像上绘制输出的标志。(默认绘制矩形框)
        :return: 像素格式的手部关节位置列表；手部边界框
        """
        x_list = []
        y_list = []
        bbox_info = []
        self.lmList = []
        h, w, c = img.shape
        if self.results.multi_hand_landmarks:
            my_hand = self.results.multi_hand_landmarks[hand_no]
            for i, lm in enumerate(my_hand.landmark):
                px, py = int(lm.x * w), int(lm.y * h)
                x_list.append(px)
                y_list.append(py)
                self.lmList.append([lm.x, lm.y, 0])
                if draw:
                    cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
            x_min, x_max = min(x_list), max(x_list)
            y_min, y_max = min(y_list), max(y_list)
            box_w, box_h = x_max - x_min, y_max - y_min
            bbox = x_min, y_min, box_w, box_h
            cx, cy = bbox[0] + (bbox[2] // 2), bbox[1] + (bbox[3] // 2)
            bbox_info = {"id": hand_no, "bbox": bbox, "center": (cx, cy), "shape": (h, w)}
            if draw:
                cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
                              (bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
                              (0, 255, 0), 2)
        self.revolve(img)
        self.re_lmList = np.array(self.re_lmList)
        if self.re_lmList.any():
            self.re_lmList = np.concatenate((np.zeros((21, 1)), self.re_lmList), axis=1)
            self.re_lmList = np.concatenate((self.re_lmList, np.zeros((1, 4))), axis=0)
        return self.re_lmList, bbox_info
    def revolve(self, img, draw=True):
        """
            旋转手势识别点
            :param img: 要查找的主图像
            :param draw: 在图像上绘制输出的标志。(默认绘制矩形框)
            :return: 像素格式的手部关节位置列表
        """
        h, w, c = img.shape
        if len(self.lmList) >= 21:
            # print(self.lmList)
            self.re_lmList = []
            point_x = self.lmList[0][0]
            point_y = self.lmList[0][1]
            delta_x = self.lmList[13][0] - point_x
            delta_y = self.lmList[13][1] - point_y
            if delta_y == 0:
                if delta_x < 0:
                    theta = math.pi / 2
                else:
                    theta = -math.pi / 2
            else:
                theta = math.atan(delta_x / delta_y)
                if delta_y > 0:
                    theta = theta + math.pi
            # print(theta*180/math.pi)
            for i in self.lmList:
                px, py = rotate(theta, i[0] * w, i[1] * h, point_x * w, point_y * h)
                self.re_lmList.append([px, py, 0])
                if draw:
                    cv2.circle(img, (int(px), int(py)), 5, (0, 0, 255), cv2.FILLED)
            # 归一化
            x_array = normalize(np.array(self.re_lmList)[:, 0])
            # print(x_array)
            for i in range(len(x_array)):
                self.re_lmList[i][0] = x_array[i]
            y_array = normalize(np.array(self.re_lmList)[:, 1])
            for i in range(len(y_array)):
                self.re_lmList[i][1] = x_array[i]
        else:
            self.re_lmList = self.lmList
        return self.re_lmList
    def hand_type(self):
        """
        检查传入的手部 是左还是右
        :return: 1 或 0
        """
        if self.results.multi_hand_landmarks:
            if self.lmList[17][0] < self.lmList[5][0]:
                return 1
            else:
                return 0
 class AI:
    def __init__(self, datasets_dir):
        self.EPOCH = 20
        self.BATCH_SIZE = 2
        self.LR = 10e-5
        self.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.datasets_dir = datasets_dir
        self.train_loader = None
        self.m = 0
        self.out_label = []  # CNN网络输出后数字标签转和字符串标签的映射关系
    def load_datasets(self):
        train_data = []
        train_label = []
        self.m = 0
        for file in Path(self.datasets_dir).rglob("*.npz"):
            data = np.load(str(file))
            train_data.append(data["data"])
            label_number = np.ones(len(data["data"])) * len(self.out_label)
            train_label.append(label_number)
            self.out_label.append(data["label"])
            self.m += 1
        train_data = torch.Tensor(np.concatenate(train_data, axis=0))
        train_data = train_data.unsqueeze(1)
        train_label = torch.tensor(np.concatenate(train_label, axis=0)).long()
        dataset = TensorDataset(train_data, train_label)
        self.train_loader = DataLoader(dataset, batch_size=self.BATCH_SIZE, shuffle=True)
        return self.m
    def train_cnn(self):
        cnn = CNN(self.m).to(self.DEVICE)
        optimizer = torch.optim.Adam(cnn.parameters(), self.LR)  # optimize all cnn parameters
        loss_func = nn.CrossEntropyLoss()  # the target label is not one-hotted
        for epoch in range(self.EPOCH):
            for step, (data, target) in enumerate(self.train_loader):
                # 分配 batch data, normalize x when iterate train_loader
                data, target = data.to(self.DEVICE), target.to(self.DEVICE)
                output = cnn(data)  # cnn output
                loss = loss_func(output, target)  # cross entropy loss
                optimizer.zero_grad()  # clear gradients for this training step
                loss.backward()  # backpropagation, compute gradients
                optimizer.step()  # apply gradients
                if (step + 1) % 50 == 0:  # 输出结果
                    print(
                        "\r[Epoch: %d] [%d/%d (%0.f %%)][Loss: %f]"
                        % (
                            epoch + 1,
                            (step + 1) * len(data),
                            len(self.train_loader.dataset),
                            100. * (step + 1) / len(self.train_loader),
                            loss.item()
                        ), end="")
        cnn.out_label = self.out_label
        torch.save(cnn, 'CNN.pkl')
        print("训练结束")
 class Main:
    def __init__(self):
        self.camera = None
        self.detector = HandDetector()
        self.default_datasets = "Datasets"
        self.len_x = 22
        self.len_y = 4
        self.label = ''
        self.result = []
        self.disp = ""
    def change_state(self):
        self.label = self.entry.get()  # 调用get()方法，将Entry中的内容获取出来
        self.top1.quit()
        if self.label == "":
            self.top1.destroy()
    def make_datasets(self, camera, datasets_dir="default", n=100):
        if datasets_dir == "default":
            return
        if exists(datasets_dir):
            shutil.rmtree(datasets_dir)
        mkdir(datasets_dir)
        self.camera = camera
        self.top1 = tk.Tk()
        self.top1.geometry('300x50')
        self.top1.title('请输入标签')
        tk.Label(self.top1, text='Label:').place(x=27, y=10)
        self.entry = tk.Entry(self.top1, width=15)
        self.entry.place(x=80, y=10)
        tk.Button(self.top1, text='确定', command=self.change_state).place(x=235, y=5)
        self.top1.mainloop()
        while not self.label == "":
            data = np.zeros([n, self.len_x, self.len_y])
            shape_list = np.zeros([n, 2], dtype=np.int16)
            hand_type = np.zeros(n, dtype=np.int8)
            count = 0
            cv2.startWindowThread()
            while True:
                frame, img = self.camera.read()
                img = self.detector.find_hands(img)
                result = np.zeros((self.len_x, self.len_y))
                lm_list, bbox = self.detector.find_position(img)
                for i in range(len(lm_list)):
                    result[i] = np.array(lm_list[i])
                if result.sum() > 0:  # 假设矩阵不为0，即捕捉到手部时
                    shape = bbox["shape"]
                    x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
                    data[count] = result
                    hand_type[count] = self.detector.hand_type()
                    shape_list[count] = np.array(shape)
                    count += 1
                    cv2.putText(img, str("{}/{}".format(count, n)), (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
                                (0, 255, 0), 3)
                cv2.imshow("camera", img)
                key = cv2.waitKey(100)
                if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
                    break
                elif key == 27:
                    break
                elif count == n - 1:
                    break
            cv2.destroyAllWindows()
            open(datasets_dir + "/" + self.label + ".npz", "w")
            np.savez(datasets_dir + "/" + self.label + ".npz", label=self.label, data=data,
                     handtype=hand_type, shape=shape_list)
            self.top1.mainloop()
    def train(self, datasets_dir="default"):
        if datasets_dir == "default":
            datasets_dir = self.default_datasets
        ai = AI(datasets_dir)
        ai.load_datasets()
        ai.train_cnn()
    def gesture_recognition_camera(self, detector, img, cnn):
        self.detector = detector
        out_label = cnn.out_label
        img = self.detector.find_hands(img)
        lm_list, bbox = self.detector.find_position(img)
        if lm_list.any():
            x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
            data = torch.Tensor(lm_list)
            data = data.unsqueeze(0)
            data = data.unsqueeze(0)
            test_output = cnn(data)
            test_np = test_output.detach().numpy()[0]
            # normal_temp = normalize(test_np)
            # temp = normal_temp[np.argpartition(normal_temp, -2)[-2:]]
            temp = test_np[np.argpartition(test_np, -2)[-2:]]
            print(temp[1]-temp[0])
            if temp[1]-temp[0] < 5.5:
                return 1
            self.result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
            if len(self.result) > 4:
                self.disp = str(out_label[stats.mode(self.result)[0][0]])
                self.result = []
            cv2.putText(img, self.disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
                        (0, 0, 255), 3)
        return 0
    def gesture_recognition_video(self, filedir):
        self.detector = HandDetector()
        cnn = torch.load("CNN.pkl")
        out_label = cnn.out_label
        result = []
        disp = ""
        cap = cv2.VideoCapture(filedir)
        while True:
            ret, img = cap.read()
            img = self.detector.find_hands(img)
            lm_list, bbox = self.detector.find_position(img)
            if lm_list.any():
                x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
                data = torch.Tensor(lm_list)
                data = data.unsqueeze(0)
                data = data.unsqueeze(0)
                test_output = cnn(data)
                result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
                if len(result) > 5:
                    disp = str(out_label[stats.mode(result)[0][0]])
                    result = []
                cv2.putText(img, disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
                            (0, 0, 255), 3)
            cv2.imshow("camera", img)
            key = cv2.waitKey(1)
            if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
                break
            elif key == 27:
                break
        cap.release()
    def gesture_recognition_img(self, filedir):
        self.detector = HandDetector()
        cnn = torch.load("CNN.pkl")
        out_label = cnn.out_label
        result = []
        disp = ""
        img = cv2.imread(filedir)
        img = self.detector.find_hands(img)
        while True:
            lm_list, bbox = self.detector.find_position(img)
            if lm_list.any():
                x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
                data = torch.Tensor(lm_list)
                data = data.unsqueeze(0)
                data = data.unsqueeze(0)
                test_output = cnn(data)
                result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
                if len(result) > 5:
                    disp = str(out_label[stats.mode(result)[0][0]])
                    result = []
                cv2.putText(img, disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
                            (0, 0, 255), 3)
            cv2.imshow("camera", img)
            key = cv2.waitKey(1)
            if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
                break
            elif key == 27:
                break
 if __name__ == '__main__':
    solution = Main()
    my_datasets_dir = "test"
    solution.make_datasets(my_datasets_dir, 100)
    solution.train(my_datasets_dir)
    dir_video = "C:/Users/Liar/Pictures/Camera Roll/WIN_20220630_20_11_47_Pro.mp4"
    dir_img = "C:/Users/Liar/Pictures/Camera Roll/WIN_20220630_20_01_22_Pro.jpg"
    # solution.gesture_recognition_camera()
    # solution.gesture_recognition_video(dir_video)
    # solution.gesture_recognition_img(dir_img)
--- a/ai_two.py
+++ b/ai_two.py
@ -1,414 +0,0 @@
 # -*- coding:utf-8 -*-
 """
 信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
 """
 import cv2
 import mediapipe as mp
 import torch
 import torch.nn as nn
 import numpy as np
 import tkinter as tk
 import shutil
 import math
 from scipy import stats
 from os.path import exists
 from os import mkdir
 from pathlib import Path
 from torch.utils.data import DataLoader, TensorDataset
 # 旋转函数
 def rotate(angle, x, y, point_x, point_y):
    px = (x - point_x) * math.cos(angle) - (y - point_y) * math.sin(angle) + point_x
    py = (x - point_x) * math.sin(angle) + (y - point_y) * math.cos(angle) + point_y
    return px, py
 # 归一化
 def normalize(x):
    max_x = np.max(x)
    min_x = np.min(x)
    return (x-min_x)/(max_x-min_x)
 class CNNTwo(nn.Module):
    def __init__(self, m):
        super(CNNTwo, self).__init__()
        self.out_label = []
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.med = nn.Linear(32 * 11 * 1, 500)
        self.med2 = nn.Linear(1*21*3, 100)
        self.med3 = nn.Linear(100, 500)
        self.out = nn.Linear(500, m)  # fully connected layer, output 10 classes
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)  # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        x = self.med(x)
        # x = self.med2(x)
        # x = self.med3(x)
        output = self.out(x)
        return output
 class HandDetector:
    """
    使用mediapipe库查找手。导出地标像素格式。添加了额外的功能。
    如查找方式，许多手指向上或两个手指之间的距离。而且提供找到的手的边界框信息。
    """
    def __init__(self, mode=False, max_hands=2, detection_con=0.5, min_track_con=0.5):
        """
        :param mode: 在静态模式下，对每个图像进行检测
        :param max_hands: 要检测的最大手数
        :param detection_con: 最小检测置信度
        :param min_track_con: 最小跟踪置信度
        """
        self.results = None
        self.mode = mode
        self.max_hands = max_hands
        self.modelComplex = 1
        self.detection_con = detection_con
        self.min_track_con = min_track_con
        # 初始化手部的识别模型
        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(static_image_mode=self.mode,
                                        max_num_hands=self.max_hands,
                                        min_detection_confidence=self.detection_con,
                                        min_tracking_confidence=self.min_track_con)
        self.mpDraw = mp.solutions.drawing_utils  # 初始化绘图器
        self.tipIds = [4, 8, 12, 16, 20]  # 指尖列表
        self.fingers = []
        self.lmList = []
        self.re_lmList = []
    def find_hands(self, img, draw=True):
        """
        从图像(BRG)中找到手部。
        :param img: 用于查找手的图像。
        :param draw: 在图像上绘制输出的标志。
        :return: 带或不带图形的图像
        """
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # 将传入的图像由BGR模式转标准的Opencv模式——RGB模式，
        self.results = self.hands.process(img_rgb)
        is_two_hand = False
        if self.results.multi_hand_landmarks is not None and len(self.results.multi_hand_landmarks) >= 2:
            is_two_hand = True
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms,
                                               self.mpHands.HAND_CONNECTIONS)
        return img, is_two_hand
    def find_position(self, img, hand_no=0, draw=True):
        """
        查找单手的地标并将其放入列表中像素格式。还可以返回手部的周围的边界框。
        :param img: 要查找的主图像
        :param hand_no: 如果检测到多只手，则为手部id
        :param draw: 在图像上绘制输出的标志。(默认绘制矩形框)
        :return: 像素格式的手部关节位置列表；手部边界框
        """
        x_list = []
        y_list = []
        bbox_info = []
        self.lmList = []
        h, w, c = img.shape
        if self.results.multi_hand_landmarks:
            my_hand = self.results.multi_hand_landmarks[hand_no]
            for i, lm in enumerate(my_hand.landmark):
                px, py = int(lm.x * w), int(lm.y * h)
                x_list.append(px)
                y_list.append(py)
                self.lmList.append([lm.x, lm.y, 0])
                if draw:
                    cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
            x_min, x_max = min(x_list), max(x_list)
            y_min, y_max = min(y_list), max(y_list)
            box_w, box_h = x_max - x_min, y_max - y_min
            bbox = x_min, y_min, box_w, box_h
            cx, cy = bbox[0] + (bbox[2] // 2), bbox[1] + (bbox[3] // 2)
            bbox_info = {"id": hand_no, "bbox": bbox, "center": (cx, cy), "shape": (h, w)}
            if draw:
                cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
                              (bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
                              (0, 255, 0), 2)
        self.revolve(img)
        self.re_lmList = np.array(self.re_lmList)
        if self.re_lmList.any():
            self.re_lmList = np.concatenate((np.zeros((21, 1)), self.re_lmList), axis=1)
            self.re_lmList = np.concatenate((self.re_lmList, np.zeros((1, 4))), axis=0)
        return self.re_lmList, bbox_info
    def revolve(self, img, draw=True):
        """
            旋转手势识别点
            :param img: 要查找的主图像
            :param draw: 在图像上绘制输出的标志。(默认绘制矩形框)
            :return: 像素格式的手部关节位置列表
        """
        h, w, c = img.shape
        if len(self.lmList) >= 21:
            # print(self.lmList)
            self.re_lmList = []
            point_x = self.lmList[0][0]
            point_y = self.lmList[0][1]
            delta_x = self.lmList[13][0] - point_x
            delta_y = self.lmList[13][1] - point_y
            if delta_y == 0:
                if delta_x < 0:
                    theta = math.pi / 2
                else:
                    theta = -math.pi / 2
            else:
                theta = math.atan(delta_x / delta_y)
                if delta_y > 0:
                    theta = theta + math.pi
            # print(theta*180/math.pi)
            for i in self.lmList:
                px, py = rotate(theta, i[0] * w, i[1] * h, point_x * w, point_y * h)
                self.re_lmList.append([px, py, 0])
                if draw:
                    cv2.circle(img, (int(px), int(py)), 5, (0, 0, 255), cv2.FILLED)
            # 归一化
            x_array = normalize(np.array(self.re_lmList)[:, 0])
            # print(x_array)
            for i in range(len(x_array)):
                self.re_lmList[i][0] = x_array[i]
            y_array = normalize(np.array(self.re_lmList)[:, 1])
            for i in range(len(y_array)):
                self.re_lmList[i][1] = x_array[i]
        else:
            self.re_lmList = self.lmList
        return self.re_lmList
    def hand_type(self):
        """
        检查传入的手部 是左还是右
        :return: 1 或 0
        """
        if self.results.multi_hand_landmarks:
            if self.lmList[17][0] < self.lmList[5][0]:
                return 1
            else:
                return 0
 class AI:
    def __init__(self, datasets_dir):
        self.EPOCH = 100
        self.BATCH_SIZE = 4
        self.LR = 10e-5
        self.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.datasets_dir = datasets_dir
        self.train_loader = None
        self.m = 0
        self.out_label = []  # CNN网络输出后数字标签转和字符串标签的映射关系
    def load_datasets(self):
        train_data = []
        train_label = []
        self.m = 0
        for file in Path(self.datasets_dir).rglob("*.npz"):
            data = np.load(str(file))
            train_data.append(data["data"])
            label_number = np.ones(len(data["data"])) * len(self.out_label)
            train_label.append(label_number)
            self.out_label.append(data["label"])
            self.m += 1
        train_data = torch.Tensor(np.concatenate(train_data, axis=0))
        train_data = train_data.unsqueeze(1)
        train_label = torch.tensor(np.concatenate(train_label, axis=0)).long()
        dataset = TensorDataset(train_data, train_label)
        self.train_loader = DataLoader(dataset, batch_size=self.BATCH_SIZE, shuffle=True)
        return self.m
    def train_cnn(self):
        cnn = CNNTwo(self.m).to(self.DEVICE)
        optimizer = torch.optim.Adam(cnn.parameters(), self.LR)  # optimize all cnn parameters
        loss_func = nn.CrossEntropyLoss()  # the target label is not one-hotted
        for epoch in range(self.EPOCH):
            for step, (data, target) in enumerate(self.train_loader):
                # 分配 batch data, normalize x when iterate train_loader
                data, target = data.to(self.DEVICE), target.to(self.DEVICE)
                output = cnn(data)  # cnn output
                loss = loss_func(output, target)  # cross entropy loss
                optimizer.zero_grad()  # clear gradients for this training step
                loss.backward()  # backpropagation, compute gradients
                optimizer.step()  # apply gradients
                if (step + 1) % 50 == 0:  # 输出结果
                    print(
                        "\r[Epoch: %d] [%d/%d (%0.f %%)][Loss: %f]"
                        % (
                            epoch + 1,
                            (step + 1) * len(data),
                            len(self.train_loader.dataset),
                            100. * (step + 1) / len(self.train_loader),
                            loss.item()
                        ), end="")
        cnn.out_label = self.out_label
        torch.save(cnn, 'CNN_two.pkl')
        print("训练结束")
 class Main:
    def __init__(self):
        self.camera = None
        self.detector = HandDetector()
        self.default_datasets = "Datasets"
        self.len_x = 44
        self.len_y = 4
        self.label = ''
        self.result = []
        self.disp = ""
    def change_state(self):
        self.label = self.entry.get()  # 调用get()方法，将Entry中的内容获取出来
        self.top1.quit()
        if self.label == "":
            self.top1.destroy()
    def on_closing(self):
        self.label = ""
        self.top1.destroy()
    def make_datasets(self, camera, datasets_dir="default", n=100):
        if datasets_dir == "default":
            return
        if exists(datasets_dir):
            shutil.rmtree(datasets_dir)
        mkdir(datasets_dir)
        self.camera = camera
        self.top1 = tk.Tk()
        self.top1.geometry('300x50')
        self.top1.title('请输入标签')
        self.top1.protocol("WM_DELETE_WINDOW", self.on_closing)
        tk.Label(self.top1, text='Label:').place(x=27, y=10)
        self.entry = tk.Entry(self.top1, width=15)
        self.entry.place(x=80, y=10)
        tk.Button(self.top1, text='确定', command=self.change_state).place(x=235, y=5)
        self.top1.mainloop()
        while not self.label == "":
            data = np.zeros([n, self.len_x, self.len_y])
            shape_list = np.zeros([n, 2], dtype=np.int16)
            hand_type = np.zeros(n, dtype=np.int8)
            count = 0
            cv2.startWindowThread()
            while True:
                frame, img = self.camera.read()
                img, is_two_hand = self.detector.find_hands(img)
                result = np.zeros((self.len_x, self.len_y))
                if is_two_hand:
                    lm_list1, bbox1 = self.detector.find_position(img, 0)
                    lm_list2, bbox2 = self.detector.find_position(img, 1)
                    for i in range(len(lm_list1)):
                        result[i] = np.array(lm_list1[i])
                    for i in range(len(lm_list1), len(lm_list1)+len(lm_list2)):
                        result[i] = np.array(lm_list2[i-len(lm_list1)])
                    if result.sum() > 0:  # 假设矩阵不为0，即捕捉到手部时
                        shape1 = bbox1["shape"]
                        x_1, y_1 = bbox1["bbox"][0], bbox1["bbox"][1]
                        shape2 = bbox2["shape"]
                        x_2, y_2 = bbox2["bbox"][0], bbox2["bbox"][1]
                        data[count] = result
                        hand_type[count] = self.detector.hand_type()
                        shape_list[count] = np.array(shape1)
                        count += 1
                        cv2.putText(img, str("{}/{}".format(count, n)), (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
                                    (0, 255, 0), 3)
                        cv2.putText(img, str("{}/{}".format(count, n)), (x_2, y_2), cv2.FONT_HERSHEY_PLAIN, 3,
                                    (0, 255, 0), 3)
                cv2.imshow("camera", img)
                key = cv2.waitKey(100)
                if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
                    break
                elif key == 27:
                    break
                elif count == n - 1:
                    break
            cv2.destroyAllWindows()
            open(datasets_dir + "/" + self.label + ".npz", "w")
            np.savez(datasets_dir + "/" + self.label + ".npz", label=self.label, data=data,
                     handtype=hand_type, shape=shape_list)
            self.top1.mainloop()
    def train(self, datasets_dir="default"):
        if datasets_dir == "default":
            datasets_dir = self.default_datasets
        ai = AI(datasets_dir)
        ai.load_datasets()
        ai.train_cnn()
    def gesture_recognition(self, detector, img, cnn):
        self.detector = detector
        out_label = cnn.out_label
        img, is_two_hand = self.detector.find_hands(img)
        if is_two_hand:
            lm_list1, bbox1 = self.detector.find_position(img, 0)
            lm_list2, bbox2 = self.detector.find_position(img, 1)
            if lm_list1.any() and lm_list2.any():
                x_1, y_1 = bbox1["bbox"][0], bbox1["bbox"][1]
                x_2, y_2 = bbox2["bbox"][0], bbox2["bbox"][1]
                lm_list = np.concatenate((lm_list1, lm_list2), axis=0)
                data = torch.Tensor(lm_list)
                data = data.unsqueeze(0)
                data = data.unsqueeze(0)
                test_output = cnn(data)
                self.result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
                if len(self.result) > 4:
                    self.disp = str(out_label[stats.mode(self.result)[0][0]])
                    self.result = []
                cv2.putText(img, self.disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
                            (0, 0, 255), 3)
                cv2.putText(img, self.disp, (x_2, y_2), cv2.FONT_HERSHEY_PLAIN, 3,
                            (0, 0, 255), 3)
        else:
            return 1
        return 0
 if __name__ == '__main__':
    solution = Main()
    my_datasets_dir = "test-two"
    solution.make_datasets(my_datasets_dir, 100)
    solution.train(my_datasets_dir)
    solution.gesture_recognition()
--- a/demo.py
+++ b/demo.py
@ -1,8 +1,14 @@
 # -*- coding:utf-8 -*-
 """
 信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
 """
 import cv2
 import mediapipe as mp
 import numpy as np
 class HandDetector:
@ -21,7 +27,7 @@ class HandDetector:
        self.results = None
        self.mode = mode
        self.max_hands = max_hands
-        self.modelComplex = False
+        self.modelComplex = 1
        self.detection_con = detection_con
        self.min_track_con = min_track_con
@ -73,7 +79,7 @@ class HandDetector:
                px, py = int(lm.x * w), int(lm.y * h)
                x_list.append(px)
                y_list.append(py)
-                self.lmList.append(np.array([px, py]))
+                self.lmList.append([px, py])
                if draw:
                    cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
            x_min, x_max = min(x_list), max(x_list)
@ -111,9 +117,7 @@ class HandDetector:
                    fingers.append(0)
            # 4 Fingers
            for i in range(1, 5):
-                # if self.lmList[self.tipIds[i]][1] < self.lmList[self.tipIds[i] - 2][1]:
+                if self.lmList[self.tipIds[i]][1] < self.lmList[self.tipIds[i] - 2][1]:
                if np.dot(self.lmList[self.tipIds[i]-2]-self.lmList[self.tipIds[i]-3],
                          self.lmList[self.tipIds[i]-1]-self.lmList[self.tipIds[i]-2]) >= 0:
                    fingers.append(1)
                else:
                    fingers.append(0)
@ -134,49 +138,55 @@ class HandDetector:
 class Main:
    def __init__(self):
        self.detector = None
-        self.camera = None
+        self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
-        # self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
+        self.camera.set(3, 1280)
-        # self.camera.set(3, 1280)
+        self.camera.set(4, 720)
        # self.camera.set(4, 720)
-    def gesture_recognition(self, img, detector):
+    def gesture_recognition(self):
-        self.detector = detector
+        self.detector = HandDetector()
-        img = self.detector.find_hands(img)
+        while True:
-        lm_list, bbox = self.detector.find_position(img)
+            frame, img = self.camera.read()
            img = self.detector.find_hands(img)
            lm_list, bbox = self.detector.find_position(img)
-        if lm_list:
+            if lm_list:
-            x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
+                x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
-            x1, x2, x3, x4, x5 = self.detector.fingers_up()
+                x1, x2, x3, x4, x5 = self.detector.fingers_up()
-            if (np.linalg.norm(lm_list[4]-lm_list[8]) < 50) and (np.linalg.norm(lm_list[4]-lm_list[12]) < 50):
+
-                cv2.putText(img, "7_SEVEN", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
+                if (x2 == 1 and x3 == 1) and (x4 == 0 and x5 == 0 and x1 == 0):
-                            (0, 0, 255), 3)
+                    cv2.putText(img, "2_TWO", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
-            elif (x2 == 1 and x3 == 1) and (x4 == 0 and x5 == 0 and x1 == 0):
+                                (0, 0, 255), 3)
-                cv2.putText(img, "2_TWO", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
+                elif x3 and x1 == 0 and x2 == 0 and (x4 == 0, x5 == 0):
-                            (0, 0, 255), 3)
+                    cv2.putText(img, "FUCK YOU!!", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
-            elif (x2 == 1 and x3 == 1 and x4 == 1) and (x1 == 0 and x5 == 0):
+                                (0, 0, 255), 3)
-                cv2.putText(img, "3_THREE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
+                elif (x2 == 1 and x3 == 1 and x4 == 1) and (x1 == 0 and x5 == 0):
-                            (0, 0, 255), 3)
+                    cv2.putText(img, "3_THREE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
-            elif (x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1) and (x1 == 0):
+                                (0, 0, 255), 3)
-                cv2.putText(img, "4_FOUR", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
+                elif (x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1) and (x1 == 0):
-                            (0, 0, 255), 3)
+                    cv2.putText(img, "4_FOUR", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
-            elif x1 == 1 and x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1:
+                                (0, 0, 255), 3)
-                cv2.putText(img, "5_FIVE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
+                elif x1 == 1 and x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1:
-                            (0, 0, 255), 3)
+                    cv2.putText(img, "5_FIVE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
-            elif (x2 == 1 and x1 == 0) and (x3 == 0 and x4 == 0 and x5 == 0):
+                                (0, 0, 255), 3)
-                cv2.putText(img, "1_ONE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
+                elif x2 == 1 and x1 == 0 and (x3 == 0, x4 == 0, x5 == 0):
-                            (0, 0, 255), 3)
+                    cv2.putText(img, "1_ONE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
-            elif (x1 == 1 and x2 == 1) and (x3 == 0 and x4 == 0 and x5 == 0):
+                                (0, 0, 255), 3)
-                cv2.putText(img, "8_EIGHT", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
+                elif x1 == 1 and x2 == 1 and (x3 == 0, x4 == 0, x5 == 0):
-                            (0, 0, 255), 3)
+                    cv2.putText(img, "8_EIGHT", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
-            elif (x1 == 1 and x5 == 1) and (x3 == 0 and x4 == 0 and x2 == 0):
+                                (0, 0, 255), 3)
-                cv2.putText(img, "6_SIX", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
+                elif x1 == 1 and x5 == 1 and (x3 == 0, x4 == 0, x5 == 0):
-                            (0, 0, 255), 3)
+                    cv2.putText(img, "6_SIX", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
-            elif x1 == 0 and x5 == 0 and x3 == 0 and x4 == 0 and x2 == 0:
+                                (0, 0, 255), 3)
-                cv2.putText(img, "0_ZERO", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
+                elif x1 and (x2 == 0, x3 == 0, x4 == 0, x5 == 0):
-                            (0, 0, 255), 3)
+                    cv2.putText(img, "GOOD!", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
-            else:
+                                (0, 0, 255), 3)
-                return 1
+
-        return 0
+            cv2.imshow("camera", img)
            key = cv2.waitKey(1)
            if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
                break
            elif key == 27:
                break
 if __name__ == '__main__':
--- a/gr.py
+++ b/gr.py
@ -1,180 +0,0 @@
 import TM
 import ai
 import ai_two
 import cv2
 import copy
 import torch
 import torch.nn as nn
 class CNN(nn.Module):
    def __init__(self, m):
        super(CNN, self).__init__()
        self.out_label = []
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=1),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.med = nn.Linear(32 * 11 * 2, 500)
        self.med2 = nn.Linear(1 * 21 * 3, 100)
        self.med3 = nn.Linear(100, 500)
        self.out = nn.Linear(500, m)  # fully connected layer, output 10 classes
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)  # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        x = self.med(x)
        # x = self.med2(x)
        # x = self.med3(x)
        output = self.out(x)
        return output
 class CNNTwo(nn.Module):
    def __init__(self, m):
        super(CNNTwo, self).__init__()
        self.out_label = []
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.med = nn.Linear(32 * 11 * 1, 500)
        self.med2 = nn.Linear(1*21*3, 100)
        self.med3 = nn.Linear(100, 500)
        self.out = nn.Linear(500, m)  # fully connected layer, output 10 classes
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)  # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        x = self.med(x)
        # x = self.med2(x)
        # x = self.med3(x)
        output = self.out(x)
        return output
 class Main:
    def __init__(self):
        self.camera = cv2.VideoCapture(0, cv2.CAP_DSHOW)
        self.camera.set(3, 1280)
        self.camera.set(4, 720)
        self.tm_detector = TM.HandDetector()
        self.ai_detector = ai.HandDetector()
        self.at_detector = ai_two.HandDetector()
        self.tm_main = TM.Main()
        self.ai_main = ai.Main()
        self.at_main = ai_two.Main()
    def gr_img(self, filedir, diy):
        print(filedir)
        if diy:
            cnn = torch.load("CNN.pkl")
            cnn_two = torch.load("CNN_two.pkl")
        while True:
            not_match = 0
            img = cv2.imread(filedir)
            img_tm = copy.deepcopy(img)
            is_one_hand = self.at_main.gesture_recognition(self.at_detector, img, cnn_two)
            if is_one_hand:
                not_match = self.ai_main.gesture_recognition_camera(self.ai_detector, img, cnn)
                if not_match:
                    self.tm_main.gesture_recognition(img_tm, self.tm_detector)
            if not_match:
                cv2.imshow("camera", img_tm)
            else:
                cv2.imshow("camera", img)
            key = cv2.waitKey(1)
            if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
                break
            elif key == 27:
                break
    def gr_video(self, filedir, diy):
        cap = cv2.VideoCapture(filedir)
        if diy:
            cnn = torch.load("CNN.pkl")
            cnn_two = torch.load("CNN_two.pkl")
        while True:
            ret, img = cap.read()
            not_match = 0
            img_tm = copy.deepcopy(img)
            is_one_hand = self.at_main.gesture_recognition(self.at_detector, img, cnn_two)
            if is_one_hand:
                not_match = self.ai_main.gesture_recognition_camera(self.ai_detector, img, cnn)
                if not_match:
                    self.tm_main.gesture_recognition(img_tm, self.tm_detector)
            if not_match:
                cv2.imshow("camera", img_tm)
            else:
                cv2.imshow("camera", img)
            key = cv2.waitKey(1)
            if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
                break
            elif key == 27:
                break
        cap.release()
    def gr_realtime(self, diy):
        if diy:
            cnn = torch.load("CNN.pkl")
            cnn_two = torch.load("CNN_two.pkl")
        while True:
            frame, img = self.camera.read()
            not_match = 0
            img_tm = copy.deepcopy(img)
            is_one_hand = self.at_main.gesture_recognition(self.at_detector, img, cnn_two)
            if is_one_hand:
                not_match = self.ai_main.gesture_recognition_camera(self.ai_detector, img, cnn)
                if not_match:
                    self.tm_main.gesture_recognition(img_tm, self.tm_detector)
            if not_match:
                cv2.imshow("camera", img_tm)
            else:
                cv2.imshow("camera", img)
            key = cv2.waitKey(1)
            if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
                break
            elif key == 27:
                break
    def ai_input(self):
        self.ai_main.make_datasets(self.camera, "ai_datasets", 100)
        self.ai_main.train("ai_datasets")
        self.at_main.make_datasets(self.camera, "ai_two_datasets", 100)
        self.at_main.train("ai_two_datasets")
 if __name__ == '__main__':
    main = Main()
    main.gr_img("", 0)
--- a/main.py
+++ b/main.py
@ -1,112 +0,0 @@
 # -*- coding:utf-8 -*-
 """
 信号设计课程小组设计
@ by: Leaf
@ date: 2022-05-28
 """
 import gr
 import tkinter as tk
 from tkinter import filedialog, Button, Label, Frame, ACTIVE, LEFT
 from PIL import Image, ImageTk
 class DisplayImage:
    """用于展示选择的图片"""
    def __init__(self, master):
        self.master = master
        master.title("GUI")
        self.Text_lab0 = Label(master, text='已加载图像/视频')
        self.Text_lab0.pack(pady=10)
        self.image_frame = Frame(master, bd=0, height=300, width=300, bg='white', highlightthickness=2,
                                 highlightbackground='gray', highlightcolor='black')
        self.image_frame.pack()
        self.Text_label = Label(master, text='加载待识别影像/视频')
        self.Text_label.place(x=60, y=410)
        self.Choose_image = Button(master, command=self.choose_img, text="图像",
                                   width=7, default=ACTIVE, borderwidth=0)
        self.Choose_image.place(x=50, y=450)
        self.Choose_image = Button(master, command=self.choose_video, text="视频",
                                   width=7, default=ACTIVE, borderwidth=0)
        self.Choose_image.place(x=120, y=450)
        self.Text_label2 = Label(master, text='运行手势识别程序')
        self.Text_label2.place(x=60, y=500)
        self.image_mosaic = Button(master, command=self.gesture_recognition, text="Gesture recognition",
                                   width=17, default=ACTIVE, borderwidth=0)
        self.image_mosaic.place(x=50, y=540)
        self.Text_label3 = Label(master, text='运行实时手势识别程序')
        self.Text_label3.place(x=300, y=410)
        self.realtime = Button(master, command=self.realtime_gr, text="Realtime\n gesture recognition",
                               width=17, height=6, default=ACTIVE, borderwidth=0)
        self.realtime.place(x=300, y=450)
        self.Text_label4 = Label(master, text='录入自定义手势')
        self.Text_label4.place(x=180, y=610)
        self.input = Button(master, command=self.input_image, text="Input gesture",
                            width=42, default=ACTIVE, borderwidth=0)
        self.input.place(x=60, y=650)
        self.gr = gr.Main()
        self.temp_dir = "temp"
        self.mode = 0
        self.directory = ""
        self.diy = 1
    def choose_img(self):
        self.mode = 1
        # 清空框架中的内容
        for widget in self.image_frame.winfo_children():
            widget.destroy()
        self.directory = filedialog.askopenfilename()
        # 布局所选图片
        img = Image.open(self.directory).resize((300, 300))
        img.save(self.temp_dir + "/photo.png")
        image = ImageTk.PhotoImage(image=img)
        label = Label(self.image_frame, highlightthickness=0, borderwidth=0)
        label.configure(image=image)
        label.pack(side=LEFT, expand=True)
    def choose_video(self):
        # 清空框架中的内容
        self.mode = 2
        for widget in self.image_frame.winfo_children():
            widget.destroy()
        self.directory = filedialog.askopenfilename()
        # 布局所选图片
        img = Image.open(self.temp_dir+"/video.jpg").resize((300, 300))
        img.save(self.temp_dir + "/photo.png")
        image = ImageTk.PhotoImage(image=img)
        label = Label(self.image_frame, highlightthickness=0, borderwidth=0)
        label.configure(image=image)
        label.pack(side=LEFT, expand=True)
    def gesture_recognition(self):
        if self.mode == 1:
            self.gr.gr_img(self.directory, self.diy)
        elif self.mode == 2:
            self.gr.gr_video(self.directory, self.diy)
    def realtime_gr(self):
        self.gr.gr_realtime(self.diy)
    def input_image(self):
        self.diy = 1
        self.gr.ai_input()
 def main():
    window = tk.Tk()
    DisplayImage(window)
    window.title('手势识别')
    window.geometry('500x720')
    window.mainloop()
 if __name__ == '__main__':
    main()