修改：增加众数判定

2022-06-29 09:39:50 +08:00 · 2022-06-29 09:39:50 +08:00 · 96c6039474
commit 96c6039474
parent 8bfe21e11b
2 changed files with 42 additions and 16 deletions
--- a/CNN.pkl
+++ b/CNN.pkl
--- a/ai.py
+++ b/ai.py
@ -14,6 +14,7 @@ import torch.nn as nn
 import numpy as np
 import shutil
 import math
+from scipy import stats
 from os.path import exists
 from os import mkdir
 from pathlib import Path
@ -27,6 +28,13 @@ def rotate(angle, x, y, point_x, point_y):
    return px, py


+# 归一化
+def normalize(x):
+    max_x = np.max(x)
+    min_x = np.min(x)
+    return (x-min_x)/(max_x-min_x)
+
+
 class CNN(nn.Module):
    def __init__(self, m):
        super(CNN, self).__init__()
@ -45,9 +53,9 @@ class CNN(nn.Module):
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            nn.ReLU(),
-            nn.MaxPool2d(3),
+            nn.MaxPool2d(2),
        )
-        self.med = nn.Linear(32 * 7 * 1, 500)
+        self.med = nn.Linear(32 * 11 * 2, 500)
        self.med2 = nn.Linear(1*21*3, 100)
        self.med3 = nn.Linear(100, 500)
        self.out = nn.Linear(500, m)  # fully connected layer, output 10 classes
@ -131,7 +139,7 @@ class HandDetector:
                px, py = int(lm.x * w), int(lm.y * h)
                x_list.append(px)
                y_list.append(py)
-                self.lmList.append([lm.x, lm.y, lm.z])
+                self.lmList.append([lm.x, lm.y, 0])
                if draw:
                    cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
            x_min, x_max = min(x_list), max(x_list)
@ -147,6 +155,10 @@ class HandDetector:
                              (0, 255, 0), 2)

        self.revolve(img)
+        self.re_lmList = np.array(self.re_lmList)
+        if self.re_lmList.any():
+            self.re_lmList = np.concatenate((np.zeros((21, 1)), self.re_lmList), axis=1)
+            self.re_lmList = np.concatenate((self.re_lmList, np.zeros((1, 4))), axis=0)

        return self.re_lmList, bbox_info

@ -176,11 +188,18 @@ class HandDetector:
                    theta = theta + math.pi
            # print(theta*180/math.pi)
            for i in self.lmList:
-                x, y = rotate(theta, i[0], i[1], point_x, point_y)
-                px, py = int(x * w), int(y * h)
-                self.re_lmList.append([x, y, i[2]])
+                px, py = rotate(theta, i[0] * w, i[1] * h, point_x * w, point_y * h)
+                self.re_lmList.append([px, py, 0])
                if draw:
-                    cv2.circle(img, (px, py), 5, (0, 0, 255), cv2.FILLED)
+                    cv2.circle(img, (int(px), int(py)), 5, (0, 0, 255), cv2.FILLED)
+            # 归一化
+            x_array = normalize(np.array(self.re_lmList)[:, 0])
+            # print(x_array)
+            for i in range(len(x_array)):
+                self.re_lmList[i][0] = x_array[i]
+            y_array = normalize(np.array(self.re_lmList)[:, 1])
+            for i in range(len(y_array)):
+                self.re_lmList[i][1] = x_array[i]
        else:
            self.re_lmList = self.lmList
        return self.re_lmList
@ -263,6 +282,8 @@ class Main:
        self.camera = None
        self.detector = HandDetector()
        self.default_datasets = "Datasets"
+        self.len_x = 22
+        self.len_y = 4

    def make_datasets(self, datasets_dir="default", n=100):
        if datasets_dir == "default":
@ -276,24 +297,23 @@ class Main:
            self.camera.set(4, 720)
        label = input("label:")
        while not label == "":
-            data = np.zeros([n, 21, 3])
+            data = np.zeros([n, self.len_x, self.len_y])
            shape_list = np.zeros([n, 2], dtype=np.int16)
            hand_type = np.zeros(n, dtype=np.int8)

-            zero_data = np.zeros([21, 3])
            count = 0
            cv2.startWindowThread()
            while True:
                frame, img = self.camera.read()
                img = self.detector.find_hands(img)
-                result = np.zeros((21, 3))
+                result = np.zeros((self.len_x, self.len_y))

                lm_list, bbox = self.detector.find_position(img)
                for i in range(len(lm_list)):
                    result[i] = np.array(lm_list[i])
-                shape = bbox["shape"]
+                if result.sum() > 0:  # 假设矩阵不为0，即捕捉到手部时

-                if result.all() != zero_data.all():  # 假设矩阵不为0，即捕捉到手部时
+                    shape = bbox["shape"]
                    x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
                    data[count] = result
                    hand_type[count] = self.detector.hand_type()
@ -332,20 +352,26 @@ class Main:
        self.detector = HandDetector()
        cnn = torch.load("CNN.pkl")
        out_label = cnn.out_label
+        result = []
+        disp = ""
        while True:
            frame, img = self.camera.read()
            img = self.detector.find_hands(img)
            lm_list, bbox = self.detector.find_position(img)

-            if lm_list:
+            if lm_list.any():
                x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
                data = torch.Tensor(lm_list)
                data = data.unsqueeze(0)
                data = data.unsqueeze(0)

                test_output = cnn(data)
-                result = torch.max(test_output, 1)[1].data.cpu().numpy()[0]
-                cv2.putText(img, str(out_label[result]), (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
+                result.append(torch.max(test_output, 1)[1].data.cpu().numpy()[0])
+                if len(result) > 5:
+                    disp = str(out_label[stats.mode(result)[0][0]])
+                    result = []
+
+                cv2.putText(img, disp, (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
                            (0, 0, 255), 3)

            cv2.imshow("camera", img)
@ -359,6 +385,6 @@ class Main:
 if __name__ == '__main__':
    solution = Main()
    my_datasets_dir = "test"
-    solution.make_datasets(my_datasets_dir, 50)
+    solution.make_datasets(my_datasets_dir, 100)
    solution.train(my_datasets_dir)
    solution.gesture_recognition()