MouseControl/HandTrackingModule.py at main · lrongyi/MouseControl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import cv2 as cv
import mediapipe as mp
import time

class handDetector():
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.trackCon = trackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(
            static_image_mode=self.mode,
            max_num_hands=self.maxHands,
            min_detection_confidence=self.detectionCon,
            min_tracking_confidence=self.trackCon
        )
        self.mpDraw = mp.solutions.drawing_utils

    def findHands(self, img, draw=True):
        # Processing the frames from the webcam to see if there are hands on the screen
        imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)

        if self.results.multi_hand_landmarks:
            for handLandmarks in self.results.multi_hand_landmarks:

                if draw:
                    self.mpDraw.draw_landmarks(img, handLandmarks, self.mpHands.HAND_CONNECTIONS)
        return img

    def findPosition(self, img, handNum=0, draw=True):
        landmarks = []

        if self.results.multi_hand_landmarks:
            hand = self.results.multi_hand_landmarks[handNum]

            for id, landmark in enumerate(hand.landmark):
                h, w, channels = img.shape

                # get the pixel of each landmark
                centerx, centery = int(landmark.x * w), int(landmark.y * h)

                landmarks.append([id, centerx, centery])

                if draw and id == 4:
                    cv.circle(img, (centerx, centery), 15, (255, 0, 0), cv.FILLED)

        return landmarks


def main():
    cap = cv.VideoCapture(0)

    prevTime, currTime = 0, 0
    detector = handDetector()
    while True:
        # Reading the webcam
        success, img = cap.read()
        img = detector.findHands(img)
        landmarks = detector.findPosition(img)


        currTime = time.time()
        fps = 1/(currTime - prevTime)
        prevTime = currTime

        cv.putText(img, str(int(fps)), (10, 70), cv.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 3)
        cv.imshow('Image', img)

        if cv.waitKey(20) & 0xFF==ord('d'):
            break

    cap.release()
    cv.destroyAllWindows()

if __name__ == '__main__':
    main()