-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfingerspelling_to_pandas_singlehand_landmarks.py
109 lines (85 loc) · 3.7 KB
/
fingerspelling_to_pandas_singlehand_landmarks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import pathlib
from typing import Any, Dict, List, Tuple
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import tqdm
from mediapipe.python.solution_base import SolutionBase
def extract_hand_point_cloud(
hand_result: SolutionBase,
) -> List[Tuple[float, float, float]]:
hand_num_landmarks = 21
# hand_point_cloud structure
# columns are x, y, z coordinates
# first 21 rows are left hand landmarks
# following 21 rows are right hand landmarks.
hand_point_cloud = [(np.nan, np.nan, np.nan)] * hand_num_landmarks
if hand_result.multi_hand_landmarks:
for hand_landmarks in hand_result.multi_hand_landmarks:
for landmark_index, landmark in enumerate(hand_landmarks.landmark):
point_coordinate = (landmark.x, landmark.y, landmark.z)
hand_point_cloud[landmark_index] = point_coordinate
return hand_point_cloud
def create_column_map(
hand_point_cloud: List[Tuple[float, float, float]]
) -> Dict[str, float]:
if len(hand_point_cloud) != 21:
raise ValueError
if not all(len(point) == 3 for point in hand_point_cloud):
raise ValueError
column_map = {}
for point_ind, point in enumerate(hand_point_cloud):
for coord_name, coord_value in zip(("x", "y", "z"), point):
column_name = f"{coord_name}_hand_{point_ind}"
column_map[column_name] = np.float32(coord_value)
return column_map
def process_dataset(dataset_path: pathlib.Path):
data = []
dataset_path = dataset_path
for person_dir in tqdm.tqdm(
list(dataset_path.iterdir()), desc="Persons", leave=False
):
if not person_dir.is_dir():
continue
person = person_dir.name
for letter_dir in tqdm.tqdm(
list(person_dir.iterdir()), desc="Letters", leave=False
):
if not letter_dir.is_dir():
continue
letter = letter_dir.name
mp_hands = mp.solutions.hands.Hands(
static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5
)
file_paths = list(letter_dir.glob("frame_*"))
# file_paths = list(letter_dir.glob("color_*"))
# sort by filename as video is saved as series of frame images
# the landmark extractor uses assumes a series of "smooth" transistions
# due to setting 'static_image_mode' to False
file_paths = sorted(file_paths)
for file_path in tqdm.tqdm(file_paths, desc="Images", leave=False):
image = cv2.imread(str(file_path))
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = mp_hands.process(image_rgb)
hand_point_cloud = extract_hand_point_cloud(results)
column_map: Dict[str, Any] = create_column_map(hand_point_cloud)
column_map["person"] = person
column_map["letter"] = letter
column_map["img_file"] = str(pathlib.Path(*file_path.parts[-4:]))
data.append(column_map)
return pd.DataFrame(data)
if __name__ == "__main__":
data_basepath = pathlib.Path.home() / "data"
# dataset_path = data_basepath / "fingerspelling5"
dataset_path = (
data_basepath / "recorded" / "asl_alphabet" / "images"
) # self recorded data
output_path = pathlib.Path(__file__).parent / "data"
# output_file = output_path / "fingerspelling5_singlehands_with_filepath_sorted.csv"
output_file = (
output_path / "recorded_asl_alphabet_singlehands_with_filepath_sorted.csv"
)
df = process_dataset(dataset_path=dataset_path)
df.to_csv(output_file, index=False)
print(f"Saved to {str(output_file)}")