scripts/resample_evalset.py

2.7 KB · 78 lines · python Raw

1	`import os`
2	`import random`
3	`import cv2`
4	`from datetime import datetime`
5	`import logging`
6
7	`# Set up logging configuration`
8	`log_file = "sample_images.log"`
9	`logging.basicConfig(filename=log_file, level=logging.INFO,`
10	`format='%(asctime)s - %(levelname)s - %(message)s')`
11
12	`def detect_faces(image_path):`
13	`# Load the pre-trained Haar Cascade model for face detection`
14	`face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')`
15
16	`# Read the image in grayscale`
17	`image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)`
18	`if image is None:`
19	`return False`
20
21	`# Detect faces in the image`
22	`faces = face_cascade.detectMultiScale(image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))`
23
24	`# Return True if at least one face is detected`
25	`return len(faces) > 0`
26
27	`def sample_images(input_folder, output_folder, sample_rate=0.2):`
28	`# Ensure the output folder exists`
29	`if not os.path.exists(output_folder):`
30	`os.makedirs(output_folder)`
31
32	`# Initialize counters and start time`
33	`total_files = 0`
34	`sampled_files = 0`
35	`start_time = datetime.now()`
36
37	`# Walk through the input folder structure`
38	`for root, dirs, files in os.walk(input_folder):`
39	`relative_path = os.path.relpath(root, input_folder)`
40	`output_subfolder = os.path.join(output_folder, relative_path)`
41
42	`if not os.path.exists(output_subfolder):`
43	`os.makedirs(output_subfolder)`
44
45	`total_files += len(files)`
46
47	`# Sample files in this directory`
48	`sampled_files_this_batch = []`
49	`for file in files:`
50	`if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):`
51	`input_file_path = os.path.join(root, file)`
52	`if detect_faces(input_file_path):`
53	`sampled_files_this_batch.append(file)`
54
55	`sampled_files += len(sampled_files_this_batch)`
56
57	`for file in files:`
58	`if file in sampled_files_this_batch:`
59	`input_file_path = os.path.join(root, file)`
60	`output_file_path = os.path.join(output_subfolder, file)`
61	`os.link(input_file_path, output_file_path)`
62
63	`# Log the action`
64	`logging.info(f"Sampled and copied {input_file_path} to {output_file_path}")`
65
66	`elapsed_time = datetime.now() - start_time`
67	`print(f"Processed {sampled_files}/{total_files} files in {elapsed_time}")`
68
69	`end_time = datetime.now()`
70	`total_time = end_time - start_time`
71	`logging.info(f"Total time taken: {total_time}")`
72	`logging.info(f"Sampled {sampled_files} out of {total_files} files.")`
73
74	`if __name__ == "__main__":`
75	`input_folder = "EvalSet"`
76	`output_folder = "resampledEvalSet"`
77	`sample_images(input_folder, output_folder)`
78