scripts/quick_analysis.py

3.7 KB · 99 lines · python Raw

1	`import os`
2	`from PIL import Image`
3	`from collections import Counter`
4
5	`def analyze_images(directory):`
6	`analysis_results = {}`
7
8	`for root, dirs, files in os.walk(directory):`
9	`if files:`
10	`model_folder_name = os.path.basename(root)`
11	`if model_folder_name not in analysis_results:`
12	`analysis_results[model_folder_name] = {`
13	`'image_count': 0,`
14	`'total_size': 0,`
15	`'resolutions': Counter()`
16	`}`
17
18	`for file in files:`
19	`file_path = os.path.join(root, file)`
20
21	`# Count the image`
22	`analysis_results[model_folder_name]['image_count'] += 1`
23
24	`# Calculate the size of the image`
25	`try:`
26	`with Image.open(file_path) as img:`
27	`# Get the size of the image in bytes`
28	`file_size = os.path.getsize(file_path)`
29	`analysis_results[model_folder_name]['total_size'] += file_size`
30
31	`# Get image dimensions`
32	`width, height = img.size`
33	`analysis_results[model_folder_name]['resolutions'][(width, height)] += 1`
34	`except Exception as e:`
35	`print(f"Error reading file {file_path}: {e}")`
36
37	`return analysis_results`
38
39	`def print_and_log_analysis_results(analysis_results, dataset_name, log_file):`
40	`# Determine the maximum length of model names`
41	`max_model_length = max(len(model) for model in analysis_results.keys())`
42	`model_column_width = max(max_model_length, 20) # Ensure at least 20 characters`
43
44	`# Define column widths`
45	`image_count_width = 12`
46	`total_size_width = 14`
47	`resolution_width = 25`
48
49	`# Create header`
50	`header = f"{'Model':<{model_column_width}} \| {'Image Count':>{image_count_width}} \| {'Total Size (MB)':>{total_size_width}} \| {'Most Common Resolution':<{resolution_width}}"`
51	`separator = "-" * (model_column_width + image_count_width + total_size_width + resolution_width + 7) # 7 for separators`
52
53	`result_lines = []`
54	`result_lines.append(f"Analysis for {dataset_name}:\n")`
55	`result_lines.append(header + "\n")`
56	`result_lines.append(separator + "\n")`
57
58	`for model, data in analysis_results.items():`
59	`total_size_mb = data['total_size'] / (1024 * 1024)`
60	`most_common_resolution = data['resolutions'].most_common(1)`
61
62	`if most_common_resolution:`
63	`common_res = f"{most_common_resolution[0][0][0]}x{most_common_resolution[0][0][1]} ({most_common_resolution[0][1]} images)"`
64	`else:`
65	`common_res = "None"`
66
67	`result_lines.append(f"{model:<{model_column_width}} \| {data['image_count']:>{image_count_width}} \| {total_size_mb:>{total_size_width}.2f} \| {common_res:<{resolution_width}}\n")`
68
69	`result_lines.append("\n")`
70
71	`# Print to console`
72	`for line in result_lines:`
73	`print(line, end='')`
74
75	`# Write to log file`
76	`with open(log_file, 'a') as f:`
77	`f.writelines(result_lines)`
78
79	`def main():`
80	`# Define directories`
81	`generated_dir = 'resampledEvalSet'`
82	`real_dir = 'real'`
83	`log_file = 'analysis_results.txt'`
84
85	`# Clear the log file (optional, comment out if you want to append)`
86	`with open(log_file, 'w') as f:`
87	`pass`
88
89	`# Analyze generated images`
90	`generated_analysis_results = analyze_images(generated_dir)`
91	`print_and_log_analysis_results(generated_analysis_results, "Generated Images", log_file)`
92
93	`# Analyze real images`
94	`real_analysis_results = analyze_images(real_dir)`
95	`print_and_log_analysis_results(real_analysis_results, "Real Images", log_file)`
96
97	`if __name__ == "__main__":`
98	`main()`
99