processor_config.json
1.6 KB · 76 lines · json Raw
1 {
2 "audio_ms_per_token": 40,
3 "audio_seq_length": 750,
4 "feature_extractor": {
5 "dither": 0.0,
6 "feature_extractor_type": "Gemma4AudioFeatureExtractor",
7 "feature_size": 128,
8 "fft_length": 512,
9 "fft_overdrive": false,
10 "frame_length": 320,
11 "hop_length": 160,
12 "input_scale_factor": 1.0,
13 "max_frequency": 8000.0,
14 "mel_floor": 0.001,
15 "min_frequency": 0.0,
16 "padding_side": "right",
17 "padding_value": 0.0,
18 "per_bin_mean": null,
19 "per_bin_stddev": null,
20 "preemphasis": 0.0,
21 "preemphasis_htk_flavor": true,
22 "return_attention_mask": true,
23 "sampling_rate": 16000
24 },
25 "image_processor": {
26 "do_convert_rgb": true,
27 "do_normalize": false,
28 "do_rescale": true,
29 "do_resize": true,
30 "image_mean": [
31 0.0,
32 0.0,
33 0.0
34 ],
35 "image_processor_type": "Gemma4ImageProcessor",
36 "image_seq_length": 280,
37 "image_std": [
38 1.0,
39 1.0,
40 1.0
41 ],
42 "max_soft_tokens": 280,
43 "patch_size": 16,
44 "pooling_kernel_size": 3,
45 "resample": 3,
46 "rescale_factor": 0.00392156862745098
47 },
48 "image_seq_length": 280,
49 "processor_class": "Gemma4Processor",
50 "video_processor": {
51 "do_convert_rgb": true,
52 "do_normalize": true,
53 "do_rescale": true,
54 "do_resize": true,
55 "do_sample_frames": true,
56 "image_mean": [
57 0.0,
58 0.0,
59 0.0
60 ],
61 "image_std": [
62 1.0,
63 1.0,
64 1.0
65 ],
66 "max_soft_tokens": 70,
67 "num_frames": 32,
68 "patch_size": 16,
69 "pooling_kernel_size": 3,
70 "resample": 3,
71 "rescale_factor": 0.00392156862745098,
72 "return_metadata": false,
73 "video_processor_type": "Gemma4VideoProcessor"
74 }
75 }
76