vae/config.json
1.7 KB · 129 lines · json Raw
1 {
2 "_class_name": "AutoencoderKLWan",
3 "_diffusers_version": "0.35.0.dev0",
4 "attn_scales": [],
5 "base_dim": 160,
6 "clip_output": false,
7 "decoder_base_dim": 256,
8 "dim_mult": [
9 1,
10 2,
11 4,
12 4
13 ],
14 "dropout": 0.0,
15 "in_channels": 12,
16 "is_residual": true,
17 "latents_mean": [
18 -0.2289,
19 -0.0052,
20 -0.1323,
21 -0.2339,
22 -0.2799,
23 0.0174,
24 0.1838,
25 0.1557,
26 -0.1382,
27 0.0542,
28 0.2813,
29 0.0891,
30 0.157,
31 -0.0098,
32 0.0375,
33 -0.1825,
34 -0.2246,
35 -0.1207,
36 -0.0698,
37 0.5109,
38 0.2665,
39 -0.2108,
40 -0.2158,
41 0.2502,
42 -0.2055,
43 -0.0322,
44 0.1109,
45 0.1567,
46 -0.0729,
47 0.0899,
48 -0.2799,
49 -0.123,
50 -0.0313,
51 -0.1649,
52 0.0117,
53 0.0723,
54 -0.2839,
55 -0.2083,
56 -0.052,
57 0.3748,
58 0.0152,
59 0.1957,
60 0.1433,
61 -0.2944,
62 0.3573,
63 -0.0548,
64 -0.1681,
65 -0.0667
66 ],
67 "latents_std": [
68 0.4765,
69 1.0364,
70 0.4514,
71 1.1677,
72 0.5313,
73 0.499,
74 0.4818,
75 0.5013,
76 0.8158,
77 1.0344,
78 0.5894,
79 1.0901,
80 0.6885,
81 0.6165,
82 0.8454,
83 0.4978,
84 0.5759,
85 0.3523,
86 0.7135,
87 0.6804,
88 0.5833,
89 1.4146,
90 0.8986,
91 0.5659,
92 0.7069,
93 0.5338,
94 0.4889,
95 0.4917,
96 0.4069,
97 0.4999,
98 0.6866,
99 0.4093,
100 0.5709,
101 0.6065,
102 0.6415,
103 0.4944,
104 0.5726,
105 1.2042,
106 0.5458,
107 1.6887,
108 0.3971,
109 1.06,
110 0.3943,
111 0.5537,
112 0.5444,
113 0.4089,
114 0.7468,
115 0.7744
116 ],
117 "num_res_blocks": 2,
118 "out_channels": 12,
119 "patch_size": 2,
120 "scale_factor_spatial": 16,
121 "scale_factor_temporal": 4,
122 "temperal_downsample": [
123 false,
124 true,
125 true
126 ],
127 "z_dim": 48
128 }
129