vae/config.json
1.3 KB · 82 lines · json Raw
1 {
2 "_class_name": "AutoencoderKLLTX2Video",
3 "_diffusers_version": "0.37.0.dev0",
4 "block_out_channels": [
5 256,
6 512,
7 1024,
8 2048
9 ],
10 "decoder_block_out_channels": [
11 256,
12 512,
13 1024
14 ],
15 "decoder_causal": false,
16 "decoder_inject_noise": [
17 false,
18 false,
19 false,
20 false
21 ],
22 "decoder_layers_per_block": [
23 5,
24 5,
25 5,
26 5
27 ],
28 "decoder_spatial_padding_mode": "reflect",
29 "decoder_spatio_temporal_scaling": [
30 true,
31 true,
32 true
33 ],
34 "down_block_types": [
35 "LTX2VideoDownBlock3D",
36 "LTX2VideoDownBlock3D",
37 "LTX2VideoDownBlock3D",
38 "LTX2VideoDownBlock3D"
39 ],
40 "downsample_type": [
41 "spatial",
42 "temporal",
43 "spatiotemporal",
44 "spatiotemporal"
45 ],
46 "encoder_causal": true,
47 "encoder_spatial_padding_mode": "zeros",
48 "in_channels": 3,
49 "latent_channels": 128,
50 "layers_per_block": [
51 4,
52 6,
53 6,
54 2,
55 2
56 ],
57 "out_channels": 3,
58 "patch_size": 4,
59 "patch_size_t": 1,
60 "resnet_norm_eps": 1e-06,
61 "scaling_factor": 1.0,
62 "spatial_compression_ratio": 32,
63 "spatio_temporal_scaling": [
64 true,
65 true,
66 true,
67 true
68 ],
69 "temporal_compression_ratio": 8,
70 "timestep_conditioning": false,
71 "upsample_factor": [
72 2,
73 2,
74 2
75 ],
76 "upsample_residual": [
77 true,
78 true,
79 true
80 ]
81 }
82