unet/config.json

1.7 KB · 73 lines · json Raw

1	`{`
2	`"_class_name": "UNet2DConditionModel",`
3	`"_diffusers_version": "0.24.0.dev0",`
4	`"act_fn": "silu",`
5	`"addition_embed_type": "text_time",`
6	`"addition_embed_type_num_heads": 64,`
7	`"addition_time_embed_dim": 256,`
8	`"attention_head_dim": [`
9	`5,`
10	`10,`
11	`20`
12	`],`
13	`"attention_type": "default",`
14	`"block_out_channels": [`
15	`320,`
16	`640,`
17	`1280`
18	`],`
19	`"center_input_sample": false,`
20	`"class_embed_type": null,`
21	`"class_embeddings_concat": false,`
22	`"conv_in_kernel": 3,`
23	`"conv_out_kernel": 3,`
24	`"cross_attention_dim": 2048,`
25	`"cross_attention_norm": null,`
26	`"down_block_types": [`
27	`"DownBlock2D",`
28	`"CrossAttnDownBlock2D",`
29	`"CrossAttnDownBlock2D"`
30	`],`
31	`"downsample_padding": 1,`
32	`"dropout": 0.0,`
33	`"dual_cross_attention": false,`
34	`"encoder_hid_dim": null,`
35	`"encoder_hid_dim_type": null,`
36	`"flip_sin_to_cos": true,`
37	`"freq_shift": 0,`
38	`"in_channels": 4,`
39	`"layers_per_block": 2,`
40	`"mid_block_only_cross_attention": null,`
41	`"mid_block_scale_factor": 1,`
42	`"mid_block_type": "UNetMidBlock2DCrossAttn",`
43	`"norm_eps": 1e-05,`
44	`"norm_num_groups": 32,`
45	`"num_attention_heads": null,`
46	`"num_class_embeds": null,`
47	`"only_cross_attention": false,`
48	`"out_channels": 4,`
49	`"projection_class_embeddings_input_dim": 2816,`
50	`"resnet_out_scale_factor": 1.0,`
51	`"resnet_skip_time_act": false,`
52	`"resnet_time_scale_shift": "default",`
53	`"reverse_transformer_layers_per_block": null,`
54	`"sample_size": 64,`
55	`"time_cond_proj_dim": null,`
56	`"time_embedding_act_fn": null,`
57	`"time_embedding_dim": null,`
58	`"time_embedding_type": "positional",`
59	`"timestep_post_act": null,`
60	`"transformer_layers_per_block": [`
61	`1,`
62	`2,`
63	`10`
64	`],`
65	`"up_block_types": [`
66	`"CrossAttnUpBlock2D",`
67	`"CrossAttnUpBlock2D",`
68	`"UpBlock2D"`
69	`],`
70	`"upcast_attention": null,`
71	`"use_linear_projection": true`
72	`}`
73