QuantFunc commited on
Commit
1a265b6
·
verified ·
1 Parent(s): d03ba50

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. README.md +201 -0
  3. assets/logo.webp +0 -0
  4. precision-config/50x-above-fp4-sample.json +12 -0
  5. precision-config/50x-below-int4-sample.json +12 -0
  6. prequant/qwen-image-edit-2509-50x-above.safetensors +3 -0
  7. prequant/qwen-image-edit-2511-50x-above.safetensors +3 -0
  8. prequant/qwen-image-edit-2511-50x-below.safetensors +3 -0
  9. qwen-image-edit-series-50x-above-base-model/model_index.json +28 -0
  10. qwen-image-edit-series-50x-above-base-model/processor/added_tokens.json +24 -0
  11. qwen-image-edit-series-50x-above-base-model/processor/chat_template.jinja +7 -0
  12. qwen-image-edit-series-50x-above-base-model/processor/merges.txt +0 -0
  13. qwen-image-edit-series-50x-above-base-model/processor/preprocessor_config.json +39 -0
  14. qwen-image-edit-series-50x-above-base-model/processor/special_tokens_map.json +31 -0
  15. qwen-image-edit-series-50x-above-base-model/processor/tokenizer.json +3 -0
  16. qwen-image-edit-series-50x-above-base-model/processor/tokenizer_config.json +208 -0
  17. qwen-image-edit-series-50x-above-base-model/processor/video_preprocessor_config.json +43 -0
  18. qwen-image-edit-series-50x-above-base-model/processor/vocab.json +0 -0
  19. qwen-image-edit-series-50x-above-base-model/quantfunc_config.json +15 -0
  20. qwen-image-edit-series-50x-above-base-model/scheduler/scheduler_config.json +11 -0
  21. qwen-image-edit-series-50x-above-base-model/text_encoder/config.json +132 -0
  22. qwen-image-edit-series-50x-above-base-model/text_encoder/model.safetensors +3 -0
  23. qwen-image-edit-series-50x-above-base-model/tokenizer/added_tokens.json +24 -0
  24. qwen-image-edit-series-50x-above-base-model/tokenizer/chat_template.jinja +54 -0
  25. qwen-image-edit-series-50x-above-base-model/tokenizer/merges.txt +0 -0
  26. qwen-image-edit-series-50x-above-base-model/tokenizer/special_tokens_map.json +31 -0
  27. qwen-image-edit-series-50x-above-base-model/tokenizer/tokenizer_config.json +207 -0
  28. qwen-image-edit-series-50x-above-base-model/tokenizer/vocab.json +0 -0
  29. qwen-image-edit-series-50x-above-base-model/vae/config.json +56 -0
  30. qwen-image-edit-series-50x-above-base-model/vae/diffusion_pytorch_model.safetensors +3 -0
  31. qwen-image-edit-series-50x-above-base-model/vision_encoder/config.json +18 -0
  32. qwen-image-edit-series-50x-above-base-model/vision_encoder/model.safetensors +3 -0
  33. qwen-image-edit-series-50x-below-base-model/model_index.json +28 -0
  34. qwen-image-edit-series-50x-below-base-model/quantfunc_config.json +15 -0
  35. qwen-image-edit-series-50x-below-base-model/scheduler/scheduler_config.json +11 -0
  36. qwen-image-edit-series-50x-below-base-model/text_encoder/config.json +135 -0
  37. qwen-image-edit-series-50x-below-base-model/text_encoder/model.safetensors +3 -0
  38. qwen-image-edit-series-50x-below-base-model/tokenizer/added_tokens.json +24 -0
  39. qwen-image-edit-series-50x-below-base-model/tokenizer/chat_template.jinja +54 -0
  40. qwen-image-edit-series-50x-below-base-model/tokenizer/merges.txt +0 -0
  41. qwen-image-edit-series-50x-below-base-model/tokenizer/special_tokens_map.json +31 -0
  42. qwen-image-edit-series-50x-below-base-model/tokenizer/tokenizer_config.json +207 -0
  43. qwen-image-edit-series-50x-below-base-model/tokenizer/vocab.json +0 -0
  44. qwen-image-edit-series-50x-below-base-model/vae/config.json +56 -0
  45. qwen-image-edit-series-50x-below-base-model/vae/diffusion_pytorch_model.safetensors +3 -0
  46. qwen-image-edit-series-50x-below-base-model/vision_encoder/config.json +18 -0
  47. qwen-image-edit-series-50x-below-base-model/vision_encoder/model.safetensors +3 -0
  48. transformer/config.json +18 -0
  49. transformer/qwen-image-2511-50x-above-lighting-4steps-prequant.safetensors +3 -0
  50. transformer/qwen-image-2511-50x-above-lighting-4steps.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ qwen-image-edit-series-50x-above-base-model/processor/tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: quantfunc-model-license
4
+ tags:
5
+ - image-editing
6
+ - image-to-image
7
+ - diffusion
8
+ - quantized
9
+ - quantfunc
10
+ language:
11
+ - en
12
+ ---
13
+
14
+ # QuantFunc
15
+
16
+ <div align="center" style="margin-top: 50px;">
17
+ <img src="assets/logo.webp" width="300" alt="Logo">
18
+ </div>
19
+
20
+ # Qwen-Image-Edit-Series
21
+
22
+ Pre-quantized **Qwen-Image-Edit-2511** image editing model series by [QuantFunc](https://github.com/user/quantfunc), with both Lighting and SVDQ backend inference support.
23
+
24
+ ## Overview
25
+
26
+ Qwen-Image-Edit-2511 is an image editing diffusion model distilled from Alibaba Qwen team's image editing model. It can edit input images according to text instructions and supports multi-reference image inputs.
27
+
28
+ With the latest QuantFunc ComfyUI plugin, inference achieves **2x–6x speedup** over mainstream frameworks — e.g. Qwen-Image-Edit 1K image inference in ComfyUI reduced from 9.6s to 1.6s (tested on RTX 4090).
29
+
30
+ ## Hardware Requirements
31
+
32
+ - Supports NVIDIA RTX 30 series and above
33
+ - RTX 20 series does not support BF16, which causes significant precision loss in Qwen series model quantization scenarios. Therefore, the 20 series currently only supports Z-Image models.
34
+
35
+ ## Compatibility
36
+
37
+ - The base models in this repository are compatible with **any version** of Qwen-Image-Edit transformer weights
38
+ - The QuantFunc code plugin and ComfyUI plugin are **100% compatible** with previous versions of Qwen-Image-Edit models
39
+
40
+ ## Directory Structure
41
+
42
+ ```
43
+ Qwen-Image-Edit-Series/
44
+ ├── qwen-image-edit-series-50x-above-base-model/ # Base model, optimized for RTX 50 series and above
45
+ │ ├── text_encoder/ # Qwen2.5-VL text encoder (pre-quantized)
46
+ │ ├── vision_encoder/ # Qwen2.5-VL vision encoder (pre-quantized)
47
+ │ ├── vae/ # VAE encoder + decoder (~242MB)
48
+ │ ├── tokenizer/ # Tokenizer
49
+ │ ├── processor/ # Image preprocessor
50
+ │ ├── scheduler/ # Scheduler config
51
+ │ ├── model_index.json
52
+ │ └── quantfunc_config.json
53
+ ├── qwen-image-edit-series-50x-below-base-model/ # Base model, optimized for RTX 50 series and below
54
+ │ └── (same structure as above)
55
+ ├── transformer/
56
+ │ ├── config.json
57
+ │ ├── qwen-image-2511-50x-above-lighting-4steps.safetensors # RTX 50+ Lighting 4-step
58
+ │ ├── qwen-image-2511-50x-above-lighting-4steps-prequant.safetensors # RTX 50+ Lighting pre-quantized
59
+ │ ├── qwen-image-2511-50x-above-svdq-4steps.safetensors # RTX 50+ SVDQ 4-step
60
+ │ ├── qwen-image-2511-50x-above-svdq.safetensors # RTX 50+ SVDQ full-step
61
+ │ ├── qwen-image-2511-50x-below-lighting-4steps.safetensors # RTX 30/40 Lighting 4-step
62
+ │ └── qwen-image-2511-50x-below-lighting-4steps-prequant.safetensors # RTX 30/40 Lighting pre-quantized
63
+ ├── prequant/ # Pre-quantized modulation weights
64
+ │ ├── qwen-image-edit-2511-50x-above.safetensors # RTX 50+ mod weights
65
+ │ ├── qwen-image-edit-2511-50x-below.safetensors # RTX 30/40 mod weights
66
+ │ └── qwen-image-edit-2509-50x-above.safetensors # Legacy 2509 mod weights
67
+ └── precision-config/ # Lighting precision config samples
68
+ ├── 50x-above-fp4-sample.json # FP4 config for RTX 50+
69
+ └── 50x-below-int4-sample.json # INT4 config for RTX 30/40
70
+ ```
71
+
72
+ ## Model Variants
73
+
74
+ ### By GPU Generation
75
+
76
+ | Variant | Target GPU | Description |
77
+ |---------|-----------|-------------|
78
+ | **50x-above** | RTX 50 series and above | Optimized for Blackwell architecture |
79
+ | **50x-below** | RTX 30/40 series | Broadly compatible |
80
+
81
+ ### By Inference Backend
82
+
83
+ | Backend | File Suffix | Features |
84
+ |---------|------------|----------|
85
+ | **Lighting 4-step** | `*-lighting-4steps.safetensors` | Fastest inference with fused operators |
86
+ | **SVDQ 4-step** | `*-svdq-4steps.safetensors` | 4-step distilled + SVDQ quantization, runtime LoRA support |
87
+ | **SVDQ full-step** | `*-svdq.safetensors` | Default step count inference, runtime LoRA support |
88
+
89
+ > The base-model and transformer must use the **same variant** (both above or both below).
90
+
91
+ ## Quick Start
92
+
93
+ ### Download
94
+
95
+ ```bash
96
+ pip install modelscope
97
+ ```
98
+
99
+ ```python
100
+ from modelscope import snapshot_download
101
+ model_dir = snapshot_download('QuantFunc/Qwen-Image-Edit-Series')
102
+ ```
103
+
104
+ ### Lighting Backend Inference
105
+
106
+ ```bash
107
+ quantfunc \
108
+ --model-dir Qwen-Image-Edit-Series/qwen-image-edit-series-50x-above-base-model \
109
+ --transformer Qwen-Image-Edit-Series/transformer/qwen-image-2511-50x-above-lighting-4steps.safetensors \
110
+ --auto-optimize --model-backend lighting \
111
+ --ref-image input.png \
112
+ --prompt "make the sky more purple and add stars" \
113
+ --output output.png --steps 4
114
+ ```
115
+
116
+ ### SVDQ Backend Inference
117
+
118
+ ```bash
119
+ quantfunc \
120
+ --model-dir Qwen-Image-Edit-Series/qwen-image-edit-series-50x-above-base-model \
121
+ --transformer Qwen-Image-Edit-Series/transformer/qwen-image-2511-50x-above-svdq-4steps.safetensors \
122
+ --auto-optimize --model-backend svdq \
123
+ --ref-image input.png \
124
+ --prompt "change the background to a beach scene" \
125
+ --output output.png --steps 4
126
+ ```
127
+
128
+ ### SVDQ + LoRA
129
+
130
+ ```bash
131
+ quantfunc \
132
+ --model-dir Qwen-Image-Edit-Series/qwen-image-edit-series-50x-above-base-model \
133
+ --transformer Qwen-Image-Edit-Series/transformer/qwen-image-2511-50x-above-svdq-4steps.safetensors \
134
+ --auto-optimize --model-backend svdq \
135
+ --lora /path/to/style_lora.safetensors:0.8 \
136
+ --ref-image input.png \
137
+ --prompt "apply anime style to the image" \
138
+ --output output.png --steps 4
139
+ ```
140
+
141
+ ## SVDQ && Lighting Backend
142
+
143
+ This repository provides both **Lighting** and **SVDQ** backend pre-quantized models:
144
+
145
+ | Feature | Lighting | SVDQ |
146
+ |---------|----------|------|
147
+ | **Quantization** | Per-layer mixed precision (FP4/INT4/FP8/INT8) | Nunchaku-based holistic pre-quantization + Rotation quantization |
148
+ | **LoRA Integration** | Real-time quantization — build a custom model in 5 minutes with zero speed loss, integrating any number of LoRAs | Runtime low-rank pathway |
149
+ | **Ecosystem** | QuantFunc native | Compatible with the widely-adopted Nunchaku ecosystem, enhanced with Rotation quantization and Auto Rank dynamic rank optimization |
150
+ | **Flexibility** | Per-layer precision control | Precision fixed at export time |
151
+ | **Use Cases** | Rapid personal model customization, batch LoRA integration | Leverage Nunchaku ecosystem, runtime dynamic LoRA |
152
+
153
+ ## Pre-quantized Modulation Weights (prequant/)
154
+
155
+ The `prequant/` directory contains **pre-quantized modulation (mod) weights** extracted from SVDQ models. These are used with the Lighting backend to provide high-quality modulation without runtime quantization overhead.
156
+
157
+ **Usage with Lighting backend:**
158
+
159
+ ```bash
160
+ quantfunc \
161
+ --model-dir Qwen-Image-Edit-Series/qwen-image-edit-series-50x-above-base-model \
162
+ --model-backend lighting \
163
+ --precision-config Qwen-Image-Edit-Series/precision-config/50x-above-fp4-sample.json \
164
+ --mod-weights Qwen-Image-Edit-Series/prequant/qwen-image-edit-2511-50x-above.safetensors \
165
+ --rotation-block-size 256 \
166
+ --ref-image input.png --prompt "edit instruction" \
167
+ --steps 4 --auto-optimize
168
+ ```
169
+
170
+ Alternatively, use the **pre-quantized Lighting transformer** for instant loading (no runtime quantization):
171
+
172
+ ```bash
173
+ quantfunc \
174
+ --model-dir Qwen-Image-Edit-Series/qwen-image-edit-series-50x-above-base-model \
175
+ --transformer Qwen-Image-Edit-Series/transformer/qwen-image-2511-50x-above-lighting-4steps-prequant.safetensors \
176
+ --model-backend lighting \
177
+ --ref-image input.png --prompt "edit instruction" \
178
+ --steps 4 --auto-optimize
179
+ ```
180
+
181
+ ## Precision Config (precision-config/)
182
+
183
+ Sample per-layer precision configurations for the Lighting backend:
184
+
185
+ | File | Target GPU | Precision |
186
+ |------|-----------|-----------|
187
+ | `50x-above-fp4-sample.json` | RTX 50+ | FP4 attention + AF8WF4 MLP fc2 + INT8 modulation |
188
+ | `50x-below-int4-sample.json` | RTX 30/40 | INT4 all layers + INT8 modulation |
189
+
190
+ These configs control the quantization precision of each transformer sub-layer. Customize them for your speed/quality trade-off.
191
+
192
+ ## Related Repositories
193
+
194
+ - [QuantFunc/Qwen-Image-Series](https://modelscope.cn/models/QuantFunc/Qwen-Image-Series) — Qwen-Image text-to-image (60 layers)
195
+ - [QuantFunc/Z-Image-Series](https://modelscope.cn/models/QuantFunc/Z-Image-Series) — Z-Image-Turbo text-to-image (lightweight, fast)
196
+
197
+ ## License
198
+
199
+ The pre-quantized model weights in this repository are derived from the original models. Users must comply with the original model's license agreement. The QuantFunc inference engine and its plugins (including the ComfyUI plugin) are licensed separately — see official QuantFunc channels for details.
200
+
201
+ For models quantized from commercially licensed models, users are responsible for obtaining the necessary commercial licenses from the original model providers.
assets/logo.webp ADDED
precision-config/50x-above-fp4-sample.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "transformer_blocks.attn.to_qkv": "f4",
3
+ "transformer_blocks.attn.add_qkv_proj": "f4",
4
+ "transformer_blocks.attn.to_out": "f4",
5
+ "transformer_blocks.attn.to_add_out": "f4",
6
+ "transformer_blocks.img_mlp.net.0.proj": "f4",
7
+ "transformer_blocks.img_mlp.net.2": "af8wf4",
8
+ "transformer_blocks.txt_mlp.net.0.proj": "f4",
9
+ "transformer_blocks.txt_mlp.net.2": "af8wf4",
10
+ "transformer_blocks.img_mod": "i8",
11
+ "transformer_blocks.txt_mod": "i8"
12
+ }
precision-config/50x-below-int4-sample.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "transformer_blocks.attn.to_qkv": "i4",
3
+ "transformer_blocks.attn.add_qkv_proj": "i4",
4
+ "transformer_blocks.attn.to_out": "i4",
5
+ "transformer_blocks.attn.to_add_out": "i4",
6
+ "transformer_blocks.img_mlp.net.0.proj": "i4",
7
+ "transformer_blocks.img_mlp.net.2": "i4",
8
+ "transformer_blocks.txt_mlp.net.0.proj": "i4",
9
+ "transformer_blocks.txt_mlp.net.2": "i4",
10
+ "transformer_blocks.img_mod": "i8",
11
+ "transformer_blocks.txt_mod": "i8"
12
+ }
prequant/qwen-image-edit-2509-50x-above.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0879c424a486adec58607aa2992a9dcbe55e69a567ac07f5d8d23bf1bfc19be2
3
+ size 3826539224
prequant/qwen-image-edit-2511-50x-above.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e89c7fb91f24919654afffe3c6a83b21d4f3d5896dd9efa7b5734baec4063bc8
3
+ size 3826550584
prequant/qwen-image-edit-2511-50x-below.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ef632343b0211e5cf8e943cdd286734ed23cf2ad9c1ff0aab19a724e3b5841
3
+ size 3826550536
qwen-image-edit-series-50x-above-base-model/model_index.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "QwenImageEditPlusPipeline",
3
+ "_diffusers_version": "0.36.0.dev0",
4
+ "processor": [
5
+ "transformers",
6
+ "Qwen2VLProcessor"
7
+ ],
8
+ "scheduler": [
9
+ "diffusers",
10
+ "FlowMatchEulerDiscreteScheduler"
11
+ ],
12
+ "text_encoder": [
13
+ "transformers",
14
+ "Qwen2_5_VLForConditionalGeneration"
15
+ ],
16
+ "tokenizer": [
17
+ "transformers",
18
+ "Qwen2Tokenizer"
19
+ ],
20
+ "transformer": [
21
+ "diffusers",
22
+ "QwenImageTransformer2DModel"
23
+ ],
24
+ "vae": [
25
+ "diffusers",
26
+ "AutoencoderKLQwenImage"
27
+ ]
28
+ }
qwen-image-edit-series-50x-above-base-model/processor/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
qwen-image-edit-series-50x-above-base-model/processor/chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
qwen-image-edit-series-50x-above-base-model/processor/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen-image-edit-series-50x-above-base-model/processor/preprocessor_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "disable_grouping": null,
7
+ "do_center_crop": null,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": null,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_mean": [
14
+ 0.48145466,
15
+ 0.4578275,
16
+ 0.40821073
17
+ ],
18
+ "image_processor_type": "Qwen2VLImageProcessorFast",
19
+ "image_std": [
20
+ 0.26862954,
21
+ 0.26130258,
22
+ 0.27577711
23
+ ],
24
+ "input_data_format": null,
25
+ "max_pixels": 12845056,
26
+ "merge_size": 2,
27
+ "min_pixels": 3136,
28
+ "pad_size": null,
29
+ "patch_size": 14,
30
+ "processor_class": "Qwen2VLProcessor",
31
+ "resample": 3,
32
+ "rescale_factor": 0.00392156862745098,
33
+ "return_tensors": null,
34
+ "size": {
35
+ "longest_edge": 12845056,
36
+ "shortest_edge": 3136
37
+ },
38
+ "temporal_patch_size": 2
39
+ }
qwen-image-edit-series-50x-above-base-model/processor/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
qwen-image-edit-series-50x-above-base-model/processor/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
qwen-image-edit-series-50x-above-base-model/processor/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "processor_class": "Qwen2VLProcessor",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
qwen-image-edit-series-50x-above-base-model/processor/video_preprocessor_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "do_center_crop": null,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "do_sample_frames": false,
12
+ "fps": null,
13
+ "image_mean": [
14
+ 0.48145466,
15
+ 0.4578275,
16
+ 0.40821073
17
+ ],
18
+ "image_std": [
19
+ 0.26862954,
20
+ 0.26130258,
21
+ 0.27577711
22
+ ],
23
+ "input_data_format": null,
24
+ "max_frames": 768,
25
+ "max_pixels": 12845056,
26
+ "merge_size": 2,
27
+ "min_frames": 4,
28
+ "min_pixels": 3136,
29
+ "num_frames": null,
30
+ "pad_size": null,
31
+ "patch_size": 14,
32
+ "processor_class": "Qwen2VLProcessor",
33
+ "resample": 3,
34
+ "rescale_factor": 0.00392156862745098,
35
+ "return_metadata": false,
36
+ "size": {
37
+ "longest_edge": 12845056,
38
+ "shortest_edge": 3136
39
+ },
40
+ "temporal_patch_size": 2,
41
+ "video_metadata": null,
42
+ "video_processor_type": "Qwen2VLVideoProcessor"
43
+ }
qwen-image-edit-series-50x-above-base-model/processor/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen-image-edit-series-50x-above-base-model/quantfunc_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "lighting",
3
+ "model_id": "a330be1d-cc9d-4e7b-aa0d-f65b11ae5108",
4
+ "obfuscated": true,
5
+ "text_encoder": {
6
+ "prequantized": true,
7
+ "text_precision": "fp4",
8
+ "use_rotation": true
9
+ },
10
+ "vision_encoder": {
11
+ "prequantized": true,
12
+ "vision_quant": "fp4",
13
+ "vision_rotation": true
14
+ }
15
+ }
qwen-image-edit-series-50x-above-base-model/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "FlowMatchEulerDiscreteScheduler",
3
+ "base_image_seq_len": 256,
4
+ "base_shift": 1.0986122886681098,
5
+ "max_image_seq_len": 8192,
6
+ "max_shift": 1.0986122886681098,
7
+ "num_train_timesteps": 1000,
8
+ "shift": 1.0,
9
+ "time_shift_type": "exponential",
10
+ "use_dynamic_shifting": true
11
+ }
qwen-image-edit-series-50x-above-base-model/text_encoder/config.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2_5_VLForConditionalGeneration"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "dtype": "bfloat16",
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 3584,
11
+ "image_token_id": 151655,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 18944,
14
+ "max_position_embeddings": 128000,
15
+ "max_window_layers": 28,
16
+ "model_type": "qwen2_5_vl",
17
+ "num_attention_heads": 28,
18
+ "num_hidden_layers": 28,
19
+ "num_key_value_heads": 4,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": {
22
+ "mrope_section": [
23
+ 16,
24
+ 24,
25
+ 24
26
+ ],
27
+ "rope_type": "default",
28
+ "type": "default"
29
+ },
30
+ "rope_theta": 1000000.0,
31
+ "sliding_window": 32768,
32
+ "text_config": {
33
+ "_name_or_path": "/cpfs01/haoyangzhang/pretrained_weights/Qwen2.5-VL",
34
+ "architectures": [
35
+ "Qwen2_5_VLForConditionalGeneration"
36
+ ],
37
+ "attention_dropout": 0.0,
38
+ "bos_token_id": 151643,
39
+ "dtype": "float32",
40
+ "eos_token_id": 151645,
41
+ "hidden_act": "silu",
42
+ "hidden_size": 3584,
43
+ "initializer_range": 0.02,
44
+ "intermediate_size": 18944,
45
+ "layer_types": [
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
+ "full_attention",
69
+ "full_attention",
70
+ "full_attention",
71
+ "full_attention",
72
+ "full_attention",
73
+ "full_attention"
74
+ ],
75
+ "max_position_embeddings": 128000,
76
+ "max_window_layers": 28,
77
+ "model_type": "qwen2_5_vl_text",
78
+ "num_attention_heads": 28,
79
+ "num_hidden_layers": 28,
80
+ "num_key_value_heads": 4,
81
+ "rms_norm_eps": 1e-06,
82
+ "rope_scaling": {
83
+ "mrope_section": [
84
+ 16,
85
+ 24,
86
+ 24
87
+ ],
88
+ "rope_type": "default",
89
+ "type": "default"
90
+ },
91
+ "rope_theta": 1000000.0,
92
+ "sliding_window": null,
93
+ "use_cache": true,
94
+ "use_sliding_window": false,
95
+ "vision_token_id": 151654,
96
+ "vocab_size": 152064
97
+ },
98
+ "tie_word_embeddings": false,
99
+ "transformers_version": "4.57.1",
100
+ "use_cache": true,
101
+ "use_sliding_window": false,
102
+ "video_token_id": 151656,
103
+ "vision_config": {
104
+ "depth": 32,
105
+ "dtype": "float32",
106
+ "fullatt_block_indexes": [
107
+ 7,
108
+ 15,
109
+ 23,
110
+ 31
111
+ ],
112
+ "hidden_act": "silu",
113
+ "hidden_size": 1280,
114
+ "in_channels": 3,
115
+ "in_chans": 3,
116
+ "initializer_range": 0.02,
117
+ "intermediate_size": 3420,
118
+ "model_type": "qwen2_5_vl",
119
+ "num_heads": 16,
120
+ "out_hidden_size": 3584,
121
+ "patch_size": 14,
122
+ "spatial_merge_size": 2,
123
+ "spatial_patch_size": 14,
124
+ "temporal_patch_size": 2,
125
+ "tokens_per_second": 2,
126
+ "window_size": 112
127
+ },
128
+ "vision_end_token_id": 151653,
129
+ "vision_start_token_id": 151652,
130
+ "vision_token_id": 151654,
131
+ "vocab_size": 152064
132
+ }
qwen-image-edit-series-50x-above-base-model/text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d7714228a7618ec4cd222a80fc9575ae8627829bcff70f9ad641de4bed677a
3
+ size 4761171927
qwen-image-edit-series-50x-above-base-model/tokenizer/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
qwen-image-edit-series-50x-above-base-model/tokenizer/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
qwen-image-edit-series-50x-above-base-model/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen-image-edit-series-50x-above-base-model/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
qwen-image-edit-series-50x-above-base-model/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
qwen-image-edit-series-50x-above-base-model/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen-image-edit-series-50x-above-base-model/vae/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKLQwenImage",
3
+ "_diffusers_version": "0.36.0.dev0",
4
+ "attn_scales": [],
5
+ "base_dim": 96,
6
+ "dim_mult": [
7
+ 1,
8
+ 2,
9
+ 4,
10
+ 4
11
+ ],
12
+ "dropout": 0.0,
13
+ "latents_mean": [
14
+ -0.7571,
15
+ -0.7089,
16
+ -0.9113,
17
+ 0.1075,
18
+ -0.1745,
19
+ 0.9653,
20
+ -0.1517,
21
+ 1.5508,
22
+ 0.4134,
23
+ -0.0715,
24
+ 0.5517,
25
+ -0.3632,
26
+ -0.1922,
27
+ -0.9497,
28
+ 0.2503,
29
+ -0.2921
30
+ ],
31
+ "latents_std": [
32
+ 2.8184,
33
+ 1.4541,
34
+ 2.3275,
35
+ 2.6558,
36
+ 1.2196,
37
+ 1.7708,
38
+ 2.6052,
39
+ 2.0743,
40
+ 3.2687,
41
+ 2.1526,
42
+ 2.8652,
43
+ 1.5579,
44
+ 1.6382,
45
+ 1.1253,
46
+ 2.8251,
47
+ 1.916
48
+ ],
49
+ "num_res_blocks": 2,
50
+ "temperal_downsample": [
51
+ false,
52
+ true,
53
+ true
54
+ ],
55
+ "z_dim": 16
56
+ }
qwen-image-edit-series-50x-above-base-model/vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c8bc8b758c649abef9ea407b95408389a3b2f610d0d10fcb054fe171d0a8344
3
+ size 253806966
qwen-image-edit-series-50x-above-base-model/vision_encoder/config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "depth": 32,
3
+ "fullatt_block_indexes": [
4
+ 7,
5
+ 15,
6
+ 23,
7
+ 31
8
+ ],
9
+ "hidden_size": 1280,
10
+ "in_channels": 3,
11
+ "intermediate_size": 3420,
12
+ "num_heads": 16,
13
+ "out_hidden_size": 3584,
14
+ "patch_size": 14,
15
+ "spatial_merge_size": 2,
16
+ "temporal_patch_size": 2,
17
+ "window_size": 112
18
+ }
qwen-image-edit-series-50x-above-base-model/vision_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49ba688655e579bb242441e3523939983927f739e90f095ae9fbaf3168742c64
3
+ size 382927193
qwen-image-edit-series-50x-below-base-model/model_index.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "QwenImageEditPipeline",
3
+ "_diffusers_version": "0.35.0.dev0",
4
+ "processor": [
5
+ "transformers",
6
+ "Qwen2VLProcessor"
7
+ ],
8
+ "scheduler": [
9
+ "diffusers",
10
+ "FlowMatchEulerDiscreteScheduler"
11
+ ],
12
+ "text_encoder": [
13
+ "transformers",
14
+ "Qwen2_5_VLForConditionalGeneration"
15
+ ],
16
+ "tokenizer": [
17
+ "transformers",
18
+ "Qwen2Tokenizer"
19
+ ],
20
+ "transformer": [
21
+ "diffusers",
22
+ "QwenImageTransformer2DModel"
23
+ ],
24
+ "vae": [
25
+ "diffusers",
26
+ "AutoencoderKLQwenImage"
27
+ ]
28
+ }
qwen-image-edit-series-50x-below-base-model/quantfunc_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "lighting",
3
+ "model_id": "54d5a9c6-806c-40d5-bf76-1683edace3f8",
4
+ "obfuscated": true,
5
+ "text_encoder": {
6
+ "prequantized": true,
7
+ "text_precision": "int4",
8
+ "use_rotation": true
9
+ },
10
+ "vision_encoder": {
11
+ "prequantized": true,
12
+ "vision_quant": "int4",
13
+ "vision_rotation": true
14
+ }
15
+ }
qwen-image-edit-series-50x-below-base-model/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "FlowMatchEulerDiscreteScheduler",
3
+ "base_image_seq_len": 256,
4
+ "base_shift": 1.0986122886681098,
5
+ "max_image_seq_len": 8192,
6
+ "max_shift": 1.0986122886681098,
7
+ "num_train_timesteps": 1000,
8
+ "shift": 1.0,
9
+ "time_shift_type": "exponential",
10
+ "use_dynamic_shifting": true
11
+ }
qwen-image-edit-series-50x-below-base-model/text_encoder/config.json ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2_5_VLForConditionalGeneration"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "image_token_id": 151655,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 18944,
13
+ "max_position_embeddings": 128000,
14
+ "max_window_layers": 28,
15
+ "model_type": "qwen2_5_vl",
16
+ "num_attention_heads": 28,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 4,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": {
21
+ "mrope_section": [
22
+ 16,
23
+ 24,
24
+ 24
25
+ ],
26
+ "rope_type": "default",
27
+ "type": "default"
28
+ },
29
+ "rope_theta": 1000000.0,
30
+ "sliding_window": 32768,
31
+ "text_config": {
32
+ "architectures": [
33
+ "Qwen2_5_VLForConditionalGeneration"
34
+ ],
35
+ "attention_dropout": 0.0,
36
+ "bos_token_id": 151643,
37
+ "eos_token_id": 151645,
38
+ "hidden_act": "silu",
39
+ "hidden_size": 3584,
40
+ "image_token_id": null,
41
+ "initializer_range": 0.02,
42
+ "intermediate_size": 18944,
43
+ "layer_types": [
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
+ "full_attention",
69
+ "full_attention",
70
+ "full_attention",
71
+ "full_attention"
72
+ ],
73
+ "max_position_embeddings": 128000,
74
+ "max_window_layers": 28,
75
+ "model_type": "qwen2_5_vl_text",
76
+ "num_attention_heads": 28,
77
+ "num_hidden_layers": 28,
78
+ "num_key_value_heads": 4,
79
+ "rms_norm_eps": 1e-06,
80
+ "rope_scaling": {
81
+ "mrope_section": [
82
+ 16,
83
+ 24,
84
+ 24
85
+ ],
86
+ "rope_type": "default",
87
+ "type": "default"
88
+ },
89
+ "rope_theta": 1000000.0,
90
+ "sliding_window": null,
91
+ "torch_dtype": "float32",
92
+ "use_cache": true,
93
+ "use_sliding_window": false,
94
+ "video_token_id": null,
95
+ "vision_end_token_id": 151653,
96
+ "vision_start_token_id": 151652,
97
+ "vision_token_id": 151654,
98
+ "vocab_size": 152064
99
+ },
100
+ "tie_word_embeddings": false,
101
+ "torch_dtype": "bfloat16",
102
+ "transformers_version": "4.55.2",
103
+ "use_cache": true,
104
+ "use_sliding_window": false,
105
+ "video_token_id": 151656,
106
+ "vision_config": {
107
+ "depth": 32,
108
+ "fullatt_block_indexes": [
109
+ 7,
110
+ 15,
111
+ 23,
112
+ 31
113
+ ],
114
+ "hidden_act": "silu",
115
+ "hidden_size": 1280,
116
+ "in_channels": 3,
117
+ "in_chans": 3,
118
+ "initializer_range": 0.02,
119
+ "intermediate_size": 3420,
120
+ "model_type": "qwen2_5_vl",
121
+ "num_heads": 16,
122
+ "out_hidden_size": 3584,
123
+ "patch_size": 14,
124
+ "spatial_merge_size": 2,
125
+ "spatial_patch_size": 14,
126
+ "temporal_patch_size": 2,
127
+ "tokens_per_second": 2,
128
+ "torch_dtype": "float32",
129
+ "window_size": 112
130
+ },
131
+ "vision_end_token_id": 151653,
132
+ "vision_start_token_id": 151652,
133
+ "vision_token_id": 151654,
134
+ "vocab_size": 152064
135
+ }
qwen-image-edit-series-50x-below-base-model/text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd766271590adcc936d8be074355850264a50f4321ad700621ad9e32be746c8
3
+ size 4557255789
qwen-image-edit-series-50x-below-base-model/tokenizer/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
qwen-image-edit-series-50x-below-base-model/tokenizer/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
qwen-image-edit-series-50x-below-base-model/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen-image-edit-series-50x-below-base-model/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
qwen-image-edit-series-50x-below-base-model/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
qwen-image-edit-series-50x-below-base-model/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen-image-edit-series-50x-below-base-model/vae/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKLQwenImage",
3
+ "_diffusers_version": "0.35.0.dev0",
4
+ "attn_scales": [],
5
+ "base_dim": 96,
6
+ "dim_mult": [
7
+ 1,
8
+ 2,
9
+ 4,
10
+ 4
11
+ ],
12
+ "dropout": 0.0,
13
+ "latents_mean": [
14
+ -0.7571,
15
+ -0.7089,
16
+ -0.9113,
17
+ 0.1075,
18
+ -0.1745,
19
+ 0.9653,
20
+ -0.1517,
21
+ 1.5508,
22
+ 0.4134,
23
+ -0.0715,
24
+ 0.5517,
25
+ -0.3632,
26
+ -0.1922,
27
+ -0.9497,
28
+ 0.2503,
29
+ -0.2921
30
+ ],
31
+ "latents_std": [
32
+ 2.8184,
33
+ 1.4541,
34
+ 2.3275,
35
+ 2.6558,
36
+ 1.2196,
37
+ 1.7708,
38
+ 2.6052,
39
+ 2.0743,
40
+ 3.2687,
41
+ 2.1526,
42
+ 2.8652,
43
+ 1.5579,
44
+ 1.6382,
45
+ 1.1253,
46
+ 2.8251,
47
+ 1.916
48
+ ],
49
+ "num_res_blocks": 2,
50
+ "temperal_downsample": [
51
+ false,
52
+ true,
53
+ true
54
+ ],
55
+ "z_dim": 16
56
+ }
qwen-image-edit-series-50x-below-base-model/vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c8bc8b758c649abef9ea407b95408389a3b2f610d0d10fcb054fe171d0a8344
3
+ size 253806966
qwen-image-edit-series-50x-below-base-model/vision_encoder/config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "depth": 32,
3
+ "fullatt_block_indexes": [
4
+ 7,
5
+ 15,
6
+ 23,
7
+ 31
8
+ ],
9
+ "hidden_size": 1280,
10
+ "in_channels": 3,
11
+ "intermediate_size": 3456,
12
+ "num_heads": 16,
13
+ "out_hidden_size": 3584,
14
+ "patch_size": 14,
15
+ "spatial_merge_size": 2,
16
+ "temporal_patch_size": 2,
17
+ "window_size": 112
18
+ }
qwen-image-edit-series-50x-below-base-model/vision_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:176999021e8db0ec8e6a39a17f45b14896c718dd51fd60577426b98a1b1a0bec
3
+ size 362827726
transformer/config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "QwenImageTransformer2DModel",
3
+ "_diffusers_version": "0.36.0.dev0",
4
+ "attention_head_dim": 128,
5
+ "axes_dims_rope": [
6
+ 16,
7
+ 56,
8
+ 56
9
+ ],
10
+ "guidance_embeds": false,
11
+ "in_channels": 64,
12
+ "joint_attention_dim": 3584,
13
+ "num_attention_heads": 24,
14
+ "num_layers": 60,
15
+ "out_channels": 16,
16
+ "patch_size": 2,
17
+ "zero_cond_t": true
18
+ }
transformer/qwen-image-2511-50x-above-lighting-4steps-prequant.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d60b8601d62feffad5368353821f77a8c8c5594ea5d369e48e773d1d1b5b0a9
3
+ size 11420095381
transformer/qwen-image-2511-50x-above-lighting-4steps.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e6c50da18bef20b5bb402bf54a072fe4fd04a4db0ab0ea47123e0e1ec4f7c5a
3
+ size 14498964669