QuantFunc commited on Mar 27

Commit

1a265b6

verified ·

1 Parent(s): d03ba50

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
README.md +201 -0
assets/logo.webp +0 -0
precision-config/50x-above-fp4-sample.json +12 -0
precision-config/50x-below-int4-sample.json +12 -0
prequant/qwen-image-edit-2509-50x-above.safetensors +3 -0
prequant/qwen-image-edit-2511-50x-above.safetensors +3 -0
prequant/qwen-image-edit-2511-50x-below.safetensors +3 -0
qwen-image-edit-series-50x-above-base-model/model_index.json +28 -0
qwen-image-edit-series-50x-above-base-model/processor/added_tokens.json +24 -0
qwen-image-edit-series-50x-above-base-model/processor/chat_template.jinja +7 -0
qwen-image-edit-series-50x-above-base-model/processor/merges.txt +0 -0
qwen-image-edit-series-50x-above-base-model/processor/preprocessor_config.json +39 -0
qwen-image-edit-series-50x-above-base-model/processor/special_tokens_map.json +31 -0
qwen-image-edit-series-50x-above-base-model/processor/tokenizer.json +3 -0
qwen-image-edit-series-50x-above-base-model/processor/tokenizer_config.json +208 -0
qwen-image-edit-series-50x-above-base-model/processor/video_preprocessor_config.json +43 -0
qwen-image-edit-series-50x-above-base-model/processor/vocab.json +0 -0
qwen-image-edit-series-50x-above-base-model/quantfunc_config.json +15 -0
qwen-image-edit-series-50x-above-base-model/scheduler/scheduler_config.json +11 -0
qwen-image-edit-series-50x-above-base-model/text_encoder/config.json +132 -0
qwen-image-edit-series-50x-above-base-model/text_encoder/model.safetensors +3 -0
qwen-image-edit-series-50x-above-base-model/tokenizer/added_tokens.json +24 -0
qwen-image-edit-series-50x-above-base-model/tokenizer/chat_template.jinja +54 -0
qwen-image-edit-series-50x-above-base-model/tokenizer/merges.txt +0 -0
qwen-image-edit-series-50x-above-base-model/tokenizer/special_tokens_map.json +31 -0
qwen-image-edit-series-50x-above-base-model/tokenizer/tokenizer_config.json +207 -0
qwen-image-edit-series-50x-above-base-model/tokenizer/vocab.json +0 -0
qwen-image-edit-series-50x-above-base-model/vae/config.json +56 -0
qwen-image-edit-series-50x-above-base-model/vae/diffusion_pytorch_model.safetensors +3 -0
qwen-image-edit-series-50x-above-base-model/vision_encoder/config.json +18 -0
qwen-image-edit-series-50x-above-base-model/vision_encoder/model.safetensors +3 -0
qwen-image-edit-series-50x-below-base-model/model_index.json +28 -0
qwen-image-edit-series-50x-below-base-model/quantfunc_config.json +15 -0
qwen-image-edit-series-50x-below-base-model/scheduler/scheduler_config.json +11 -0
qwen-image-edit-series-50x-below-base-model/text_encoder/config.json +135 -0
qwen-image-edit-series-50x-below-base-model/text_encoder/model.safetensors +3 -0
qwen-image-edit-series-50x-below-base-model/tokenizer/added_tokens.json +24 -0
qwen-image-edit-series-50x-below-base-model/tokenizer/chat_template.jinja +54 -0
qwen-image-edit-series-50x-below-base-model/tokenizer/merges.txt +0 -0
qwen-image-edit-series-50x-below-base-model/tokenizer/special_tokens_map.json +31 -0
qwen-image-edit-series-50x-below-base-model/tokenizer/tokenizer_config.json +207 -0
qwen-image-edit-series-50x-below-base-model/tokenizer/vocab.json +0 -0
qwen-image-edit-series-50x-below-base-model/vae/config.json +56 -0
qwen-image-edit-series-50x-below-base-model/vae/diffusion_pytorch_model.safetensors +3 -0
qwen-image-edit-series-50x-below-base-model/vision_encoder/config.json +18 -0
qwen-image-edit-series-50x-below-base-model/vision_encoder/model.safetensors +3 -0
transformer/config.json +18 -0
transformer/qwen-image-2511-50x-above-lighting-4steps-prequant.safetensors +3 -0
transformer/qwen-image-2511-50x-above-lighting-4steps.safetensors +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+qwen-image-edit-series-50x-above-base-model/processor/tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,201 @@

+---
+license: other
+license_name: quantfunc-model-license
+tags:
+  - image-editing
+  - image-to-image
+  - diffusion
+  - quantized
+  - quantfunc
+language:
+  - en
+---
+# QuantFunc
+<div align="center" style="margin-top: 50px;">
+  <img src="assets/logo.webp" width="300" alt="Logo">
+</div>
+# Qwen-Image-Edit-Series
+Pre-quantized **Qwen-Image-Edit-2511** image editing model series by [QuantFunc](https://github.com/user/quantfunc), with both Lighting and SVDQ backend inference support.
+## Overview
+Qwen-Image-Edit-2511 is an image editing diffusion model distilled from Alibaba Qwen team's image editing model. It can edit input images according to text instructions and supports multi-reference image inputs.
+With the latest QuantFunc ComfyUI plugin, inference achieves **2x–6x speedup** over mainstream frameworks — e.g. Qwen-Image-Edit 1K image inference in ComfyUI reduced from 9.6s to 1.6s (tested on RTX 4090).
+## Hardware Requirements
+- Supports NVIDIA RTX 30 series and above
+- RTX 20 series does not support BF16, which causes significant precision loss in Qwen series model quantization scenarios. Therefore, the 20 series currently only supports Z-Image models.
+## Compatibility
+- The base models in this repository are compatible with **any version** of Qwen-Image-Edit transformer weights
+- The QuantFunc code plugin and ComfyUI plugin are **100% compatible** with previous versions of Qwen-Image-Edit models
+## Directory Structure
+```
+Qwen-Image-Edit-Series/
+├── qwen-image-edit-series-50x-above-base-model/    # Base model, optimized for RTX 50 series and above
+│   ├── text_encoder/          # Qwen2.5-VL text encoder (pre-quantized)
+│   ├── vision_encoder/        # Qwen2.5-VL vision encoder (pre-quantized)
+│   ├── vae/                   # VAE encoder + decoder (~242MB)
+│   ├── tokenizer/             # Tokenizer
+│   ├── processor/             # Image preprocessor
+│   ├── scheduler/             # Scheduler config
+│   ├── model_index.json
+│   └── quantfunc_config.json
+├── qwen-image-edit-series-50x-below-base-model/    # Base model, optimized for RTX 50 series and below
+│   └── (same structure as above)
+├── transformer/
+│   ├── config.json
+│   ├── qwen-image-2511-50x-above-lighting-4steps.safetensors           # RTX 50+ Lighting 4-step
+│   ├── qwen-image-2511-50x-above-lighting-4steps-prequant.safetensors  # RTX 50+ Lighting pre-quantized
+│   ├── qwen-image-2511-50x-above-svdq-4steps.safetensors               # RTX 50+ SVDQ 4-step
+│   ├── qwen-image-2511-50x-above-svdq.safetensors                      # RTX 50+ SVDQ full-step
+│   ├── qwen-image-2511-50x-below-lighting-4steps.safetensors           # RTX 30/40 Lighting 4-step
+│   └── qwen-image-2511-50x-below-lighting-4steps-prequant.safetensors  # RTX 30/40 Lighting pre-quantized
+├── prequant/                                                # Pre-quantized modulation weights
+│   ├── qwen-image-edit-2511-50x-above.safetensors           # RTX 50+ mod weights
+│   ├── qwen-image-edit-2511-50x-below.safetensors           # RTX 30/40 mod weights
+│   └── qwen-image-edit-2509-50x-above.safetensors           # Legacy 2509 mod weights
+└── precision-config/                                        # Lighting precision config samples
+    ├── 50x-above-fp4-sample.json                            # FP4 config for RTX 50+
+    └── 50x-below-int4-sample.json                           # INT4 config for RTX 30/40
+```
+## Model Variants
+### By GPU Generation
+| Variant | Target GPU | Description |
+|---------|-----------|-------------|
+| **50x-above** | RTX 50 series and above | Optimized for Blackwell architecture |
+| **50x-below** | RTX 30/40 series | Broadly compatible |
+### By Inference Backend
+| Backend | File Suffix | Features |
+|---------|------------|----------|
+| **Lighting 4-step** | `*-lighting-4steps.safetensors` | Fastest inference with fused operators |
+| **SVDQ 4-step** | `*-svdq-4steps.safetensors` | 4-step distilled + SVDQ quantization, runtime LoRA support |
+| **SVDQ full-step** | `*-svdq.safetensors` | Default step count inference, runtime LoRA support |
+> The base-model and transformer must use the **same variant** (both above or both below).
+## Quick Start
+### Download
+```bash
+pip install modelscope
+```
+```python
+from modelscope import snapshot_download
+model_dir = snapshot_download('QuantFunc/Qwen-Image-Edit-Series')
+```
+### Lighting Backend Inference
+```bash
+quantfunc \
+  --model-dir Qwen-Image-Edit-Series/qwen-image-edit-series-50x-above-base-model \
+  --transformer Qwen-Image-Edit-Series/transformer/qwen-image-2511-50x-above-lighting-4steps.safetensors \
+  --auto-optimize --model-backend lighting \
+  --ref-image input.png \
+  --prompt "make the sky more purple and add stars" \
+  --output output.png --steps 4
+```
+### SVDQ Backend Inference
+```bash
+quantfunc \
+  --model-dir Qwen-Image-Edit-Series/qwen-image-edit-series-50x-above-base-model \
+  --transformer Qwen-Image-Edit-Series/transformer/qwen-image-2511-50x-above-svdq-4steps.safetensors \
+  --auto-optimize --model-backend svdq \
+  --ref-image input.png \
+  --prompt "change the background to a beach scene" \
+  --output output.png --steps 4
+```
+### SVDQ + LoRA
+```bash
+quantfunc \
+  --model-dir Qwen-Image-Edit-Series/qwen-image-edit-series-50x-above-base-model \
+  --transformer Qwen-Image-Edit-Series/transformer/qwen-image-2511-50x-above-svdq-4steps.safetensors \
+  --auto-optimize --model-backend svdq \
+  --lora /path/to/style_lora.safetensors:0.8 \
+  --ref-image input.png \
+  --prompt "apply anime style to the image" \
+  --output output.png --steps 4
+```
+## SVDQ && Lighting Backend
+This repository provides both **Lighting** and **SVDQ** backend pre-quantized models:
+| Feature | Lighting | SVDQ |
+|---------|----------|------|
+| **Quantization** | Per-layer mixed precision (FP4/INT4/FP8/INT8) | Nunchaku-based holistic pre-quantization + Rotation quantization |
+| **LoRA Integration** | Real-time quantization — build a custom model in 5 minutes with zero speed loss, integrating any number of LoRAs | Runtime low-rank pathway |
+| **Ecosystem** | QuantFunc native | Compatible with the widely-adopted Nunchaku ecosystem, enhanced with Rotation quantization and Auto Rank dynamic rank optimization |
+| **Flexibility** | Per-layer precision control | Precision fixed at export time |
+| **Use Cases** | Rapid personal model customization, batch LoRA integration | Leverage Nunchaku ecosystem, runtime dynamic LoRA |
+## Pre-quantized Modulation Weights (prequant/)
+The `prequant/` directory contains **pre-quantized modulation (mod) weights** extracted from SVDQ models. These are used with the Lighting backend to provide high-quality modulation without runtime quantization overhead.
+**Usage with Lighting backend:**
+```bash
+quantfunc \
+  --model-dir Qwen-Image-Edit-Series/qwen-image-edit-series-50x-above-base-model \
+  --model-backend lighting \
+  --precision-config Qwen-Image-Edit-Series/precision-config/50x-above-fp4-sample.json \
+  --mod-weights Qwen-Image-Edit-Series/prequant/qwen-image-edit-2511-50x-above.safetensors \
+  --rotation-block-size 256 \
+  --ref-image input.png --prompt "edit instruction" \
+  --steps 4 --auto-optimize
+```
+Alternatively, use the **pre-quantized Lighting transformer** for instant loading (no runtime quantization):
+```bash
+quantfunc \
+  --model-dir Qwen-Image-Edit-Series/qwen-image-edit-series-50x-above-base-model \
+  --transformer Qwen-Image-Edit-Series/transformer/qwen-image-2511-50x-above-lighting-4steps-prequant.safetensors \
+  --model-backend lighting \
+  --ref-image input.png --prompt "edit instruction" \
+  --steps 4 --auto-optimize
+```
+## Precision Config (precision-config/)
+Sample per-layer precision configurations for the Lighting backend:
+| File | Target GPU | Precision |
+|------|-----------|-----------|
+| `50x-above-fp4-sample.json` | RTX 50+ | FP4 attention + AF8WF4 MLP fc2 + INT8 modulation |
+| `50x-below-int4-sample.json` | RTX 30/40 | INT4 all layers + INT8 modulation |
+These configs control the quantization precision of each transformer sub-layer. Customize them for your speed/quality trade-off.
+## Related Repositories
+- [QuantFunc/Qwen-Image-Series](https://modelscope.cn/models/QuantFunc/Qwen-Image-Series) — Qwen-Image text-to-image (60 layers)
+- [QuantFunc/Z-Image-Series](https://modelscope.cn/models/QuantFunc/Z-Image-Series) — Z-Image-Turbo text-to-image (lightweight, fast)
+## License
+The pre-quantized model weights in this repository are derived from the original models. Users must comply with the original model's license agreement. The QuantFunc inference engine and its plugins (including the ComfyUI plugin) are licensed separately — see official QuantFunc channels for details.
+For models quantized from commercially licensed models, users are responsible for obtaining the necessary commercial licenses from the original model providers.

assets/logo.webp ADDED Viewed

precision-config/50x-above-fp4-sample.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "transformer_blocks.attn.to_qkv": "f4",
+  "transformer_blocks.attn.add_qkv_proj": "f4",
+  "transformer_blocks.attn.to_out": "f4",
+  "transformer_blocks.attn.to_add_out": "f4",
+  "transformer_blocks.img_mlp.net.0.proj": "f4",
+  "transformer_blocks.img_mlp.net.2": "af8wf4",
+  "transformer_blocks.txt_mlp.net.0.proj": "f4",
+  "transformer_blocks.txt_mlp.net.2": "af8wf4",
+  "transformer_blocks.img_mod": "i8",
+  "transformer_blocks.txt_mod": "i8"
+}

precision-config/50x-below-int4-sample.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "transformer_blocks.attn.to_qkv": "i4",
+  "transformer_blocks.attn.add_qkv_proj": "i4",
+  "transformer_blocks.attn.to_out": "i4",
+  "transformer_blocks.attn.to_add_out": "i4",
+  "transformer_blocks.img_mlp.net.0.proj": "i4",
+  "transformer_blocks.img_mlp.net.2": "i4",
+  "transformer_blocks.txt_mlp.net.0.proj": "i4",
+  "transformer_blocks.txt_mlp.net.2": "i4",
+  "transformer_blocks.img_mod": "i8",
+  "transformer_blocks.txt_mod": "i8"
+}

prequant/qwen-image-edit-2509-50x-above.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0879c424a486adec58607aa2992a9dcbe55e69a567ac07f5d8d23bf1bfc19be2
+size 3826539224

prequant/qwen-image-edit-2511-50x-above.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e89c7fb91f24919654afffe3c6a83b21d4f3d5896dd9efa7b5734baec4063bc8
+size 3826550584

prequant/qwen-image-edit-2511-50x-below.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4ef632343b0211e5cf8e943cdd286734ed23cf2ad9c1ff0aab19a724e3b5841
+size 3826550536

qwen-image-edit-series-50x-above-base-model/model_index.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_class_name": "QwenImageEditPlusPipeline",
+  "_diffusers_version": "0.36.0.dev0",
+  "processor": [
+    "transformers",
+    "Qwen2VLProcessor"
+  ],
+  "scheduler": [
+    "diffusers",
+    "FlowMatchEulerDiscreteScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "Qwen2_5_VLForConditionalGeneration"
+  ],
+  "tokenizer": [
+    "transformers",
+    "Qwen2Tokenizer"
+  ],
+  "transformer": [
+    "diffusers",
+    "QwenImageTransformer2DModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKLQwenImage"
+  ]
+}

qwen-image-edit-series-50x-above-base-model/processor/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

qwen-image-edit-series-50x-above-base-model/processor/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,7 @@

+{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
+You are a helpful assistant.<|im_end|>
+{% endif %}<|im_start|>{{ message['role'] }}
+{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
+{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
+{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
+{% endif %}

qwen-image-edit-series-50x-above-base-model/processor/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen-image-edit-series-50x-above-base-model/processor/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "crop_size": null,
+  "data_format": "channels_first",
+  "default_to_square": true,
+  "device": null,
+  "disable_grouping": null,
+  "do_center_crop": null,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_pad": null,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2VLImageProcessorFast",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "input_data_format": null,
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "pad_size": null,
+  "patch_size": 14,
+  "processor_class": "Qwen2VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "return_tensors": null,
+  "size": {
+    "longest_edge": 12845056,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2
+}

qwen-image-edit-series-50x-above-base-model/processor/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

qwen-image-edit-series-50x-above-base-model/processor/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

qwen-image-edit-series-50x-above-base-model/processor/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "Qwen2VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

qwen-image-edit-series-50x-above-base-model/processor/video_preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "crop_size": null,
+  "data_format": "channels_first",
+  "default_to_square": true,
+  "device": null,
+  "do_center_crop": null,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "do_sample_frames": false,
+  "fps": null,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "input_data_format": null,
+  "max_frames": 768,
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_frames": 4,
+  "min_pixels": 3136,
+  "num_frames": null,
+  "pad_size": null,
+  "patch_size": 14,
+  "processor_class": "Qwen2VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "return_metadata": false,
+  "size": {
+    "longest_edge": 12845056,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2,
+  "video_metadata": null,
+  "video_processor_type": "Qwen2VLVideoProcessor"
+}

qwen-image-edit-series-50x-above-base-model/processor/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen-image-edit-series-50x-above-base-model/quantfunc_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "backend": "lighting",
+  "model_id": "a330be1d-cc9d-4e7b-aa0d-f65b11ae5108",
+  "obfuscated": true,
+  "text_encoder": {
+    "prequantized": true,
+    "text_precision": "fp4",
+    "use_rotation": true
+  },
+  "vision_encoder": {
+    "prequantized": true,
+    "vision_quant": "fp4",
+    "vision_rotation": true
+  }
+}

qwen-image-edit-series-50x-above-base-model/scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "_class_name": "FlowMatchEulerDiscreteScheduler",
+  "base_image_seq_len": 256,
+  "base_shift": 1.0986122886681098,
+  "max_image_seq_len": 8192,
+  "max_shift": 1.0986122886681098,
+  "num_train_timesteps": 1000,
+  "shift": 1.0,
+  "time_shift_type": "exponential",
+  "use_dynamic_shifting": true
+}

qwen-image-edit-series-50x-above-base-model/text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,132 @@

+{
+  "architectures": [
+    "Qwen2_5_VLForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "image_token_id": 151655,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 128000,
+  "max_window_layers": 28,
+  "model_type": "qwen2_5_vl",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
+    ],
+    "rope_type": "default",
+    "type": "default"
+  },
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "text_config": {
+    "_name_or_path": "/cpfs01/haoyangzhang/pretrained_weights/Qwen2.5-VL",
+    "architectures": [
+      "Qwen2_5_VLForConditionalGeneration"
+    ],
+    "attention_dropout": 0.0,
+    "bos_token_id": 151643,
+    "dtype": "float32",
+    "eos_token_id": 151645,
+    "hidden_act": "silu",
+    "hidden_size": 3584,
+    "initializer_range": 0.02,
+    "intermediate_size": 18944,
+    "layer_types": [
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 128000,
+    "max_window_layers": 28,
+    "model_type": "qwen2_5_vl_text",
+    "num_attention_heads": 28,
+    "num_hidden_layers": 28,
+    "num_key_value_heads": 4,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": {
+      "mrope_section": [
+        16,
+        24,
+        24
+      ],
+      "rope_type": "default",
+      "type": "default"
+    },
+    "rope_theta": 1000000.0,
+    "sliding_window": null,
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vision_token_id": 151654,
+    "vocab_size": 152064
+  },
+  "tie_word_embeddings": false,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "video_token_id": 151656,
+  "vision_config": {
+    "depth": 32,
+    "dtype": "float32",
+    "fullatt_block_indexes": [
+      7,
+      15,
+      23,
+      31
+    ],
+    "hidden_act": "silu",
+    "hidden_size": 1280,
+    "in_channels": 3,
+    "in_chans": 3,
+    "initializer_range": 0.02,
+    "intermediate_size": 3420,
+    "model_type": "qwen2_5_vl",
+    "num_heads": 16,
+    "out_hidden_size": 3584,
+    "patch_size": 14,
+    "spatial_merge_size": 2,
+    "spatial_patch_size": 14,
+    "temporal_patch_size": 2,
+    "tokens_per_second": 2,
+    "window_size": 112
+  },
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654,
+  "vocab_size": 152064
+}

qwen-image-edit-series-50x-above-base-model/text_encoder/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7d7714228a7618ec4cd222a80fc9575ae8627829bcff70f9ad641de4bed677a
+size 4761171927

qwen-image-edit-series-50x-above-base-model/tokenizer/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

qwen-image-edit-series-50x-above-base-model/tokenizer/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,54 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- messages[0]['content'] }}
+    {%- else %}
+        {{- 'You are a helpful assistant.' }}
+    {%- endif %}
+    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
+    {%- else %}
+        {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content %}
+            {{- '\n' + message.content }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n{"name": "' }}
+            {{- tool_call.name }}
+            {{- '", "arguments": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- '}\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

qwen-image-edit-series-50x-above-base-model/tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen-image-edit-series-50x-above-base-model/tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

qwen-image-edit-series-50x-above-base-model/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,207 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

qwen-image-edit-series-50x-above-base-model/tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen-image-edit-series-50x-above-base-model/vae/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_class_name": "AutoencoderKLQwenImage",
+  "_diffusers_version": "0.36.0.dev0",
+  "attn_scales": [],
+  "base_dim": 96,
+  "dim_mult": [
+    1,
+    2,
+    4,
+    4
+  ],
+  "dropout": 0.0,
+  "latents_mean": [
+    -0.7571,
+    -0.7089,
+    -0.9113,
+    0.1075,
+    -0.1745,
+    0.9653,
+    -0.1517,
+    1.5508,
+    0.4134,
+    -0.0715,
+    0.5517,
+    -0.3632,
+    -0.1922,
+    -0.9497,
+    0.2503,
+    -0.2921
+  ],
+  "latents_std": [
+    2.8184,
+    1.4541,
+    2.3275,
+    2.6558,
+    1.2196,
+    1.7708,
+    2.6052,
+    2.0743,
+    3.2687,
+    2.1526,
+    2.8652,
+    1.5579,
+    1.6382,
+    1.1253,
+    2.8251,
+    1.916
+  ],
+  "num_res_blocks": 2,
+  "temperal_downsample": [
+    false,
+    true,
+    true
+  ],
+  "z_dim": 16
+}

qwen-image-edit-series-50x-above-base-model/vae/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c8bc8b758c649abef9ea407b95408389a3b2f610d0d10fcb054fe171d0a8344
+size 253806966

qwen-image-edit-series-50x-above-base-model/vision_encoder/config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "depth": 32,
+  "fullatt_block_indexes": [
+    7,
+    15,
+    23,
+    31
+  ],
+  "hidden_size": 1280,
+  "in_channels": 3,
+  "intermediate_size": 3420,
+  "num_heads": 16,
+  "out_hidden_size": 3584,
+  "patch_size": 14,
+  "spatial_merge_size": 2,
+  "temporal_patch_size": 2,
+  "window_size": 112
+}

qwen-image-edit-series-50x-above-base-model/vision_encoder/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49ba688655e579bb242441e3523939983927f739e90f095ae9fbaf3168742c64
+size 382927193

qwen-image-edit-series-50x-below-base-model/model_index.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_class_name": "QwenImageEditPipeline",
+  "_diffusers_version": "0.35.0.dev0",
+  "processor": [
+    "transformers",
+    "Qwen2VLProcessor"
+  ],
+  "scheduler": [
+    "diffusers",
+    "FlowMatchEulerDiscreteScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "Qwen2_5_VLForConditionalGeneration"
+  ],
+  "tokenizer": [
+    "transformers",
+    "Qwen2Tokenizer"
+  ],
+  "transformer": [
+    "diffusers",
+    "QwenImageTransformer2DModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKLQwenImage"
+  ]
+}

qwen-image-edit-series-50x-below-base-model/quantfunc_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "backend": "lighting",
+  "model_id": "54d5a9c6-806c-40d5-bf76-1683edace3f8",
+  "obfuscated": true,
+  "text_encoder": {
+    "prequantized": true,
+    "text_precision": "int4",
+    "use_rotation": true
+  },
+  "vision_encoder": {
+    "prequantized": true,
+    "vision_quant": "int4",
+    "vision_rotation": true
+  }
+}

qwen-image-edit-series-50x-below-base-model/scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "_class_name": "FlowMatchEulerDiscreteScheduler",
+  "base_image_seq_len": 256,
+  "base_shift": 1.0986122886681098,
+  "max_image_seq_len": 8192,
+  "max_shift": 1.0986122886681098,
+  "num_train_timesteps": 1000,
+  "shift": 1.0,
+  "time_shift_type": "exponential",
+  "use_dynamic_shifting": true
+}

qwen-image-edit-series-50x-below-base-model/text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,135 @@

+{
+  "architectures": [
+    "Qwen2_5_VLForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "image_token_id": 151655,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 128000,
+  "max_window_layers": 28,
+  "model_type": "qwen2_5_vl",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
+    ],
+    "rope_type": "default",
+    "type": "default"
+  },
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "text_config": {
+    "architectures": [
+      "Qwen2_5_VLForConditionalGeneration"
+    ],
+    "attention_dropout": 0.0,
+    "bos_token_id": 151643,
+    "eos_token_id": 151645,
+    "hidden_act": "silu",
+    "hidden_size": 3584,
+    "image_token_id": null,
+    "initializer_range": 0.02,
+    "intermediate_size": 18944,
+    "layer_types": [
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 128000,
+    "max_window_layers": 28,
+    "model_type": "qwen2_5_vl_text",
+    "num_attention_heads": 28,
+    "num_hidden_layers": 28,
+    "num_key_value_heads": 4,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": {
+      "mrope_section": [
+        16,
+        24,
+        24
+      ],
+      "rope_type": "default",
+      "type": "default"
+    },
+    "rope_theta": 1000000.0,
+    "sliding_window": null,
+    "torch_dtype": "float32",
+    "use_cache": true,
+    "use_sliding_window": false,
+    "video_token_id": null,
+    "vision_end_token_id": 151653,
+    "vision_start_token_id": 151652,
+    "vision_token_id": 151654,
+    "vocab_size": 152064
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.55.2",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "video_token_id": 151656,
+  "vision_config": {
+    "depth": 32,
+    "fullatt_block_indexes": [
+      7,
+      15,
+      23,
+      31
+    ],
+    "hidden_act": "silu",
+    "hidden_size": 1280,
+    "in_channels": 3,
+    "in_chans": 3,
+    "initializer_range": 0.02,
+    "intermediate_size": 3420,
+    "model_type": "qwen2_5_vl",
+    "num_heads": 16,
+    "out_hidden_size": 3584,
+    "patch_size": 14,
+    "spatial_merge_size": 2,
+    "spatial_patch_size": 14,
+    "temporal_patch_size": 2,
+    "tokens_per_second": 2,
+    "torch_dtype": "float32",
+    "window_size": 112
+  },
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654,
+  "vocab_size": 152064
+}

qwen-image-edit-series-50x-below-base-model/text_encoder/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddd766271590adcc936d8be074355850264a50f4321ad700621ad9e32be746c8
+size 4557255789

qwen-image-edit-series-50x-below-base-model/tokenizer/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

qwen-image-edit-series-50x-below-base-model/tokenizer/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,54 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- messages[0]['content'] }}
+    {%- else %}
+        {{- 'You are a helpful assistant.' }}
+    {%- endif %}
+    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
+    {%- else %}
+        {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content %}
+            {{- '\n' + message.content }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n{"name": "' }}
+            {{- tool_call.name }}
+            {{- '", "arguments": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- '}\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

qwen-image-edit-series-50x-below-base-model/tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen-image-edit-series-50x-below-base-model/tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

qwen-image-edit-series-50x-below-base-model/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,207 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

qwen-image-edit-series-50x-below-base-model/tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen-image-edit-series-50x-below-base-model/vae/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_class_name": "AutoencoderKLQwenImage",
+  "_diffusers_version": "0.35.0.dev0",
+  "attn_scales": [],
+  "base_dim": 96,
+  "dim_mult": [
+    1,
+    2,
+    4,
+    4
+  ],
+  "dropout": 0.0,
+  "latents_mean": [
+    -0.7571,
+    -0.7089,
+    -0.9113,
+    0.1075,
+    -0.1745,
+    0.9653,
+    -0.1517,
+    1.5508,
+    0.4134,
+    -0.0715,
+    0.5517,
+    -0.3632,
+    -0.1922,
+    -0.9497,
+    0.2503,
+    -0.2921
+  ],
+  "latents_std": [
+    2.8184,
+    1.4541,
+    2.3275,
+    2.6558,
+    1.2196,
+    1.7708,
+    2.6052,
+    2.0743,
+    3.2687,
+    2.1526,
+    2.8652,
+    1.5579,
+    1.6382,
+    1.1253,
+    2.8251,
+    1.916
+  ],
+  "num_res_blocks": 2,
+  "temperal_downsample": [
+    false,
+    true,
+    true
+  ],
+  "z_dim": 16
+}

qwen-image-edit-series-50x-below-base-model/vae/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c8bc8b758c649abef9ea407b95408389a3b2f610d0d10fcb054fe171d0a8344
+size 253806966

qwen-image-edit-series-50x-below-base-model/vision_encoder/config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "depth": 32,
+  "fullatt_block_indexes": [
+    7,
+    15,
+    23,
+    31
+  ],
+  "hidden_size": 1280,
+  "in_channels": 3,
+  "intermediate_size": 3456,
+  "num_heads": 16,
+  "out_hidden_size": 3584,
+  "patch_size": 14,
+  "spatial_merge_size": 2,
+  "temporal_patch_size": 2,
+  "window_size": 112
+}

qwen-image-edit-series-50x-below-base-model/vision_encoder/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:176999021e8db0ec8e6a39a17f45b14896c718dd51fd60577426b98a1b1a0bec
+size 362827726

transformer/config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "_class_name": "QwenImageTransformer2DModel",
+  "_diffusers_version": "0.36.0.dev0",
+  "attention_head_dim": 128,
+  "axes_dims_rope": [
+    16,
+    56,
+    56
+  ],
+  "guidance_embeds": false,
+  "in_channels": 64,
+  "joint_attention_dim": 3584,
+  "num_attention_heads": 24,
+  "num_layers": 60,
+  "out_channels": 16,
+  "patch_size": 2,
+  "zero_cond_t": true
+}

transformer/qwen-image-2511-50x-above-lighting-4steps-prequant.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d60b8601d62feffad5368353821f77a8c8c5594ea5d369e48e773d1d1b5b0a9
+size 11420095381

transformer/qwen-image-2511-50x-above-lighting-4steps.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e6c50da18bef20b5bb402bf54a072fe4fd04a4db0ab0ea47123e0e1ec4f7c5a
+size 14498964669