mirror of
https://www.modelscope.cn/Qwen/Qwen3-VL-8B-Thinking.git
synced 2026-04-02 13:02:53 +08:00
Update README.md
This commit is contained in:
27
README.md
27
README.md
@ -58,10 +58,10 @@ This is the weight repository for Qwen3-VL-8B-Thinking.
|
||||
|
||||
**Multimodal performance**
|
||||
|
||||

|
||||

|
||||
|
||||
**Pure text performance**
|
||||

|
||||

|
||||
|
||||
## Quickstart
|
||||
|
||||
@ -128,6 +128,29 @@ output_text = processor.batch_decode(
|
||||
print(output_text)
|
||||
```
|
||||
|
||||
### Generation Hyperparameters
|
||||
#### VL
|
||||
```bash
|
||||
export greedy='false'
|
||||
export top_p=0.95
|
||||
export top_k=20
|
||||
export repetition_penalty=1.0
|
||||
export presence_penalty=0.0
|
||||
export temperature=1.0
|
||||
export out_seq_length=40960
|
||||
```
|
||||
|
||||
#### Text
|
||||
```bash
|
||||
export greedy='false'
|
||||
export top_p=0.95
|
||||
export top_k=20
|
||||
export repetition_penalty=1.0
|
||||
export presence_penalty=1.5
|
||||
export temperature=1.0
|
||||
export out_seq_length=32768 (for aime, lcb, and gpqa, it is recommended to set to 81920)
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Citation
|
||||
|
||||
Reference in New Issue
Block a user