diff --git a/README.md b/README.md index cad3676..071bcf6 100644 --- a/README.md +++ b/README.md @@ -77,31 +77,32 @@ import requests from PIL import Image from transformers import AutoProcessor, AutoModelForCausalLM +from modelscope import snapshot_download +model_dir = snapshot_download("AI-ModelScope/Florence-2-large") -model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True) -processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True) +model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True) +processor = AutoProcessor.from_pretrained(model_dir, trust_remote_code=True) -url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true" +prompt = "" + +url = "https://modelscope.oss-cn-beijing.aliyuncs.com/resource/car.jpg" image = Image.open(requests.get(url, stream=True).raw) -def run_example(task_prompt, text_input=None): - if text_input is None: - prompt = task_prompt - else: - prompt = task_prompt + text_input - inputs = processor(text=prompt, images=image, return_tensors="pt") - generated_ids = model.generate( - input_ids=inputs["input_ids"], - pixel_values=inputs["pixel_values"], - max_new_tokens=1024, - num_beams=3 - ) - generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] +inputs = processor(text=prompt, images=image, return_tensors="pt") - parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height)) +generated_ids = model.generate( + input_ids=inputs["input_ids"], + pixel_values=inputs["pixel_values"], + max_new_tokens=1024, + num_beams=3, + do_sample=False +) +generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] - print(parsed_answer) +parsed_answer = processor.post_process_generation(generated_text, task="", image_size=(image.width, image.height)) + +print(parsed_answer) ```