Description
Reminder
- I have read the README and searched the existing issues.
Reproduction
#run
CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat myconfig/inference/llama3_vllm.yaml
#config
model_name_or_path: /root/autodl-tmp/chat-main/app/serve/model_weight/Qwen-7B
template: qwen
infer_backend: vllm
vllm_enforce_eager: true
#error flow
Running on local URL: http://0.0.0.0:7860
To create a public link, set share=True
in launch()
.
Traceback (most recent call last):
File "/root/condaenv/Qwen/lib/python3.10/site-packages/gradio/queueing.py", line 521, in process_events
response = await route_utils.call_process_api(
File "/root/condaenv/Qwen/lib/python3.10/site-packages/gradio/route_utils.py", line 276, in call_process_api
output = await app.get_blocks().process_api(
File "/root/condaenv/Qwen/lib/python3.10/site-packages/gradio/blocks.py", line 1945, in process_api
result = await self.call_function(
File "/root/condaenv/Qwen/lib/python3.10/site-packages/gradio/blocks.py", line 1525, in call_function
prediction = await utils.async_iteration(iterator)
File "/root/condaenv/Qwen/lib/python3.10/site-packages/gradio/utils.py", line 655, in async_iteration
return await iterator.anext()
File "/root/condaenv/Qwen/lib/python3.10/site-packages/gradio/utils.py", line 648, in anext
return await anyio.to_thread.run_sync(
File "/root/condaenv/Qwen/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "/root/condaenv/Qwen/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
File "/root/condaenv/Qwen/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 859, in run
result = context.run(func, *args)
File "/root/condaenv/Qwen/lib/python3.10/site-packages/gradio/utils.py", line 631, in run_sync_iterator_async
return next(iterator)
File "/root/condaenv/Qwen/lib/python3.10/site-packages/gradio/utils.py", line 814, in gen_wrapper
response = next(iterator)
File "/root/LLaMA-Factory/src/llamafactory/webui/chatter.py", line 124, in stream
for new_text in self.stream_chat(
File "/root/LLaMA-Factory/src/llamafactory/chat/chat_model.py", line 70, in stream_chat
yield task.result()
File "/root/condaenv/Qwen/lib/python3.10/concurrent/futures/_base.py", line 458, in result
return self.__get_result()
File "/root/condaenv/Qwen/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/root/LLaMA-Factory/src/llamafactory/chat/chat_model.py", line 82, in astream_chat
async for new_token in self.engine.stream_chat(messages, system, tools, image, **input_kwargs):
File "/root/LLaMA-Factory/src/llamafactory/chat/vllm_engine.py", line 208, in stream_chat
generator = await self._generate(messages, system, tools, image, **input_kwargs)
File "/root/LLaMA-Factory/src/llamafactory/chat/vllm_engine.py", line 160, in _generate
result_generator = self.model.generate(
TypeError: AsyncLLMEngine.generate() got an unexpected keyword argument 'prompt'
Expected behavior
unexpected keyword argument 'prompt'
System Info
unexpected keyword argument 'prompt'
Others
unexpected keyword argument 'prompt'
Activity