Closed
Description
Traceback (most recent call last):
File "/data/disk2/ybZhang/LLaMA-Factory/src/cli_demo.py", line 49, in <module>
main()
File "/data/disk2/ybZhang/LLaMA-Factory/src/cli_demo.py", line 15, in main
chat_model = ChatModel()
File "/data/disk2/ybZhang/LLaMA-Factory/src/llmtuner/chat/chat_model.py", line 25, in __init__
self.engine: "BaseEngine" = VllmEngine(model_args, data_args, finetuning_args, generating_args)
File "/data/disk2/ybZhang/LLaMA-Factory/src/llmtuner/chat/vllm_engine.py", line 37, in __init__
self.model = AsyncLLMEngine.from_engine_args(engine_args)
File "/home/ybZhang/miniconda3/envs/glm-f/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 361, in from_engine_args
engine = cls(
File "/home/ybZhang/miniconda3/envs/glm-f/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 319, in __init__
self.engine = self._init_engine(*args, **kwargs)
File "/home/ybZhang/miniconda3/envs/glm-f/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 437, in _init_engine
return engine_class(*args, **kwargs)
File "/home/ybZhang/miniconda3/envs/glm-f/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 148, in __init__
self.model_executor = executor_class(
File "/home/ybZhang/miniconda3/envs/glm-f/lib/python3.10/site-packages/vllm/executor/executor_base.py", line 41, in __init__
self._init_executor()
File "/home/ybZhang/miniconda3/envs/glm-f/lib/python3.10/site-packages/vllm/executor/gpu_executor.py", line 22, in _init_executor
self._init_non_spec_worker()
File "/home/ybZhang/miniconda3/envs/glm-f/lib/python3.10/site-packages/vllm/executor/gpu_executor.py", line 50, in _init_non_spec_worker
self.driver_worker.init_device()
File "/home/ybZhang/miniconda3/envs/glm-f/lib/python3.10/site-packages/vllm/worker/worker.py", line 103, in init_device
_check_if_gpu_supports_dtype(self.model_config.dtype)
File "/home/ybZhang/miniconda3/envs/glm-f/lib/python3.10/site-packages/vllm/worker/worker.py", line 327, in _check_if_gpu_supports_dtype
raise ValueError(
ValueError: Bfloat16 is only supported on GPUs with compute capability of at least 8.0. Your Tesla V100-SXM2-32GB GPU has compute capability 7.0. You can use float16 instead by explicitly setting the`dtype` flag in CLI, for example: --dtype=half.
Activity