Skip to content

Commit 0bf8a07

Browse files
authored
yield error when prompt processing suffers exception (#4574)
* yield error when prompt processing suffers exception * fix
1 parent 6172fc2 commit 0bf8a07

1 file changed

Lines changed: 30 additions & 19 deletions

File tree

lmdeploy/serve/core/async_engine.py

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -398,25 +398,36 @@ async def generate(
398398
logger.warning('chat_template_kwargs["enable_thinking"] is already set, '
399399
'the value will not be overwritten by enable_thinking')
400400
if messages:
401-
prompt = messages
402-
self.request_logger.log_prompt(session, prompt=prompt)
403-
prompt_input = await self.prompt_processor.get_prompt_input(prompt=prompt,
404-
do_preprocess=do_preprocess,
405-
sequence_start=sequence_start,
406-
adapter_name=adapter_name,
407-
tools=tools,
408-
reasoning_effort=reasoning_effort,
409-
chat_template_kwargs=chat_template_kwargs,
410-
media_io_kwargs=media_io_kwargs,
411-
mm_processor_kwargs=mm_processor_kwargs,
412-
**kwargs)
413-
prompt = prompt_input.get('prompt')
414-
input_ids = prompt_input.get('input_ids')
415-
self.request_logger.log_inputs(session,
416-
prompt=prompt,
417-
prompt_token_ids=input_ids,
418-
gen_config=gen_config,
419-
adapter_name=adapter_name)
401+
try:
402+
prompt = messages
403+
self.request_logger.log_prompt(session, prompt=prompt)
404+
prompt_input = await self.prompt_processor.get_prompt_input(prompt=prompt,
405+
do_preprocess=do_preprocess,
406+
sequence_start=sequence_start,
407+
adapter_name=adapter_name,
408+
tools=tools,
409+
reasoning_effort=reasoning_effort,
410+
chat_template_kwargs=chat_template_kwargs,
411+
media_io_kwargs=media_io_kwargs,
412+
mm_processor_kwargs=mm_processor_kwargs,
413+
**kwargs)
414+
prompt = prompt_input.get('prompt')
415+
input_ids = prompt_input.get('input_ids')
416+
self.request_logger.log_inputs(session,
417+
prompt=prompt,
418+
prompt_token_ids=input_ids,
419+
gen_config=gen_config,
420+
adapter_name=adapter_name)
421+
except Exception:
422+
logger.exception('[generate] error in prompt processing')
423+
metrics_processor.increase_failed_requests('error')
424+
yield GenOut(response='in prompt processing error',
425+
history_token_len=session.step,
426+
input_token_len=len(input_ids) if input_ids is not None else 0,
427+
generate_token_len=0,
428+
finish_reason='error',
429+
token_ids=[])
430+
return
420431
else:
421432
# TODO(lvhan) VLM doesn't support input_ids as an argument.
422433
# Figure out a graceful way to handle the invalid input

0 commit comments

Comments
 (0)