@@ -398,25 +398,36 @@ async def generate(
398398 logger .warning ('chat_template_kwargs["enable_thinking"] is already set, '
399399 'the value will not be overwritten by enable_thinking' )
400400 if messages :
401- prompt = messages
402- self .request_logger .log_prompt (session , prompt = prompt )
403- prompt_input = await self .prompt_processor .get_prompt_input (prompt = prompt ,
404- do_preprocess = do_preprocess ,
405- sequence_start = sequence_start ,
406- adapter_name = adapter_name ,
407- tools = tools ,
408- reasoning_effort = reasoning_effort ,
409- chat_template_kwargs = chat_template_kwargs ,
410- media_io_kwargs = media_io_kwargs ,
411- mm_processor_kwargs = mm_processor_kwargs ,
412- ** kwargs )
413- prompt = prompt_input .get ('prompt' )
414- input_ids = prompt_input .get ('input_ids' )
415- self .request_logger .log_inputs (session ,
416- prompt = prompt ,
417- prompt_token_ids = input_ids ,
418- gen_config = gen_config ,
419- adapter_name = adapter_name )
401+ try :
402+ prompt = messages
403+ self .request_logger .log_prompt (session , prompt = prompt )
404+ prompt_input = await self .prompt_processor .get_prompt_input (prompt = prompt ,
405+ do_preprocess = do_preprocess ,
406+ sequence_start = sequence_start ,
407+ adapter_name = adapter_name ,
408+ tools = tools ,
409+ reasoning_effort = reasoning_effort ,
410+ chat_template_kwargs = chat_template_kwargs ,
411+ media_io_kwargs = media_io_kwargs ,
412+ mm_processor_kwargs = mm_processor_kwargs ,
413+ ** kwargs )
414+ prompt = prompt_input .get ('prompt' )
415+ input_ids = prompt_input .get ('input_ids' )
416+ self .request_logger .log_inputs (session ,
417+ prompt = prompt ,
418+ prompt_token_ids = input_ids ,
419+ gen_config = gen_config ,
420+ adapter_name = adapter_name )
421+ except Exception :
422+ logger .exception ('[generate] error in prompt processing' )
423+ metrics_processor .increase_failed_requests ('error' )
424+ yield GenOut (response = 'in prompt processing error' ,
425+ history_token_len = session .step ,
426+ input_token_len = len (input_ids ) if input_ids is not None else 0 ,
427+ generate_token_len = 0 ,
428+ finish_reason = 'error' ,
429+ token_ids = [])
430+ return
420431 else :
421432 # TODO(lvhan) VLM doesn't support input_ids as an argument.
422433 # Figure out a graceful way to handle the invalid input
0 commit comments