Skip to content

Commit 86a5399

Browse files
authored
Aworld update 0417 (#872)
* Feature/add multi-media skills * Feature/add multi-media skills * Feature/add multi-media skills
1 parent f98f137 commit 86a5399

4 files changed

Lines changed: 754 additions & 30 deletions

File tree

aworld-cli/src/aworld_cli/inner_plugins/smllc/agents/image/image.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,10 @@ async def async_policy(self, observation: Observation, info: Dict[str, Any] = {}
9797
- `image_format`, `image_size_bytes`, `usage`
9898
9999
Current behavior:
100-
- Text-to-image uses JSON `POST /v1/images/generations`
101-
- Image edits use multipart/form-data `POST /v1/images/edits`
100+
- Default backend (`llm_provider=image`, env `TEXT_TO_IMAGE_PROVIDER=image`): text-to-image uses JSON `POST /v1/images/generations`; single-image edits use multipart `POST /v1/images/edits`.
101+
- Kling backend (`llm_provider=kling_image`, env `TEXT_TO_IMAGE_PROVIDER=kling_image`): async task API — `POST /v1/images/generations` (text or one reference image) or `POST /v1/images/multi-image2image` (two or more reference images), then poll until images are ready.
102102
- The agent prefers `response_format=url` by default so upstream callers such as `Aworld` can receive the remote image link.
103-
- For edits, remote images use the `url` parameter; local images and `data:image/...` inputs use the `image` parameter with base64 content
103+
- For the default Qwen-style backend, edits use `url`/`image` as above; Kling accepts URLs or raw base64 for reference images.
104104
"""
105105
)
106106
def build_image_swarm():

aworld/agents/image_agent.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
from aworld.events.util import send_message
5252
from aworld.logs.util import logger
5353
from aworld.models.image_provider import ImageProvider
54+
from aworld.models.kling_image_provider import KlingImageProvider
5455
from aworld.models.model_response import ModelResponse
5556
from aworld.output.base import Output
5657

@@ -82,17 +83,16 @@ class ImageAgent(LLMAgent):
8283

8384
@staticmethod
8485
def _ensure_image_config(conf):
85-
"""Ensure the config uses image provider.
86+
"""Ensure the config uses a supported image backend.
8687
87-
This method forcibly sets the llm_provider to 'image' because
88-
ImageAgent only works with ImageProvider. If the user provided
89-
a different provider, it will be overridden with a warning.
88+
``llm_provider`` is set to ``image`` or ``kling_image``. Any other
89+
value is replaced with ``image`` and a warning is logged.
9090
9191
Args:
9292
conf: Input configuration (AgentConfig, dict, or ConfigDict)
9393
9494
Returns:
95-
A new config object with llm_provider set to 'image'
95+
A new config object with ``llm_provider`` set to ``image`` or ``kling_image``
9696
9797
Raises:
9898
ValueError: If conf is None
@@ -114,19 +114,22 @@ def _ensure_image_config(conf):
114114
elif isinstance(conf, dict):
115115
original_provider = conf.get('llm_provider')
116116

117-
# Log warning if overriding
118-
if original_provider and original_provider != "image":
117+
allowed = ("image", "kling_image")
118+
target_provider = "image"
119+
if original_provider in allowed:
120+
target_provider = original_provider
121+
elif original_provider:
119122
logger.warning(
120123
f"ImageAgent: Overriding llm_provider from '{original_provider}' "
121-
f"to 'image'. ImageAgent only works with ImageProvider."
124+
f"to 'image'. Use 'image' or 'kling_image' for image backends."
122125
)
123-
124-
# Create a new AgentConfig with image provider
126+
127+
# Create a new AgentConfig with resolved image provider
125128
if isinstance(conf, AgentConfig):
126129
# For AgentConfig, we need to modify llm_config
127130
# Get the llm_config dict
128131
llm_config_dict = conf.llm_config.model_dump(exclude_none=True)
129-
llm_config_dict['llm_provider'] = "image"
132+
llm_config_dict['llm_provider'] = target_provider
130133

131134
# Create new ModelConfig
132135
new_llm_config = ModelConfig(**llm_config_dict)
@@ -138,7 +141,7 @@ def _ensure_image_config(conf):
138141
return AgentConfig(**conf_dict)
139142
elif isinstance(conf, dict):
140143
# Modify dict directly
141-
conf['llm_provider'] = "image"
144+
conf['llm_provider'] = target_provider
142145
return conf
143146
else:
144147
# For other types (ConfigDict, etc.), try to handle gracefully
@@ -170,8 +173,8 @@ def __init__(
170173
Args:
171174
name: Agent name
172175
conf: AgentConfig specifying the image provider, API key, and base URL.
173-
Must not be None. The llm_provider will be forcibly set to
174-
'image' regardless of the input value.
176+
Must not be None. ``llm_provider`` is normalized to ``image`` or
177+
``kling_image`` (other values become ``image`` with a warning).
175178
desc: Agent description exposed as tool description
176179
agent_id: Explicit agent ID; auto-generated if None
177180
default_size: Default image size (e.g., "1024x1024", "1024x768", "768x1024")
@@ -200,16 +203,14 @@ def __init__(
200203
**kwargs,
201204
)
202205

203-
# Verify that the provider is ImageProvider
206+
# Verify that the provider is a supported image backend
204207
if self.llm and self.llm.provider:
205-
if not isinstance(self.llm.provider, ImageProvider):
208+
if not isinstance(self.llm.provider, (ImageProvider, KlingImageProvider)):
206209
error_msg = (
207-
f"[ImageAgent:{self.id()}] Expected ImageProvider, "
210+
f"[ImageAgent:{self.id()}] Expected ImageProvider or KlingImageProvider, "
208211
f"but got {type(self.llm.provider).__name__}. "
209-
f"ImageAgent only works with ImageProvider. "
210-
f"Config llm_provider was set to 'image', but provider "
211-
f"initialization failed. Please check your provider registry and "
212-
f"ensure ImageProvider is properly registered."
212+
f"Set llm_provider to 'image' or 'kling_image'. "
213+
f"Provider initialization may have failed; check provider registry and API config."
213214
)
214215
logger.error(error_msg)
215216
raise TypeError(error_msg)
@@ -240,12 +241,13 @@ def _env_or_default(env_key: str, default: Optional[str] = None) -> Optional[str
240241

241242
def _resolve_provider_runtime_config(
242243
self,
243-
provider: ImageProvider,
244+
provider: Any,
244245
*,
245246
has_input_images: bool,
246247
) -> Dict[str, Any]:
247248
prefix = "IMAGE_TO_IMAGE" if has_input_images else "TEXT_TO_IMAGE"
248249
legacy_prefix = "IMAGE" if not has_input_images else None
250+
default_llm_provider = "kling_image" if isinstance(provider, KlingImageProvider) else "image"
249251

250252
def pick(name: str, fallback: Optional[str]) -> Optional[str]:
251253
value = self._env_or_default(f"{prefix}_{name}")
@@ -257,7 +259,7 @@ def pick(name: str, fallback: Optional[str]) -> Optional[str]:
257259
"api_key": pick("API_KEY", provider.api_key),
258260
"base_url": pick("BASE_URL", provider.base_url),
259261
"model_name": pick("MODEL_NAME", provider.model_name),
260-
"provider": pick("PROVIDER", None) or "image",
262+
"provider": pick("PROVIDER", None) or default_llm_provider,
261263
}
262264
temp_value = pick("TEMPERATURE", None)
263265
if temp_value is not None:
@@ -269,7 +271,7 @@ def pick(name: str, fallback: Optional[str]) -> Optional[str]:
269271

270272
def _apply_provider_runtime_config(
271273
self,
272-
provider: ImageProvider,
274+
provider: Any,
273275
runtime_config: Dict[str, Any],
274276
) -> None:
275277
provider.api_key = runtime_config.get("api_key") or provider.api_key
@@ -544,11 +546,11 @@ async def _invoke_image_generation(
544546
provider = self.llm.provider
545547

546548
# Verify provider type
547-
if not isinstance(provider, ImageProvider):
549+
if not isinstance(provider, (ImageProvider, KlingImageProvider)):
548550
raise TypeError(
549-
f"ImageAgent requires ImageProvider, "
551+
f"ImageAgent requires ImageProvider or KlingImageProvider, "
550552
f"but got {type(provider).__name__}. "
551-
f"Please ensure conf.llm_provider is set to 'image'."
553+
f"Set conf.llm_provider to 'image' or 'kling_image'."
552554
)
553555

554556
# Check if provider has the required methods

0 commit comments

Comments
 (0)