Skip to content

Commit a1addec

Browse files
authored
Don't ensure ASCII in LLM-visible JSON strings (MemMachine#1378)
1 parent 49551ec commit a1addec

8 files changed

Lines changed: 349 additions & 8 deletions

File tree

packages/client/client_tests/test_format.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,59 @@ def test_content_json_escaped(self):
9999
result = format_episodes([ep])
100100
assert json.dumps('She said "hello"') in result
101101

102+
def test_non_ascii_content_preserved_literally(self):
103+
"""Non-ASCII characters must appear literally in the LLM context, not
104+
as ``\\uXXXX`` escapes — escaping bloats token counts and degrades
105+
recall on multilingual content."""
106+
ep = EpisodeResponse(
107+
uid="1",
108+
content="寿司 café 🍕 naïve résumé Привет",
109+
producer_id="user_1",
110+
producer_role="user",
111+
created_at=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
112+
)
113+
result = format_episodes([ep])
114+
assert "寿司" in result
115+
assert "café" in result
116+
assert "🍕" in result
117+
assert "naïve" in result
118+
assert "résumé" in result
119+
assert "Привет" in result
120+
# Sanity: no escaped CJK / cyrillic / accented sequences.
121+
assert "\\u" not in result
122+
123+
def test_non_ascii_content_lossless_roundtrip(self):
124+
"""The JSON-encoded portion must round-trip back to the original
125+
string so downstream LLM consumers (and any client-side
126+
post-processing) still see correct text."""
127+
original = '日本語 — "quoted" + emoji 🎉'
128+
ep = EpisodeResponse(
129+
uid="1",
130+
content=original,
131+
producer_id="user_1",
132+
producer_role="user",
133+
created_at=None,
134+
)
135+
result = format_episodes([ep])
136+
json_part = result.removeprefix("user_1: ").rstrip("\n")
137+
assert json.loads(json_part) == original
138+
139+
def test_output_is_utf8_encodable(self):
140+
"""The LLM-visible string must be safe to send over UTF-8 transports
141+
(HTTP body, logging sinks). ``ensure_ascii=False`` produces
142+
unescaped surrogates only for malformed inputs; clean Unicode must
143+
encode without error."""
144+
ep = EpisodeResponse(
145+
uid="1",
146+
content="Mixed: ASCII + 中文 + 🚀",
147+
producer_id="user_1",
148+
producer_role="user",
149+
created_at=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
150+
)
151+
result = format_episodes([ep])
152+
encoded = result.encode("utf-8")
153+
assert encoded.decode("utf-8") == result
154+
102155

103156
class TestFormatSemanticMemories:
104157
"""Tests for format_semantic_memories."""
@@ -146,6 +199,32 @@ def test_groups_by_tag(self):
146199
"background": {"role": "engineer"},
147200
}
148201

202+
def test_non_ascii_value_preserved_literally(self):
203+
feature = SemanticFeature(
204+
category="profile",
205+
tag="prefs",
206+
feature_name="favorite_food",
207+
value="寿司 🍣",
208+
)
209+
result = format_semantic_memories([feature])
210+
assert "寿司" in result
211+
assert "🍣" in result
212+
assert "\\u" not in result
213+
# And the JSON is still valid.
214+
assert json.loads(result) == {"prefs": {"favorite_food": "寿司 🍣"}}
215+
216+
def test_non_ascii_tag_and_feature_name_preserved(self):
217+
feature = SemanticFeature(
218+
category="profile",
219+
tag="préférences",
220+
feature_name="種類",
221+
value="ramen",
222+
)
223+
result = format_semantic_memories([feature])
224+
assert "préférences" in result
225+
assert "種類" in result
226+
assert json.loads(result) == {"préférences": {"種類": "ramen"}}
227+
149228
def test_metadata_excluded(self):
150229
feature = SemanticFeature(
151230
set_id="set_1",

packages/client/src/memmachine_client/format.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def format_episodes(episodes: Iterable[EpisodeResponse | Episode]) -> str:
4343
if episode.created_at is not None:
4444
date_str = episode.created_at.strftime("%A, %B %d, %Y")
4545
time_str = episode.created_at.strftime("%I:%M %p")
46-
result += f"[{date_str} at {time_str}] {episode.producer_id}: {json.dumps(episode.content)}\n"
46+
result += f"[{date_str} at {time_str}] {episode.producer_id}: {json.dumps(episode.content, ensure_ascii=False)}\n"
4747
else:
48-
result += f"{episode.producer_id}: {json.dumps(episode.content)}\n"
48+
result += f"{episode.producer_id}: {json.dumps(episode.content, ensure_ascii=False)}\n"
4949
return result
5050

5151

@@ -66,7 +66,7 @@ def format_semantic_memories(features: Iterable[SemanticFeature]) -> str:
6666
structured: dict[str, dict[str, str]] = {}
6767
for feature in features:
6868
structured.setdefault(feature.tag, {})[feature.feature_name] = feature.value
69-
return json.dumps(structured)
69+
return json.dumps(structured, ensure_ascii=False)
7070

7171

7272
def format_search_result(result: SearchResult) -> str:

packages/server/server_tests/memmachine_server/common/episode_store/test_episode_model.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Test for the Episode models."""
22

3+
import json
34
from datetime import UTC, datetime
45

56
import pytest
@@ -60,3 +61,51 @@ def test_episodes_to_string_with_episode_response(base_episode_data):
6061
assert len(lines) == 1
6162
line0 = '[Wednesday, January 14, 2026 at 01:30 PM] user_1: "Hello world"'
6263
assert lines[0] == line0
64+
65+
66+
def test_episodes_to_string_message_preserves_non_ascii(base_episode_data):
67+
"""Non-ASCII content must appear literally in the LLM context, not as
68+
``\\uXXXX`` escapes — escapes inflate the prompt token count and
69+
obscure semantic content."""
70+
base_episode_data["content"] = "寿司 café 🍕 Привет"
71+
ep = Episode(**base_episode_data)
72+
result = episodes_to_string([ep])
73+
74+
assert "寿司" in result
75+
assert "café" in result
76+
assert "🍕" in result
77+
assert "Привет" in result
78+
assert "\\u" not in result
79+
80+
# The JSON-quoted content must round-trip back to the original string.
81+
line = result.rstrip("\n")
82+
json_part = line.split(": ", 1)[1]
83+
assert json.loads(json_part) == "寿司 café 🍕 Привет"
84+
85+
86+
def test_episodes_to_string_non_message_preserves_non_ascii(base_episode_data):
87+
"""The ``case _:`` fallback (e.g. an EpisodeResponse with no episode
88+
type) must also preserve Unicode literally."""
89+
fallback_data = {k: v for k, v in base_episode_data.items() if k != "session_key"}
90+
fallback_data["episode_type"] = None
91+
fallback_data["content"] = "要約: ☕ résumé"
92+
er = EpisodeResponse(**fallback_data)
93+
result = episodes_to_string([er])
94+
95+
assert "要約" in result
96+
assert "☕" in result
97+
assert "résumé" in result
98+
assert "\\u" not in result
99+
assert json.loads(result.rstrip("\n")) == "要約: ☕ résumé"
100+
101+
102+
def test_episodes_to_string_output_is_utf8_encodable(base_episode_data):
103+
"""The formatted string is the exact text fed to LanguageModel prompts;
104+
it must be losslessly UTF-8 encodable (no surrogate pairs from broken
105+
escaping)."""
106+
base_episode_data["content"] = "ASCII + 中文 + 🚀 + emoji modifier 👨‍👩‍👧‍👦"
107+
ep = Episode(**base_episode_data)
108+
result = episodes_to_string([ep])
109+
110+
encoded = result.encode("utf-8")
111+
assert encoded.decode("utf-8") == result
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""Unit tests for ``DeclarativeMemory.string_from_episode_context``.
2+
3+
These tests target the static formatter directly and do not need any of
4+
the heavyweight integration fixtures (Neo4j, embedders, rerankers) used
5+
by ``test_declarative_memory.py``.
6+
"""
7+
8+
import json
9+
from datetime import UTC, datetime
10+
11+
from memmachine_server.episodic_memory.declarative_memory import (
12+
ContentType,
13+
DeclarativeMemory,
14+
Episode,
15+
)
16+
17+
18+
def _make_episode(content):
19+
return Episode(
20+
uid="ep_1",
21+
timestamp=datetime(2026, 1, 14, 13, 30, tzinfo=UTC),
22+
source="user_1",
23+
content_type=ContentType.MESSAGE,
24+
content=content,
25+
)
26+
27+
28+
def test_ascii_content_baseline():
29+
"""Sanity check that ASCII content still formats as before — guards
30+
against regressions in the timestamp/source/JSON layout."""
31+
result = DeclarativeMemory.string_from_episode_context(
32+
[_make_episode("Hello world")]
33+
)
34+
assert result == '[Wednesday, January 14, 2026 at 01:30 PM] user_1: "Hello world"\n'
35+
36+
37+
def test_non_ascii_content_preserved_literally():
38+
"""Non-ASCII characters must reach the reranker / LLM as-is, not as
39+
``\\uXXXX`` escapes — escapes inflate token counts and obscure
40+
semantic content for the reranker."""
41+
result = DeclarativeMemory.string_from_episode_context(
42+
[_make_episode("寿司 café 🍕 Привет naïve")]
43+
)
44+
45+
assert "寿司" in result
46+
assert "café" in result
47+
assert "🍕" in result
48+
assert "Привет" in result
49+
assert "naïve" in result
50+
assert "\\u" not in result
51+
52+
53+
def test_non_ascii_content_lossless_roundtrip():
54+
"""The JSON-encoded content portion must round-trip back to the
55+
original string — the reranker scoring relies on the literal text
56+
matching the query distribution."""
57+
original = '日本語 — "quoted" + emoji 🎉'
58+
result = DeclarativeMemory.string_from_episode_context([_make_episode(original)])
59+
json_part = result.split(": ", 1)[1].rstrip("\n")
60+
assert json.loads(json_part) == original
61+
62+
63+
def test_output_is_utf8_encodable():
64+
"""The context string is fed to ``Reranker.score`` and (via siblings)
65+
into LLM prompts; it must encode to UTF-8 cleanly."""
66+
result = DeclarativeMemory.string_from_episode_context(
67+
[_make_episode("Mixed: ASCII + 中文 + 🚀 + 👨‍👩‍👧‍👦")]
68+
)
69+
assert result.encode("utf-8").decode("utf-8") == result
70+
71+
72+
def test_multiple_episodes_each_preserve_unicode():
73+
eps = [_make_episode("café"), _make_episode("寿司"), _make_episode("🚀")]
74+
result = DeclarativeMemory.string_from_episode_context(eps)
75+
lines = result.strip().split("\n")
76+
assert len(lines) == 3
77+
assert "café" in lines[0]
78+
assert "寿司" in lines[1]
79+
assert "🚀" in lines[2]

packages/server/server_tests/memmachine_server/semantic_memory/test_semantic_llm.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,3 +312,133 @@ def test_consolidation_format_preserves_all_fields(self, features_with_ids):
312312
assert entry["feature"] == "observer_fix"
313313
assert entry["value"] == "Fixed observer subagent bug"
314314
assert entry["metadata"] == {"id": "42"}
315+
316+
317+
class TestNonAsciiPromptSerialization:
318+
"""Both ``llm_feature_update`` and ``llm_consolidate_features`` embed
319+
the existing feature set into the user prompt via
320+
``json.dumps(..., ensure_ascii=False)``. The non-ASCII payload must
321+
survive into the prompt as literal Unicode (so the LLM sees
322+
``"寿司"`` and not ``"\\u5bff\\u53f8"``) and the prompt must remain
323+
a valid UTF-8 string."""
324+
325+
@pytest.fixture
326+
def non_ascii_features(self):
327+
return [
328+
SemanticFeature(
329+
category="Profile",
330+
tag="食べ物", # tag itself is non-ASCII
331+
feature_name="favorite_dish",
332+
value="寿司 🍣",
333+
metadata=SemanticFeature.Metadata(id="100"),
334+
),
335+
SemanticFeature(
336+
category="Profile",
337+
tag="préférences",
338+
feature_name="café",
339+
value="naïve résumé — Привет",
340+
metadata=SemanticFeature.Metadata(id="101"),
341+
),
342+
]
343+
344+
@pytest.mark.asyncio
345+
async def test_feature_update_prompt_preserves_non_ascii_literally(
346+
self,
347+
magic_mock_llm_model: MagicMock,
348+
non_ascii_features: list[SemanticFeature],
349+
):
350+
magic_mock_llm_model.generate_parsed_response.return_value = {"commands": []}
351+
352+
await llm_feature_update(
353+
features=non_ascii_features,
354+
message_content="I had 寿司 for lunch",
355+
model=magic_mock_llm_model,
356+
update_prompt="Update features",
357+
)
358+
359+
# The user_prompt is the second positional or 'user_prompt' kwarg.
360+
call_kwargs = magic_mock_llm_model.generate_parsed_response.call_args.kwargs
361+
user_prompt = call_kwargs["user_prompt"]
362+
363+
# Literal Unicode reaches the LLM, no escape sequences.
364+
assert "食べ物" in user_prompt
365+
assert "寿司 🍣" in user_prompt
366+
assert "préférences" in user_prompt
367+
assert "café" in user_prompt
368+
assert "naïve résumé — Привет" in user_prompt
369+
assert "\\u" not in user_prompt
370+
371+
# The prompt is UTF-8 transport-safe.
372+
assert user_prompt.encode("utf-8").decode("utf-8") == user_prompt
373+
374+
@pytest.mark.asyncio
375+
async def test_consolidate_prompt_preserves_non_ascii_literally(
376+
self,
377+
magic_mock_llm_model: MagicMock,
378+
non_ascii_features: list[SemanticFeature],
379+
):
380+
magic_mock_llm_model.generate_parsed_response.return_value = {
381+
"consolidated_memories": [],
382+
"keep_memories": None,
383+
}
384+
385+
await llm_consolidate_features(
386+
features=non_ascii_features,
387+
model=magic_mock_llm_model,
388+
consolidate_prompt="Consolidate features",
389+
)
390+
391+
call_kwargs = magic_mock_llm_model.generate_parsed_response.call_args.kwargs
392+
user_prompt = call_kwargs["user_prompt"]
393+
394+
assert "食べ物" in user_prompt
395+
assert "寿司 🍣" in user_prompt
396+
assert "préférences" in user_prompt
397+
assert "naïve résumé — Привет" in user_prompt
398+
assert "\\u" not in user_prompt
399+
assert user_prompt.encode("utf-8").decode("utf-8") == user_prompt
400+
401+
# The consolidation prompt is bare JSON — verify it still parses
402+
# and round-trips losslessly.
403+
import json
404+
405+
parsed = json.loads(user_prompt)
406+
assert parsed[0]["tag"] == "食べ物"
407+
assert parsed[0]["value"] == "寿司 🍣"
408+
assert parsed[0]["metadata"] == {"id": "100"}
409+
assert parsed[1]["feature"] == "café"
410+
assert parsed[1]["value"] == "naïve résumé — Привет"
411+
412+
@pytest.mark.asyncio
413+
async def test_feature_update_prompt_old_profile_block_is_valid_json(
414+
self,
415+
magic_mock_llm_model: MagicMock,
416+
non_ascii_features: list[SemanticFeature],
417+
):
418+
"""The feature-update prompt wraps the JSON inside ``<OLD_PROFILE>``
419+
delimiters; the inner block must still parse as JSON so the LLM
420+
is shown structurally valid input."""
421+
magic_mock_llm_model.generate_parsed_response.return_value = {"commands": []}
422+
423+
await llm_feature_update(
424+
features=non_ascii_features,
425+
message_content="…",
426+
model=magic_mock_llm_model,
427+
update_prompt="Update features",
428+
)
429+
430+
user_prompt = magic_mock_llm_model.generate_parsed_response.call_args.kwargs[
431+
"user_prompt"
432+
]
433+
434+
start = user_prompt.index("<OLD_PROFILE>\n") + len("<OLD_PROFILE>\n")
435+
end = user_prompt.index("\n</OLD_PROFILE>")
436+
old_profile_json = user_prompt[start:end]
437+
438+
import json
439+
440+
parsed = json.loads(old_profile_json)
441+
assert parsed == {
442+
"食べ物": {"favorite_dish": "寿司 🍣"},
443+
"préférences": {"café": "naïve résumé — Привет"},
444+
}

0 commit comments

Comments
 (0)