Skip to content

Commit a3bd913

Browse files
committed
Add stdio daemon mode with configurable runtime logging
1 parent a732c8d commit a3bd913

16 files changed

Lines changed: 988 additions & 41 deletions

File tree

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,4 @@ jobs:
3333
run: |
3434
bitloops-embeddings --help
3535
bitloops-embeddings describe --model bge-m3
36+
bitloops-embeddings daemon --help

README.md

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
- a one-shot CLI for simple embedding requests
66
- a long-lived local HTTP server for repeated requests
7+
- a long-lived stdio daemon for process-managed IPC
78
- release packaging for major desktop and server operating systems
89

910
The first release is intentionally operational rather than retrieval-quality-complete. It focuses on a stable interface, model bootstrapping, hello-world inference, and releasable artefacts.
@@ -105,6 +106,15 @@ Override the bind target:
105106
bitloops-embeddings serve --model bge-m3 --host 127.0.0.1 --port 7719
106107
```
107108

109+
Configure logging for long-lived modes:
110+
111+
```bash
112+
bitloops-embeddings serve \
113+
--model bge-m3 \
114+
--log-level debug \
115+
--log-file ./bitloops-embeddings.log
116+
```
117+
108118
### HTTP API
109119

110120
Health:
@@ -146,6 +156,56 @@ Error shape:
146156
}
147157
```
148158

159+
## Daemon usage
160+
161+
Start the stdio daemon:
162+
163+
```bash
164+
bitloops-embeddings daemon --model bge-m3
165+
```
166+
167+
The daemon:
168+
169+
- loads the model once and keeps it warm
170+
- reads newline-delimited JSON requests from `stdin`
171+
- writes newline-delimited JSON protocol responses only to `stdout`
172+
- writes logs and diagnostics to the configured log sink or, if needed, to `stderr`
173+
174+
Use a custom log file:
175+
176+
```bash
177+
bitloops-embeddings daemon \
178+
--model bge-m3 \
179+
--log-level info \
180+
--log-file ./bitloops-embeddings-daemon.log
181+
```
182+
183+
Ready event:
184+
185+
```json
186+
{"event":"ready","protocol":1,"capabilities":["embed","ping","health","shutdown"]}
187+
```
188+
189+
Example request:
190+
191+
```json
192+
{"id":"1","cmd":"embed","texts":["hello","world"],"model":"bge-m3"}
193+
```
194+
195+
Example response:
196+
197+
```json
198+
{"id":"1","ok":true,"vectors":[[0.12,0.98],[-0.44,0.07]],"model":"bge-m3"}
199+
```
200+
201+
Example error:
202+
203+
```json
204+
{"id":"7","ok":false,"error":{"code":"UNKNOWN_COMMAND","message":"unsupported cmd: frobnicate"}}
205+
```
206+
207+
The daemon exits cleanly on `shutdown` or when `stdin` reaches EOF.
208+
149209
## Cache directory resolution
150210

151211
Model cache resolution order:
@@ -198,5 +258,7 @@ The repository includes two workflows:
198258
## Troubleshooting
199259

200260
- The first `embed` or `serve` invocation downloads model files into the local cache. This can take a while on a cold machine.
261+
- The first `daemon` invocation also downloads model files into the local cache if they are not already present.
201262
- If model loading fails, check network access to Hugging Face and confirm the cache directory is writable.
263+
- Long-lived modes support `--log-level` and `--log-file`. Without `--log-file`, `serve` and `daemon` use a best-effort OS log sink and fall back to `stderr` if the native sink is unavailable.
202264
- The runtime does not log input texts by default.

scripts/real_backend_smoke.py

Lines changed: 102 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import socket
66
import subprocess
77
import sys
8+
import tempfile
89
import time
910
from pathlib import Path
1011
from urllib import error, request
@@ -24,6 +25,7 @@ def main() -> None:
2425

2526
run_embed_smoke(binary)
2627
run_server_smoke(binary, port)
28+
run_daemon_smoke(binary)
2729

2830

2931
def run_embed_smoke(binary: str) -> None:
@@ -48,39 +50,109 @@ def run_embed_smoke(binary: str) -> None:
4850

4951

5052
def run_server_smoke(binary: str, port: int) -> None:
51-
process = subprocess.Popen(
52-
[
53-
binary,
54-
"serve",
55-
"--model",
56-
"bge-m3",
57-
"--host",
58-
"127.0.0.1",
59-
"--port",
60-
str(port),
61-
],
62-
stdout=subprocess.PIPE,
63-
stderr=subprocess.PIPE,
64-
text=True,
65-
)
53+
with tempfile.TemporaryDirectory(prefix="bitloops-embeddings-serve-logs-") as temp_dir:
54+
log_file = Path(temp_dir) / "serve.log"
55+
process = subprocess.Popen(
56+
[
57+
binary,
58+
"serve",
59+
"--model",
60+
"bge-m3",
61+
"--host",
62+
"127.0.0.1",
63+
"--port",
64+
str(port),
65+
"--log-file",
66+
str(log_file),
67+
],
68+
stdout=subprocess.PIPE,
69+
stderr=subprocess.PIPE,
70+
text=True,
71+
)
6672

67-
try:
68-
wait_for_health(process, port)
69-
embed_response = http_post_json(
70-
f"http://127.0.0.1:{port}/embed",
71-
{"texts": ["Hello World"]},
73+
try:
74+
wait_for_health(process, port)
75+
embed_response = http_post_json(
76+
f"http://127.0.0.1:{port}/embed",
77+
{"texts": ["Hello World"]},
78+
)
79+
if embed_response["model_id"] != "bge-m3":
80+
raise RuntimeError("Server smoke returned an unexpected model id.")
81+
if not embed_response["embeddings"] or not embed_response["embeddings"][0]:
82+
raise RuntimeError("Server smoke returned an empty embedding vector.")
83+
finally:
84+
process.terminate()
85+
try:
86+
process.wait(timeout=20)
87+
except subprocess.TimeoutExpired:
88+
process.kill()
89+
process.wait(timeout=5)
90+
91+
92+
def run_daemon_smoke(binary: str) -> None:
93+
with tempfile.TemporaryDirectory(prefix="bitloops-embeddings-daemon-logs-") as temp_dir:
94+
log_file = Path(temp_dir) / "daemon.log"
95+
process = subprocess.Popen(
96+
[
97+
binary,
98+
"daemon",
99+
"--model",
100+
"bge-m3",
101+
"--log-file",
102+
str(log_file),
103+
],
104+
stdin=subprocess.PIPE,
105+
stdout=subprocess.PIPE,
106+
stderr=subprocess.PIPE,
107+
text=True,
72108
)
73-
if embed_response["model_id"] != "bge-m3":
74-
raise RuntimeError("Server smoke returned an unexpected model id.")
75-
if not embed_response["embeddings"] or not embed_response["embeddings"][0]:
76-
raise RuntimeError("Server smoke returned an empty embedding vector.")
77-
finally:
78-
process.terminate()
109+
79110
try:
80-
process.wait(timeout=20)
81-
except subprocess.TimeoutExpired:
82-
process.kill()
83-
process.wait(timeout=5)
111+
ready_event = read_json_line(process.stdout)
112+
if ready_event.get("event") != "ready":
113+
raise RuntimeError("Daemon smoke did not emit a ready event.")
114+
115+
write_json_line(process.stdin, {"id": "1", "cmd": "ping"})
116+
ping_response = read_json_line(process.stdout)
117+
if ping_response != {"id": "1", "ok": True, "pong": True}:
118+
raise RuntimeError(f"Unexpected daemon ping response: {ping_response}")
119+
120+
write_json_line(
121+
process.stdin,
122+
{"id": "2", "cmd": "embed", "texts": ["Hello World"]},
123+
)
124+
embed_response = read_json_line(process.stdout)
125+
if embed_response.get("model") != "bge-m3":
126+
raise RuntimeError("Daemon smoke returned an unexpected model id.")
127+
if not embed_response.get("vectors") or not embed_response["vectors"][0]:
128+
raise RuntimeError("Daemon smoke returned an empty embedding vector.")
129+
130+
write_json_line(process.stdin, {"id": "3", "cmd": "shutdown"})
131+
shutdown_response = read_json_line(process.stdout)
132+
if shutdown_response != {"id": "3", "ok": True}:
133+
raise RuntimeError(f"Unexpected daemon shutdown response: {shutdown_response}")
134+
if process.wait(timeout=20) != 0:
135+
raise RuntimeError("Daemon smoke exited with a non-zero status.")
136+
finally:
137+
if process.poll() is None:
138+
process.terminate()
139+
try:
140+
process.wait(timeout=5)
141+
except subprocess.TimeoutExpired:
142+
process.kill()
143+
process.wait(timeout=5)
144+
145+
146+
def write_json_line(stream, payload: dict[str, object]) -> None:
147+
stream.write(json.dumps(payload) + "\n")
148+
stream.flush()
149+
150+
151+
def read_json_line(stream) -> dict[str, object]:
152+
line = stream.readline()
153+
if not line:
154+
raise RuntimeError("Expected a protocol message but reached EOF.")
155+
return json.loads(line)
84156

85157

86158
def wait_for_health(process: subprocess.Popen[str], port: int, timeout_seconds: int = 180) -> None:

src/bitloops_embeddings/backend/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@ def backend_name(self) -> str: ...
1313
@property
1414
def dimensions(self) -> int: ...
1515

16+
@property
17+
def is_loaded(self) -> bool: ...
18+
1619
def load(self) -> None: ...
1720

1821
def embed(self, texts: list[str]) -> list[list[float]]: ...
1922

23+
def close(self) -> None: ...

src/bitloops_embeddings/backend/sentence_transformers_backend.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,12 @@ def backend_name(self) -> str:
3434
def dimensions(self) -> int:
3535
return self._dimensions
3636

37+
@property
38+
def is_loaded(self) -> bool:
39+
return self._model is not None
40+
3741
def load(self) -> None:
38-
if self._model is not None:
42+
if self.is_loaded:
3943
return
4044

4145
try:
@@ -90,3 +94,6 @@ def embed(self, texts: list[str]) -> list[list[float]]:
9094
if hasattr(vectors, "tolist"):
9195
return vectors.tolist()
9296
return [[float(value) for value in vector] for vector in vectors]
97+
98+
def close(self) -> None:
99+
self._model = None

src/bitloops_embeddings/cli.py

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from bitloops_embeddings.backend.base import EmbeddingBackend
1010
from bitloops_embeddings.cache import ensure_cache_dir, resolve_cache_dir
11+
from bitloops_embeddings.daemon import run_daemon
1112
from bitloops_embeddings.errors import BitloopsEmbeddingsError
1213
from bitloops_embeddings.logging_utils import configure_logging, log_event
1314
from bitloops_embeddings.models import EmbeddingResponse, RuntimeInfo
@@ -39,6 +40,10 @@ class LogLevel(str, Enum):
3940
ERROR = "error"
4041

4142

43+
class Transport(str, Enum):
44+
STDIO = "stdio"
45+
46+
4247
def main() -> None:
4348
app(prog_name=RUNTIME_NAME)
4449

@@ -109,14 +114,22 @@ def serve(
109114
LogLevel,
110115
typer.Option("--log-level", help="Server log verbosity.", case_sensitive=False),
111116
] = LogLevel.INFO,
117+
log_file: Annotated[
118+
Optional[Path],
119+
typer.Option(
120+
"--log-file",
121+
help="Optional log file path. Defaults to the OS log sink for long-lived modes.",
122+
dir_okay=False,
123+
writable=True,
124+
),
125+
] = None,
112126
max_batch_size: Annotated[
113127
int,
114128
typer.Option("--max-batch-size", help="Maximum texts accepted by the /embed endpoint."),
115129
] = 32,
116130
) -> None:
117-
configure_logging(log_level.value)
118-
119131
try:
132+
configure_logging(log_level.value, log_file=log_file, prefer_os_log=True)
120133
backend = _build_backend(model=model, cache_dir=cache_dir)
121134
backend.load()
122135
app_instance = create_app(backend, max_batch_size=max_batch_size)
@@ -134,6 +147,53 @@ def serve(
134147
_exit_with_error(BitloopsEmbeddingsError(f"Unexpected runtime error: {exc}"))
135148

136149

150+
@app.command()
151+
def daemon(
152+
model: Annotated[str, typer.Option("--model", help="Public model identifier.")],
153+
transport: Annotated[
154+
Transport,
155+
typer.Option("--transport", help="IPC transport.", case_sensitive=False),
156+
] = Transport.STDIO,
157+
cache_dir: Annotated[
158+
Optional[Path],
159+
typer.Option(
160+
"--cache-dir",
161+
help="Override the model cache directory.",
162+
file_okay=False,
163+
dir_okay=True,
164+
writable=True,
165+
),
166+
] = None,
167+
log_level: Annotated[
168+
LogLevel,
169+
typer.Option("--log-level", help="Daemon log verbosity.", case_sensitive=False),
170+
] = LogLevel.INFO,
171+
log_file: Annotated[
172+
Optional[Path],
173+
typer.Option(
174+
"--log-file",
175+
help="Optional log file path. Defaults to the OS log sink for long-lived modes.",
176+
dir_okay=False,
177+
writable=True,
178+
),
179+
] = None,
180+
) -> None:
181+
try:
182+
configure_logging(log_level.value, log_file=log_file, prefer_os_log=True)
183+
if transport is not Transport.STDIO:
184+
raise typer.BadParameter("Only stdio transport is supported in v1.")
185+
backend = _build_backend(model=model, cache_dir=cache_dir)
186+
raise typer.Exit(code=run_daemon(backend))
187+
except typer.BadParameter:
188+
raise
189+
except typer.Exit:
190+
raise
191+
except BitloopsEmbeddingsError as exc:
192+
_exit_with_error(exc)
193+
except Exception as exc:
194+
_exit_with_error(BitloopsEmbeddingsError(f"Unexpected runtime error: {exc}"))
195+
196+
137197
@app.command()
138198
def describe(
139199
model: Annotated[str, typer.Option("--model", help="Public model identifier.")],
@@ -180,6 +240,9 @@ def _emit_json(payload: str, *, output: Optional[Path]) -> None:
180240

181241

182242
def _exit_with_error(exc: BitloopsEmbeddingsError) -> None:
183-
log_event("fatal_error", code=exc.code, message=str(exc))
243+
try:
244+
log_event("fatal_error", code=exc.code, message=str(exc))
245+
except Exception:
246+
pass
184247
typer.echo(f"Error: {exc}", err=True)
185248
raise typer.Exit(code=1)

0 commit comments

Comments
 (0)