imas-codex/pyproject.toml at main · iterorganization/imas-codex · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
[build-system]
requires = [
    "hatchling",
    "hatch-vcs",
    "pydantic>=2.11.4,<3.0.0",
    "pint>=0.24.4,<0.25.0",
    "rich>=13.7.0,<14.0.0",
    "click>=8.0.0,<9.0.0",
    "networkx>=3.0,<4.0",
    "PyYAML>=6.0,<7.0",
    "ruamel-yaml>=0.18.17",
    "imas-data-dictionary>=4.1.0",
    "imas-data-dictionaries>=4.1.0",
    "imas-python>=2.0.1",
    "linkml>=1.9.3",
    "linkml-runtime>=1.9.5",
    "neo4j>=5.28.0,<6.0.0",
    "imas-standard-names @ git+https://github.com/Simon-McIntosh/IMAS-Standard-Names.git@v0.7.0rc37",
]
build-backend = "hatchling.build"

[project]
name = "imas-codex"
dynamic = ["version"]
description = "An IMAS Data Dictionary MCP server"
readme = "README.md"
requires-python = ">=3.12,<3.13"
authors = [{ name = "Simon McIntosh", email = "simon.mcintosh@iter.org" }]
license = { text = "CC BY-ND 4.0" }
keywords = [
    "IMAS",
    "MCP",
    "Model Context Protocol",
    "Data Dictionary",
    "Fusion",
    "Plasma Physics",
    "ITER",
]
classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Science/Research",
    "Intended Audience :: Developers",
    "License :: Other/Proprietary License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.12",
    "Topic :: Scientific/Engineering :: Physics",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Topic :: Database :: Database Engines/Servers",
]
dependencies = [
    # --- Core: MCP server, graph queries, search, CLI ---
    "fastmcp>=3.2.0",
    "pydantic>=2.11.4,<3.0.0",
    "pint>=0.24.4,<0.25.0",
    "nest-asyncio>=1.5.0,<2.0.0",
    "click>=8.0.0,<9.0.0",
    "python-dotenv>=1.0.0",
    "PyYAML>=6.0,<7.0",
    "numpy>=2.3.1",
    "cachetools>=5.3.0,<6.0.0",
    "imas-data-dictionaries>=4.1.0",
    "anyio>=4.0.0,<5.0.0",
    "rapidfuzz>=3.0.0",
    "neo4j>=5.28.0,<6.0.0",
    "ruamel-yaml>=0.18.17",
    "rich>=13.7.0,<14.0.0",
    "httpx>=0.27.0",
    "requests>=2.25.0",
    "filelock>=3.0.0",
    # Security: CVE fix requires lxml>=6.1.0 (indirect via python-pptx/python-docx)
    "lxml>=6.1.0",
    "breame>=0.1.2",
]

[project.optional-dependencies]
test = [
    "torch",
    "sentence-transformers>=5.0.0",
    "pytest>=8.3.5,<9.0.0",
    "pytest-cov>=6.1.1,<7.0.0",
    "pytest-asyncio>=0.21.0,<1.0.0",
    "pytest-xdist>=3.0.0,<4.0.0",
    "pytest-benchmark>=4.0.0,<5.0.0",
    "pytest-timeout>=2.1.0,<3.0.0",
    "coverage>=7.0.0",
]
bench = ["asv>=0.6.0,<1.0.0"]
serve = [
    # Embedding server deps (GPU inference)
    "fastapi>=0.115.0",
    "uvicorn>=0.34.0",
    "uvloop>=0.21.0",
]
# PyTorch backend selection - choose one:
#   uv sync --extra cpu  (default, ~2GB, no CUDA)
#   uv sync --extra gpu  (CUDA 12.1, ~6GB, torch 2.5.1 max due to iter driver)
# Both include serve deps (embedding server) for single-extra deploys.
cpu = [
    "torch",
    "sentence-transformers>=5.0.0",
    "imas-codex[serve]",
]
# LLM proxy runs via `uv tool run litellm[proxy]` — no venv deps needed.
gpu = [
    "torch==2.5.1",
    "accelerate>=1.0.0",
    "sentence-transformers>=5.0.0",
    "imas-codex[serve]",
]

[dependency-groups]
dev = [
    # --- Code quality ---
    "ruff>=0",
    "mypy>=1.15.0",
    "pre-commit>=4.2.0",
    "tqdm-stubs>=0.2.1",
    # --- Interactive dev ---
    "ipython>=9.2.0",
    "ipykernel>=6.29.5",
    # --- Standard Names catalog (install manually: uv pip install -e ../imas-standard-names) ---
    # --- LLM & Discovery ---
    "litellm>=1.81.0",
    # --- Graph build & schema ---
    "linkml>=1.9.3",
    "linkml-runtime>=1.9.5",
    "imas-python>=2.0.1",
    "networkx>=3.0,<4.0",
    "scikit-learn>=1.7.2",
    "hdbscan>=0.8.41",
    # --- Wiki & document parsing ---
    "beautifulsoup4>=4.14.3",
    "Pillow>=11.0.0",
    "python-docx>=1.1.2",
    "python-pptx>=1.0.2",
    "openpyxl>=3.1.5",
    "nbformat>=5.10.4",
    "xlrd>=2.0.2",
    # --- Auth & remote ---
    "keyring>=25.7.0",
    "secretstorage>=3.5.0",
    "fabric>=3.0.0",
    "pykakasi>=2.3.0",
    "jellyfish>=1.2.1",
    # huggingface_hub is a transitive dep of sentence-transformers.
    # Only the cpu/gpu extras need it directly (for model downloads).
    # The [hf_xet] extra installs native Rust bindings that crash on some
    # environments (ITER SDCC). HF_HUB_DISABLE_XET=1 is set in __init__.py.
    # --- Tree-sitter ---
    "tree-sitter>=0.25.2",
    "tree-sitter-language-pack>=0.13.0",
    "tree-sitter-gdl>=0.2.0",
    # --- Testing ---
    "pytest>=8.4.2",
    "pytest-asyncio>=0.26.0",
    "pytest-cov>=6.1.1",
    "pytest-timeout>=2.1.0",
    "xlwt>=1.3.0",
    # --- Serve (embedding + LLM proxy) ---
    "fastapi>=0.115.0",
    "uvicorn>=0.31.1",
    # --- Catalog preview (mkdocs-driven via 'standard-names serve') ---
    "mkdocs>=1.6.1,<2.0.0",
    "mkdocs-material>=9.6.5,<10.0.0",
    "imas-standard-names @ git+https://github.com/Simon-McIntosh/IMAS-Standard-Names.git@v0.7.0rc37",
]

[project.urls]
Homepage = "https://github.com/iterorganization/imas-codex"
Repository = "https://github.com/iterorganization/imas-codex"
Documentation = "https://github.com/iterorganization/imas-codex#readme"
"Bug Tracker" = "https://github.com/iterorganization/imas-codex/issues"

[project.scripts]
imas-codex = "imas_codex.cli:main"
dd-version = "scripts.dd_version:main"
build-schemas = "scripts.build_schemas:build_schemas"
map-ids-domains = "scripts.map_ids_domains:map_ids_domains"
build-clusters = "scripts.build_clusters:build_clusters"
build-embeddings = "scripts.build_embeddings:build_embeddings"
build-database = "scripts.build_database:build_database"
build-path-map = "scripts.build_path_map:build_path_map_cli"
build-models = "scripts.build_models:build_models"
gen-physics-domains = "scripts.gen_physics_domains:gen_physics_domains"
ingest-mdsplus = "scripts.ingest_mdsplus:ingest_mdsplus"


[tool.hatch.version]
source = "vcs"

[tool.hatch.build.targets.wheel]
packages = ["imas_codex", "scripts"]
exclude = ["imas_codex/resources/.gitkeep"]
artifacts = [
    "imas_codex/resources/**",
    "imas_codex/core/physics_domain.py",
    "imas_codex/graph/models.py",
    "imas_codex/graph/schema_context_data.py",
]

[tool.hatch.build.hooks.custom]
path = "hatch_build_hooks.py"
verbose = true
ids-filter = ""
imas-dd-version = ""

[tool.imas-codex]

# Facility locations — list position determines port offsets for all services.
# Neo4j: bolt = 7687 + index, http = 7474 + index.
# Embed: server = 18765 + index.
# LLM proxy: port = 18400 + index.
# Tunneled connections add +10000 (e.g. iter tunnel → bolt 17687).
locations = ["iter", "tcv", "jt-60sa", "jet", "west", "mast-u", "asdex-u", "east", "diii-d", "kstar"]

# SSH host aliases for remote services (graph, embedding, etc.).
# When omitted, the location name is used as the SSH alias.
# Only add entries where the SSH alias differs from the location name.
[tool.imas-codex.hosts]
# iter, tcv, jt-60sa use location name as SSH alias (implicit)

[tool.imas-codex.graph]
# Location = where Neo4j runs. Use a facility name ("iter") for the login node,
# or a compute location ("titan") for SLURM-managed services.
# Compute locations are defined in {facility}.yaml under compute_locations.
# Override: IMAS_CODEX_GRAPH_LOCATION=local
location = "titan"
neo4j-version = "2026.01.4-community"  # Docker tag for Apptainer image
username = "neo4j"
password = "imas-codex"

# Explicit profile overrides (convention handles standard cases):
# [tool.imas-codex.graph.profiles.staging]
# location = "staging-server"      # Where Neo4j runs (SSH alias)
# bolt-port = 7700
# http-port = 7701
# data-dir = "/custom/path/neo4j-staging"

[tool.imas-codex.data-dictionary]
version = "4.1.0"
include-ggd = true
include-error-fields = false

[tool.imas-codex.embedding]
model = "Qwen/Qwen3-Embedding-0.6B"
dimension = 256
location = "titan"       # login node T4 (or "titan" for compute P100 via SLURM)

[tool.imas-codex.discovery]
# Minimum combined score for high-value path processing.
# Used by: enrichment auto-threshold, refinement gate, code CLI default.
# Override: IMAS_CODEX_DISCOVERY_THRESHOLD
threshold = 0.90
# Offset subtracted from threshold to get the triage gate.
# triage_threshold = threshold - triage-offset (default: 0.90 - 0.15 = 0.75).
# Override: IMAS_CODEX_TRIAGE_THRESHOLD
triage-offset = 0.15

[tool.imas-codex.logs]
# Where CLI commands run and write logs.
# MCP log tools (get_logs, list_logs, tail_logs) read from this location.
# "local" = read from local filesystem (default)
# "<facility>" = SSH to that host to read logs (e.g. "iter")
# Override: IMAS_CODEX_LOG_LOCATION
location = "iter"

[tool.imas-codex.language]
# openrouter/ prefix required so direct-path bypass preserves cache_control
# and response_cost. Without it, calls silently route through the proxy,
# stripping cache discounts (often 80%+) and zeroing cost telemetry.
# Powers generate_docs (and any other bulk-content non-refine fanouts).
# refine_name and refine_docs were peeled off into [tool.imas-codex.refine]
# on 2026-05-03 after E3 telemetry showed flash-lite refines accept at
# ~5% vs ~42% for Sonnet-4.6 compose.
# 2026-05-03 (post-E3 audit): per user mandate, NO flash-lite in the SN
# pipeline. generate_docs is now on Sonnet 4.6 — bulk doc gen quality
# matters because it drives the docs-axis review and downstream YAML.
model = "openrouter/anthropic/claude-sonnet-4.6"
batch-size = 50

[tool.imas-codex.refine]
# openrouter/ prefix — see [language] note above.
# Powers process_refine_name_batch and process_refine_docs_batch.
# Routed onto Sonnet 4.6 (matches [sn-run] compose tier) on 2026-05-03
# because flash-lite refines accepted at ~5% vs ~42% for Sonnet compose
# in the E3 acceptance audit. Kept as a separate section so the refine
# tier can diverge from compose later (e.g. always-on Opus) without
# affecting the compose pass.
model = "openrouter/anthropic/claude-sonnet-4.6"

[tool.imas-codex.dd-enrichment]
# openrouter/ prefix required so direct-path bypass preserves cache_control
# and response_cost. Without it, calls silently route through the proxy,
# stripping cache discounts (often 80%+) and zeroing cost telemetry.
model = "openrouter/anthropic/claude-sonnet-4.6"

[tool.imas-codex.vision]
# openrouter/ prefix — see [language] note above.
# flash-lite is INTENTIONAL here: this section drives image OCR only
# (wiki/document page image captioning via discovery/wiki and related
# doc-ingestion paths). It is NOT used in the SN compose/review/refine
# pipeline. Flash-lite is adequate for OCR-quality transcription and
# keeps image-processing costs minimal. Any SN-pipeline model must go
# through [sn-run], [sn-enrich], [refine], or [sn.review.*] — never vision.
model = "openrouter/google/gemini-3.1-flash-lite-preview"

[tool.imas-codex.reasoning]
# openrouter/ prefix — see [dd-enrichment] note above.
model = "openrouter/anthropic/claude-sonnet-4.6"

[tool.imas-codex.sn-compose]
# Maximum concurrent LLM requests issued by the compose worker.
# Derived from the 2026-04-24 extended OpenRouter rate-limit probe (Wave 3):
#   All four working models (Haiku 4.5, Sonnet 4.6, Opus 4.6, GPT-5.4)
#   cleared N=128 with zero 429s.  True ceiling is ≥128 per account tier.
#   75% of lowest measured clean level (128): floor(0.75 * 128) = 96.
#   Case A applied: single global semaphore sufficient (variance <2× across models).
# See docs/ops/openrouter-rate-ceilings.md for probe details.
max-concurrency = 96

[tool.imas-codex.sn-run]
# openrouter/ prefix — see [dd-enrichment] note above.
# Compose pool was billing 358 calls on unprefixed name (cache_control
# stripped via proxy). Re-prefixed 2026-05-02 to restore cache discounts.
model = "openrouter/anthropic/claude-sonnet-4.6"
batch-size = 25
# Batch size used when ``sn generate --name-only`` groups paths by
# ``(physics_domain × unit)`` rather than ``(cluster × unit)``.  The
# name-only mode trades per-item cluster context for breadth: wider
# batches (default 50) cut LLM-call count ~70% during bootstrap at
# the cost of richer per-candidate enrichment, which the subsequent
# review / enrichment passes restore.
name-only-batch-size = 50
# Batch size used when ``sn generate --target docs`` routes through the
# five-phase enrich pipeline. Enrichment packs ~800–1200 tokens per name
# (docs, exemplars, DD context) so batches should be smaller than the
# default compose batches. Mirrors ``[tool.imas-codex.sn-enrich].batch-size``
# but allows tuning the docs-generation pass independently when invoked
# through ``sn generate``.
docs-batch-size = 12
# Pre-flight token budget guard.  Batches whose estimated token count
# exceeds this value are binary-split before dispatch.  Set below the
# model's context window (200 k) to leave headroom for the response.
max-tokens = 150000

[tool.imas-codex.sn-enrich]
model = "openrouter/anthropic/claude-opus-4.6"
# Enrich packs ~800–1200 tokens per name (docs, exemplars, DD context)
# so batches should be smaller than generate batches.
batch-size = 12
# Pre-flight token budget guard (same semantics as sn-run).
max-tokens = 150000

[tool.imas-codex.sn]
# Scored-example injection targets: score thresholds for selecting
# exemplar StandardName nodes into compose/review prompts.
example-target-scores = [1.0, 0.8, 0.65, 0.4]
# Tolerance band around each target when selecting the closest example.
example-tolerance = 0.05
# Maximum examples per score bucket.
example-per-bucket = 1
# Compose retry: max retry attempts on grammar/validation failure.
retry-attempts = 1
# Hybrid-search k expansion factor used on retry.
retry-k-expansion = 12

# Default staging directory for export/preview/publish (ephemeral).
staging-dir = "~/.cache/imas-codex/staging"
# Path to ISNC git checkout. Empty = auto-discover from sibling dirs.
isnc-dir = ""

[tool.imas-codex.sn.review]
# Shared review settings (apply to both axes).
# Disagreement detection: true iff N>=2 AND max(score) - min(score) >= threshold.
disagreement-threshold = 0.20
# Maximum RD-quorum cycles (1 = primary only, 2 = blind pair, 3 = full quorum).
max-cycles = 3

[tool.imas-codex.sn.review.names]
# Reviewer model chain for the name-quality axis (grammar/semantic/convention/completeness).
# Length semantics:
#   1 model  → quorum disabled (single reviewer, mirrors legacy behaviour)
#   2 models → blind primary + blind secondary, no escalator
#   3 models → full RD-quorum: models[0] primary (blind), models[1] secondary (blind),
#              models[2] escalator (sees both, authoritative)
#   4+       → rejected at config load time (ValueError)
#
# 3-model RD-quorum chain (full escalator).
# Sonnet 4.6 primary, GPT-5.4 secondary (cross-vendor independence), Opus 4.6 escalator.
#
# Why Sonnet primary (not Opus): As of 2026-05, OpenRouter's prompt-cache
# discount is broken for `anthropic/claude-opus-4.6` across all upstream
# providers (Anthropic-native, Google Vertex, Amazon Bedrock all return
# cache_creation=0 / cache_read=0 despite cache_control blocks). Sonnet 4.6
# caches correctly (90% discount on warm calls). Probe: research/cache_probe.py.
# Reverting to Opus primary as soon as upstream caching is restored will
# require a single-line edit here.
#
# Gemini 3.1-pro-preview removed: Phase 7 probe showed it returns prose despite
# response_format=json_object — structurally unreliable as a reviewer despite
# _sanitize_content prose-extraction.  GPT-5.4 provides cross-vendor bias signal
# while staying inside a validated structured-output family.
# Temperature: GPT-5.4 rejects temperature=0.0; llm.py auto-clamps to None.
models = [
  "openrouter/anthropic/claude-sonnet-4.6",  # cycle 0 primary (blind, cache-enabled)
  "openrouter/openai/gpt-5.4",               # cycle 1 secondary (blind, cross-vendor)
  "openrouter/anthropic/claude-opus-4.6",    # cycle 2 escalator (disputes only, no cache)
]
disagreement-threshold = 0.20

# ---------------------------------------------------------------------------
# Reviewer profiles — select with IMAS_CODEX_SN_REVIEW_PROFILE env var or
# the --reviewer-profile CLI flag on `sn run` / `sn review`.
# ---------------------------------------------------------------------------

[tool.imas-codex.sn.review.names.profiles.default]
# Mirror of the top-level [sn.review.names] keys; used when no profile is set.
# See top-level [sn.review.names] for rationale on Sonnet-primary ordering.
models = [
  "openrouter/anthropic/claude-sonnet-4.6",  # cycle 0 primary (blind, cache-enabled)
  "openrouter/openai/gpt-5.4",               # cycle 1 secondary (blind, cross-vendor)
  "openrouter/anthropic/claude-opus-4.6",    # cycle 2 escalator (disputes only, no cache)
]
disagreement-threshold = 0.20

[tool.imas-codex.sn.review.names.profiles.quality-cost-balanced]
# Quality-preserving cost reduction: Sonnet 4.6 primary + GPT-5.4 secondary
# (cross-vendor) + Opus 4.6 arbiter (disputes only, authoritative).
# Rationale: Sonnet 4.6 benchmarked at 76.5 avg compose quality vs Opus
# equivalent; reviewer benchmark expected similar.  Opus retained as
# escalator so disputed names still get the strongest model's verdict.
# Expected cost: ~70-80% reduction vs default (Opus on every name → Opus
# only on disagreements). NO Haiku — quality floor preserved.
# Validate on ≥30 already-accepted names (per-dim score divergence ≤0.05,
# tier agreement ≥90%, net cost ≤40% of default) before adopting as default.
models = [
  "openrouter/anthropic/claude-sonnet-4.6",  # cycle 0 primary (blind)
  "openrouter/openai/gpt-5.4",               # cycle 1 secondary (blind, cross-vendor)
  "openrouter/anthropic/claude-opus-4.6",    # cycle 2 escalator (disputes only, authoritative)
]
disagreement-threshold = 0.20

[tool.imas-codex.sn.review.names.profiles.opus-only]
# Single-model mode: no quorum, no escalator. Maximum quality, maximum cost.
models = ["openrouter/anthropic/claude-opus-4.6"]
disagreement-threshold = 1.0  # never escalate (single-model)

[tool.imas-codex.sn.review.docs]
# Reviewer model chain for the documentation-quality axis.
# Same length semantics as [sn.review.names]. Sonnet primary for cache discount —
# see [sn.review.names] block for rationale.
models = [
  "openrouter/anthropic/claude-sonnet-4.6",
  "openrouter/openai/gpt-5.4",
  "openrouter/anthropic/claude-opus-4.6",
]
disagreement-threshold = 0.20

[tool.imas-codex.sn.benchmark]
# Models to evaluate for standard name composition.
# Each is benchmarked independently; results are compared side-by-side.
# Format: OpenRouter model IDs (provider/model-slug).
compose-models = [
    "anthropic/claude-sonnet-4.6",
    "anthropic/claude-haiku-4.5",
    "openai/gpt-5.4",
    "openai/gpt-5.4-mini",
    "google/gemini-3.1-pro-preview",
    "google/gemini-3-flash-preview",
    "google/gemini-3.1-flash-lite-preview",
]
# Frontier model used as independent judge for quality scoring.
# Must be a strong reasoning model — evaluates 5 dimensions (0-100).
reviewer-model = "anthropic/claude-opus-4.6"

[tool.imas-codex.sn.fanout]
# Plan 39 — Structured fan-out for the SN compose pipeline.
# Master switch.  Phase 1B shipped at 6f0bb52b; enabled for smoke + rotation.
enabled = true
# Hard cap on Stage A query count (parse-time bound).
max-fan-degree = 3
# Per-runner and total Stage B timeouts.
function-timeout-s = 5.0
total-timeout-s = 12.0
# Per-result hit cap (renderer truncates each FanoutResult.hits).
result-hit-cap = 8
# Total evidence-block token caps (renderer truncates total evidence).
evidence-token-cap-baseline = 2000
evidence-token-cap-escalation = 800
# Stage A LLM model + sampling.
# NOTE: Anthropic's structured-output API rejects schemas with `oneOf`
# (FanoutPlan uses a discriminated union over fn_id). Gemini and OpenAI
# accept oneOf cleanly. Per 2026-05-03 user mandate (NO flash-lite in
# pipeline), proposer is now on Gemini 3.1 Pro Preview which supports
# oneOf and provides production-tier quality.
proposer-model = "openrouter/google/gemini-3.1-pro-preview"
proposer-temperature = 0.1
# Cost padding added to the parent lease at refine-cycle start (tiered, plan §7.3).
fanout-cost-estimate-baseline = 0.005
fanout-cost-estimate-escalation = 0.05
# Per-cycle hard cap on cumulative fan-out sub-event spend (tiered, plan §7.3).
fanout-max-charge-per-cycle-baseline = 0.02
fanout-max-charge-per-cycle-escalation = 0.10
# Refine trigger predicate.
# Keywords match the language reviewers actually use ("decomposition", "absorbed",
# "compound", "awkward") plus disambiguation cues. Case-insensitive substring match.
refine-trigger-keywords = ["unclear", "ambiguous", "duplicate", "consider", "compare", "decomposition", "absorbed", "compound", "awkward"]
# Reviewer-comment dim allow-list for the trigger predicate (plan §5.1 I3).
# MUST match the actual review schema: ('grammar', 'semantic', 'convention', 'completeness').
# We scan grammar + semantic because that is where decomposition/disambiguation
# defects surface (a previous default of ('clarity', 'disambiguation') silently
# disabled the trigger because those dims do not exist in the rubric).
refine-trigger-comment-dims = ["grammar", "semantic"]
# Comment excerpt total length cap (plan §5.1 S3).
refine-trigger-comment-chars = 800

[tool.imas-codex.sn.fanout.sites]
# Per-site enable flags.  Phase 1 flips refine_name to true after 1B
# wires the worker; the master `enabled` switch above gates everything.
refine_name = true

[tool.imas-codex.llm]
# LiteLLM proxy settings. Port base = 18400, offset by location index.
# Runs on login node (needs outbound HTTPS for OpenRouter/Anthropic).
location = "iter"       # facility hosting LLM proxy (or "local" for in-process)

[tool.hatch.envs.test]
dependencies = [
    "pytest>=8.3.5,<9.0.0",
    "pytest-cov>=6.1.1,<7.0.0",
    "coverage>=7.0.0",
]

[tool.hatch.envs.dev]
dependencies = [
    "black>=23.0.0",
    "ruff>=0",
    "ipython>=9.2.0",
    "ipykernel>=6.29.5",
    "mypy>=1.15.0",
    "pre-commit>=4.2.0",
]

[tool.mypy]
python_version = "3.12"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true

[[tool.mypy.overrides]]
module = ["imas.*", "pint.*"]
ignore_missing_imports = true
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true

# Optional dependency overrides (only when installed)
[[tool.mypy.overrides]]
module = ["packaging.*"]
ignore_missing_imports = true

[tool.hatch.metadata]
allow-direct-references = true

[tool.pytest.ini_options]
testpaths = ["tests"]
timeout = 30
faulthandler_timeout = 60
addopts = [
    "--strict-markers",
    "--strict-config",
    "--tb=short",
    "--no-header",
    "-q",
    "--durations=10",
    "-m", "not slow and not sn_health and not graph",
]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "session"
markers = [
    "asyncio: marks tests as async",
    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
    "integration: marks tests as integration tests",
    "unit: marks tests as unit tests",
    "fast: marks tests as fast-running unit tests",
    "performance: marks tests that focus on performance",
    "timeout: marks tests with timeout constraints (requires pytest-timeout)",
    "graph: marks tests requiring a live Neo4j connection (auto-skipped when unavailable)",
    "graph_mcp: marks graph-native MCP tests requiring Neo4j with fixture data",
    "fixture_only: marks tests that require fixture data and should be skipped on production graphs",
    "sn_health: marks standard name health gate tests requiring a populated Neo4j graph (run with -m sn_health)",
    "requires_graph: marks tests requiring a live Neo4j connection with DD content",
]
filterwarnings = [
    "ignore::DeprecationWarning",
    "ignore::PendingDeprecationWarning",
    "ignore::pytest.PytestUnraisableExceptionWarning",
]
# Performance optimizations
cache_dir = ".pytest_cache"
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]

[tool.coverage.run]
source = ["imas_codex"]
omit = ["tests/*", "*/tests/*", "**/test_*.py", "**/conftest.py"]

[tool.coverage.report]
exclude_lines = [
    "pragma: no cover",
    "def __repr__",
    "if self.debug:",
    "if settings.DEBUG",
    "raise AssertionError",
    "raise NotImplementedError",
    "if 0:",
    "if __name__ == .__main__.:",
]
show_missing = true
precision = 2

[tool.coverage.html]
directory = "htmlcov"
precision = 2

# Ruff configuration
[tool.ruff]
target-version = "py312"
line-length = 88
indent-width = 4

[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"

[tool.ruff.lint]
select = [
    "E",   # pycodestyle errors
    "W",   # pycodestyle warnings
    "F",   # pyflakes
    "I",   # isort
    "B",   # flake8-bugbear
    "C4",  # flake8-comprehensions
    "UP",  # pyupgrade
]
ignore = [
    "C901",  # too complex
    "E501",  # long lines
]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]  # Allow unused imports in __init__.py
"tests/**/*" = ["B011", "F401", "F811"]  # Allow assert False and unused imports in tests
# Remote scripts run on Python 3.6+ systems - disable type hint and subprocess modernization
"imas_codex/remote/scripts/*" = ["UP006", "UP007", "UP021", "UP022", "UP031", "UP035", "UP038", "UP045"]

[tool.ruff.lint.isort]
known-first-party = ["imas_codex"]
force-single-line = false
combine-as-imports = true

# UV configuration: PyTorch index selection by extra
# Default sync uses cpu extra (see default-groups below)
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true

[[tool.uv.index]]
name = "pytorch-gpu"
url = "https://download.pytorch.org/whl/cu121"
explicit = true

[tool.uv.sources]
torch = [
  { index = "pytorch-cpu", extra = "cpu" },
  { index = "pytorch-cpu", extra = "test" },
  { index = "pytorch-gpu", extra = "gpu" },
]


[tool.uv]
prerelease = "if-necessary-or-explicit"
# cpu, gpu, and test extras for torch are mutually exclusive with gpu
conflicts = [
  [
    { extra = "cpu" },
    { extra = "gpu" },
  ],
  [
    { extra = "test" },
    { extra = "gpu" },
  ],
]