Skip to content

Commit 2fc4c02

Browse files
authored
Better alignment of tracer tools with functional testing framework (#765)
1 parent c34ed62 commit 2fc4c02

5 files changed

Lines changed: 464 additions & 13 deletions

File tree

local-db-tracing/README.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,53 @@ python local-db-tracing/validate_rdb_selects.py --input-file local-db-tracing/ou
3232

3333
8. Review pass/fail details in `local-db-tracing/output/<paired-run>/rdb-selects-results.md`.
3434

35+
### Step-By-Step Checks
36+
37+
Dual capture runs also generate per-step artifacts so you can replay and validate one step at a time.
38+
39+
For a paired run such as `local-db-tracing/output/<paired-run>/`:
40+
41+
- replay SQL for each source step is written under `cdc-<database>/step-<N>/setup.sql`
42+
- target verification SQL for each target step is written under `logical-<database>/step-<N>/query.sql`
43+
- each `query.sql` is cumulative through that step, so step 2 reflects the expected target state after both step 1 and step 2 have been applied
44+
45+
Manual step-by-step workflow:
46+
47+
1. Run `cdc-<database>/step-1/setup.sql` against the source database.
48+
2. Wait for `nedss-datareporting-reporting-pipeline-service-1` to have "No ids to process from the topics."
49+
3. Run:
50+
51+
```powershell
52+
python local-db-tracing/validate_rdb_selects.py --input-file local-db-tracing/output/<paired-run>/logical-<database>/step-1/query.sql
53+
```
54+
55+
4. Review the generated Markdown report for step 1, `local-db-tracing/output/<paired-run>/logical-<database>/step-1/rdb-selects-results.md`.
56+
5. Run `cdc-<database>/step-2/setup.sql`.
57+
6. Run:
58+
59+
```powershell
60+
python local-db-tracing/validate_rdb_selects.py --input-file local-db-tracing/output/<paired-run>/logical-<database>/step-2/query.sql
61+
```
62+
63+
7. Review the generated Markdown report for step 2, `local-db-tracing/output/<paired-run>/logical-<database>/step-2/rdb-selects-results.md`.
64+
8. Repeat for later steps.
65+
66+
Example validator command for a step query file:
67+
68+
```powershell
69+
python local-db-tracing/validate_rdb_selects.py --input-file local-db-tracing/output/<paired-run>/logical-RDB_MODERN/step-2/query.sql
70+
```
71+
72+
When only `--input-file` is provided, the validator writes results next to that step query file using default names:
73+
74+
- `rdb-selects-results.json`
75+
- `rdb-selects-results.md`
76+
77+
For example, validating `logical-RDB_MODERN/step-2/query.sql` writes:
78+
79+
- `logical-RDB_MODERN/step-2/rdb-selects-results.json`
80+
- `logical-RDB_MODERN/step-2/rdb-selects-results.md`
81+
3582
## Overview
3683

3784
This toolkit supports two goals:
@@ -275,7 +322,9 @@ Dual-capture run (example `.../20260408-111218-NBS_ODSE-to-RDB_MODERN/`):
275322

276323
- `combined-manifest.json`: pointers to source/target artifacts for the action window
277324
- `cdc-<database>/`: CDC artifacts for source database
325+
- `cdc-<database>/step-<N>/setup.sql`: replay SQL for individual source steps
278326
- `logical-<database>/`: logical artifacts for target database
327+
- `logical-<database>/step-<N>/query.sql`: cumulative verification SQL for each target step
279328
- `rdb-selects.sql`: generated target verification queries with `-- EXPECTED_ROWS_JSON` comments
280329
- `rdb-selects-results.json`: machine-readable validation results (when validator is run)
281330
- `rdb-selects-results.md`: human-readable validation report (when validator is run)

local-db-tracing/generate_rdb_selects.py

Lines changed: 117 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@
2828
VAR_PLUS_OFFSET_PATTERN = re.compile(r"^(?P<left>@[A-Za-z0-9_]+)\s*\+\s*(?P<offset>-?\d+)$")
2929
OFFSET_PLUS_VAR_PATTERN = re.compile(r"^(?P<offset>-?\d+)\s*\+\s*(?P<right>@[A-Za-z0-9_]+)$")
3030

31+
IGNORED_OUTPUT_COLUMNS = frozenset(
32+
{
33+
"RDB_LAST_REFRESH_TIME",
34+
"LAB_RPT_LAST_UPDATE_DT",
35+
}
36+
)
37+
3138

3239
@dataclass(frozen=True)
3340
class DeclareEntry:
@@ -871,6 +878,65 @@ def logical_change_step(change: dict[str, object]) -> int | None:
871878
return sorted(finalized, key=lambda item: (item.schema_name.lower(), item.table_name.lower(), item.where_fields))
872879

873880

881+
def logical_change_step(change: dict[str, object]) -> int | None:
882+
metadata = change.get("metadata")
883+
if not isinstance(metadata, dict):
884+
return None
885+
step_value = metadata.get("step")
886+
try:
887+
return int(step_value)
888+
except (TypeError, ValueError):
889+
return None
890+
891+
892+
def step_numbers_from_manifest_or_changes(
893+
manifest: dict[str, object],
894+
logical_changes: list[dict[str, object]],
895+
) -> list[int]:
896+
ordered_steps: list[int] = []
897+
seen: set[int] = set()
898+
899+
manifest_steps = manifest.get("steps")
900+
if isinstance(manifest_steps, list):
901+
for step in manifest_steps:
902+
if not isinstance(step, dict):
903+
continue
904+
step_value = step.get("step")
905+
try:
906+
step_number = int(step_value)
907+
except (TypeError, ValueError):
908+
continue
909+
if step_number not in seen:
910+
seen.add(step_number)
911+
ordered_steps.append(step_number)
912+
913+
for change in logical_changes:
914+
step_number = logical_change_step(change)
915+
if step_number is None or step_number in seen:
916+
continue
917+
seen.add(step_number)
918+
ordered_steps.append(step_number)
919+
920+
return ordered_steps
921+
922+
923+
def build_renderable_scaffolds(
924+
logical_changes_obj: list[dict[str, object]],
925+
declare_entries: list[DeclareEntry],
926+
) -> list[SelectScaffold]:
927+
scaffolds = build_scaffolds(logical_changes_obj, declare_entries)
928+
known_lookup_keys_file = Path(__file__).with_name("known_lookup_keys.json")
929+
if known_lookup_keys_file.exists():
930+
try:
931+
known_lookup_keys_obj = json.loads(known_lookup_keys_file.read_text(encoding="utf-8"))
932+
if isinstance(known_lookup_keys_obj, dict):
933+
scaffolds = apply_known_lookup_keys(scaffolds, known_lookup_keys_obj.get("known_tables"))
934+
except (OSError, json.JSONDecodeError) as error:
935+
print(f"Warning: Could not load {known_lookup_keys_file}: {error}", file=sys.stderr)
936+
scaffolds = consolidate_fk_scaffolds(scaffolds)
937+
return [s for s in scaffolds if not s.table_name.lower().startswith("nrt_")]
938+
939+
874940
def apply_known_lookup_keys(
875941
scaffolds: list[SelectScaffold],
876942
known_lookup_keys: dict[str, dict[str, object]] | None,
@@ -998,6 +1064,41 @@ def display_path(path_value: str) -> str:
9981064
return path_value.replace("\\", "/")
9991065

10001066

1067+
def write_step_query_files(
1068+
logical_output_dir: Path,
1069+
manifest: dict[str, object],
1070+
logical_changes_obj: list[dict[str, object]],
1071+
declare_lines: list[str],
1072+
declare_entries: list[DeclareEntry],
1073+
columns_by_table: dict[tuple[str, str], list[str]] | None,
1074+
primary_keys_by_table: dict[tuple[str, str], frozenset[str]] | None,
1075+
foreign_keys_by_source: dict[tuple[str, str, str], tuple[str, str, str]] | None,
1076+
generated_always_columns: set[tuple[str, str, str]] | None,
1077+
auto_datetime_defaults: set[tuple[str, str, str]] | None,
1078+
) -> None:
1079+
for step_number in step_numbers_from_manifest_or_changes(manifest, logical_changes_obj):
1080+
step_dir = logical_output_dir / f"step-{step_number}"
1081+
step_dir.mkdir(parents=True, exist_ok=True)
1082+
cumulative_changes = [
1083+
change
1084+
for change in logical_changes_obj
1085+
if (logical_change_step(change) or 0) <= step_number
1086+
]
1087+
step_scaffolds = build_renderable_scaffolds(cumulative_changes, declare_entries)
1088+
step_sql = render_sql(
1089+
manifest,
1090+
declare_lines,
1091+
declare_entries,
1092+
step_scaffolds,
1093+
columns_by_table,
1094+
primary_keys_by_table,
1095+
foreign_keys_by_source,
1096+
generated_always_columns,
1097+
auto_datetime_defaults,
1098+
)
1099+
(step_dir / "query.sql").write_text(step_sql, encoding="utf-8")
1100+
1101+
10011102
def render_sql(
10021103
manifest: dict[str, object],
10031104
declare_lines: list[str],
@@ -1076,8 +1177,8 @@ def render_sql(
10761177
if normalize_identifier(s) == normalize_identifier(scaffold.schema_name)
10771178
and normalize_identifier(t) == normalize_identifier(scaffold.table_name)
10781179
)
1079-
select_excluded_columns = pk_columns | generated_excluded_for_table | auto_excluded_for_table
1080-
json_excluded_columns = pk_columns | generated_excluded_for_table | auto_excluded_for_table
1180+
select_excluded_columns = pk_columns | generated_excluded_for_table | auto_excluded_for_table | IGNORED_OUTPUT_COLUMNS
1181+
json_excluded_columns = pk_columns | generated_excluded_for_table | auto_excluded_for_table | IGNORED_OUTPUT_COLUMNS
10811182
select_columns: list[str] | None = None
10821183
if columns_by_table is not None:
10831184
all_columns = columns_by_table.get(table_key)
@@ -1116,17 +1217,7 @@ def generate_rdb_selects_from_manifest(
11161217

11171218
declare_lines = extract_declare_block(inserts_text)
11181219
declare_entries = parse_declare_entries(declare_lines)
1119-
scaffolds = build_scaffolds(logical_changes_obj, declare_entries)
1120-
known_lookup_keys_file = Path(__file__).with_name("known_lookup_keys.json")
1121-
if known_lookup_keys_file.exists():
1122-
try:
1123-
known_lookup_keys_obj = json.loads(known_lookup_keys_file.read_text(encoding="utf-8"))
1124-
if isinstance(known_lookup_keys_obj, dict):
1125-
scaffolds = apply_known_lookup_keys(scaffolds, known_lookup_keys_obj.get("known_tables"))
1126-
except (OSError, json.JSONDecodeError) as error:
1127-
print(f"Warning: Could not load {known_lookup_keys_file}: {error}", file=sys.stderr)
1128-
scaffolds = consolidate_fk_scaffolds(scaffolds)
1129-
scaffolds = [s for s in scaffolds if not s.table_name.lower().startswith("nrt_")]
1220+
scaffolds = build_renderable_scaffolds(logical_changes_obj, declare_entries)
11301221
logical_database = str(manifest.get("logical_database") or "RDB_MODERN")
11311222
columns_by_table, primary_keys_by_table, foreign_keys_by_source, generated_always_columns, auto_datetime_defaults = load_rdb_column_metadata(logical_database)
11321223
output_sql = render_sql(
@@ -1141,6 +1232,19 @@ def generate_rdb_selects_from_manifest(
11411232
auto_datetime_defaults,
11421233
)
11431234

1235+
write_step_query_files(
1236+
logical_changes_path.parent,
1237+
manifest,
1238+
logical_changes_obj,
1239+
declare_lines,
1240+
declare_entries,
1241+
columns_by_table,
1242+
primary_keys_by_table,
1243+
foreign_keys_by_source,
1244+
generated_always_columns,
1245+
auto_datetime_defaults,
1246+
)
1247+
11441248
final_output_path = output_path if output_path is not None else manifest_path.with_name("rdb-selects.sql")
11451249
final_output_path.write_text(output_sql, encoding="utf-8")
11461250
return final_output_path, len(scaffolds)

local-db-tracing/test_generate_rdb_selects.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,63 @@ def test_auto_datetime_columns_are_excluded_from_select_and_expected_json(self)
295295
self.assertNotIn("[updated_dttm]", sql)
296296
self.assertIn('-- EXPECTED_ROWS_JSON:\n-- [{"patient_uid":1234}]', sql)
297297

298+
def test_known_refresh_timestamps_are_excluded_from_select_and_expected_json(self) -> None:
299+
declare_entries = generate_rdb_selects.parse_declare_entries(
300+
[
301+
"DECLARE @dbo_Person_local_id nvarchar(40) = N'PSN1234GA01';",
302+
]
303+
)
304+
scaffolds = generate_rdb_selects.build_scaffolds(
305+
[
306+
{
307+
"schema_name": "dbo",
308+
"table_name": "LAB_REPORT",
309+
"operation": "insert",
310+
"stable_identity": {
311+
"strategy": "business_keys",
312+
"eligible_for_comparison": True,
313+
"fields": {"PATIENT_LOCAL_ID": "PSN1234GA01"},
314+
},
315+
"primary_key_values": {},
316+
"after": {
317+
"PATIENT_LOCAL_ID": "PSN1234GA01",
318+
"RDB_LAST_REFRESH_TIME": "2026-04-23T12:34:56",
319+
"LAB_RPT_LAST_UPDATE_DT": "2026-04-23T12:35:56",
320+
"RESULT_STATUS": "FINAL",
321+
},
322+
}
323+
],
324+
declare_entries,
325+
)
326+
327+
sql = generate_rdb_selects.render_sql(
328+
{
329+
"logical_database": "RDB_MODERN",
330+
"cdc_summary_file": str(generate_rdb_selects.REPO_ROOT / "summary.txt"),
331+
"logical_changes_file": str(generate_rdb_selects.REPO_ROOT / "logical-changes.json"),
332+
},
333+
["DECLARE @dbo_Person_local_id nvarchar(40) = N'PSN1234GA01';"],
334+
declare_entries,
335+
scaffolds,
336+
columns_by_table={
337+
("dbo", "LAB_REPORT"): [
338+
"PATIENT_LOCAL_ID",
339+
"RDB_LAST_REFRESH_TIME",
340+
"LAB_RPT_LAST_UPDATE_DT",
341+
"RESULT_STATUS",
342+
],
343+
},
344+
)
345+
346+
self.assertIn("SELECT", sql)
347+
self.assertIn(" [PATIENT_LOCAL_ID],", sql)
348+
self.assertIn(" [RESULT_STATUS]", sql)
349+
self.assertNotIn("[RDB_LAST_REFRESH_TIME]", sql)
350+
self.assertNotIn("[LAB_RPT_LAST_UPDATE_DT]", sql)
351+
self.assertIn('-- EXPECTED_ROWS_JSON:\n-- [{"PATIENT_LOCAL_ID":"PSN1234GA01","RESULT_STATUS":"FINAL"}]', sql)
352+
self.assertNotIn("RDB_LAST_REFRESH_TIME", sql)
353+
self.assertNotIn("LAB_RPT_LAST_UPDATE_DT", sql)
354+
298355
def test_expected_rows_json_maps_ambiguous_entity_uid_candidates_deterministically(self) -> None:
299356
declare_entries = generate_rdb_selects.parse_declare_entries(
300357
[
@@ -434,6 +491,94 @@ def test_render_sql_includes_steps_comment_for_multi_step_scaffold(self) -> None
434491

435492
self.assertIn("-- Steps: 1, 2", sql)
436493

494+
def test_generate_rdb_selects_writes_cumulative_step_query_files(self) -> None:
495+
with TemporaryDirectory() as temp_dir:
496+
root = Path(temp_dir)
497+
paired_dir = root / "20260408-143320-NBS_ODSE-to-RDB_MODERN"
498+
cdc_dir = paired_dir / "cdc-NBS_ODSE"
499+
logical_dir = paired_dir / "logical-RDB_MODERN"
500+
cdc_dir.mkdir(parents=True)
501+
logical_dir.mkdir(parents=True)
502+
503+
summary_path = cdc_dir / "summary.txt"
504+
summary_path.write_text(
505+
"""Reconstructed SQL written to: inserts.sql\nRun inserts.sql directly against the source database to replay captured writes.\n""",
506+
encoding="utf-8",
507+
)
508+
inserts_path = cdc_dir / "inserts.sql"
509+
inserts_path.write_text(
510+
"""USE [NBS_ODSE];\nDECLARE @dbo_Entity_entity_uid bigint = -1000;\nDECLARE @dbo_Person_local_id nvarchar(40) = N'PSN1000GA01';\n\n-- STEP 1: Create patient\n-- step: 1\nINSERT INTO [dbo].[Person] ([person_uid]) VALUES (@dbo_Entity_entity_uid);\n""",
511+
encoding="utf-8",
512+
)
513+
514+
logical_changes_path = logical_dir / "logical-changes.json"
515+
logical_changes_path.write_text(
516+
json.dumps(
517+
[
518+
{
519+
"schema_name": "dbo",
520+
"table_name": "D_PATIENT",
521+
"operation": "insert",
522+
"stable_identity": {
523+
"strategy": "business_keys",
524+
"eligible_for_comparison": True,
525+
"fields": {"PATIENT_LOCAL_ID": "PSN1000GA01"},
526+
},
527+
"primary_key_values": {"PATIENT_KEY": 9},
528+
"after": {"PATIENT_KEY": 9, "PATIENT_LOCAL_ID": "PSN1000GA01", "PATIENT_LAST_NAME": "Alpha"},
529+
"metadata": {"step": 1},
530+
},
531+
{
532+
"schema_name": "dbo",
533+
"table_name": "D_PATIENT",
534+
"operation": "update",
535+
"stable_identity": {
536+
"strategy": "business_keys",
537+
"eligible_for_comparison": True,
538+
"fields": {"PATIENT_LOCAL_ID": "PSN1000GA01"},
539+
},
540+
"primary_key_values": {"PATIENT_KEY": 9},
541+
"after": {"PATIENT_KEY": 9, "PATIENT_LOCAL_ID": "PSN1000GA01", "PATIENT_LAST_NAME": "Beta"},
542+
"metadata": {"step": 2},
543+
},
544+
],
545+
indent=2,
546+
)
547+
+ "\n",
548+
encoding="utf-8",
549+
)
550+
551+
manifest_path = paired_dir / "combined-manifest.json"
552+
manifest_path.write_text(
553+
json.dumps(
554+
{
555+
"logical_database": "TEST_DB_NO_CACHE",
556+
"cdc_summary_file": str(summary_path),
557+
"cdc_inserts_file": str(inserts_path),
558+
"logical_changes_file": str(logical_changes_path),
559+
"steps": [
560+
{"step": 1, "description": "Create patient"},
561+
{"step": 2, "description": "Update patient"},
562+
],
563+
},
564+
indent=2,
565+
)
566+
+ "\n",
567+
encoding="utf-8",
568+
)
569+
570+
output_path, scaffold_count = generate_rdb_selects.generate_rdb_selects_from_manifest(manifest_path)
571+
572+
step1_sql = (logical_dir / "step-1" / "query.sql").read_text(encoding="utf-8")
573+
step2_sql = (logical_dir / "step-2" / "query.sql").read_text(encoding="utf-8")
574+
final_sql = output_path.read_text(encoding="utf-8")
575+
576+
self.assertEqual(scaffold_count, 1)
577+
self.assertIn('"PATIENT_LAST_NAME":"Alpha"', step1_sql)
578+
self.assertNotIn('"PATIENT_LAST_NAME":"Beta"', step1_sql)
579+
self.assertIn('"PATIENT_LAST_NAME":"Beta"', step2_sql)
580+
self.assertIn('"PATIENT_LAST_NAME":"Beta"', final_sql)
581+
437582

438583
if __name__ == "__main__":
439584
unittest.main()

0 commit comments

Comments
 (0)