Skip to content

Commit 86cc860

Browse files
committed
Cache Implementation
- Agency - Officer - Unit - Location
1 parent 7643489 commit 86cc860

23 files changed

Lines changed: 1481 additions & 21 deletions

backend/README.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,52 @@ To authenticate your API requests, include the JWT token in the `Authorization`
4949
```
5050
Authorization: Bearer <your_token>
5151
```
52+
53+
## Data Retrieval Benchmarking
54+
55+
### Unit Cache Benchmarking
56+
57+
There is a small benchmarking script at [backend/tasks/benchmark_unit_cache.py](/Users/darrellmalone/Sync/Clients/NPDC/code/police-data-trust/backend/tasks/benchmark_unit_cache.py:1) for measuring unit-heavy endpoints.
58+
59+
Example using an existing token:
60+
```bash
61+
python3 backend/tasks/benchmark_unit_cache.py \
62+
--base-url http://localhost:5000 \
63+
--token "$ACCESS_TOKEN" \
64+
--unit-id <unit-uid-1> \
65+
--unit-id <unit-uid-2> \
66+
--search-term "precinct 1" \
67+
--search-term "investigations" \
68+
--output-json /tmp/unit-cache-before.json
69+
```
70+
71+
Example logging in directly:
72+
```bash
73+
python3 backend/tasks/benchmark_unit_cache.py \
74+
--base-url http://localhost:5000 \
75+
--email you@example.com \
76+
--password "your-password" \
77+
--unit-id <unit-uid> \
78+
--search-term "precinct 1"
79+
```
80+
81+
To compare an after-run against a saved baseline:
82+
```bash
83+
python3 backend/tasks/benchmark_unit_cache.py \
84+
--base-url http://localhost:5000 \
85+
--token "$ACCESS_TOKEN" \
86+
--unit-id <unit-uid> \
87+
--search-term "precinct 1" \
88+
--compare-json /tmp/unit-cache-before.json \
89+
--output-json /tmp/unit-cache-after.json
90+
```
91+
92+
For Neo4j query-plan profiling, there is also [backend/tasks/profile_unit_cache_queries.py](/Users/darrellmalone/Sync/Clients/NPDC/code/police-data-trust/backend/tasks/profile_unit_cache_queries.py:1). It runs `PROFILE` for the current cached query shape and a legacy traversal version of the same unit-related workload, then prints DB-hit and timing deltas.
93+
94+
Example:
95+
```bash
96+
python3 backend/tasks/profile_unit_cache_queries.py \
97+
--unit-id <unit-uid-1> \
98+
--unit-id <unit-uid-2> \
99+
--output-json /tmp/unit-query-profile.json
100+
```

backend/api.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,36 @@ def refresh_location_cache():
190190
result = LocationCacheService().refresh_location_richness_cache()
191191
click.echo(result)
192192

193+
@app.cli.command("refresh-agency-cache")
194+
def refresh_agency_cache():
195+
"""Refresh cached agency metric fields."""
196+
from backend.services.agency_cache_service import (
197+
AgencyCacheService,
198+
)
199+
200+
result = AgencyCacheService().refresh_agency_metrics_cache()
201+
click.echo(result)
202+
203+
@app.cli.command("refresh-unit-cache")
204+
def refresh_unit_cache():
205+
"""Refresh cached unit metric fields."""
206+
from backend.services.unit_cache_service import (
207+
UnitCacheService,
208+
)
209+
210+
result = UnitCacheService().refresh_unit_metrics_cache()
211+
click.echo(result)
212+
213+
@app.cli.command("refresh-officer-cache")
214+
def refresh_officer_cache():
215+
"""Refresh cached officer metric fields."""
216+
from backend.services.officer_cache_service import (
217+
OfficerCacheService,
218+
)
219+
220+
result = OfficerCacheService().refresh_officer_metrics_cache()
221+
click.echo(result)
222+
193223

194224
def register_routes(app: Flask):
195225
app.register_blueprint(sources_bp)

backend/database/models/agency.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from neomodel import (
88
db,
99
StructuredNode,
10+
DateTimeNeo4jFormatProperty,
11+
IntegerProperty,
1012
StringProperty,
1113
RelationshipTo,
1214
UniqueIdProperty,
@@ -44,7 +46,11 @@ class Unit(StructuredNode, HasCitations, JsonSerializable, SearchableMixin):
4446
"hq_address", "hq_city", "hq_state", "hq_zip",
4547
"agency", "date_established"
4648
]
47-
__hidden_properties__ = ["citations", "city_node"]
49+
__hidden_properties__ = [
50+
"citations", "city_node",
51+
"officer_count_cached", "complaint_count_cached",
52+
"allegation_count_cached", "metrics_updated_at",
53+
]
4854

4955
uid = UniqueIdProperty()
5056
name = StringProperty()
@@ -57,6 +63,10 @@ class Unit(StructuredNode, HasCitations, JsonSerializable, SearchableMixin):
5763
website_url = StringProperty()
5864
description = StringProperty()
5965
date_established = DateNeo4jFormatProperty()
66+
officer_count_cached = IntegerProperty(default=0, index=True)
67+
complaint_count_cached = IntegerProperty(default=0, index=True)
68+
allegation_count_cached = IntegerProperty(default=0, index=True)
69+
metrics_updated_at = DateTimeNeo4jFormatProperty()
6070

6171
# Relationships
6272
agency = RelationshipTo("Agency", "ESTABLISHED_BY", cardinality=One)
@@ -86,14 +96,7 @@ def total_officers(self):
8696
Returns:
8797
int: The total number of officers.
8898
"""
89-
cy = """
90-
MATCH (u:Unit {uid: $uid})-[]-(:Employment)-[]-(o:Officer)
91-
RETURN COUNT(o) AS total_officers
92-
"""
93-
result, meta = db.cypher_query(cy, {'uid': self.uid})
94-
if result:
95-
return result[0][0]
96-
return 0
99+
return self.officer_count_cached or 0
97100

98101
@property
99102
def current_commander(self):
@@ -156,8 +159,12 @@ class Agency(StructuredNode, HasCitations, JsonSerializable, SearchableMixin):
156159
"hq_address", "hq_city", "hq_state", "hq_zip",
157160
"jurisdiction", "date_established"
158161
]
159-
__hidden_properties__ = ["citations", "state_node",
160-
"county_node", "city_node"]
162+
__hidden_properties__ = [
163+
"citations", "state_node", "county_node", "city_node",
164+
"unit_count_cached", "officer_count_cached",
165+
"complaint_count_cached", "allegation_count_cached",
166+
"metrics_updated_at",
167+
]
161168
__virtual_relationships__ = ["units"]
162169

163170
uid = UniqueIdProperty()
@@ -172,6 +179,11 @@ class Agency(StructuredNode, HasCitations, JsonSerializable, SearchableMixin):
172179
description = StringProperty()
173180
date_established = DateNeo4jFormatProperty()
174181
jurisdiction = StringProperty(choices=Jurisdiction.choices())
182+
unit_count_cached = IntegerProperty(default=0, index=True)
183+
officer_count_cached = IntegerProperty(default=0, index=True)
184+
complaint_count_cached = IntegerProperty(default=0, index=True)
185+
allegation_count_cached = IntegerProperty(default=0, index=True)
186+
metrics_updated_at = DateTimeNeo4jFormatProperty()
175187

176188
# Relationships
177189
city_node = RelationshipTo(

backend/database/models/infra/locations.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
StructuredNode,
33
StringProperty,
44
IntegerProperty,
5+
DateTimeNeo4jFormatProperty,
56
FloatProperty,
67
UniqueIdProperty,
78
Relationship,
@@ -10,7 +11,6 @@
1011
One
1112
)
1213
from neomodel.contrib.spatial_properties import PointProperty
13-
from backend.database.properties.datetime import DateNeo4jFormatProperty
1414

1515

1616
STATE_INFO = {
@@ -102,7 +102,7 @@ class CountyNode(Place):
102102
officer_count_cached = IntegerProperty(default=0, index=True)
103103
complaint_count_cached = IntegerProperty(default=0, index=True)
104104
richness_score_cached = FloatProperty(default=0.0, index=True)
105-
richness_updated_at = DateNeo4jFormatProperty()
105+
richness_updated_at = DateTimeNeo4jFormatProperty()
106106

107107
# Relationships
108108
state = RelationshipTo("StateNode", "WITHIN_STATE", cardinality=One)
@@ -118,7 +118,7 @@ class CityNode(Place):
118118
officer_count_cached = IntegerProperty(default=0, index=True)
119119
complaint_count_cached = IntegerProperty(default=0, index=True)
120120
richness_score_cached = FloatProperty(default=0.0, index=True)
121-
richness_updated_at = DateNeo4jFormatProperty()
121+
richness_updated_at = DateTimeNeo4jFormatProperty()
122122

123123
# Relationships
124124
county = RelationshipTo("CountyNode", "WITHIN_COUNTY", cardinality=One)

backend/database/models/officer.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from neomodel import (
1010
db, StructuredNode, Relationship,
1111
StringProperty, IntegerProperty,
12+
DateTimeNeo4jFormatProperty,
1213
UniqueIdProperty, One
1314
)
1415

@@ -40,7 +41,13 @@ class Officer(StructuredNode, HasCitations, JsonSerializable):
4041
"last_name", "suffix", "ethnicity",
4142
"gender", "year_of_birth"
4243
]
43-
__hidden_properties__ = ["citations"]
44+
__hidden_properties__ = [
45+
"citations",
46+
"complaint_count_cached",
47+
"allegation_count_cached",
48+
"substantiated_count_cached",
49+
"metrics_updated_at",
50+
]
4451
__virtual_relationships__ = ["state_ids"]
4552

4653
uid = UniqueIdProperty()
@@ -51,6 +58,10 @@ class Officer(StructuredNode, HasCitations, JsonSerializable):
5158
ethnicity = StringProperty(choices=Ethnicity.choices())
5259
gender = StringProperty(choices=Gender.choices())
5360
year_of_birth = IntegerProperty()
61+
complaint_count_cached = IntegerProperty(default=0, index=True)
62+
allegation_count_cached = IntegerProperty(default=0, index=True)
63+
substantiated_count_cached = IntegerProperty(default=0, index=True)
64+
metrics_updated_at = DateTimeNeo4jFormatProperty()
5465

5566
def __repr__(self):
5667
return f"<Officer {self.uid}>"

backend/queries/agency_cache.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from neomodel import db
2+
3+
4+
AGENCY_METRICS_INPUT_QUERY = """
5+
MATCH (a:Agency)
6+
CALL (a) {
7+
OPTIONAL MATCH (a)-[:ESTABLISHED_BY]-(u:Unit)
8+
RETURN count(DISTINCT u) AS unit_count
9+
}
10+
CALL (a) {
11+
OPTIONAL MATCH (a)-[:ESTABLISHED_BY]-(:Unit)<-[:IN_UNIT]
12+
-(:Employment)-[:HELD_BY]-(o:Officer)
13+
RETURN count(DISTINCT o) AS officer_count
14+
}
15+
CALL (a) {
16+
OPTIONAL MATCH (a)-[:ESTABLISHED_BY]-(:Unit)<-[:IN_UNIT]
17+
-(:Employment)-[:HELD_BY]-(:Officer)
18+
-[:ACCUSED_OF]->(al:Allegation)-[:ALLEGED]-(c:Complaint)
19+
RETURN
20+
count(DISTINCT c) AS complaint_count,
21+
count(DISTINCT al) AS allegation_count
22+
}
23+
RETURN
24+
a.uid AS agency_uid,
25+
unit_count,
26+
officer_count,
27+
complaint_count,
28+
allegation_count
29+
ORDER BY agency_uid ASC
30+
"""
31+
32+
UPDATE_AGENCY_METRICS_CACHE_QUERY = """
33+
UNWIND $updates AS row
34+
MATCH (a:Agency {uid: row.agency_uid})
35+
SET
36+
a.unit_count_cached = row.unit_count_cached,
37+
a.officer_count_cached = row.officer_count_cached,
38+
a.complaint_count_cached = row.complaint_count_cached,
39+
a.allegation_count_cached = row.allegation_count_cached,
40+
a.metrics_updated_at = datetime(row.metrics_updated_at)
41+
RETURN count(a) AS updated
42+
"""
43+
44+
45+
class AgencyCacheQueries:
46+
def fetch_agency_metrics_inputs(self):
47+
rows, _ = db.cypher_query(AGENCY_METRICS_INPUT_QUERY)
48+
return rows
49+
50+
def update_agency_metrics_cache(self, updates: list[dict]) -> int:
51+
if not updates:
52+
return 0
53+
54+
rows, _ = db.cypher_query(
55+
UPDATE_AGENCY_METRICS_CACHE_QUERY,
56+
{"updates": updates},
57+
)
58+
return rows[0][0] if rows else 0

backend/queries/location_cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
city.officer_count_cached = row.officer_count_cached,
6868
city.complaint_count_cached = row.complaint_count_cached,
6969
city.richness_score_cached = row.richness_score_cached,
70-
city.richness_updated_at = date(row.richness_updated_at)
70+
city.richness_updated_at = datetime(row.richness_updated_at)
7171
RETURN count(city) AS updated
7272
"""
7373

@@ -79,7 +79,7 @@
7979
county.officer_count_cached = row.officer_count_cached,
8080
county.complaint_count_cached = row.complaint_count_cached,
8181
county.richness_score_cached = row.richness_score_cached,
82-
county.richness_updated_at = date(row.richness_updated_at)
82+
county.richness_updated_at = datetime(row.richness_updated_at)
8383
RETURN count(county) AS updated
8484
"""
8585

backend/queries/officer_cache.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from neomodel import db
2+
3+
4+
OFFICER_METRICS_INPUT_QUERY = """
5+
MATCH (o:Officer)
6+
CALL (o) {
7+
OPTIONAL MATCH (o)-[:ACCUSED_OF]->(al:Allegation)-[:ALLEGED]->(c:Complaint)
8+
RETURN
9+
count(DISTINCT c) AS complaint_count,
10+
count(DISTINCT al) AS allegation_count,
11+
sum(
12+
CASE
13+
WHEN toLower(trim(coalesce(al.finding, ""))) = "substantiated"
14+
THEN 1 ELSE 0
15+
END
16+
) AS substantiated_count
17+
}
18+
RETURN
19+
o.uid AS officer_uid,
20+
complaint_count,
21+
allegation_count,
22+
substantiated_count
23+
ORDER BY officer_uid ASC
24+
"""
25+
26+
UPDATE_OFFICER_METRICS_CACHE_QUERY = """
27+
UNWIND $updates AS row
28+
MATCH (o:Officer {uid: row.officer_uid})
29+
SET
30+
o.complaint_count_cached = row.complaint_count_cached,
31+
o.allegation_count_cached = row.allegation_count_cached,
32+
o.substantiated_count_cached = row.substantiated_count_cached,
33+
o.metrics_updated_at = datetime(row.metrics_updated_at)
34+
RETURN count(o) AS updated
35+
"""
36+
37+
38+
class OfficerCacheQueries:
39+
def fetch_officer_metrics_inputs(self):
40+
rows, _ = db.cypher_query(OFFICER_METRICS_INPUT_QUERY)
41+
return rows
42+
43+
def update_officer_metrics_cache(self, updates: list[dict]) -> int:
44+
if not updates:
45+
return 0
46+
47+
rows, _ = db.cypher_query(
48+
UPDATE_OFFICER_METRICS_CACHE_QUERY,
49+
{"updates": updates},
50+
)
51+
return rows[0][0] if rows else 0

0 commit comments

Comments
 (0)