Skip to content

Commit 27cc4cb

Browse files
committed
feat: add notifications sync and popularity scoring to typesense-sync
- Add ProjectNotification schema, transform, QUERY_BY, FACET_BY - Syncs name, description, proponent, location, region, type, subType, trigger, decision, pcp, dates, associatedProject - Resolves list IDs for type/region; strips HTML for search, preserves HTML in descriptionHtml for display - Add popularity field (int32, sortable) to documents and projects schemas - Add popularity-sync.js - Queries penguin-analytics for Search Result Clicked and Search Download Clicked events over a rolling window (default 30d) - Weights: downloads 3x, clicks 1x - Patches live Typesense collections via bulk update action - Add popularity CronJob Helm template and values (disabled by default) - Scheduled at 3 AM, 1 hour after nightly full re-index - Add pg dependency for PostgreSQL connectivity
1 parent 6c2b65e commit 27cc4cb

7 files changed

Lines changed: 497 additions & 10 deletions

File tree

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
{{- if .Values.popularity.enabled }}
2+
apiVersion: batch/v1
3+
kind: CronJob
4+
metadata:
5+
name: {{ include "typesense.fullname" . }}-popularity
6+
labels:
7+
{{- include "typesense.labels" . | nindent 4 }}
8+
spec:
9+
# Runs at 3 AM daily — 1 hour after the full re-index (2 AM).
10+
# Sequencing matters: the re-index swaps the alias to a fresh collection
11+
# (no popularity scores), then this job patches the scores in.
12+
schedule: {{ .Values.popularity.schedule | quote }}
13+
concurrencyPolicy: Forbid # Never overlap popularity runs
14+
successfulJobsHistoryLimit: 3
15+
failedJobsHistoryLimit: 1
16+
jobTemplate:
17+
spec:
18+
backoffLimit: {{ .Values.popularity.backoffLimit }}
19+
ttlSecondsAfterFinished: {{ .Values.popularity.ttlSecondsAfterFinished }}
20+
template:
21+
metadata:
22+
labels:
23+
role: api-eagle-epic # Grants MongoDB access via NetworkPolicy (reused for egress)
24+
spec:
25+
restartPolicy: OnFailure
26+
containers:
27+
- name: popularity
28+
image: "{{ .Values.popularity.image.repository }}:{{ .Values.popularity.image.tag }}"
29+
imagePullPolicy: {{ .Values.popularity.image.pullPolicy }}
30+
command: ["node", "src/popularity-sync.js"]
31+
env:
32+
# ── Typesense connection ────────────────────────────────────────
33+
- name: TYPESENSE_HOST
34+
value: {{ include "typesense.fullname" . }}
35+
- name: TYPESENSE_PORT
36+
value: {{ .Values.typesense.port | quote }}
37+
- name: TYPESENSE_API_KEY
38+
valueFrom:
39+
secretKeyRef:
40+
name: {{ .Values.typesense.existingSecret }}
41+
key: TYPESENSE_API_KEY
42+
# ── penguin-analytics PostgreSQL ────────────────────────────────
43+
- name: PENGUIN_DB_HOST
44+
valueFrom:
45+
secretKeyRef:
46+
name: {{ .Values.popularity.penguinSecret }}
47+
key: PENGUIN_DB_HOST
48+
- name: PENGUIN_DB_PORT
49+
valueFrom:
50+
secretKeyRef:
51+
name: {{ .Values.popularity.penguinSecret }}
52+
key: PENGUIN_DB_PORT
53+
- name: PENGUIN_DB_NAME
54+
valueFrom:
55+
secretKeyRef:
56+
name: {{ .Values.popularity.penguinSecret }}
57+
key: PENGUIN_DB_NAME
58+
- name: PENGUIN_DB_USER
59+
valueFrom:
60+
secretKeyRef:
61+
name: {{ .Values.popularity.penguinSecret }}
62+
key: PENGUIN_DB_USER
63+
- name: PENGUIN_DB_PASSWORD
64+
valueFrom:
65+
secretKeyRef:
66+
name: {{ .Values.popularity.penguinSecret }}
67+
key: PENGUIN_DB_PASSWORD
68+
# ── Tuning (optional) ───────────────────────────────────────────
69+
- name: POPULARITY_WINDOW_DAYS
70+
value: {{ .Values.popularity.windowDays | quote }}
71+
resources:
72+
{{- toYaml .Values.popularity.resources | nindent 14 }}
73+
{{- end }}

helm/typesense/values.yaml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,45 @@ reindex:
8686
backoffLimit: 2
8787
ttlSecondsAfterFinished: 3600
8888

89+
# ---------------------------------------------------------------------------
90+
# Popularity sync CronJob — patches click scores from penguin-analytics into
91+
# the Typesense popularity field. Runs at 3 AM, 1 hour after the re-index.
92+
#
93+
# Requires a pre-created secret containing penguin-analytics DB credentials:
94+
# oc create secret generic penguin-analytics-db \
95+
# --from-literal=PENGUIN_DB_HOST=penguin-analytics-database \
96+
# --from-literal=PENGUIN_DB_PORT=5432 \
97+
# --from-literal=PENGUIN_DB_NAME=analytics \
98+
# --from-literal=PENGUIN_DB_USER=analytics_user \
99+
# --from-literal=PENGUIN_DB_PASSWORD=<password> \
100+
# -n <namespace>
101+
# ---------------------------------------------------------------------------
102+
popularity:
103+
enabled: false # Enable once penguin-analytics is deployed in this namespace
104+
schedule: "0 3 * * *" # 3:00 AM daily — after re-index completes at 2 AM
105+
106+
image:
107+
repository: image-registry.openshift-image-registry.svc:5000/6cdc9e-tools/typesense-sync
108+
tag: "latest"
109+
pullPolicy: Always
110+
111+
# Name of the secret containing penguin-analytics DB connection details
112+
penguinSecret: penguin-analytics-db
113+
114+
# Rolling window for click score aggregation (days)
115+
windowDays: 30
116+
117+
resources:
118+
requests:
119+
cpu: 50m
120+
memory: 128Mi
121+
limits:
122+
cpu: 200m
123+
memory: 256Mi
124+
125+
backoffLimit: 2
126+
ttlSecondsAfterFinished: 3600
127+
89128
# ---------------------------------------------------------------------------
90129
# MongoDB connection info (reads from existing eagle-api-mongodb secret)
91130
# ---------------------------------------------------------------------------

typesense-sync/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
"main": "src/index.js",
77
"scripts": {
88
"start": "node src/index.js",
9-
"full-sync": "node src/full-sync.js"
9+
"full-sync": "node src/full-sync.js",
10+
"popularity-sync": "node src/popularity-sync.js"
1011
},
1112
"engines": {
1213
"node": ">=24"
1314
},
1415
"dependencies": {
1516
"mongodb": "^7.0.0",
17+
"pg": "^8.20.0",
1618
"typesense": "^3.0.5"
1719
}
1820
}

typesense-sync/src/collections.js

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ const DOCUMENT_SCHEMA = {
3333
{ name: 'internalExt', type: 'string', optional: true },
3434
{ name: 'datePosted', type: 'int64', sort: true, optional: true },
3535
{ name: 'dateUploaded', type: 'int64', sort: true, optional: true },
36+
// 30-day click/download score — updated nightly by popularity-sync.js
37+
{ name: 'popularity', type: 'int32', sort: true, optional: true },
3638
],
3739
};
3840

@@ -57,6 +59,8 @@ const PROJECT_SCHEMA = {
5759
{ name: 'decisionDate', type: 'int64', sort: true, optional: true },
5860
// [lng, lat] centroid for map thumbnail in search results
5961
{ name: 'centroid', type: 'float[]', optional: true },
62+
// 30-day click score — updated nightly by popularity-sync.js
63+
{ name: 'popularity', type: 'int32', sort: true, optional: true },
6064
],
6165
};
6266

@@ -84,11 +88,40 @@ const RECENTACTIVITY_SCHEMA = {
8488
],
8589
};
8690

91+
const PROJECTNOTIFICATION_SCHEMA = {
92+
name: 'notifications',
93+
fields: [
94+
{ name: 'id', type: 'string' },
95+
// Search fields
96+
{ name: 'name', type: 'string', index: true, optional: true },
97+
{ name: 'description', type: 'string', index: true, optional: true },
98+
{ name: 'proponent', type: 'string', index: true, optional: true },
99+
{ name: 'associatedProjectName', type: 'string', index: true, optional: true },
100+
{ name: 'region', type: 'string', facet: true, index: true, optional: true },
101+
{ name: 'location', type: 'string', index: true, optional: true },
102+
// Facet / filter fields
103+
{ name: 'type', type: 'string', facet: true, optional: true },
104+
{ name: 'subType', type: 'string', facet: true, optional: true },
105+
{ name: 'trigger', type: 'string', facet: true, optional: true },
106+
{ name: 'decision', type: 'string', facet: true, optional: true },
107+
{ name: 'pcp', type: 'string', facet: true, optional: true },
108+
// Dates
109+
{ name: 'notificationReceivedDate', type: 'int64', sort: true, optional: true },
110+
{ name: 'decisionDate', type: 'int64', sort: true, optional: true },
111+
// Metadata
112+
{ name: 'associatedProjectId', type: 'string', optional: true },
113+
{ name: 'centroid', type: 'float[]', optional: true },
114+
// Original HTML for display (not indexed)
115+
{ name: 'descriptionHtml', type: 'string', index: false, optional: true },
116+
],
117+
};
118+
87119
/** Map _schemaName → Typesense schema */
88120
const SCHEMAS = {
89-
Document: DOCUMENT_SCHEMA,
90-
Project: PROJECT_SCHEMA,
91-
RecentActivity: RECENTACTIVITY_SCHEMA,
121+
Document: DOCUMENT_SCHEMA,
122+
Project: PROJECT_SCHEMA,
123+
RecentActivity: RECENTACTIVITY_SCHEMA,
124+
ProjectNotification: PROJECTNOTIFICATION_SCHEMA,
92125
};
93126

94127
/**
@@ -108,15 +141,20 @@ const QUERY_BY = {
108141
fields: 'headline,content,notificationName',
109142
weights: '9000,8000,3000',
110143
},
144+
ProjectNotification: {
145+
fields: 'name,description,proponent,associatedProjectName,region,location',
146+
weights: '9000,8000,3000,2000,1500,1000',
147+
},
111148
};
112149

113150
/**
114151
* Facet fields to include in every search response, keyed by schemaName.
115152
*/
116153
const FACET_BY = {
117-
Document: 'type,milestone,documentAuthorType,projectPhase,legislation',
118-
Project: 'region,status,currentPhaseName,eacDecision,type,sector',
119-
RecentActivity: 'type',
154+
Document: 'type,milestone,documentAuthorType,projectPhase,legislation',
155+
Project: 'region,status,currentPhaseName,eacDecision,type,sector',
156+
RecentActivity: 'type',
157+
ProjectNotification: 'type,region,decision,pcp',
120158
};
121159

122160
module.exports = { SCHEMAS, QUERY_BY, FACET_BY };

0 commit comments

Comments
 (0)