Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/gcp_admin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ jobs:
region: ${{ env.REGION }}
image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}
revision_traffic: LATEST=100
# listUsers() scans ~112K Firebase Auth users for the Cumulative
# Users chart; 512Mi was OOMing (SIGABRT) during the scan.
flags: '--memory=1Gi --cpu=1'
secrets: |
FIREBASE_PROJECT_ID=WEB_ADMIN_FIREBASE_PROJECT_ID:latest
FIREBASE_CLIENT_EMAIL=WEB_ADMIN_FIREBASE_CLIENT_EMAIL:latest
Expand Down
45 changes: 35 additions & 10 deletions web/admin/app/api/omi/stats/daily-new-users/route.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { NextRequest, NextResponse } from "next/server";
import { getAdminAuth } from "@/lib/firebase/admin";
import { verifyAdmin } from "@/lib/auth";
import { getJsonCache, setJsonCache } from "@/lib/redis";

export const dynamic = "force-dynamic";

Expand All @@ -10,9 +11,16 @@ export const dynamic = "force-dynamic";
// `metadata.creationTime` is set by the Auth service on account creation
// and is the authoritative signup timestamp.
//
// Scanning every user via `listUsers()` pages 1000 at a time (112K ≈ 25s),
// so the full series is cached in module scope for 10 minutes. A pending
// rebuild is shared across concurrent requests so we don't fan out.
// Scanning every user via `listUsers()` pages 1000 at a time (112K ≈ 25s)
// and keeping UserRecord objects live is enough to OOM a 512Mi Cloud Run
// container. The fix is threefold:
// 1. Persist the computed series to Redis so only one instance pays
// the scan cost per TTL window, even across cold starts.
// 2. Keep an in-memory shadow of the Redis value so warm instances
// skip the round-trip.
// 3. Iterate pages one at a time, only retaining the date bucket map
// (~700 string keys + ints) and dropping each UserRecord batch to
// GC as soon as the loop moves on.

type DailyPoint = { date: string; users: number; cumulative: number };
type CachedSeries = {
Expand All @@ -21,7 +29,9 @@ type CachedSeries = {
generatedAt: number;
};

const CACHE_TTL_MS = 10 * 60 * 1000;
const REDIS_KEY = "admin:stats:daily-new-users:v1";
const REDIS_TTL_SECONDS = 30 * 60; // 30 min — matches the in-memory shadow
const LOCAL_TTL_MS = 30 * 60 * 1000;

let cachedSeries: CachedSeries | null = null;
let pendingBuild: Promise<CachedSeries> | null = null;
Expand All @@ -37,17 +47,20 @@ async function buildDailySeries(): Promise<CachedSeries> {
do {
const page = await auth.listUsers(1000, pageToken);
for (const user of page.users) {
const ct = user.metadata?.creationTime
? new Date(user.metadata.creationTime)
: null;
if (!ct || Number.isNaN(ct.getTime())) continue;
const rawCt = user.metadata?.creationTime;
if (!rawCt) continue;
const ct = new Date(rawCt);
if (Number.isNaN(ct.getTime())) continue;
const key = ct.toISOString().slice(0, 10);
countsByDate[key] = (countsByDate[key] || 0) + 1;
if (!earliest || ct < earliest) earliest = ct;
if (!latest || ct > latest) latest = ct;
total++;
}
pageToken = page.pageToken || undefined;
// Yield to the event loop between pages so V8 can collect the
// previous page's UserRecord objects before we request the next.
await new Promise((r) => setImmediate(r));
} while (pageToken);

if (!earliest || !latest) {
Expand Down Expand Up @@ -80,13 +93,25 @@ async function buildDailySeries(): Promise<CachedSeries> {

async function getSeries(): Promise<CachedSeries> {
const now = Date.now();
if (cachedSeries && now - cachedSeries.generatedAt < CACHE_TTL_MS) {

// 1. Warm in-memory shadow.
if (cachedSeries && now - cachedSeries.generatedAt < LOCAL_TTL_MS) {
return cachedSeries;
}

// 2. Shared Redis cache — survives cold starts and cross-instance.
const fromRedis = await getJsonCache<CachedSeries>(REDIS_KEY);
if (fromRedis && now - fromRedis.generatedAt < LOCAL_TTL_MS) {
cachedSeries = fromRedis;
return fromRedis;
}

// 3. De-duplicate concurrent rebuilds inside the same instance.
if (pendingBuild) return pendingBuild;
pendingBuild = buildDailySeries()
.then((series) => {
.then(async (series) => {
cachedSeries = series;
await setJsonCache(REDIS_KEY, series, REDIS_TTL_SECONDS);
return series;
})
.finally(() => {
Expand Down
33 changes: 33 additions & 0 deletions web/admin/lib/redis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,39 @@ function getRedis(): Redis | null {
return _redis;
}

/**
* Read a JSON value from Redis. Returns null if Redis is unavailable,
* the key is missing, or the stored value can't be parsed. Errors are
* logged but never thrown — callers should treat this as a best-effort
* cache read and fall back to the source of truth.
*/
export async function getJsonCache<T>(key: string): Promise<T | null> {
const redis = getRedis();
if (!redis) return null;
try {
const raw = await redis.get(key);
if (!raw) return null;
return JSON.parse(raw) as T;
} catch (err) {
console.error(`Redis get ${key} failed:`, err);
return null;
}
}

/**
* Write a JSON value to Redis with a TTL (in seconds). Fail-open: any
* serialization or transport error is logged and swallowed.
*/
export async function setJsonCache(key: string, value: unknown, ttlSeconds: number): Promise<void> {
const redis = getRedis();
if (!redis) return;
try {
await redis.set(key, JSON.stringify(value), 'EX', ttlSeconds);
} catch (err) {
console.error(`Redis set ${key} failed:`, err);
}
}

/**
* Delete the enforcement stage cache for a user.
* Matches backend's invalidate_enforcement_cache() in utils/fair_use.py.
Expand Down
Loading