Skip to content

Add product-tests: retry/close gates + scenario/chaos suite #15

Add product-tests: retry/close gates + scenario/chaos suite

Add product-tests: retry/close gates + scenario/chaos suite #15

Workflow file for this run

name: Product Tests
on:
pull_request:
branches: [main, dev]
push:
branches: [main, dev]
schedule:
# Nightly GPU ring — 09:00 UTC every day
- cron: '0 9 * * *'
concurrency:
group: product-tests-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
pull-requests: write
jobs:
# ---------------------------------------------------------------------------
# PR gate: CPU-only, passthrough pipeline, <25 min budget
# ---------------------------------------------------------------------------
# Detect touched paths so we can opt a small UI-multimodal subset into the
# PR gate when onboarding/graph components changed, without paying the API
# cost on every PR.
path-filter:
if: github.event_name != 'schedule'
runs-on: ubuntu-latest
name: Detect touched paths
outputs:
ui: ${{ steps.filter.outputs.ui }}
steps:
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
ui:
- 'frontend/src/components/onboarding/**'
- 'frontend/src/components/graph/**'
pr-gate:
if: github.event_name != 'schedule'
needs: path-filter
runs-on: ubuntu-latest
name: Product Tests (PR gate, CPU)
timeout-minutes: 25
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install ffmpeg
run: sudo apt-get update && sudo apt-get install -y ffmpeg
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '22.19.0'
cache: 'npm'
cache-dependency-path: frontend/package-lock.json
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
version: "0.9.11"
- name: Build frontend
working-directory: frontend
run: |
npm ci
npm run build
- name: Install product-tests deps
run: uv sync --group product-tests
- name: Install Playwright browser
run: uv run playwright install --with-deps chromium
- name: Check harness/testids.py is in sync with frontend
working-directory: product-tests
run: |
uv run python -m harness.testids --check || {
echo "::error::harness/testids.py is out of sync with frontend data-testid values."
echo "::error::Run \`uv run python -m harness.testids --sync\` and commit the result."
exit 1
}
- name: Run PR-gate scenarios (local mode)
env:
SCOPE_TEST_INSTRUMENTATION: "1"
CUDA_VISIBLE_DEVICES: ""
SCOPE_CLOUD_RING: "pr"
run: |
uv run pytest product-tests/scenarios/ \
-v --tb=short -m "not cloud"
- name: Run PR-gate chaos (local mode, fast subset)
env:
SCOPE_TEST_INSTRUMENTATION: "1"
CUDA_VISIBLE_DEVICES: ""
run: |
uv run pytest product-tests/chaos/ \
-v --tb=short -m "not slow" --chaos-seed="${{ github.sha }}"
# NOTE: cloud-smoke step lives in docker-build.yml's `product-tests-cloud-smoke`
# job, which `needs: deploy-pr` so it runs only after the per-PR fal app
# is deployed. That consumes `needs.deploy-pr.outputs.livepeer_fal_app_id`
# directly, so no `SCOPE_PR_FAL_APP_ID` secret is required.
# Only the UI multimodal subset gets opted into the PR gate, and only
# when the touched paths suggest it's relevant. Keeps the common-case
# PR ring machine-only (fast, free) without punting visual coverage
# entirely when the risky areas change.
- name: Run UI multimodal (path-triggered, advisory)
# NOTE: secrets cannot be referenced directly in `if:` — the step
# always runs when the path filter matched, and the Python side
# skips cleanly when ANTHROPIC_API_KEY is unset (verdict "uncertain"
# instead of "fail"). Trailing `|| true` keeps this advisory during
# the Slice 5 stabilization window.
if: needs.path-filter.outputs.ui == 'true'
env:
SCOPE_TEST_INSTRUMENTATION: "1"
CUDA_VISIBLE_DEVICES: ""
SCOPE_MULTIMODAL_EVAL: "1"
SCOPE_MULTIMODAL_BUDGET_USD: "0.50"
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
uv run pytest product-tests/scenarios/ \
-v --tb=short -m "multimodal and ui and not cloud" || true
- name: Aggregate summary
if: always()
id: summary
run: |
summary=$(find product-tests/reports -name summary.md | head -1)
if [ -n "$summary" ]; then
echo "SUMMARY_PATH=$summary" >> "$GITHUB_ENV"
# The closing heredoc delimiter MUST be on its own line. If the
# summary file ends without a trailing newline, our closing
# SUMMARY_EOF gets concatenated onto the last content line and
# GitHub errors with "Matching delimiter not found 'SUMMARY_EOF'",
# which silently breaks the PR-comment post step. Force a newline.
{
echo "summary<<SUMMARY_EOF"
cat "$summary"
printf '\n'
echo "SUMMARY_EOF"
} >> "$GITHUB_OUTPUT"
cat "$summary"
else
echo "No summary.md emitted" | tee -a "$GITHUB_STEP_SUMMARY"
fi
- name: Post summary as PR comment
if: always() && github.event_name == 'pull_request' && steps.summary.outputs.summary != ''
uses: marocchino/sticky-pull-request-comment@v2
with:
header: product-tests-summary
message: |
### Product Tests — ${{ job.status }}
${{ steps.summary.outputs.summary }}
<sub>Run: [#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})</sub>
- name: Upload reports on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: product-tests-reports-${{ github.run_id }}
path: product-tests/reports/
retention-days: 14
# ---------------------------------------------------------------------------
# Nightly ring: cloud-backed (models on fal), <60 min budget.
# No GPU needed on the runner — every nightly step uses SCOPE_CLOUD_APP_ID
# and `-m cloud`, so the runner just drives Scope + Playwright and talks
# to fal over WebRTC.
# ---------------------------------------------------------------------------
nightly:
if: github.event_name == 'schedule'
runs-on: ubuntu-latest
name: Product Tests (Nightly, Cloud)
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install ffmpeg
run: sudo apt-get update && sudo apt-get install -y ffmpeg
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
version: "0.9.11"
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '22.19.0'
- name: Build frontend
working-directory: frontend
run: |
npm ci
npm run build
- name: Install product-tests deps
run: uv sync --group product-tests
- name: Install Playwright browser
run: uv run playwright install --with-deps chromium
- name: Check harness/testids.py is in sync with frontend
working-directory: product-tests
run: uv run python -m harness.testids --check
- name: Run scenarios + chaos (cloud)
env:
SCOPE_TEST_INSTRUMENTATION: "1"
# The main-pinned app deployed by fal-deploy.yml on every push to
# main. Stable, known, no secret needed. Tests append "/ws" in
# the fixture when they open a connection.
SCOPE_CLOUD_APP_ID: "daydream/scope-livepeer/ws"
# Scope client cloud auth — required for signer.daydream.live
# to accept the discover_orchestrators call. Empty if secrets
# not yet configured; cloud-marked tests will fail/skip
# without leaking until the secrets are wired.
SCOPE_CLOUD_API_KEY: ${{ secrets.SCOPE_CLOUD_API_KEY }}
SCOPE_USER_ID: ${{ secrets.SCOPE_USER_ID }}
SCOPE_CLOUD_RING: "nightly"
SCOPE_CHURN_DURATION_SEC: "180"
# Multimodal enabled in the nightly ring only — daily budget cap
# prevents a runaway suite from burning unlimited API credit.
SCOPE_MULTIMODAL_EVAL: "1"
SCOPE_MULTIMODAL_TRIAGE: "1"
SCOPE_MULTIMODAL_BUDGET_USD: "10.00"
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
uv run pytest product-tests/scenarios/ product-tests/chaos/ \
-v --tb=short --chaos-seed="${{ github.run_id }}"
- name: Run release full-matrix (cloud, all starter workflows)
env:
SCOPE_TEST_INSTRUMENTATION: "1"
# The main-pinned app deployed by fal-deploy.yml on every push to
# main. Stable, known, no secret needed. Tests append "/ws" in
# the fixture when they open a connection.
SCOPE_CLOUD_APP_ID: "daydream/scope-livepeer/ws"
# Scope client cloud auth — required for signer.daydream.live
# to accept the discover_orchestrators call. Empty if secrets
# not yet configured; cloud-marked tests will fail/skip
# without leaking until the secrets are wired.
SCOPE_CLOUD_API_KEY: ${{ secrets.SCOPE_CLOUD_API_KEY }}
SCOPE_USER_ID: ${{ secrets.SCOPE_USER_ID }}
SCOPE_CLOUD_RING: "nightly"
run: |
uv run pytest product-tests/release/ -v --tb=short -m cloud
- name: Run regression suite
env:
SCOPE_TEST_INSTRUMENTATION: "1"
# The main-pinned app deployed by fal-deploy.yml on every push to
# main. Stable, known, no secret needed. Tests append "/ws" in
# the fixture when they open a connection.
SCOPE_CLOUD_APP_ID: "daydream/scope-livepeer/ws"
# Scope client cloud auth — required for signer.daydream.live
# to accept the discover_orchestrators call. Empty if secrets
# not yet configured; cloud-marked tests will fail/skip
# without leaking until the secrets are wired.
SCOPE_CLOUD_API_KEY: ${{ secrets.SCOPE_CLOUD_API_KEY }}
SCOPE_USER_ID: ${{ secrets.SCOPE_USER_ID }}
SCOPE_CLOUD_RING: "nightly"
SCOPE_MULTIMODAL_EVAL: "1"
SCOPE_MULTIMODAL_TRIAGE: "1"
SCOPE_MULTIMODAL_BUDGET_USD: "10.00"
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
if [ -d product-tests/regression ] && ls product-tests/regression/test_*.py >/dev/null 2>&1; then
uv run pytest product-tests/regression/ -v --tb=short
else
echo "No regression tests yet — skipping."
fi
- name: Upload reports
if: always()
uses: actions/upload-artifact@v4
with:
name: product-tests-nightly-${{ github.run_id }}
path: product-tests/reports/
retention-days: 30