Add product-tests: retry/close gates + scenario/chaos suite #15
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Product Tests | |
| on: | |
| pull_request: | |
| branches: [main, dev] | |
| push: | |
| branches: [main, dev] | |
| schedule: | |
| # Nightly GPU ring — 09:00 UTC every day | |
| - cron: '0 9 * * *' | |
| concurrency: | |
| group: product-tests-${{ github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| jobs: | |
| # --------------------------------------------------------------------------- | |
| # PR gate: CPU-only, passthrough pipeline, <25 min budget | |
| # --------------------------------------------------------------------------- | |
| # Detect touched paths so we can opt a small UI-multimodal subset into the | |
| # PR gate when onboarding/graph components changed, without paying the API | |
| # cost on every PR. | |
| path-filter: | |
| if: github.event_name != 'schedule' | |
| runs-on: ubuntu-latest | |
| name: Detect touched paths | |
| outputs: | |
| ui: ${{ steps.filter.outputs.ui }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dorny/paths-filter@v3 | |
| id: filter | |
| with: | |
| filters: | | |
| ui: | |
| - 'frontend/src/components/onboarding/**' | |
| - 'frontend/src/components/graph/**' | |
| pr-gate: | |
| if: github.event_name != 'schedule' | |
| needs: path-filter | |
| runs-on: ubuntu-latest | |
| name: Product Tests (PR gate, CPU) | |
| timeout-minutes: 25 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Install ffmpeg | |
| run: sudo apt-get update && sudo apt-get install -y ffmpeg | |
| - name: Set up Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: '22.19.0' | |
| cache: 'npm' | |
| cache-dependency-path: frontend/package-lock.json | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v3 | |
| with: | |
| enable-cache: true | |
| version: "0.9.11" | |
| - name: Build frontend | |
| working-directory: frontend | |
| run: | | |
| npm ci | |
| npm run build | |
| - name: Install product-tests deps | |
| run: uv sync --group product-tests | |
| - name: Install Playwright browser | |
| run: uv run playwright install --with-deps chromium | |
| - name: Check harness/testids.py is in sync with frontend | |
| working-directory: product-tests | |
| run: | | |
| uv run python -m harness.testids --check || { | |
| echo "::error::harness/testids.py is out of sync with frontend data-testid values." | |
| echo "::error::Run \`uv run python -m harness.testids --sync\` and commit the result." | |
| exit 1 | |
| } | |
| - name: Run PR-gate scenarios (local mode) | |
| env: | |
| SCOPE_TEST_INSTRUMENTATION: "1" | |
| CUDA_VISIBLE_DEVICES: "" | |
| SCOPE_CLOUD_RING: "pr" | |
| run: | | |
| uv run pytest product-tests/scenarios/ \ | |
| -v --tb=short -m "not cloud" | |
| - name: Run PR-gate chaos (local mode, fast subset) | |
| env: | |
| SCOPE_TEST_INSTRUMENTATION: "1" | |
| CUDA_VISIBLE_DEVICES: "" | |
| run: | | |
| uv run pytest product-tests/chaos/ \ | |
| -v --tb=short -m "not slow" --chaos-seed="${{ github.sha }}" | |
| # NOTE: cloud-smoke step lives in docker-build.yml's `product-tests-cloud-smoke` | |
| # job, which `needs: deploy-pr` so it runs only after the per-PR fal app | |
| # is deployed. That consumes `needs.deploy-pr.outputs.livepeer_fal_app_id` | |
| # directly, so no `SCOPE_PR_FAL_APP_ID` secret is required. | |
| # Only the UI multimodal subset gets opted into the PR gate, and only | |
| # when the touched paths suggest it's relevant. Keeps the common-case | |
| # PR ring machine-only (fast, free) without punting visual coverage | |
| # entirely when the risky areas change. | |
| - name: Run UI multimodal (path-triggered, advisory) | |
| # NOTE: secrets cannot be referenced directly in `if:` — the step | |
| # always runs when the path filter matched, and the Python side | |
| # skips cleanly when ANTHROPIC_API_KEY is unset (verdict "uncertain" | |
| # instead of "fail"). Trailing `|| true` keeps this advisory during | |
| # the Slice 5 stabilization window. | |
| if: needs.path-filter.outputs.ui == 'true' | |
| env: | |
| SCOPE_TEST_INSTRUMENTATION: "1" | |
| CUDA_VISIBLE_DEVICES: "" | |
| SCOPE_MULTIMODAL_EVAL: "1" | |
| SCOPE_MULTIMODAL_BUDGET_USD: "0.50" | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| run: | | |
| uv run pytest product-tests/scenarios/ \ | |
| -v --tb=short -m "multimodal and ui and not cloud" || true | |
| - name: Aggregate summary | |
| if: always() | |
| id: summary | |
| run: | | |
| summary=$(find product-tests/reports -name summary.md | head -1) | |
| if [ -n "$summary" ]; then | |
| echo "SUMMARY_PATH=$summary" >> "$GITHUB_ENV" | |
| # The closing heredoc delimiter MUST be on its own line. If the | |
| # summary file ends without a trailing newline, our closing | |
| # SUMMARY_EOF gets concatenated onto the last content line and | |
| # GitHub errors with "Matching delimiter not found 'SUMMARY_EOF'", | |
| # which silently breaks the PR-comment post step. Force a newline. | |
| { | |
| echo "summary<<SUMMARY_EOF" | |
| cat "$summary" | |
| printf '\n' | |
| echo "SUMMARY_EOF" | |
| } >> "$GITHUB_OUTPUT" | |
| cat "$summary" | |
| else | |
| echo "No summary.md emitted" | tee -a "$GITHUB_STEP_SUMMARY" | |
| fi | |
| - name: Post summary as PR comment | |
| if: always() && github.event_name == 'pull_request' && steps.summary.outputs.summary != '' | |
| uses: marocchino/sticky-pull-request-comment@v2 | |
| with: | |
| header: product-tests-summary | |
| message: | | |
| ### Product Tests — ${{ job.status }} | |
| ${{ steps.summary.outputs.summary }} | |
| <sub>Run: [#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})</sub> | |
| - name: Upload reports on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: product-tests-reports-${{ github.run_id }} | |
| path: product-tests/reports/ | |
| retention-days: 14 | |
| # --------------------------------------------------------------------------- | |
| # Nightly ring: cloud-backed (models on fal), <60 min budget. | |
| # No GPU needed on the runner — every nightly step uses SCOPE_CLOUD_APP_ID | |
| # and `-m cloud`, so the runner just drives Scope + Playwright and talks | |
| # to fal over WebRTC. | |
| # --------------------------------------------------------------------------- | |
| nightly: | |
| if: github.event_name == 'schedule' | |
| runs-on: ubuntu-latest | |
| name: Product Tests (Nightly, Cloud) | |
| timeout-minutes: 60 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Install ffmpeg | |
| run: sudo apt-get update && sudo apt-get install -y ffmpeg | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v3 | |
| with: | |
| enable-cache: true | |
| version: "0.9.11" | |
| - name: Set up Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: '22.19.0' | |
| - name: Build frontend | |
| working-directory: frontend | |
| run: | | |
| npm ci | |
| npm run build | |
| - name: Install product-tests deps | |
| run: uv sync --group product-tests | |
| - name: Install Playwright browser | |
| run: uv run playwright install --with-deps chromium | |
| - name: Check harness/testids.py is in sync with frontend | |
| working-directory: product-tests | |
| run: uv run python -m harness.testids --check | |
| - name: Run scenarios + chaos (cloud) | |
| env: | |
| SCOPE_TEST_INSTRUMENTATION: "1" | |
| # The main-pinned app deployed by fal-deploy.yml on every push to | |
| # main. Stable, known, no secret needed. Tests append "/ws" in | |
| # the fixture when they open a connection. | |
| SCOPE_CLOUD_APP_ID: "daydream/scope-livepeer/ws" | |
| # Scope client cloud auth — required for signer.daydream.live | |
| # to accept the discover_orchestrators call. Empty if secrets | |
| # not yet configured; cloud-marked tests will fail/skip | |
| # without leaking until the secrets are wired. | |
| SCOPE_CLOUD_API_KEY: ${{ secrets.SCOPE_CLOUD_API_KEY }} | |
| SCOPE_USER_ID: ${{ secrets.SCOPE_USER_ID }} | |
| SCOPE_CLOUD_RING: "nightly" | |
| SCOPE_CHURN_DURATION_SEC: "180" | |
| # Multimodal enabled in the nightly ring only — daily budget cap | |
| # prevents a runaway suite from burning unlimited API credit. | |
| SCOPE_MULTIMODAL_EVAL: "1" | |
| SCOPE_MULTIMODAL_TRIAGE: "1" | |
| SCOPE_MULTIMODAL_BUDGET_USD: "10.00" | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| run: | | |
| uv run pytest product-tests/scenarios/ product-tests/chaos/ \ | |
| -v --tb=short --chaos-seed="${{ github.run_id }}" | |
| - name: Run release full-matrix (cloud, all starter workflows) | |
| env: | |
| SCOPE_TEST_INSTRUMENTATION: "1" | |
| # The main-pinned app deployed by fal-deploy.yml on every push to | |
| # main. Stable, known, no secret needed. Tests append "/ws" in | |
| # the fixture when they open a connection. | |
| SCOPE_CLOUD_APP_ID: "daydream/scope-livepeer/ws" | |
| # Scope client cloud auth — required for signer.daydream.live | |
| # to accept the discover_orchestrators call. Empty if secrets | |
| # not yet configured; cloud-marked tests will fail/skip | |
| # without leaking until the secrets are wired. | |
| SCOPE_CLOUD_API_KEY: ${{ secrets.SCOPE_CLOUD_API_KEY }} | |
| SCOPE_USER_ID: ${{ secrets.SCOPE_USER_ID }} | |
| SCOPE_CLOUD_RING: "nightly" | |
| run: | | |
| uv run pytest product-tests/release/ -v --tb=short -m cloud | |
| - name: Run regression suite | |
| env: | |
| SCOPE_TEST_INSTRUMENTATION: "1" | |
| # The main-pinned app deployed by fal-deploy.yml on every push to | |
| # main. Stable, known, no secret needed. Tests append "/ws" in | |
| # the fixture when they open a connection. | |
| SCOPE_CLOUD_APP_ID: "daydream/scope-livepeer/ws" | |
| # Scope client cloud auth — required for signer.daydream.live | |
| # to accept the discover_orchestrators call. Empty if secrets | |
| # not yet configured; cloud-marked tests will fail/skip | |
| # without leaking until the secrets are wired. | |
| SCOPE_CLOUD_API_KEY: ${{ secrets.SCOPE_CLOUD_API_KEY }} | |
| SCOPE_USER_ID: ${{ secrets.SCOPE_USER_ID }} | |
| SCOPE_CLOUD_RING: "nightly" | |
| SCOPE_MULTIMODAL_EVAL: "1" | |
| SCOPE_MULTIMODAL_TRIAGE: "1" | |
| SCOPE_MULTIMODAL_BUDGET_USD: "10.00" | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| run: | | |
| if [ -d product-tests/regression ] && ls product-tests/regression/test_*.py >/dev/null 2>&1; then | |
| uv run pytest product-tests/regression/ -v --tb=short | |
| else | |
| echo "No regression tests yet — skipping." | |
| fi | |
| - name: Upload reports | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: product-tests-nightly-${{ github.run_id }} | |
| path: product-tests/reports/ | |
| retention-days: 30 |