Skip to content

Upload to PMC

Upload to PMC #83

Workflow file for this run

name: Upload to PMC
# Required configuration
# vars:
# PMC_FTP_ADDRESS # FTP/SFTP endpoint for PMC uploads (e.g., ftp://ftp-private.ncbi.nlm.nih.gov/)
# EMAIL_SUCCESS # comma-separated recipients for success mail
# EMAIL_FAILURE # comma-separated recipients for failure mail
# MAILGUN_DOMAIN # Mailgun domain (used by .github/scripts/mailgun.sh)
# SLACK_CHANNEL # Slack channel ID for notifications (used by .github/scripts/slack.sh)
# secrets:
# PMC_USER # PMC FTP username
# PMC_PASS # PMC FTP password
# MAILGUN_API_KEY # Mailgun API key (used by .github/scripts/mailgun.sh)
# SLACK_CLI_TOKEN # Slack CLI token (used by .github/scripts/slack.sh)
on:
schedule:
- cron: "15 8 * * *"
workflow_dispatch:
inputs:
datestamp:
description: 'YYYY-MM-DD to override last run date'
required: false
type: string
jobs:
upload:
runs-on: ubuntu-latest
env:
PMC_FTP_ADDRESS: ${{ vars.PMC_FTP_ADDRESS || 'ftp://ftp-private.ncbi.nlm.nih.gov/' }}
steps:
- uses: actions/checkout@v6
- name: Validate required config
env:
PMC_FTP_ADDRESS: ${{ env.PMC_FTP_ADDRESS }}
EMAIL_SUCCESS: ${{ vars.EMAIL_SUCCESS }}
EMAIL_FAILURE: ${{ vars.EMAIL_FAILURE }}
MAILGUN_DOMAIN: ${{ vars.MAILGUN_DOMAIN }}
SLACK_CHANNEL: ${{ vars.SLACK_CHANNEL }}
PMC_USER: ${{ secrets.PMC_USER }}
PMC_PASS: ${{ secrets.PMC_PASS }}
MAILGUN_API_KEY: ${{ secrets.MAILGUN_API_KEY }}
SLACK_CLI_TOKEN: ${{ secrets.SLACK_CLI_TOKEN }}
run: |
missing=()
for v in PMC_FTP_ADDRESS EMAIL_SUCCESS EMAIL_FAILURE MAILGUN_DOMAIN SLACK_CHANNEL PMC_USER PMC_PASS MAILGUN_API_KEY SLACK_CLI_TOKEN; do
if [[ -z "${!v:-}" ]]; then
missing+=("$v")
fi
done
if (( ${#missing[@]} > 0 )); then
echo "Missing required configuration: ${missing[*]}" >&2
exit 1
fi
- uses: actions/setup-python@v6
with:
python-version: '3.14'
cache: 'pip'
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install -e .
- name: Read the date stamp from the previous run
run: |
# Check if a datestamp was provided as input
if [[ -n "${{ github.event.inputs.datestamp }}" ]]; then
# Validate the date format (YYYY-MM-DD)
if [[ "${{ github.event.inputs.datestamp }}" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
AFTER_DATE="${{ github.event.inputs.datestamp }}"
echo "Using provided date stamp: $AFTER_DATE"
else
echo "Warning: datestamp '${{ github.event.inputs.datestamp }}' is not in YYYY-MM-DD format. Falling back to previous run date."
STATE_DIR=".github/state/upload-to-pmc"
if [[ -f "${STATE_DIR}/last-run-date" ]]; then
LAST_RUN_DATE=$(cat "${STATE_DIR}/last-run-date")
AFTER_DATE=$(date -d "$LAST_RUN_DATE -1 days" +"%Y-%m-%d")
else
AFTER_DATE=1900-01-01
fi
fi
else
STATE_DIR=".github/state/upload-to-pmc"
if [[ -f "${STATE_DIR}/last-run-date" ]]; then
LAST_RUN_DATE=$(cat "${STATE_DIR}/last-run-date")
AFTER_DATE=$(date -d "$LAST_RUN_DATE -1 days" +"%Y-%m-%d")
else
AFTER_DATE=1900-01-01
fi
fi
echo "AFTER_DATE=$AFTER_DATE" >> "$GITHUB_ENV"
- name: Create artifact directory
run: |
NOW=$(date +%Y-%m-%d-%H%M)
ARTIFACT_DIR="$RUNNER_TEMP/$NOW"
mkdir -p "$ARTIFACT_DIR"
echo "ARTIFACT_DIR=$ARTIFACT_DIR" >> "$GITHUB_ENV"
- name: Run pubarchiver for new articles
run: |
set -uo pipefail
TODAY=$(date +%Y-%m-%d)
echo "=== Running pubarchiver for new articles ===" | tee "$ARTIFACT_DIR/run.log"
pubarchiver -j micropublication -d pmc -C -a "$AFTER_DATE" \
-o "$ARTIFACT_DIR" -r "$ARTIFACT_DIR/report" \
-s csv,html -t "$TODAY" -@ "$ARTIFACT_DIR/debug.log" 2>&1 | tee -a "$ARTIFACT_DIR/run.log"
status=${PIPESTATUS[0]}
echo "PUBARCHIVER_STATUS=$status" >> "$GITHUB_ENV"
echo "TODAY=$TODAY" >> "$GITHUB_ENV"
- name: Evaluate report and counts
run: |
set -euo pipefail
REPORT="$ARTIFACT_DIR/report.csv"
VALIDATION_ERRORS=0
COMPLETE_COUNT=0
TOTAL_LINES=0
if [[ -f "$REPORT" ]]; then
VALIDATION_ERRORS=$(grep -Eci "validation|missing|failed" "$REPORT" || true)
COMPLETE_COUNT=$(grep -c "complete" "$REPORT" || true)
TOTAL_LINES=$(wc -l < "$REPORT")
fi
echo "VALIDATION_ERRORS=$VALIDATION_ERRORS" >> "$GITHUB_ENV"
echo "COMPLETE_COUNT=$COMPLETE_COUNT" >> "$GITHUB_ENV"
echo "TOTAL_LINES=$TOTAL_LINES" >> "$GITHUB_ENV"
- name: Upload to PMC FTP
if: ${{ env.PUBARCHIVER_STATUS == '0' && env.VALIDATION_ERRORS == '0' }}
run: |
set -u
CURL_STATUS=0
if [[ ${TOTAL_LINES:-0} -gt 1 && ${COMPLETE_COUNT:-0} -gt 0 ]]; then
ARCHIVEFILES=$(echo "$ARTIFACT_DIR"/micropublication-org/*.zip)
if ls $ARCHIVEFILES >/dev/null 2>&1; then
echo "=== FTP'ing file to PMC ===" | tee -a "$ARTIFACT_DIR/run.log"
set +e
curl --retry 5 --user ${{ secrets.PMC_USER }}:${{ secrets.PMC_PASS }} \
-T "{$(echo $ARCHIVEFILES | tr ' ' ',')}" \
"${{ env.PMC_FTP_ADDRESS }}" 2>&1 | tee -a "$ARTIFACT_DIR/run.log"
CURL_STATUS=${PIPESTATUS[0]}
set -e
if [[ $CURL_STATUS -eq 0 ]]; then
rm -rf "${ARTIFACT_DIR}/micropublication-org"
fi
fi
fi
echo "CURL_STATUS=$CURL_STATUS" >> "$GITHUB_ENV"
- name: Update state files
if: ${{ env.PUBARCHIVER_STATUS == '0' && env.VALIDATION_ERRORS == '0' && env.CURL_STATUS == '0' }}
run: |
STATE_DIR=".github/state/upload-to-pmc"
mkdir -p "$STATE_DIR"
echo "$TODAY" > "$STATE_DIR/last-run-date"
- name: Commit updated state files
if: ${{ env.PUBARCHIVER_STATUS == '0' && env.VALIDATION_ERRORS == '0' && env.CURL_STATUS == '0' }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git pull origin ${{ github.ref }}
git add .github/state/upload-to-pmc/last-run-date
git commit -m "🤖 update upload-to-pmc state files"
git push origin HEAD:${{ github.ref }}
- name: Set email variables
if: always()
env:
EMAIL_FAILURE: ${{ vars.EMAIL_FAILURE }}
EMAIL_SUCCESS: ${{ vars.EMAIL_SUCCESS }}
COMPLETE_COUNT: ${{ env.COMPLETE_COUNT }}
TOTAL_LINES: ${{ env.TOTAL_LINES }}
TODAY: ${{ env.TODAY }}
PUBARCHIVER_STATUS: ${{ env.PUBARCHIVER_STATUS }}
VALIDATION_ERRORS: ${{ env.VALIDATION_ERRORS }}
CURL_STATUS: ${{ env.CURL_STATUS }}
run: |
if [[ $PUBARCHIVER_STATUS != '0' ]] || [[ $VALIDATION_ERRORS != '0' ]] || [[ $CURL_STATUS != '0' ]]; then
EMAIL_TO="${EMAIL_FAILURE:-}"
if [[ $PUBARCHIVER_STATUS != '0' ]]; then
SUBJECT="PMC upload failed: pubarchiver error"
elif [[ $VALIDATION_ERRORS != '0' ]]; then
SUBJECT="PMC upload failed: ${VALIDATION_ERRORS} articles with validation errors"
else
SUBJECT="PMC upload failed: FTP upload error"
fi
else
EMAIL_TO="${EMAIL_SUCCESS:-}"
SUBJECT="PMC upload: ${COMPLETE_COUNT} total articles on ${TODAY}"
fi
if [[ $TOTAL_LINES -le 1 ]]; then
BODY="No new articles were uploaded to PMC."
rm -f "$ARTIFACT_DIR/report*"
else
BODY="$(cat "$ARTIFACT_DIR/report.csv")"
rm -f "$ARTIFACT_DIR/report.csv"
fi
echo "EMAIL_TO=$EMAIL_TO" >> "$GITHUB_ENV"
echo "EMAIL_SUBJECT=$SUBJECT" >> "$GITHUB_ENV"
echo "EMAIL_BODY_B64=$(echo "$BODY" | base64 -w 0)" >> "$GITHUB_ENV"
- name: Send email report
if: always()
env:
MAILGUN_API_KEY: ${{ secrets.MAILGUN_API_KEY }}
MAILGUN_DOMAIN: ${{ vars.MAILGUN_DOMAIN }}
EMAIL_TO: ${{ env.EMAIL_TO }}
EMAIL_SUBJECT: ${{ env.EMAIL_SUBJECT }}
EMAIL_BODY_B64: ${{ env.EMAIL_BODY_B64 }}
run: |
bash .github/scripts/mailgun.sh "${{ env.ARTIFACT_DIR }}"
- name: Install slack-cli
if: always()
run: |
git clone --depth 1 https://github.com/caltechlibrary/slack-cli "$RUNNER_TEMP/slack-cli"
echo "$RUNNER_TEMP/slack-cli/src" >> "$GITHUB_PATH"
- name: Post to Slack
if: always()
env:
SLACK_CHANNEL: ${{ vars.SLACK_CHANNEL }}
SLACK_CLI_TOKEN: ${{ secrets.SLACK_CLI_TOKEN }}
RUN_NAME: "PMC upload for micropublications.org"
PUBARCHIVER_STATUS: ${{ env.PUBARCHIVER_STATUS }}
VALIDATION_ERRORS: ${{ env.VALIDATION_ERRORS }}
CURL_STATUS: ${{ env.CURL_STATUS }}
run: |
bash .github/scripts/slack.sh "${{ env.ARTIFACT_DIR }}"
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v6
with:
path: ${{ env.ARTIFACT_DIR }}
compression-level: 9
- name: Fail if pubarchiver errored
if: ${{ env.PUBARCHIVER_STATUS != '0' }}
run: |
echo "pubarchiver exited with status ${PUBARCHIVER_STATUS}" >&2
exit 1
- name: Fail if validation errors found
if: ${{ env.PUBARCHIVER_STATUS == '0' && env.VALIDATION_ERRORS != '0' }}
run: |
echo "Validation or missing/failed entries detected: ${VALIDATION_ERRORS}" >&2
exit 1
- name: Fail if curl upload failed
if: ${{ env.PUBARCHIVER_STATUS == '0' && env.VALIDATION_ERRORS == '0' && env.CURL_STATUS != '0' }}
run: |
echo "Curl FTP upload to PMC failed with status ${CURL_STATUS}" >&2
exit 1