Examples Test #219
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Examples Test | |
| permissions: | |
| contents: read | |
| on: | |
| schedule: | |
| # Every day at 3 AM UTC+8 | |
| - cron: '0 19 * * *' | |
| workflow_dispatch: | |
| jobs: | |
| calc-x: | |
| name: Calc-X (Python ${{ matrix.python-version }}, ${{ matrix.setup-script }}) | |
| runs-on: [self-hosted, 1ES.Pool=agl-runner-gpu] | |
| timeout-minutes: 90 | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: '3.10' | |
| setup-script: 'legacy' | |
| - python-version: '3.12' | |
| setup-script: 'stable' | |
| - python-version: '3.13' | |
| setup-script: 'latest' | |
| fail-fast: false | |
| steps: | |
| - name: Check GPU status | |
| run: nvidia-smi | |
| - name: Check disk space | |
| run: df -h | |
| - uses: actions/checkout@v4 | |
| - uses: astral-sh/setup-uv@v7 | |
| with: | |
| enable-cache: true | |
| python-version: ${{ matrix.python-version }} | |
| - name: Upgrade dependencies (latest) | |
| run: uv lock --upgrade | |
| if: matrix.setup-script == 'latest' | |
| - name: Sync dependencies (latest) | |
| run: | | |
| uv sync --frozen --no-default-groups --extra verl \ | |
| --group dev --group experiment --group agents --group torch-gpu-stable | |
| if: matrix.setup-script == 'latest' | |
| - name: Sync dependencies (stable & legacy) | |
| run: | | |
| uv sync --frozen --no-default-groups --extra verl \ | |
| --group dev --group experiment --group agents --group torch-gpu-${{ matrix.setup-script }} | |
| if: matrix.setup-script != 'latest' | |
| - name: Freeze dependencies | |
| run: | | |
| set -ex | |
| uv pip freeze | tee requirements-freeze.txt | |
| echo "UV_LOCKED=1" >> $GITHUB_ENV | |
| echo "UV_NO_SYNC=1" >> $GITHUB_ENV | |
| - name: Upload dependencies artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: dependencies-calc-x-${{ matrix.python-version }}-${{ matrix.setup-script }} | |
| path: requirements-freeze.txt | |
| compression-level: 0 | |
| - name: Launch LiteLLM Proxy | |
| run: | | |
| ./scripts/litellm_run.sh | |
| env: | |
| AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }} | |
| AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} | |
| - name: Prepare Calc-X dataset | |
| run: | | |
| set -ex | |
| cd examples/calc_x | |
| uv run gdown --fuzzy https://drive.google.com/file/d/1FQMyKLLd6hP9dw9rfZn1EZOWNvKaDsqw/view | |
| unzip calc-x-data.zip -d data | |
| rm calc-x-data.zip | |
| - name: Calc-X MCP sanity check | |
| run: | | |
| set -ex | |
| cd examples/calc_x | |
| uv run tests/test_mcp_calculator.py | |
| env: | |
| OPENAI_API_BASE: http://localhost:12306/ | |
| OPENAI_API_KEY: dummy | |
| - name: Calc-X sanity check | |
| run: | | |
| set -ex | |
| cd examples/calc_x | |
| uv run legacy_calc_agent_debug.py | |
| env: | |
| OPENAI_BASE_URL: http://localhost:12306/ | |
| OPENAI_API_KEY: dummy | |
| # Calc-X training suddenly works after running the sanity check. | |
| # And it has to be run before Spider training. | |
| # The client side used to hang in many of my attempts. | |
| # Don't ask why. Don't touch this. | |
| - name: Calc-X training | |
| run: | | |
| set -ex | |
| source .venv/bin/activate | |
| cd examples/calc_x | |
| ../../scripts/restart_ray.sh | |
| sleep 5 | |
| PYTHONUNBUFFERED=1 python train_calc_agent.py --val-file data/test_mini.parquet --ci | |
| sleep 10 | |
| shell: bash | |
| env: | |
| WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }} | |
| WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }} | |
| id: calc_x_train | |
| - name: Validate Calc-X training | |
| run: | | |
| set -ex | |
| uv run scripts/validate_example_wandb.py ${{ steps.calc_x_train.outputs.project_name }} ${{ steps.calc_x_train.outputs.run_name }} | |
| env: | |
| WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }} | |
| WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }} | |
| - name: Calc-X training LLM Proxy | |
| run: | | |
| set -ex | |
| source .venv/bin/activate | |
| cd examples/calc_x | |
| ../../scripts/restart_ray.sh | |
| sleep 5 | |
| PYTHONUNBUFFERED=1 python train_calc_agent.py --val-file data/test_mini.parquet --ci --llm-proxy | |
| sleep 10 | |
| shell: bash | |
| env: | |
| WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }} | |
| WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }} | |
| id: calc_x_train_llm_proxy | |
| - name: Calc-X training with external store | |
| run: | | |
| set -ex | |
| source .venv/bin/activate | |
| cd examples/calc_x | |
| ../../scripts/restart_ray.sh | |
| agl store --port 4747 & | |
| sleep 5 | |
| AGL_MANAGED_STORE=0 AGL_CURRENT_ROLE=runner python train_calc_agent.py --external-store-address http://localhost:4747 --val-file data/test_mini.parquet --ci & | |
| sleep 5 | |
| AGL_MANAGED_STORE=0 AGL_CURRENT_ROLE=algorithm python train_calc_agent.py --external-store-address http://localhost:4747 --val-file data/test_mini.parquet --ci | |
| pkill -f agl && echo "SIGTERM sent to agl" || echo "No agl process found" | |
| while pgrep -f agl; do | |
| echo "Waiting for agl to finish..." | |
| sleep 5 | |
| done | |
| pkill -f train_calc_agent.py && echo "SIGTERM sent to train_calc_agent.py" || echo "No train_calc_agent.py process found" | |
| while pgrep -f train_calc_agent.py; do | |
| echo "Waiting for train_calc_agent.py to finish..." | |
| sleep 5 | |
| done | |
| echo "train_calc_agent.py has finished." | |
| sleep 10 | |
| shell: bash | |
| env: | |
| WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }} | |
| WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }} | |
| id: calc_x_train_external_store | |
| spider: | |
| name: Spider (Python ${{ matrix.python-version }}, ${{ matrix.setup-script }}) | |
| runs-on: [self-hosted, 1ES.Pool=agl-runner-gpu] | |
| timeout-minutes: 60 | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: '3.10' | |
| setup-script: 'legacy' | |
| - python-version: '3.12' | |
| setup-script: 'stable' | |
| - python-version: '3.13' | |
| setup-script: 'latest' | |
| fail-fast: false | |
| steps: | |
| - name: Check GPU status | |
| run: nvidia-smi | |
| - name: Check disk space | |
| run: df -h | |
| - uses: actions/checkout@v4 | |
| - uses: astral-sh/setup-uv@v7 | |
| with: | |
| enable-cache: true | |
| python-version: ${{ matrix.python-version }} | |
| - name: Upgrade dependencies (latest) | |
| run: uv lock --upgrade | |
| if: matrix.setup-script == 'latest' | |
| - name: Sync dependencies (latest) | |
| run: | | |
| uv sync --frozen --no-default-groups --extra verl \ | |
| --group dev --group experiment --group agents --group torch-gpu-stable | |
| if: matrix.setup-script == 'latest' | |
| - name: Sync dependencies (stable & legacy) | |
| run: | | |
| uv sync --frozen --no-default-groups --extra verl \ | |
| --group dev --group experiment --group agents --group torch-gpu-${{ matrix.setup-script }} | |
| if: matrix.setup-script != 'latest' | |
| - name: Freeze dependencies | |
| run: | | |
| set -ex | |
| uv pip freeze | tee requirements-freeze.txt | |
| echo "UV_LOCKED=1" >> $GITHUB_ENV | |
| echo "UV_NO_SYNC=1" >> $GITHUB_ENV | |
| - name: Upload dependencies artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: dependencies-spider-${{ matrix.python-version }}-${{ matrix.setup-script }} | |
| path: requirements-freeze.txt | |
| compression-level: 0 | |
| - name: Launch LiteLLM Proxy | |
| run: | | |
| ./scripts/litellm_run.sh | |
| env: | |
| AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }} | |
| AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} | |
| - name: Prepare Spider dataset | |
| run: | | |
| set -ex | |
| cd examples/spider | |
| uv run gdown --fuzzy https://drive.google.com/file/d/1oi9J1jZP9TyM35L85CL3qeGWl2jqlnL6/view | |
| unzip -q spider-data.zip -d data | |
| rm spider-data.zip | |
| - name: Spider sanity check | |
| run: | | |
| set -ex | |
| cd examples/spider | |
| uv run sql_agent.py | |
| env: | |
| OPENAI_API_BASE: http://localhost:12306/ | |
| OPENAI_API_KEY: dummy | |
| if: success() || failure() | |
| - name: Spider training | |
| run: | | |
| set -ex | |
| source .venv/bin/activate | |
| cd examples/spider | |
| ../../scripts/restart_ray.sh | |
| sleep 5 | |
| PYTHONUNBUFFERED=1 python train_sql_agent.py fast | |
| sleep 10 | |
| shell: bash | |
| env: | |
| WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }} | |
| WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }} | |
| id: spider_train | |
| - name: Validate Spider training | |
| run: | | |
| set -ex | |
| uv run scripts/validate_example_wandb.py ${{ steps.spider_train.outputs.project_name }} ${{ steps.spider_train.outputs.run_name }} | |
| env: | |
| WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }} | |
| WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }} | |
| apo: | |
| name: APO (Python ${{ matrix.python-version }}, ${{ matrix.setup-script }}) | |
| # This job is run on GitHub hosted runners rather than self-hosted runners because it needs no GPU. | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: '3.10' | |
| setup-script: 'legacy' | |
| - python-version: '3.12' | |
| setup-script: 'stable' | |
| - python-version: '3.13' | |
| setup-script: 'latest' | |
| fail-fast: false | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: astral-sh/setup-uv@v7 | |
| with: | |
| enable-cache: true | |
| python-version: ${{ matrix.python-version }} | |
| - name: Upgrade dependencies (latest) | |
| run: uv lock --upgrade | |
| if: matrix.setup-script == 'latest' | |
| - name: Sync dependencies (latest) | |
| run: | | |
| uv sync --frozen --no-default-groups --extra apo \ | |
| --group dev --group experiment --group agents --group core-stable | |
| if: matrix.setup-script == 'latest' | |
| - name: Sync dependencies (stable & legacy) | |
| run: | | |
| uv sync --frozen --no-default-groups --extra apo \ | |
| --group dev --group experiment --group agents --group core-${{ matrix.setup-script }} | |
| if: matrix.setup-script != 'latest' | |
| - name: Freeze dependencies | |
| run: | | |
| set -ex | |
| uv pip freeze | tee requirements-freeze.txt | |
| echo "UV_LOCKED=1" >> $GITHUB_ENV | |
| echo "UV_NO_SYNC=1" >> $GITHUB_ENV | |
| - name: Upload dependencies artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: dependencies-apo-${{ matrix.python-version }}-${{ matrix.setup-script }} | |
| path: requirements-freeze.txt | |
| compression-level: 0 | |
| - name: Launch LiteLLM Proxy | |
| run: | | |
| ./scripts/litellm_run.sh | |
| env: | |
| AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }} | |
| AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} | |
| - name: APO custom algorithm | |
| run: | | |
| set -ex | |
| cd examples/apo | |
| uv run apo_custom_algorithm_trainer.py | tee _ci_apo.log | |
| # Check whether the log contains "Best prompt found:" | |
| grep "Best prompt found:" _ci_apo.log | |
| env: | |
| # New versions follow OPENAI_BASE_URL instead of OPENAI_API_BASE | |
| OPENAI_BASE_URL: http://localhost:12306/ | |
| OPENAI_API_KEY: dummy | |
| - name: APO custom algorithm debugger | |
| run: | | |
| set -ex | |
| cd examples/apo | |
| uv run apo_debug.py --mode runner | |
| uv run apo_debug.py --mode hook | |
| uv run apo_debug.py --mode trainer | |
| env: | |
| # New versions follow OPENAI_BASE_URL instead of OPENAI_API_BASE | |
| OPENAI_BASE_URL: http://localhost:12306/ | |
| OPENAI_API_KEY: dummy | |
| - name: APO built-in algorithm | |
| run: | | |
| set -ex | |
| cd examples/apo | |
| uv run room_selector_apo.py | |
| env: | |
| OPENAI_BASE_URL: http://localhost:12306/ | |
| OPENAI_API_KEY: dummy | |
| if: matrix.setup-script != 'legacy' | |
| unsloth: | |
| name: Unsloth (Python ${{ matrix.python-version }}, ${{ matrix.setup-script }}) | |
| runs-on: [self-hosted, 1ES.Pool=agl-runner-gpu] | |
| timeout-minutes: 60 | |
| strategy: | |
| matrix: | |
| # Legacy versions are not supported for Unsloth examples. | |
| include: | |
| - python-version: '3.12' | |
| setup-script: 'stable' | |
| - python-version: '3.13' | |
| setup-script: 'latest' | |
| fail-fast: false | |
| steps: | |
| - name: Check GPU status | |
| run: nvidia-smi | |
| - name: Check disk space | |
| run: df -h | |
| - uses: actions/checkout@v4 | |
| - uses: astral-sh/setup-uv@v7 | |
| with: | |
| enable-cache: true | |
| python-version: ${{ matrix.python-version }} | |
| - name: Upgrade dependencies (latest) | |
| run: uv lock --upgrade | |
| if: matrix.setup-script == 'latest' | |
| - name: Sync dependencies | |
| run: | | |
| uv sync --frozen --no-default-groups --extra verl \ | |
| --group dev --group experiment --group trl --group agents --group torch-gpu-stable | |
| - name: Freeze dependencies | |
| run: | | |
| set -ex | |
| uv pip freeze | tee requirements-freeze.txt | |
| echo "UV_LOCKED=1" >> $GITHUB_ENV | |
| echo "UV_NO_SYNC=1" >> $GITHUB_ENV | |
| - name: Upload dependencies artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: dependencies-unsloth-${{ matrix.python-version }}-${{ matrix.setup-script }} | |
| path: requirements-freeze.txt | |
| compression-level: 0 | |
| - name: Prepare Unsloth model | |
| run: | | |
| set -ex | |
| cd examples/unsloth | |
| rm -rf models | |
| uv run hf download unsloth/Qwen3-4B-Instruct-2507 --local-dir models/version_0 | |
| - name: Unsloth SFT example | |
| run: | | |
| set -ex | |
| source .venv/bin/activate | |
| cd examples/unsloth | |
| agl store --port 4747 & | |
| sleep 5 | |
| python sft_rollout_runners.py & | |
| sleep 5 | |
| python sft_algorithm.py | |
| pkill -f agl && echo "SIGTERM sent to agl" || echo "No agl process found" | |
| while pgrep -f agl; do | |
| echo "Waiting for agl to finish..." | |
| sleep 5 | |
| done | |
| pkill -f sft_rollout_runners.py && echo "SIGTERM sent to sft_rollout_runners.py" || echo "No sft_rollout_runners.py process found" | |
| while pgrep -f sft_rollout_runners.py; do | |
| echo "Waiting for sft_rollout_runners.py to finish..." | |
| sleep 5 | |
| done | |
| echo "sft_rollout_runners.py has finished." | |
| sleep 10 | |
| # Check models/version_2 must exist | |
| if [ ! -d "models/version_2" ]; then | |
| echo "models/version_2 does not exist" | |
| exit 1 | |
| fi | |
| env: | |
| WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }} | |
| WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }} | |
| - name: Unsloth SFT example all-in-one | |
| run: | | |
| set -ex | |
| source .venv/bin/activate | |
| cd examples/unsloth | |
| rm -rf models/version_1 models/version_2 | |
| python sft_allinone.py | |
| if [ ! -d "models/version_2" ]; then | |
| echo "models/version_2 does not exist" | |
| exit 1 | |
| fi | |
| env: | |
| WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }} | |
| WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }} | |
| backward-compatibility: | |
| name: Backward Compatibility (Python ${{ matrix.python-version }}, ${{ matrix.setup-script }}) | |
| runs-on: [self-hosted, 1ES.Pool=agl-runner-gpu] | |
| timeout-minutes: 30 | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: '3.10' | |
| setup-script: 'legacy' | |
| - python-version: '3.12' | |
| setup-script: 'stable' | |
| fail-fast: false | |
| steps: | |
| - name: Check GPU status | |
| run: nvidia-smi | |
| - name: Check disk space | |
| run: df -h | |
| - uses: actions/checkout@v4 | |
| - uses: astral-sh/setup-uv@v7 | |
| with: | |
| enable-cache: true | |
| python-version: ${{ matrix.python-version }} | |
| - name: Sync dependencies | |
| run: | | |
| uv sync --frozen --no-default-groups --extra apo --extra verl \ | |
| --group dev --group experiment --group agents --group torch-gpu-${{ matrix.setup-script }} | |
| - name: Freeze dependencies | |
| run: | | |
| set -ex | |
| uv pip freeze | tee requirements-freeze.txt | |
| echo "UV_LOCKED=1" >> $GITHUB_ENV | |
| echo "UV_NO_SYNC=1" >> $GITHUB_ENV | |
| - name: Upload dependencies artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: dependencies-backward-compatibility-${{ matrix.python-version }}-${{ matrix.setup-script }} | |
| path: requirements-freeze.txt | |
| compression-level: 0 | |
| - name: Launch LiteLLM Proxy | |
| run: | | |
| ./scripts/litellm_run.sh | |
| env: | |
| AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }} | |
| AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} | |
| - name: Prepare Calc-X dataset | |
| run: | | |
| set -ex | |
| cd examples/calc_x | |
| uv run gdown --fuzzy https://drive.google.com/file/d/1FQMyKLLd6hP9dw9rfZn1EZOWNvKaDsqw/view | |
| unzip calc-x-data.zip -d data | |
| rm calc-x-data.zip | |
| - name: APO example (legacy client-server style) | |
| run: | | |
| set -ex | |
| cd examples/apo | |
| uv run legacy_apo_client.py & | |
| sleep 3 # Wait for the client to be up | |
| uv run legacy_apo_server.py | |
| pkill -f legacy_apo_client.py && echo "SIGTERM sent to legacy_apo_client.py" || echo "No legacy_apo_client.py process found" | |
| while pgrep -f legacy_apo_client.py; do | |
| echo "Waiting for legacy_apo_client.py to finish..." | |
| sleep 5 | |
| done | |
| echo "legacy_apo_client.py has finished." | |
| sleep 10 | |
| env: | |
| OPENAI_API_BASE: http://localhost:12306/ | |
| OPENAI_API_KEY: dummy | |
| - name: Calc-X MCP sanity check | |
| run: | | |
| set -ex | |
| cd examples/calc_x | |
| uv run tests/test_mcp_calculator.py | |
| env: | |
| OPENAI_API_BASE: http://localhost:12306/ | |
| OPENAI_API_KEY: dummy | |
| - name: Calc-X sanity check | |
| run: | | |
| set -ex | |
| cd examples/calc_x | |
| uv run legacy_calc_agent_debug.py | |
| env: | |
| OPENAI_BASE_URL: http://localhost:12306/ | |
| OPENAI_API_KEY: dummy | |
| - name: Calc-X training (legacy client-server style) | |
| run: | | |
| set -ex | |
| source .venv/bin/activate | |
| cd examples/calc_x | |
| ../../scripts/restart_ray.sh | |
| sleep 5 | |
| PYTHONUNBUFFERED=1 python legacy_calc_agent.py & | |
| bash legacy_train.sh | |
| pkill -f legacy_calc_agent.py && echo "SIGTERM sent to legacy_calc_agent.py" || echo "No legacy_calc_agent.py process found" | |
| while pgrep -f legacy_calc_agent.py; do | |
| echo "Waiting for legacy_calc_agent.py to finish..." | |
| sleep 5 | |
| done | |
| echo "legacy_calc_agent.py has finished." | |
| sleep 10 | |
| shell: bash | |
| env: | |
| WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }} | |
| WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }} | |
| id: calc_x_train | |
| - name: Validate Calc-X training | |
| run: | | |
| set -ex | |
| uv run scripts/validate_example_wandb.py ${{ steps.calc_x_train.outputs.project_name }} ${{ steps.calc_x_train.outputs.run_name }} | |
| env: | |
| WANDB_BASE_URL: ${{ secrets.MSR_WANDB_BASE_URL }} | |
| WANDB_API_KEY: ${{ secrets.MSR_WANDB_API_KEY }} |