Providing byte level offsets for effective alignment in Cross-Tokenizer On-Policy Distillation #3734
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Rust | |
| on: | |
| push: | |
| branches: | |
| - main | |
| pull_request: | |
| jobs: | |
| build: | |
| runs-on: ${{ matrix.os }} | |
| env: | |
| MACOSX_DEPLOYMENT_TARGET: 10.12 | |
| strategy: | |
| matrix: | |
| os: [ubuntu-latest, windows-latest, macOS-latest] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Install Rust Stable | |
| uses: actions-rs/toolchain@v1 | |
| with: | |
| toolchain: stable | |
| components: rustfmt, clippy | |
| override: true | |
| # Necessary for now for the cargo cache: https://github.com/actions/cache/issues/133#issuecomment-599102035 | |
| - if: matrix.os == 'ubuntu-latest' | |
| run: sudo chown -R $(whoami):$(id -ng) ~/.cargo/ | |
| - name: Install cargo-readme for Ubuntu | |
| if: matrix.os == 'ubuntu-latest' | |
| uses: actions-rs/cargo@v1 | |
| with: | |
| command: install | |
| args: cargo-readme | |
| - name: Install audit | |
| uses: actions-rs/cargo@v1 | |
| with: | |
| command: install | |
| args: cargo-audit | |
| - name: Build | |
| uses: actions-rs/cargo@v1 | |
| with: | |
| command: build | |
| args: --all-targets --verbose --manifest-path ./tokenizers/Cargo.toml | |
| - name: Lint with RustFmt | |
| uses: actions-rs/cargo@v1 | |
| with: | |
| command: fmt | |
| args: --manifest-path ./tokenizers/Cargo.toml -- --check | |
| - name: Lint Benchmarks with RustFmt | |
| uses: actions-rs/cargo@v1 | |
| with: | |
| command: fmt | |
| args: --manifest-path ./tokenizers/Cargo.toml -- ./tokenizers/benches/bpe_benchmark.rs --check | |
| - name: Lint with Clippy | |
| uses: actions-rs/cargo@v1 | |
| with: | |
| command: clippy | |
| args: --manifest-path ./tokenizers/Cargo.toml --all-targets --all-features -- -D warnings | |
| - name: Run Tests | |
| if: matrix.os != 'windows-latest' | |
| shell: bash | |
| working-directory: ./tokenizers | |
| run: make test | |
| # Skip integration tests for now on Windows | |
| - name: Run lib Tests on Windows | |
| if: matrix.os == 'windows-latest' | |
| uses: actions-rs/cargo@v1 | |
| with: | |
| command: test | |
| args: --verbose --manifest-path ./tokenizers/Cargo.toml --lib | |
| - name: Run doc Tests on Windows | |
| if: matrix.os == 'windows-latest' | |
| uses: actions-rs/cargo@v1 | |
| with: | |
| command: test | |
| args: --verbose --manifest-path ./tokenizers/Cargo.toml --doc | |
| - name: Install cargo-audit | |
| run: cargo install cargo-audit | |
| - name: Run Audit | |
| uses: actions-rs/cargo@v1 | |
| with: | |
| command: audit | |
| args: -D warnings -f ./tokenizers/Cargo.lock --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2025-0014 | |
| # Verify that Readme.md is up to date. | |
| - name: Make sure, Readme generated from lib.rs matches actual Readme | |
| if: matrix.os == 'ubuntu-latest' | |
| shell: bash | |
| working-directory: ./tokenizers | |
| run: cargo readme > must_match_readme.md && diff must_match_readme.md README.md | |
| - name: Check semver | |
| if: matrix.os == 'ubuntu-latest' | |
| uses: obi1kenobi/cargo-semver-checks-action@v2 | |
| with: | |
| manifest-path: ./tokenizers/Cargo.toml |