Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ option(XGRAMMAR_ENABLE_CPPTRACE
option(XGRAMMAR_ENABLE_COVERAGE "Enable code coverage with gcov" OFF)
option(XGRAMMAR_ENABLE_INTERNAL_CHECK "Enable internal checks" OFF)

if(DEFINED BUILD_PYTHON_BINDINGS)
message(
STATUS "BUILD_PYTHON_BINDINGS is deprecated, please use XGRAMMAR_BUILD_PYTHON_BINDINGS instead."
)
set(XGRAMMAR_BUILD_PYTHON_BINDINGS ${BUILD_PYTHON_BINDINGS})
set_property(
CACHE XGRAMMAR_BUILD_PYTHON_BINDINGS PROPERTY VALUE ${XGRAMMAR_BUILD_PYTHON_BINDINGS}
)
endif()

set(XGRAMMAR_CUDA_ARCHITECTURES
native
CACHE STRING "CUDA architectures"
Expand Down
4 changes: 3 additions & 1 deletion cmake/config.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
set(CMAKE_BUILD_TYPE RelWithDebInfo)
set(XGRAMMAR_BUILD_PYTHON_BINDINGS ON)
if(NOT DEFINED XGRAMMAR_BUILD_PYTHON_BINDINGS)
set(XGRAMMAR_BUILD_PYTHON_BINDINGS ON)
endif()
set(XGRAMMAR_ENABLE_COVERAGE OFF)
set(XGRAMMAR_BUILD_CXX_TESTS OFF)
set(XGRAMMAR_ENABLE_CPPTRACE OFF)
Expand Down
7 changes: 0 additions & 7 deletions web/.eslintignore

This file was deleted.

9 changes: 0 additions & 9 deletions web/.eslintrc.cjs

This file was deleted.

2 changes: 1 addition & 1 deletion web/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -euxo pipefail

mkdir -p build
cd build
emcmake cmake ../.. -DBUILD_PYTHON_BINDINGS=OFF\
emcmake cmake ../.. -DXGRAMMAR_BUILD_PYTHON_BINDINGS=OFF\
-DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-O3 -DCOMPILE_WASM_RUNTIME -DXGRAMMAR_LOG_CUSTOMIZE=1"
emmake make xgrammar -j8
cd ..
Expand Down
42 changes: 42 additions & 0 deletions web/eslint.config.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
const {
defineConfig,
globalIgnores,
} = require("eslint/config");

const tsParser = require("@typescript-eslint/parser");
const typescriptEslint = require("@typescript-eslint/eslint-plugin");
const js = require("@eslint/js");

const {
FlatCompat,
} = require("@eslint/eslintrc");

const compat = new FlatCompat({
baseDirectory: __dirname,
recommendedConfig: js.configs.recommended,
allConfig: js.configs.all
});

module.exports = defineConfig([{
extends: compat.extends("eslint:recommended", "plugin:@typescript-eslint/recommended"),

languageOptions: {
parser: tsParser,
},

plugins: {
"@typescript-eslint": typescriptEslint,
},

rules: {
"@typescript-eslint/no-explicit-any": "off",
},
}, globalIgnores([
"**/dist",
"**/debug",
"**/lib",
"**/build",
"**/node_modules",
"**/xgrammar_binding.js",
"**/.eslintrc.cjs",
])]);
6 changes: 3 additions & 3 deletions web/example/package.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"name": "web-xgrammar-example",
"version": "0.1.0",
"version": "0.1.27",
"private": true,
"type": "module",
"scripts": {
"start": "parcel src/example.html --port 8888"
},
"browser": {},
"devDependencies": {
"@mlc-ai/web-xgrammar": "0.1.0",
"@mlc-ai/web-tokenizers": "^0.1.5",
"@mlc-ai/web-xgrammar": "^0.1.27",
"@mlc-ai/web-tokenizers": "^0.1.6",
"buffer": "^5.7.1",
"parcel": "^2.8.3",
"process": "^0.11.10",
Expand Down
87 changes: 84 additions & 3 deletions web/example/src/example.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Grammar, GrammarMatcher, TokenizerInfo, GrammarCompiler, CompiledGrammar, Testings } from "@mlc-ai/web-xgrammar"
import { GrammarMatcher, TokenizerInfo, GrammarCompiler, CompiledGrammar, Testings, StructuralTag } from "@mlc-ai/web-xgrammar"
import { Tokenizer } from "@mlc-ai/web-tokenizers";
import { Type, Static } from "@sinclair/typebox";

Expand Down Expand Up @@ -26,7 +26,7 @@ async function getTokenizerInfoAndTokenizerFromUrl(
}

async function jsonExample() {
console.log("json example");
console.log("Running JSON Example");
const result = await getTokenizerInfoAndTokenizerFromUrl(
"https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q4f16_0-MLC/raw/main/tokenizer.json",
"byte_level",
Expand All @@ -51,6 +51,7 @@ async function jsonExample() {
if (!grammarMatcher.isTerminated()) {
const bitmask = await grammarMatcher.getNextTokenBitmask();
// For debugging, we can check the rejected token IDs from the mask
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const rejectedIDs = await Testings.debugGetMaskedTokensFromBitmask(
bitmask,
tokenizerInfo.getVocabSize()
Expand All @@ -72,7 +73,7 @@ async function jsonExample() {
}

async function jsonSchemaExample() {
console.log("json schema example");
console.log("Running JSON Schema Example");
// 0. Prepare a schema
const T = Type.Object({
name: Type.String(),
Expand Down Expand Up @@ -144,6 +145,7 @@ async function jsonSchemaExample() {
if (!grammarMatcher.isTerminated()) {
const bitmask = await grammarMatcher.getNextTokenBitmask();
// For debugging, we can check the rejected token IDs from the mask
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const rejectedIDs = await Testings.debugGetMaskedTokensFromBitmask(
bitmask,
tokenizerInfo.getVocabSize()
Expand All @@ -165,9 +167,88 @@ async function jsonSchemaExample() {
}


async function structuralTagExample() {
console.log("Running Structural Tag Example");

// 1. Define the schema for our function
const weatherSchema = Type.Object({
city: Type.String(),
is_celsius: Type.Boolean()
});

// 2. Create structural tag for our function
const structuralTag: StructuralTag = {
type: "structural_tag",
format: {
type: "triggered_tags",
triggers: ["<function="],
tags: [
{
type: "tag",
begin: "<function=get_weather>",
end: "</function>",
content: { type: "json_schema", json_schema: weatherSchema },
},
],
},
};

// 3. Load tokenizer
const result = await getTokenizerInfoAndTokenizerFromUrl(
"https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q4f16_0-MLC/raw/main/tokenizer.json",
"byte_level",
false,
);
const tokenizerInfo = result[0];
const tokenizer = result[1];

// 4. Get encoded vocabulary
const encodedVocab = [];
const vocabSize = tokenizer.getVocabSize();
for (let tokenId = 0; tokenId < vocabSize; tokenId++) {
encodedVocab.push(tokenizer.idToToken(tokenId));
}

// 5. Create compiler and compile the structural tag grammar
const compiler = await GrammarCompiler.createGrammarCompiler(tokenizerInfo);
const compiledGrammar = await compiler.compileStructuralTag(structuralTag);

// 6. Create the grammar matcher
const matcher = await GrammarMatcher.createGrammarMatcher(compiledGrammar);

// 7. Test with sample input
const testInput = `I need to check the weather.<function=get_weather>{"city": "New York", "is_celsius": false}</function> Thanks!`;
console.log("Testing with input:", testInput);

// 8. Process the input character by character
const encodedTokens = tokenizer.encode(testInput);
for (let i = 0; i < encodedTokens.length; i++) {
const bitmask = await matcher.getNextTokenBitmask();
// For debugging, we can check the rejected token IDs from the mask
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const rejectedIDs = await Testings.debugGetMaskedTokensFromBitmask(
bitmask,
tokenizerInfo.getVocabSize()
);

const curToken = encodedTokens[i];
const accepted = matcher.acceptToken(curToken);
if (!accepted) {
throw Error("Expect token to be accepted");
}
}

// 9. Clean up
matcher.dispose();
compiledGrammar.dispose();
compiler.dispose();
tokenizerInfo.dispose();
}

async function testAll() {
await jsonExample();
await jsonSchemaExample();
await structuralTagExample();
}

testAll();
10 changes: 9 additions & 1 deletion web/jest.config.cjs
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
module.exports = {
preset: "ts-jest",
preset: "ts-jest/presets/default-esm",
testEnvironment: "node",
extensionsToTreatAsEsm: [".ts"],
transform: {
"^.+\\.(ts|tsx)$": ["ts-jest", { useESM: true, tsconfig: "tsconfig.json" }],
},
moduleNameMapper: {
"^(\\.{1,2}/.*)\\.js$": "$1",
"^@mlc-ai/web-tokenizers$": "<rootDir>/tests/web_tokenizers_shim.mjs",
},
};
Loading
Loading