diff --git a/.env.server b/.env.server index b52abbf5cd..a70facccd2 100644 --- a/.env.server +++ b/.env.server @@ -46,4 +46,5 @@ VECTOR_SIZES="384,512,768,1024,1536,3072" RUST_LOG="INFO" BM25_ACTIVE="true" FIRECRAWL_URL=https://api.firecrawl.dev -FIRECRAWL_API_KEY=fc-abdef************** \ No newline at end of file +FIRECRAWL_API_KEY=fc-abdef************** +PDF2MD_URL="http://localhost:8081" diff --git a/.github/ISSUE_TEMPLATE/issue-template.md b/.github/ISSUE_TEMPLATE/issue-template.md index ea5603bfc9..9b7050b5da 100644 --- a/.github/ISSUE_TEMPLATE/issue-template.md +++ b/.github/ISSUE_TEMPLATE/issue-template.md @@ -13,11 +13,7 @@ assignees: '' ### Target(s) - - -### Requirement to close - - + ### Community channels diff --git a/.github/workflows/push-pdf2md-server.yml b/.github/workflows/push-pdf2md-server.yml new file mode 100644 index 0000000000..e9ac0bd0e0 --- /dev/null +++ b/.github/workflows/push-pdf2md-server.yml @@ -0,0 +1,149 @@ +name: Create PDF2MD Docker Images + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref }} + cancel-in-progress: true + +on: + workflow_dispatch: + push: + branches: + - "main" + paths: + - "pdf2md/server/**" + +jobs: + pdf2md-server: + name: Push PDF2MD Server image + runs-on: ${{ matrix.runner }} + strategy: + matrix: + runner: [blacksmith-8vcpu-ubuntu-2204] + platform: [linux/amd64] + exclude: + - runner: blacksmith-8vcpu-ubuntu-2204 + platform: linux/arm64 + - runner: blacksmith-8vcpu-ubuntu-2204-arm + platform: linux/amd64 + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + + - name: Setup buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: | + trieve/pdf2md-server + tags: | + type=raw,latest + type=sha + + - name: Build and push Docker image + uses: useblacksmith/build-push-action@v1.0.0-beta + with: + platforms: ${{ matrix.platform }} + context: pdf2md/ + file: ./pdf2md/server/Dockerfile.pdf2md-server + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + chunk-worker: + name: Push PDF2MD Chunk Worker image + runs-on: ${{ matrix.runner }} + strategy: + matrix: + runner: [blacksmith-8vcpu-ubuntu-2204] + platform: [linux/amd64] + exclude: + - runner: blacksmith-8vcpu-ubuntu-2204 + platform: linux/arm64 + - runner: blacksmith-8vcpu-ubuntu-2204-arm + platform: linux/amd64 + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + + - name: Setup buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: | + trieve/chunk-worker + tags: | + type=raw,latest + type=sha + + - name: Build and push Docker image + uses: useblacksmith/build-push-action@v1.0.0-beta + with: + platforms: ${{ matrix.platform }} + context: pdf2md/ + file: ./pdf2md/server/Dockerfile.chunk-worker + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + supervisor-worker: + name: Push PDF2MD Supervisor Worker image + runs-on: ${{ matrix.runner }} + strategy: + matrix: + runner: [blacksmith-8vcpu-ubuntu-2204] + platform: [linux/amd64] + exclude: + - runner: blacksmith-8vcpu-ubuntu-2204 + platform: linux/arm64 + - runner: blacksmith-8vcpu-ubuntu-2204-arm + platform: linux/amd64 + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + + - name: Setup buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: | + trieve/supervisor-worker + tags: | + type=raw,latest + type=sha + + - name: Build and push Docker image + uses: useblacksmith/build-push-action@v1.0.0-beta + with: + platforms: ${{ matrix.platform }} + context: pdf2md/ + file: ./pdf2md/server/Dockerfile.supervisor-worker + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.gitignore b/.gitignore index 912c59fbb8..4339d68830 100644 --- a/.gitignore +++ b/.gitignore @@ -18,7 +18,7 @@ story_html.zip testing.ipynb output.json temp.json -analytics/analytics-server/target +**/target server/target server/images server/tantivy @@ -92,4 +92,6 @@ server/migrations/2024-07-26-165058_move_config_to_table/down.sql server/migrations/2024-07-26-165058_move_config_to_table/up.sql dist/** + clients/python-sdk/dist +pdf2md/ch_migrations/chm.toml diff --git a/.vscode/settings.json b/.vscode/settings.json index 85250aaa02..6bf874db0b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,7 +2,11 @@ "[python]": { "editor.defaultFormatter": "ms-python.black-formatter" }, - "rust-analyzer.linkedProjects": ["./server/Cargo.toml"], + "rust-analyzer.linkedProjects": [ + "./server/Cargo.toml", + "./pdf2md/server/Cargo.toml", + "./pdf2md/cli/Cargo.toml" + ], "rust-analyzer.showUnlinkedFileNotification": false, "rust-analyzer.server.path": "~/.cargo/bin/rust-analyzer", "python.analysis.typeCheckingMode": "basic", diff --git a/pdf2md/.env.dist b/pdf2md/.env.dist new file mode 100644 index 0000000000..24cc230713 --- /dev/null +++ b/pdf2md/.env.dist @@ -0,0 +1,27 @@ +# Redis +REDIS_URL=redis://:thisredispasswordisverysecureandcomplex@localhost:6379 +REDIS_PASSWORD=thisredispasswordisverysecureandcomplex + +# Clickhouse +CLICKHOUSE_URL=http://localhost:8123 +CLICKHOUSE_DB=default +CLICKHOUSE_USER=clickhouse +CLICKHOUSE_PASSWORD=password + +# S3 +S3_ENDPOINT=http://127.0.0.1:9000 +S3_ACCESS_KEY=ZaaZZaaZZaaZZaaZZaaZ +S3_SECRET_KEY=ssssssssssssssssssssTTTTTTTTTTTTTTTTTTTT +S3_BUCKET=trieve + +# S3 dockerfile auto-configuration +MINIO_ROOT_USER=rootuser +MINIO_ROOT_PASSWORD=rootpassword + +# PDF2MD conversion worker services +LLM_BASE_URL=https://openrouter.ai/api/v1 +LLM_API_KEY= +LLM_MODEL=gpt-4o-mini + +# PDF2MD HTTP API server +API_KEY=admin diff --git a/pdf2md/CONTRIBUTING.md b/pdf2md/CONTRIBUTING.md new file mode 100644 index 0000000000..12501286a7 --- /dev/null +++ b/pdf2md/CONTRIBUTING.md @@ -0,0 +1,39 @@ +# Contributing to PDF2MD + +## Setup ENV's + +```bash +cd server +cp .env.dist .env +``` + +## Run dep processes + +```bash +docker compose --profile dev up -d +``` + +## Run Server + Workers + +Strongly recommend using tmux or another multiplex system to handle the different proceses. + +```bash +cargo watch -x run #HTTP server +cargo run --bin supervisor-worker +cargo run --bin chunk-worker +``` + +## CLI + +Make your changes then use the following to run: + +```bash +cd cli +cargo run -- help #or other command instead of help +``` + +## Run tailwindcss server for demo UI + +``` +npx tailwindcss -i ./static/in.css -o ./static/output.css --watch +``` diff --git a/pdf2md/cli/Cargo.lock b/pdf2md/cli/Cargo.lock new file mode 100644 index 0000000000..f3d922a408 --- /dev/null +++ b/pdf2md/cli/Cargo.lock @@ -0,0 +1,799 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys 0.59.0", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "cc" +version = "1.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "cli" +version = "0.1.0" +dependencies = [ + "base64", + "clap", + "serde_json", + "ureq", +] + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "flate2" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "libc" +version = "0.2.162" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" + +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom", + "libc", + "spin", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls" +version = "0.23.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" +dependencies = [ + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a" +dependencies = [ + "base64", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "url", + "webpki-roots", +] + +[[package]] +name = "url" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "webpki-roots" +version = "0.26.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841c67bff177718f1d4dfefde8d8f0e78f9b6589319ba88312f567fc5841a958" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/pdf2md/cli/Cargo.toml b/pdf2md/cli/Cargo.toml new file mode 100644 index 0000000000..c753f0365b --- /dev/null +++ b/pdf2md/cli/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "cli" +version = "0.1.0" +edition = "2021" + +[dependencies] +base64 = "0.22.1" +clap = { version = "4.5.20", features = ["derive", "env"] } +serde_json = "1.0.132" +ureq = { version = "2.10.1", features = ["json"] } diff --git a/pdf2md/cli/src/main.rs b/pdf2md/cli/src/main.rs new file mode 100644 index 0000000000..d621fc8ca4 --- /dev/null +++ b/pdf2md/cli/src/main.rs @@ -0,0 +1,76 @@ +use clap::{Args, Parser}; +use operators::{create_task::create_task, poll_task::poll_task}; + +pub mod operators; + +#[derive(Parser)] +#[command(author, version)] +#[command( + name = "tr-chunk", + about = "PDF2MD CLI - CLI for PDF2MD", + long_about = "PDF2MD CLI is a CLI for the PDF2MD. + + It allows you to interact with the PDF2MD from the command line by creating and polling tasks." +)] +#[command(arg_required_else_help(true))] +struct Cli { + #[command(subcommand)] + command: Option, + + /// The base URL of the PDF2MD server + #[arg( + short, + long, + env = "PDF2MD_BASE_URL", + default_value = "http://localhost:8081" + )] + base_url: String, + + /// The API key to use for authentication + #[arg( + short, + long, + env = "PDF2MD_API_KEY", + default_value = "admin" + )] + api_key: String, +} + +#[derive(Parser)] +enum Commands { + #[command(name = "create", about = "Create a new chunking task")] + Create(Create), + + #[command(name = "poll", about = "Poll a chunking task")] + Poll(Poll), +} + +#[derive(Args)] +struct Create { + /// The path to the file to chunk + #[arg(short, long)] + file: String, +} + +#[derive(Args)] +struct Poll { + /// The task ID to poll + #[arg(short, long)] + task_id: String, +} + +fn main() { + let args = Cli::parse(); + + match args.command { + Some(Commands::Create(create)) => { + create_task(&create.file, &args.base_url, &args.api_key); + } + Some(Commands::Poll(poll)) => { + poll_task(&poll.task_id, &args.base_url, &args.api_key); + } + None => { + println!("No command provided"); + } + } +} diff --git a/pdf2md/cli/src/operators/create_task.rs b/pdf2md/cli/src/operators/create_task.rs new file mode 100644 index 0000000000..1481283d06 --- /dev/null +++ b/pdf2md/cli/src/operators/create_task.rs @@ -0,0 +1,18 @@ +use base64::Engine; + +pub fn create_task(file: &str, base_url: &str, api_key: &str) { + let file = std::fs::read(file).expect("Failed to read file"); + let file = base64::prelude::BASE64_STANDARD.encode(file); + + let request = ureq::post(format!("{}/api/task", base_url).as_str()) + .set("Content-Type", "application/json") + .set("Authorization", api_key) + .send_json(serde_json::json!({ + "base64_file": file, + })) + .expect("Failed to send request"); + + let response: serde_json::Value = request.into_json().expect("Failed to parse response"); + + println!("{}", response); +} diff --git a/pdf2md/cli/src/operators/mod.rs b/pdf2md/cli/src/operators/mod.rs new file mode 100644 index 0000000000..7cf08eb4b4 --- /dev/null +++ b/pdf2md/cli/src/operators/mod.rs @@ -0,0 +1,2 @@ +pub mod create_task; +pub mod poll_task; diff --git a/pdf2md/cli/src/operators/poll_task.rs b/pdf2md/cli/src/operators/poll_task.rs new file mode 100644 index 0000000000..e48b7ad4ef --- /dev/null +++ b/pdf2md/cli/src/operators/poll_task.rs @@ -0,0 +1,23 @@ +pub fn poll_task(task_id: &str, base_url: &str, api_key: &str) { + loop { + let request = ureq::get(format!("{}/api/task/{}", base_url, task_id).as_str()) + .set("Content-Type", "application/json") + .set("Authorization", api_key) + .call() + .expect("Failed to send request"); + + let response: serde_json::Value = request.into_json().expect("Failed to parse response"); + + if (response["status"] == "Completed" + || response["total_document_pages"].as_i64() != Some(0)) + && response["pages"].as_array() != Some(&vec![]) + { + println!("{}", response); + break; + } else { + println!("Task is still processing..."); + println!("{}", response); + std::thread::sleep(std::time::Duration::from_secs(5)); + } + } +} diff --git a/pdf2md/docker-compose.yml b/pdf2md/docker-compose.yml new file mode 100644 index 0000000000..7a742cb589 --- /dev/null +++ b/pdf2md/docker-compose.yml @@ -0,0 +1,113 @@ +services: + redis: + image: redis:7.2.2 + profiles: ["dev", "prod-deps"] + restart: always + healthcheck: + test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"] + interval: 10s + timeout: 5s + retries: 10 + ports: + - "6379:6379" + volumes: + - redis-data:/data + networks: + - app-network + command: redis-server --requirepass ${REDIS_PASSWORD} + + s3: + image: minio/minio:RELEASE.2023-09-27T15-22-50Z + profiles: ["dev", "prod-deps"] + ports: + - 9000:9000 + - 42625:42625 + environment: + - MINIO_ROOT_USER=${MINIO_ROOT_USER} + - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD} + healthcheck: + test: ["CMD", "curl", "-f", "http://s3:9000/minio/health/live"] + interval: 10s + timeout: 5s + retries: 10 + volumes: + - s3-data:/data + command: server --console-address ":42625" /data + networks: + - app-network + + s3-client: + image: minio/mc + profiles: ["dev", "prod-deps"] + depends_on: + s3: + condition: service_healthy + restart: on-failure + networks: + - app-network + entrypoint: > + /bin/sh -c " + mc config host add myminio http://s3:9000 ${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD}; + mc alias set myminio http://s3:9000 ${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD}; + + mc admin user add myminio ${S3_ACCESS_KEY} ${S3_SECRET_KEY}; + mc admin policy attach myminio readwrite --user ${S3_ACCESS_KEY}; + + mc mb myminio/${S3_BUCKET}; + exit 0; + " + + clickhouse-db: + image: trieve/clickhouse:latest + profiles: ["dev", "prod-deps"] + restart: always + environment: + - CLICKHOUSE_USER=clickhouse + - CLICKHOUSE_PASSWORD=password + - CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 + volumes: + - clickhouse-data:/var/lib/clickhouse + ports: + - "8123:8123" + - "9001:9000" + - "9009:9009" + networks: + - app-network + + pdf2md-server: + image: trieve/pdf2md-server:latest + profiles: ["prod"] + network_mode: "host" + build: + context: ./server/ + dockerfile: Dockerfile.pdf2md-server + env_file: .env + + supervisor-worker: + image: trieve/supervisor-worker:latest + profiles: ["prod"] + network_mode: "host" + build: + context: ./server/ + dockerfile: Dockerfile.supervisor-worker + env_file: .env + + chunk-worker: + image: trieve/chunk-worker:latest + profiles: ["prod"] + network_mode: "host" + build: + context: ./server/ + dockerfile: Dockerfile.chunk-worker + env_file: .env + deploy: + replicas: 5 + +networks: + app-network: + driver: bridge + +volumes: + redis-data: + s3-data: + clickhouse-data: diff --git a/pdf2md/server/Cargo.lock b/pdf2md/server/Cargo.lock new file mode 100644 index 0000000000..7f6c3fe247 --- /dev/null +++ b/pdf2md/server/Cargo.lock @@ -0,0 +1,4075 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "actix-codec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f7b0a21988c1bf877cf4759ef5ddaac04c1c9fe808c9142ecb78ba97d97a28a" +dependencies = [ + "bitflags 2.6.0", + "bytes", + "futures-core", + "futures-sink", + "memchr", + "pin-project-lite", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "actix-cors" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9e772b3bcafe335042b5db010ab7c09013dad6eac4915c91d8d50902769f331" +dependencies = [ + "actix-utils", + "actix-web", + "derive_more 0.99.18", + "futures-util", + "log", + "once_cell", + "smallvec", +] + +[[package]] +name = "actix-http" +version = "3.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d48f96fc3003717aeb9856ca3d02a8c7de502667ad76eeacd830b48d2e91fac4" +dependencies = [ + "actix-codec", + "actix-rt", + "actix-service", + "actix-utils", + "ahash", + "base64", + "bitflags 2.6.0", + "brotli", + "bytes", + "bytestring", + "derive_more 0.99.18", + "encoding_rs", + "flate2", + "futures-core", + "h2 0.3.26", + "http 0.2.12", + "httparse", + "httpdate", + "itoa", + "language-tags", + "local-channel", + "mime", + "percent-encoding", + "pin-project-lite", + "rand", + "sha1", + "smallvec", + "tokio", + "tokio-util", + "tracing", + "zstd", +] + +[[package]] +name = "actix-macros" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" +dependencies = [ + "quote", + "syn 2.0.87", +] + +[[package]] +name = "actix-router" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8" +dependencies = [ + "bytestring", + "cfg-if", + "http 0.2.12", + "regex", + "regex-lite", + "serde", + "tracing", +] + +[[package]] +name = "actix-rt" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24eda4e2a6e042aa4e55ac438a2ae052d3b5da0ecf83d7411e1a368946925208" +dependencies = [ + "futures-core", + "tokio", +] + +[[package]] +name = "actix-server" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca2549781d8dd6d75c40cf6b6051260a2cc2f3c62343d761a969a0640646894" +dependencies = [ + "actix-rt", + "actix-service", + "actix-utils", + "futures-core", + "futures-util", + "mio", + "socket2", + "tokio", + "tracing", +] + +[[package]] +name = "actix-service" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b894941f818cfdc7ccc4b9e60fa7e53b5042a2e8567270f9147d5591893373a" +dependencies = [ + "futures-core", + "paste", + "pin-project-lite", +] + +[[package]] +name = "actix-utils" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a1dcdff1466e3c2488e1cb5c36a71822750ad43839937f85d2f4d9f8b705d8" +dependencies = [ + "local-waker", + "pin-project-lite", +] + +[[package]] +name = "actix-web" +version = "4.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9180d76e5cc7ccbc4d60a506f2c727730b154010262df5b910eb17dbe4b8cb38" +dependencies = [ + "actix-codec", + "actix-http", + "actix-macros", + "actix-router", + "actix-rt", + "actix-server", + "actix-service", + "actix-utils", + "actix-web-codegen", + "ahash", + "bytes", + "bytestring", + "cfg-if", + "cookie", + "derive_more 0.99.18", + "encoding_rs", + "futures-core", + "futures-util", + "impl-more", + "itoa", + "language-tags", + "log", + "mime", + "once_cell", + "pin-project-lite", + "regex", + "regex-lite", + "serde", + "serde_json", + "serde_urlencoded", + "smallvec", + "socket2", + "time", + "url", +] + +[[package]] +name = "actix-web-codegen" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f591380e2e68490b5dfaf1dd1aa0ebe78d84ba7067078512b4ea6e4492d622b8" +dependencies = [ + "actix-router", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "aligned-vec" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1" + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys 0.59.0", +] + +[[package]] +name = "anyhow" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" + +[[package]] +name = "arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" + +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + +[[package]] +name = "arg_enum_proc_macro" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "attohttpc" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a13149d0cf3f7f9b9261fad4ec63b2efbf9a80665f52def86282d26255e6331" +dependencies = [ + "http 1.1.0", + "log", + "native-tls", + "serde", + "serde_json", + "url", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "av1-grain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6678909d8c5d46a42abcf571271e15fdbc0a225e3646cf23762cd415046c78bf" +dependencies = [ + "anyhow", + "arrayvec", + "log", + "nom", + "num-rational", + "v_frame", +] + +[[package]] +name = "avif-serialize" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e335041290c43101ca215eed6f43ec437eb5a42125573f600fc3fa42b9bddd62" +dependencies = [ + "arrayvec", +] + +[[package]] +name = "aws-creds" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f84143206b9c72b3c5cb65415de60c7539c79cd1559290fddec657939131be0" +dependencies = [ + "attohttpc", + "home", + "log", + "quick-xml", + "rust-ini", + "serde", + "thiserror", + "time", + "url", +] + +[[package]] +name = "aws-region" +version = "0.25.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9aed3f9c7eac9be28662fdb3b0f4d1951e812f7c64fed4f0327ba702f459b3b" +dependencies = [ + "thiserror", +] + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bb8" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89aabfae550a5c44b43ab941844ffcd2e993cb6900b342debf59e9ea74acdb8" +dependencies = [ + "async-trait", + "futures-util", + "parking_lot", + "tokio", +] + +[[package]] +name = "bb8-redis" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1781f22daa0ae97d934fdf04a5c66646f154a164c4bdc157ec8d3c11166c05cc" +dependencies = [ + "async-trait", + "bb8", + "redis", +] + +[[package]] +name = "bit_field" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bitstream-io" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bstr" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +dependencies = [ + "memchr", +] + +[[package]] +name = "built" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c360505aed52b7ec96a3636c3f039d99103c37d1d9b4f7a8c743d3ea9ffcd03b" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytemuck" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + +[[package]] +name = "bytes" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +dependencies = [ + "serde", +] + +[[package]] +name = "bytestring" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d80203ea6b29df88012294f62733de21cfeab47f17b41af3a38bc30a03ee72" +dependencies = [ + "bytes", +] + +[[package]] +name = "cc" +version = "1.1.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f57c4b4da2a9d619dd035f27316d7a426305b75be93d09e92f2b9229c34feaf" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-expr" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02" +dependencies = [ + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chm" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854dd9fd542191b5b357fb146aa09c0ae1db611701ca47a937dc554d8deaaaea" +dependencies = [ + "chrono", + "clap", + "clickhouse 0.11.6", + "derive_more 0.99.18", + "dotenvy", + "serde", + "time", + "tokio", + "toml", +] + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets", +] + +[[package]] +name = "cityhash-rs" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93a719913643003b84bd13022b4b7e703c09342cd03b679c4641c7d2e50dc34d" + +[[package]] +name = "clap" +version = "4.5.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "clickhouse" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0875e527e299fc5f4faba42870bf199a39ab0bb2dbba1b8aef0a2151451130f" +dependencies = [ + "bstr", + "bytes", + "clickhouse-derive 0.1.1", + "clickhouse-rs-cityhash-sys", + "futures", + "hyper 0.14.31", + "hyper-tls 0.5.0", + "lz4", + "sealed 0.4.0", + "serde", + "static_assertions", + "thiserror", + "time", + "tokio", + "url", +] + +[[package]] +name = "clickhouse" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2135bb9638e8c8c1e3d794f242099e57987059ba52e7e3de597e1d99b2c4a5a3" +dependencies = [ + "bstr", + "bytes", + "cityhash-rs", + "clickhouse-derive 0.2.0", + "futures", + "futures-channel", + "http-body-util", + "hyper 1.5.0", + "hyper-util", + "lz4_flex", + "replace_with", + "sealed 0.6.0", + "serde", + "static_assertions", + "thiserror", + "time", + "tokio", + "url", +] + +[[package]] +name = "clickhouse-derive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18af5425854858c507eec70f7deb4d5d8cec4216fcb086283a78872387281ea5" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals 0.26.0", + "syn 1.0.109", +] + +[[package]] +name = "clickhouse-derive" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d70f3e2893f7d3e017eeacdc9a708fbc29a10488e3ebca21f9df6a5d2b616dbb" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals 0.29.1", + "syn 2.0.87", +] + +[[package]] +name = "clickhouse-rs-cityhash-sys" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4baf9d4700a28d6cb600e17ed6ae2b43298a5245f1f76b4eab63027ebfd592b9" +dependencies = [ + "cc", +] + +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", + "tokio-util", +] + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "cookie" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e859cd57d0710d9e06c381b550c06e76992472a8c6d527aecd2fc673dcc231fb" +dependencies = [ + "percent-encoding", + "time", + "version_check", +] + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "darling" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.87", +] + +[[package]] +name = "darling_macro" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", + "serde", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.87", +] + +[[package]] +name = "derive_more" +version = "0.99.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 2.0.87", +] + +[[package]] +name = "derive_more" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "unicode-xid", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "dlv-list" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" +dependencies = [ + "const-random", +] + +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "env_filter" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "humantime", + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "exr" +version = "1.73.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83197f59927b46c04a183a619b7c29df34e63e63c7869320862268c0ef687e0" +dependencies = [ + "bit_field", + "half", + "lebe", + "miniz_oxide", + "rayon-core", + "smallvec", + "zune-inflate", +] + +[[package]] +name = "fastrand" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" + +[[package]] +name = "fdeflate" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07c6f4c64c1d33a3111c4466f7365ebdcc37c5bd1ea0d62aae2e3d722aacbedb" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "flate2" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gif" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb2d69b19215e18bb912fa30f7ce15846e301408695e44e0ef719f1da9e19f2" +dependencies = [ + "color_quant", + "weezl", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "h2" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.1.0", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hashbrown" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "hyper" +version = "0.14.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "httparse", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +dependencies = [ + "futures-util", + "http 1.1.0", + "hyper 1.5.0", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper 0.14.31", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper 1.5.0", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "hyper 1.5.0", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "image" +version = "0.25.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6f44aed642f18953a158afeb30206f4d50da59fbc66ecb53c66488de73563b" +dependencies = [ + "bytemuck", + "byteorder-lite", + "color_quant", + "exr", + "gif", + "image-webp", + "num-traits", + "png", + "qoi", + "ravif", + "rayon", + "rgb", + "tiff", + "zune-core", + "zune-jpeg", +] + +[[package]] +name = "image-webp" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e031e8e3d94711a9ccb5d6ea357439ef3dcbed361798bd4071dc4d9793fbe22f" +dependencies = [ + "byteorder-lite", + "quick-error", +] + +[[package]] +name = "imgref" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0263a3d970d5c054ed9312c0057b4f3bde9c0b33836d3637361d4a9e6e7a408" + +[[package]] +name = "impl-more" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae21c3177a27788957044151cc2800043d127acaa460a47ebb9b84dfa2c6aa0" + +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown 0.15.1", + "serde", +] + +[[package]] +name = "interpolate_name" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "ipnet" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "jpeg-decoder" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" + +[[package]] +name = "js-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "language-tags" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4345964bb142484797b161f473a503a434de77149dd8c7427788c6e13379388" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lebe" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" + +[[package]] +name = "libc" +version = "0.2.161" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b9569d2f74e257076d8c6bfa73fb505b46b851e51ddaecc825944aa3bed17fa" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + +[[package]] +name = "local-channel" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6cbc85e69b8df4b8bb8b89ec634e7189099cea8927a276b7384ce5488e53ec8" +dependencies = [ + "futures-core", + "futures-sink", + "local-waker", +] + +[[package]] +name = "local-waker" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d873d7c67ce09b42110d801813efbc9364414e356be9935700d368351657487" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "loop9" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062" +dependencies = [ + "imgref", +] + +[[package]] +name = "lopdf" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5c8ecfc6c72051981c0459f75ccc585e7ff67c70829560cda8e647882a9abff" +dependencies = [ + "chrono", + "encoding_rs", + "flate2", + "indexmap", + "itoa", + "log", + "md-5", + "nom", + "rangemap", + "rayon", + "time", + "weezl", +] + +[[package]] +name = "lz4" +version = "1.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d1febb2b4a79ddd1980eede06a8f7902197960aa0383ffcfdd62fe723036725" +dependencies = [ + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" + +[[package]] +name = "maybe-async" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cf92c10c7e361d6b99666ec1c6f9805b0bea2c3bd8c78dc6fe98ac5bd78db11" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "maybe-rayon" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519" +dependencies = [ + "cfg-if", + "rayon", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memo-map" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + +[[package]] +name = "minidom" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f45614075738ce1b77a1768912a60c0227525971b03e09122a05b8a34a2a6278" +dependencies = [ + "rxml", +] + +[[package]] +name = "minijinja" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c37e1b517d1dcd0e51dc36c4567b9d5a29262b3ec8da6cb5d35e27a8fb529b5" +dependencies = [ + "memo-map", + "self_cell", + "serde", + "serde_json", +] + +[[package]] +name = "minijinja-embed" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46d70b7597f2d4149308210d5dc7ab79f1248238a27c1ab1a3eefd95d20c4cca" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +dependencies = [ + "hermit-abi", + "libc", + "log", + "wasi", + "windows-sys 0.52.0", +] + +[[package]] +name = "native-tls" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "noop_proc_macro" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8" + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "object" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "openai_dive" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3145b6053780214d0d872f204c92e2cf65706b8b78aa304d76567a8d3764d15" +dependencies = [ + "bytes", + "derive_builder", + "reqwest", + "serde", + "serde_json", + "tokio", + "tokio-util", +] + +[[package]] +name = "openssl" +version = "0.10.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "ordered-multimap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" +dependencies = [ + "dlv-list", + "hashbrown 0.14.5", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pdf2image" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9c2fc99b8e21f00e9ba70f14944ea0f58356b7019e2238c7bdfee8cee4aff54" +dependencies = [ + "derive_builder", + "image", + "rayon", + "thiserror", +] + +[[package]] +name = "pdf2md-server" +version = "0.1.0" +dependencies = [ + "actix-cors", + "actix-web", + "base64", + "bb8-redis", + "chm", + "clickhouse 0.13.1", + "derive_more 1.0.0", + "dotenvy", + "env_logger", + "futures", + "image", + "lazy_static", + "log", + "lopdf", + "minijinja", + "minijinja-embed", + "openai_dive", + "pdf2image", + "redis", + "regex", + "reqwest", + "rust-s3", + "serde", + "serde_json", + "signal-hook", + "tokio", + "utoipa", + "utoipa-actix-web", + "utoipa-redoc", + "uuid", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + +[[package]] +name = "png" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f9d46a34a05a6a57566bc2bfae066ef07585a6e3fa30fbbdff5936380623f0" +dependencies = [ + "bitflags 1.3.2", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "profiling" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afbdc74edc00b6f6a218ca6a5364d6226a259d4b8ea1af4a0ea063f27e179f4d" +dependencies = [ + "profiling-procmacros", +] + +[[package]] +name = "profiling-procmacros" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a65f2e60fbf1063868558d69c6beacf412dc755f9fc020f514b7955fc914fe30" +dependencies = [ + "quote", + "syn 2.0.87", +] + +[[package]] +name = "qoi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + +[[package]] +name = "quick-xml" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d3a6e5838b60e0e8fa7a43f22ade549a37d61f8bdbe636d0d7816191de969c2" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rangemap" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60fcc7d6849342eff22c4350c8b9a989ee8ceabc4b481253e8946b9fe83d684" + +[[package]] +name = "rav1e" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9" +dependencies = [ + "arbitrary", + "arg_enum_proc_macro", + "arrayvec", + "av1-grain", + "bitstream-io", + "built", + "cfg-if", + "interpolate_name", + "itertools", + "libc", + "libfuzzer-sys", + "log", + "maybe-rayon", + "new_debug_unreachable", + "noop_proc_macro", + "num-derive", + "num-traits", + "once_cell", + "paste", + "profiling", + "rand", + "rand_chacha", + "simd_helpers", + "system-deps", + "thiserror", + "v_frame", + "wasm-bindgen", +] + +[[package]] +name = "ravif" +version = "0.11.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2413fd96bd0ea5cdeeb37eaf446a22e6ed7b981d792828721e74ded1980a45c6" +dependencies = [ + "avif-serialize", + "imgref", + "loop9", + "quick-error", + "rav1e", + "rayon", + "rgb", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redis" +version = "0.27.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cccf17a692ce51b86564334614d72dcae1def0fd5ecebc9f02956da74352b5" +dependencies = [ + "arc-swap", + "async-trait", + "bytes", + "combine", + "futures-util", + "itoa", + "num-bigint", + "percent-encoding", + "pin-project-lite", + "rustls", + "rustls-native-certs", + "rustls-pemfile", + "rustls-pki-types", + "ryu", + "sha1_smol", + "socket2", + "tokio", + "tokio-rustls", + "tokio-util", + "url", +] + +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "replace_with" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a8614ee435691de62bcffcf4a66d91b3594bf1428a5722e79103249a095690" + +[[package]] +name = "reqwest" +version = "0.12.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.5.0", + "hyper-rustls", + "hyper-tls 0.6.0", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "mime_guess", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "system-configuration", + "tokio", + "tokio-native-tls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "windows-registry", +] + +[[package]] +name = "rgb" +version = "0.8.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a" + +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom", + "libc", + "spin", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rust-ini" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e310ef0e1b6eeb79169a1171daf9abcb87a2e17c03bee2c4bb100b55c75409f" +dependencies = [ + "cfg-if", + "ordered-multimap", + "trim-in-place", +] + +[[package]] +name = "rust-s3" +version = "0.35.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3df3f353b1f4209dcf437d777cda90279c397ab15a0cd6fd06bd32c88591533" +dependencies = [ + "async-trait", + "aws-creds", + "aws-region", + "base64", + "bytes", + "cfg-if", + "futures", + "hex", + "hmac", + "http 0.2.12", + "hyper 0.14.31", + "hyper-tls 0.5.0", + "log", + "maybe-async", + "md5", + "minidom", + "native-tls", + "percent-encoding", + "quick-xml", + "serde", + "serde_derive", + "serde_json", + "sha2", + "thiserror", + "time", + "tokio", + "tokio-native-tls", + "tokio-stream", + "url", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls" +version = "0.23.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" +dependencies = [ + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" +dependencies = [ + "openssl-probe", + "rustls-pemfile", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rxml" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a98f186c7a2f3abbffb802984b7f1dfd65dac8be1aafdaabbca4137f53f0dff7" +dependencies = [ + "bytes", + "rxml_validation", + "smartstring", +] + +[[package]] +name = "rxml_validation" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22a197350ece202f19a166d1ad6d9d6de145e1d2a8ef47db299abe164dbd7530" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "schannel" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sealed" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b5e421024b5e5edfbaa8e60ecf90bda9dbffc602dbb230e6028763f85f0c68c" +dependencies = [ + "heck 0.3.3", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "sealed" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22f968c5ea23d555e670b449c1c5e7b2fc399fdaec1d304a17cd48e288abc107" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.6.0", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "self_cell" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d369a96f978623eb3dc28807c4852d6cc617fed53da5d3c400feff1ef34a714a" + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "serde" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "serde_derive_internals" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85bf8229e7920a9f636479437026331ce11aa132b4dde37d121944a44d6e5f3c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "serde_json" +version = "1.0.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "serde_spanned" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha1_smol" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + +[[package]] +name = "simd_helpers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6" +dependencies = [ + "quote", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "smartstring" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" +dependencies = [ + "autocfg", + "static_assertions", + "version_check", +] + +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags 2.6.0", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "system-deps" +version = "6.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e535eb8dded36d55ec13eddacd30dec501792ff23a0b1682c38601b8cf2349" +dependencies = [ + "cfg-expr", + "heck 0.5.0", + "pkg-config", + "toml", + "version-compare", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "tempfile" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "thiserror" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02dd99dc800bbb97186339685293e1cc5d9df1f8fae2d0aecd9ff1c77efea892" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7c61ec9a6f64d2793d8a45faba21efbe3ced62a886d44c36a009b2b519b4c7e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "tiff" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e" +dependencies = [ + "flate2", + "jpeg-decoder", + "weezl", +] + +[[package]] +name = "time" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.41.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-macros" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +dependencies = [ + "rustls", + "rustls-pki-types", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "log", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "trim-in-place" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343e926fc669bc8cde4fa3129ab681c63671bae288b1f1081ceee6d9d37904fc" + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicase" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "utoipa" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514a48569e4e21c86d0b84b5612b5e73c0b2cf09db63260134ba426d4e8ea714" +dependencies = [ + "indexmap", + "serde", + "serde_json", + "utoipa-gen", +] + +[[package]] +name = "utoipa-actix-web" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7eda9c23c05af0fb812f6a177514047331dac4851a2c8e9c4b895d6d826967f" +dependencies = [ + "actix-service", + "actix-web", + "utoipa", +] + +[[package]] +name = "utoipa-gen" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5629efe65599d0ccd5d493688cbf6e03aa7c1da07fe59ff97cf5977ed0637f66" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn 2.0.87", + "uuid", +] + +[[package]] +name = "utoipa-redoc" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9218304bba9a0ea5e92085b0a427ccce5fd56eaaf6436d245b7578e6a95787e1" +dependencies = [ + "actix-web", + "serde", + "serde_json", + "utoipa", +] + +[[package]] +name = "uuid" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +dependencies = [ + "getrandom", + "serde", +] + +[[package]] +name = "v_frame" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f32aaa24bacd11e488aa9ba66369c7cd514885742c9fe08cfe85884db3e92b" +dependencies = [ + "aligned-vec", + "num-traits", + "wasm-bindgen", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version-compare" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.87", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" + +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "web-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "weezl" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +dependencies = [ + "memchr", +] + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] + +[[package]] +name = "zune-core" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" + +[[package]] +name = "zune-inflate" +version = "0.2.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "zune-jpeg" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16099418600b4d8f028622f73ff6e3deaabdff330fb9a2a131dea781ee8b0768" +dependencies = [ + "zune-core", +] diff --git a/pdf2md/server/Cargo.toml b/pdf2md/server/Cargo.toml new file mode 100644 index 0000000000..355decb4ec --- /dev/null +++ b/pdf2md/server/Cargo.toml @@ -0,0 +1,57 @@ +[package] +name = "pdf2md-server" +version = "0.1.0" +edition = "2021" +default-run = "pdf2md-server" + +[[bin]] +name = "pdf2md-server" +path = "src/main.rs" + +[[bin]] +name = "supervisor-worker" +path = "src/workers/supervisor-worker.rs" + +[[bin]] +name = "chunk-worker" +path = "src/workers/chunk-worker.rs" + +[dependencies] +utoipa = { version = "5.2.0", features = ["actix_extras", "uuid", "chrono"] } +utoipa-redoc = { version = "5.0.0", features = ["actix-web"] } +actix-web = "4.9.0" +serde = "1.0.215" +serde_json = "1.0.132" +uuid = { version = "1", features = ["v4", "serde"] } +log = "0.4" +rust-s3 = "0.35.1" +derive_more = { version = "1.0.0", features = ["display"] } +dotenvy = "0.15.7" +signal-hook = "0.3.17" +redis = { version = "0.27.5", features = ["tokio-rustls-comp", "aio"] } +bb8-redis = "0.17.0" +tokio = "1.41.1" +lazy_static = "1.5.0" +actix-cors = "0.7.0" +reqwest = "0.12.9" +clickhouse = { version = "0.13.1", features = ["time"] } +chm = "0.1.17" +lopdf = "0.34.0" +base64 = "0.22.1" +pdf2image = "0.1.2" +image = "0.25.5" +openai_dive = "0.6.6" +env_logger = "0.11.5" +utoipa-actix-web = "0.1.2" +futures = "0.3.31" +regex = "1.11.1" +minijinja-embed = "2.5.0" +minijinja = { version = "2.5.0", features = ["loader", "json"] } + +[build-dependencies] +dotenvy = "0.15.7" +minijinja-embed = "2.2.0" + +[features] +default = [] +runtime-env = [] diff --git a/pdf2md/server/Dockerfile.chunk-worker b/pdf2md/server/Dockerfile.chunk-worker new file mode 100644 index 0000000000..2d12d1e41b --- /dev/null +++ b/pdf2md/server/Dockerfile.chunk-worker @@ -0,0 +1,28 @@ +FROM rust:1.81-slim-bookworm AS chef +# We only pay the installation cost once, +# it will be cached from the second build onwards +RUN apt-get update -y && apt-get -y install pkg-config libssl-dev g++ curl +RUN cargo install cargo-chef +WORKDIR app + +FROM chef AS planner +COPY . . +RUN cargo chef prepare --recipe-path recipe.json + +FROM chef AS builder +COPY --from=planner /app/recipe.json recipe.json +# Build dependencies - this is the caching Docker layer! +RUN cargo chef cook --release --recipe-path recipe.json --bin "chunk-worker" +# Build application +COPY . . +RUN cargo build --release --features "runtime-env" --bin "chunk-worker" + +FROM debian:bookworm-slim AS runtime +RUN apt-get update -y && apt-get -y install pkg-config libssl-dev ca-certificates +WORKDIR /app +COPY ./ch_migrations/ /app/ch_migrations +COPY --from=builder /app/target/release/chunk-worker /app/chunk-worker + + +EXPOSE 8090 +ENTRYPOINT ["/app/chunk-worker"] diff --git a/pdf2md/server/Dockerfile.pdf2md-server b/pdf2md/server/Dockerfile.pdf2md-server new file mode 100644 index 0000000000..1e5dad885c --- /dev/null +++ b/pdf2md/server/Dockerfile.pdf2md-server @@ -0,0 +1,38 @@ +FROM rust:1.81-slim-bookworm AS chef +# We only pay the installation cost once, +# it will be cached from the second build onwards +RUN apt-get update -y && apt-get -y install pkg-config libssl-dev g++ curl +RUN cargo install cargo-chef +WORKDIR app + +FROM chef AS planner +COPY . . +RUN cargo chef prepare --recipe-path recipe.json + +FROM chef AS builder +COPY --from=planner /app/recipe.json recipe.json +# Build dependencies - this is the caching Docker layer! +RUN cargo chef cook --release --recipe-path recipe.json --bin "pdf2md-server" +# Build application +COPY . . +RUN cargo build --release --features "runtime-env" --bin "pdf2md-server" + +FROM debian:bookworm-slim AS runtime +WORKDIR /app + +RUN apt-get update -y; \ + apt-get install -y \ + pkg-config \ + build-essential\ + libssl-dev \ + ca-certificates \ + ; \ + mkdir -p /app/tmp + + +COPY ./ch_migrations /app/ch_migrations +COPY --from=builder /app/static /app/static +COPY --from=builder /app/target/release/pdf2md-server /app/pdf2md-server + +EXPOSE 8090 +ENTRYPOINT ["/app/pdf2md-server"] diff --git a/pdf2md/server/Dockerfile.supervisor-worker b/pdf2md/server/Dockerfile.supervisor-worker new file mode 100644 index 0000000000..459ebe8ee8 --- /dev/null +++ b/pdf2md/server/Dockerfile.supervisor-worker @@ -0,0 +1,28 @@ +FROM rust:1.81-slim-bookworm AS chef +# We only pay the installation cost once, +# it will be cached from the second build onwards +RUN apt-get update -y && apt-get -y install pkg-config libssl-dev g++ curl +RUN cargo install cargo-chef +WORKDIR app + +FROM chef AS planner +COPY . . +RUN cargo chef prepare --recipe-path recipe.json + +FROM chef AS builder +COPY --from=planner /app/recipe.json recipe.json +# Build dependencies - this is the caching Docker layer! +RUN cargo chef cook --release --recipe-path recipe.json --bin "supervisor-worker" +# Build application +COPY . . +RUN cargo build --release --features "runtime-env" --bin "supervisor-worker" + +FROM debian:bookworm-slim AS runtime +RUN apt-get update -y && apt-get -y install pkg-config libssl-dev ca-certificates +WORKDIR /app +COPY ./ch_migrations/ /app/ch_migrations +COPY --from=builder /app/target/release/supervisor-worker /app/supervisor-worker + + +EXPOSE 8090 +ENTRYPOINT ["/app/supervisor-worker"] diff --git a/pdf2md/server/build.rs b/pdf2md/server/build.rs new file mode 100644 index 0000000000..4396a3f75f --- /dev/null +++ b/pdf2md/server/build.rs @@ -0,0 +1,33 @@ +use std::error::Error; + +#[cfg(not(feature = "runtime-env"))] +fn main() -> Result<(), Box> { + use std::{env, process::Command}; + dotenvy::dotenv().expect("Failed to read .env file. Did you `cp .env.dist .env` ?"); + + let output = Command::new("npx") + .arg("tailwindcss") + .arg("-i") + .arg("./static/in.css") + .arg("-o") + .arg("./static/output.css") + .output()?; + + // Stream output + println!("{}", String::from_utf8_lossy(&output.stdout)); + + for (key, value) in env::vars() { + println!("cargo:rustc-env={key}={value}"); + } + + println!("cargo:rerun-if-changed=.env"); + + minijinja_embed::embed_templates!("src/templates"); + Ok(()) +} + +#[cfg(feature = "runtime-env")] +fn main() -> Result<(), Box> { + minijinja_embed::embed_templates!("src/templates"); + Ok(()) +} diff --git a/pdf2md/server/ch_migrations/1731019991_initial_tables/down.sql b/pdf2md/server/ch_migrations/1731019991_initial_tables/down.sql new file mode 100644 index 0000000000..1d28473522 --- /dev/null +++ b/pdf2md/server/ch_migrations/1731019991_initial_tables/down.sql @@ -0,0 +1,2 @@ +DROP TABLE IF EXISTS file_chunks; +DROP TABLE IF EXISTS file_tasks; diff --git a/pdf2md/server/ch_migrations/1731019991_initial_tables/up.sql b/pdf2md/server/ch_migrations/1731019991_initial_tables/up.sql new file mode 100644 index 0000000000..7416e7e18e --- /dev/null +++ b/pdf2md/server/ch_migrations/1731019991_initial_tables/up.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS file_tasks ( + id String, + pages UInt32, + chunks UInt32, + pages_processed UInt32, + created_at DateTime, + status String, +) ENGINE = MergeTree() +ORDER BY (id) +PARTITION BY + (toYYYYMM(created_at)) +TTL created_at + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS file_chunks ( + id String, + task_id String, + content String, + metadata String, + created_at DateTime, +) ENGINE = MergeTree() +ORDER BY (task_id, id) +PARTITION BY + (task_id) +TTL created_at + INTERVAL 30 DAY; + diff --git a/pdf2md/server/ch_migrations/1731447246_remove_chunks_processed_field/down.sql b/pdf2md/server/ch_migrations/1731447246_remove_chunks_processed_field/down.sql new file mode 100644 index 0000000000..9fb6e1e0e7 --- /dev/null +++ b/pdf2md/server/ch_migrations/1731447246_remove_chunks_processed_field/down.sql @@ -0,0 +1 @@ +ALTER TABLE file_tasks ADD COLUMN IF NOT EXISTS chunks_processed UInt32; diff --git a/pdf2md/server/ch_migrations/1731447246_remove_chunks_processed_field/up.sql b/pdf2md/server/ch_migrations/1731447246_remove_chunks_processed_field/up.sql new file mode 100644 index 0000000000..984a9020de --- /dev/null +++ b/pdf2md/server/ch_migrations/1731447246_remove_chunks_processed_field/up.sql @@ -0,0 +1 @@ +ALTER TABLE file_tasks DROP COLUMN IF EXISTS chunks; diff --git a/pdf2md/server/ch_migrations/chm.toml b/pdf2md/server/ch_migrations/chm.toml new file mode 100644 index 0000000000..529393eec3 --- /dev/null +++ b/pdf2md/server/ch_migrations/chm.toml @@ -0,0 +1,4 @@ +url = "http://localhost:8123" +user = "clickhouse" +password = "password" +database = "default" diff --git a/pdf2md/server/src/errors.rs b/pdf2md/server/src/errors.rs new file mode 100644 index 0000000000..d6be340f19 --- /dev/null +++ b/pdf2md/server/src/errors.rs @@ -0,0 +1,137 @@ +use actix_web::{ + error::{JsonPayloadError, ResponseError}, + HttpResponse, +}; +use derive_more::Display; +use serde::{Deserialize, Serialize}; +use std::convert::From; +use utoipa::ToSchema; +use uuid::Error as ParseError; + +#[derive(Serialize, Deserialize, Debug, Display, ToSchema)] +#[schema(example = json!({"message": "Bad Request"}))] +pub struct ErrorResponseBody { + pub message: String, +} + +#[derive(Debug, Display, Clone)] +pub enum ServiceError { + #[display("Internal Server Error: {_0}")] + InternalServerError(String), + + #[display("BadRequest: {_0}")] + BadRequest(String), + + #[display("BadRequest: Duplicate Tracking Id Found")] + DuplicateTrackingId(String), + + #[display("Unauthorized")] + Unauthorized, + + #[display("Forbidden")] + Forbidden, + + #[display("Not Found")] + NotFound(String), + + #[display("Json Deserialization Error: {_0}")] + JsonDeserializeError(String), + + #[display("Payload Too Large")] + PayloadTooLarge(String), +} + +// impl ResponseError trait allows to convert our errors into http responses with appropriate data +impl ResponseError for ServiceError { + fn error_response(&self) -> HttpResponse { + match self { + ServiceError::InternalServerError(ref message) => HttpResponse::InternalServerError() + .json(ErrorResponseBody { + message: message.to_string(), + }), + ServiceError::BadRequest(ref message) => { + HttpResponse::BadRequest().json(ErrorResponseBody { + message: message.to_string(), + }) + } + ServiceError::DuplicateTrackingId(ref id) => { + HttpResponse::BadRequest().json(ErrorResponseBody { + message: format!("Stoped overwriting data, Duplicate Tracking Id {:?}", id), + }) + } + ServiceError::Unauthorized => HttpResponse::Unauthorized().json(ErrorResponseBody { + message: "Unauthorized".to_string(), + }), + ServiceError::Forbidden => HttpResponse::Forbidden().json(ErrorResponseBody { + message: "Forbidden".to_string(), + }), + ServiceError::NotFound(ref message) => { + HttpResponse::NotFound().json(ErrorResponseBody { + message: format!("Not Found: {}", message), + }) + } + ServiceError::JsonDeserializeError(ref message) => { + HttpResponse::BadRequest().json(ErrorResponseBody { + message: format!("Json Deserialization Error: {}", message), + }) + } + ServiceError::PayloadTooLarge(ref message) => { + HttpResponse::PayloadTooLarge().json(ErrorResponseBody { + message: message.to_string(), + }) + } + } + } +} + +// we can return early in our handlers if UUID provided by the user is not valid +// and provide a custom message +impl From for ServiceError { + fn from(_: ParseError) -> ServiceError { + ServiceError::BadRequest("Invalid UUID".into()) + } +} + +pub fn custom_json_error_handler( + err: JsonPayloadError, + _req: &actix_web::HttpRequest, +) -> actix_web::Error { + let (error_message, solution) = match &err { + JsonPayloadError::ContentType => ( + "Content type header error", + "Ensure the content type request header of the HTTP request is set as `Content-Type: application/json`." + ), + JsonPayloadError::Payload(_) => ( + "Payload error", + "Check that the JSON payload matches the expected structure." + ), + JsonPayloadError::Deserialize(deserialize_err) => match deserialize_err.classify() { + serde_json::error::Category::Io => ( + "I/O error while reading JSON", + "Verify that the server has sufficient permissions to access the file or data source." + ), + serde_json::error::Category::Syntax => ( + "Syntax error in JSON", + "Fix syntax errors in the JSON payload to adhere to JSON formatting rules." + ), + serde_json::error::Category::Data => ( + "Data error in JSON", + "Ensure that the data in the JSON payload is valid and consistent with the expected schema." + ), + serde_json::error::Category::Eof => ( + "Unexpected end of JSON input", + "Ensure that the JSON payload is complete and not truncated." + ), + }, + _ => ( + "Other JSON payload error", + "Inspect the JSON payload and the server's handling of JSON requests for any issues." + ), + }; + + let detailed_error_message = format!( + "*Type* : {} | *Message* : {} | {}", + error_message, err, solution + ); + ServiceError::JsonDeserializeError(detailed_error_message).into() +} diff --git a/pdf2md/server/src/lib.rs b/pdf2md/server/src/lib.rs new file mode 100644 index 0000000000..2ee6424821 --- /dev/null +++ b/pdf2md/server/src/lib.rs @@ -0,0 +1,190 @@ +use actix_web::{ + get, + middleware::Logger, + web::{self, PayloadConfig}, + App, HttpResponse, HttpServer, +}; +use chm::tools::migrations::{run_pending_migrations, SetupArgs}; +use errors::{custom_json_error_handler, ErrorResponseBody}; +use routes::{create_task::create_task, get_task::get_task, jinja_templates}; +use utoipa::{ + openapi::security::{ApiKey, ApiKeyValue, SecurityScheme}, + Modify, OpenApi, +}; +use utoipa_actix_web::AppExt; +use utoipa_redoc::{Redoc, Servable}; + +pub mod errors; +pub mod middleware; +pub mod models; +pub mod operators; +pub mod routes; + +/// Health Check +/// +/// Confirmation that the service is healthy and can make embedding vectors +#[utoipa::path( + get, + path = "/health", + context_path = "/api", + tag = "Health", + responses( + (status = 200, description = "Confirmation that the service is healthy and can make embedding vectors"), + (status = 400, description = "Service error relating to making an embedding or overall service health", body = ErrorResponseBody), + ), +)] +#[get("")] +pub async fn health_check() -> Result { + Ok(HttpResponse::Ok().finish()) +} + +#[macro_export] +#[cfg(not(feature = "runtime-env"))] +macro_rules! get_env { + ($name:expr, $message:expr) => {{ + lazy_static::lazy_static! { + static ref ENV_VAR: String = { + std::env::var($name).expect($message) + }; + } + ENV_VAR.as_str() + }}; +} + +#[macro_export] +#[cfg(feature = "runtime-env")] +macro_rules! get_env { + ($name:expr, $message:expr) => {{ + lazy_static::lazy_static! { + static ref ENV_VAR: String = { + std::env::var($name).expect($message) + }; + } + ENV_VAR.as_str() + }}; +} + +pub type Templates<'a> = web::Data>; + +#[actix_web::main] +pub async fn main() -> std::io::Result<()> { + dotenvy::dotenv().ok(); + + #[derive(OpenApi)] + #[openapi(info( + title = "PDF2MD API", + description = "PDF2MD OpenAPI Specification. This document describes all of the operations available through the PDF2MD API.", + contact( + name = "Trieve Team", + url = "https://trieve.ai", + email = "developers@trieve.ai", + ), + license( + name = "BSL", + url = "https://github.com/devflowinc/trieve/blob/main/LICENSE.txt", + ), + version = "0.0.0"), + modifiers(&SecurityAddon), + tags( + (name = "Task", description = "Task operations. Allow you to interact with tasks."), + ))] + struct ApiDoc; + + struct SecurityAddon; + + impl Modify for SecurityAddon { + fn modify(&self, openapi: &mut utoipa::openapi::OpenApi) { + let components = openapi.components.as_mut().unwrap(); // we can unwrap safely since there already is components registered. + components.add_security_scheme( + "api_key", + SecurityScheme::ApiKey(ApiKey::Header(ApiKeyValue::new("Authorization"))), + ) + } + } + + env_logger::builder() + .target(env_logger::Target::Stdout) + .filter_level(log::LevelFilter::Info) + .init(); + + let redis_url = get_env!("REDIS_URL", "REDIS_URL should be set"); + + let args = SetupArgs { + url: Some(std::env::var("CLICKHOUSE_URL").unwrap_or("http://localhost:8123".to_string())), + user: Some(std::env::var("CLICKHOUSE_USER").unwrap_or("default".to_string())), + password: Some(std::env::var("CLICKHOUSE_PASSWORD").unwrap_or("password".to_string())), + database: Some(std::env::var("CLICKHOUSE_DB").unwrap_or("default".to_string())), + }; + + let clickhouse_client = clickhouse::Client::default() + .with_url(args.url.as_ref().unwrap()) + .with_user(args.user.as_ref().unwrap()) + .with_password(args.password.as_ref().unwrap()) + .with_database(args.database.as_ref().unwrap()) + .with_option("async_insert", "1") + .with_option("wait_for_async_insert", "0"); + + let _ = run_pending_migrations(args.clone()).await.map_err(|err| { + log::error!("Failed to run clickhouse migrations: {:?}", err); + }); + + log::info!("Connecting to redis"); + + let redis_manager = + bb8_redis::RedisConnectionManager::new(redis_url).expect("Failed to connect to redis"); + + let redis_connections: u32 = std::env::var("REDIS_CONNECTIONS") + .unwrap_or("200".to_string()) + .parse() + .unwrap_or(200); + + let redis_pool = bb8_redis::bb8::Pool::builder() + .max_size(redis_connections) + .build(redis_manager) + .await + .expect("Failed to create redis pool"); + + let json_cfg = web::JsonConfig::default() + .limit(134200000) + .error_handler(custom_json_error_handler); + + HttpServer::new(move || { + let mut jinja_env = minijinja::Environment::new(); + minijinja_embed::load_templates!(&mut jinja_env); + + App::new() + .wrap(actix_cors::Cors::permissive()) + .wrap( + // Set up logger, but avoid logging hot status endpoints + Logger::new("%r %s %b %{Referer}i %{User-Agent}i %T") + .exclude("/") + .exclude("/api/health") + .exclude("/metrics"), + ) + .wrap(middleware::api_key_middleware::ApiKeyMiddlewareFactory) + .into_utoipa_app() + .openapi(ApiDoc::openapi()) + .app_data(json_cfg.clone()) + .app_data(PayloadConfig::new(134200000)) + .app_data(web::Data::new(jinja_env)) + .app_data(web::Data::new(redis_pool.clone())) + .app_data(web::Data::new(clickhouse_client.clone())) + .service(utoipa_actix_web::scope("/api/task").configure(|config| { + config.service(create_task).service(get_task); + })) + .service(utoipa_actix_web::scope("/static").configure(|config| { + config.service(jinja_templates::static_files); + })) + .service(utoipa_actix_web::scope("/health").configure(|config| { + config.service(health_check); + })) + .openapi_service(|api| Redoc::with_url("/redoc", api)) + .service(utoipa_actix_web::scope("").configure(|config| { + config.service(jinja_templates::public_page); + })) + .into_app() + }) + .bind(("127.0.0.1", 8081))? + .run() + .await +} diff --git a/pdf2md/server/src/main.rs b/pdf2md/server/src/main.rs new file mode 100644 index 0000000000..7d0e0112c4 --- /dev/null +++ b/pdf2md/server/src/main.rs @@ -0,0 +1,3 @@ +fn main() -> std::io::Result<()> { + pdf2md_server::main() +} diff --git a/pdf2md/server/src/middleware/api_key_middleware.rs b/pdf2md/server/src/middleware/api_key_middleware.rs new file mode 100644 index 0000000000..ddbf0386ef --- /dev/null +++ b/pdf2md/server/src/middleware/api_key_middleware.rs @@ -0,0 +1,88 @@ +use crate::{errors::ServiceError, get_env}; +use actix_web::{ + dev::{Payload, Service, ServiceRequest, ServiceResponse, Transform}, + FromRequest, HttpMessage, HttpRequest, +}; +use futures::future::LocalBoxFuture; +use std::{ + future::{ready, Ready}, + rc::Rc, +}; + +#[derive(Clone, Debug)] +pub struct ApiKey; + +impl FromRequest for ApiKey { + type Error = ServiceError; + type Future = Ready>; + + #[inline] + fn from_request(req: &HttpRequest, _: &mut Payload) -> Self::Future { + let ext = req.extensions(); + + match ext.get::() { + Some(_) => ready(Ok(Self)), + None => ready(Err(ServiceError::Unauthorized)), + } + } +} + +pub struct ApiKeyMiddlewareFactory; + +impl Transform for ApiKeyMiddlewareFactory +where + S: Service, Error = actix_web::Error> + 'static, + S::Future: 'static, + B: 'static, +{ + type Response = ServiceResponse; + type Error = actix_web::Error; + type InitError = (); + type Transform = ApiKeyMiddleware; + type Future = Ready>; + + fn new_transform(&self, service: S) -> Self::Future { + ready(Ok(ApiKeyMiddleware { + service: Rc::new(service), + })) + } +} + +pub struct ApiKeyMiddleware { + service: Rc, +} + +impl Service for ApiKeyMiddleware +where + S: Service, Error = actix_web::Error>, + S::Future: 'static, +{ + type Response = ServiceResponse; + type Error = actix_web::Error; + type Future = LocalBoxFuture<'static, Result>; + + fn poll_ready( + &self, + ctx: &mut core::task::Context<'_>, + ) -> std::task::Poll> { + self.service.poll_ready(ctx) + } + + fn call(&self, req: ServiceRequest) -> Self::Future { + let api_key = get_env!("API_KEY", "API_KEY should be set"); + if req + .headers() + .get("Authorization") + .is_some_and(|v| v == api_key) + { + req.extensions_mut().insert(ApiKey); + } + + let future = self.service.call(req); + + Box::pin(async move { + let response = future.await?; + Ok(response) + }) + } +} diff --git a/pdf2md/server/src/middleware/mod.rs b/pdf2md/server/src/middleware/mod.rs new file mode 100644 index 0000000000..1b9fac6a24 --- /dev/null +++ b/pdf2md/server/src/middleware/mod.rs @@ -0,0 +1 @@ +pub mod api_key_middleware; diff --git a/pdf2md/server/src/models.rs b/pdf2md/server/src/models.rs new file mode 100644 index 0000000000..f8a9d08228 --- /dev/null +++ b/pdf2md/server/src/models.rs @@ -0,0 +1,225 @@ +use derive_more::derive::Display; +use s3::creds::time::OffsetDateTime; +use utoipa::ToSchema; + +pub type RedisPool = bb8_redis::bb8::Pool; + +pub trait TaskMessage { + fn increment_attempt(&mut self); + fn get_attempts(&self) -> u8; + fn has_remaining_attempts(&self) -> bool { + self.get_attempts() < 3 + } + fn get_task_id(&self) -> uuid::Uuid; +} + +#[derive(serde::Deserialize, serde::Serialize, Clone, Debug)] +pub struct FileTask { + pub task_id: uuid::Uuid, + pub upload_file_data: UploadFileReqPayload, + pub attempt_number: u8, +} + +impl TaskMessage for FileTask { + fn increment_attempt(&mut self) { + self.attempt_number += 1; + } + fn get_attempts(&self) -> u8 { + self.attempt_number + } + fn get_task_id(&self) -> uuid::Uuid { + self.task_id + } +} + +#[derive(serde::Deserialize, serde::Serialize, Clone, Debug)] +pub struct ChunkingTask { + pub task_id: uuid::Uuid, + pub file_name: String, + pub page_range: (u32, u32), + pub model_params: ModelParams, + pub attempt_number: u8, +} + +impl TaskMessage for ChunkingTask { + fn increment_attempt(&mut self) { + self.attempt_number += 1; + } + fn get_attempts(&self) -> u8 { + self.attempt_number + } + fn get_task_id(&self) -> uuid::Uuid { + self.task_id + } +} + +#[derive(serde::Deserialize, serde::Serialize, Clone, Debug, ToSchema)] +pub struct CreateFileTaskResponse { + pub task_id: uuid::Uuid, + pub status: FileTaskStatus, + pub pos_in_queue: String, +} + +#[derive(serde::Deserialize, serde::Serialize, Clone, Debug, ToSchema)] +pub struct UploadFileReqPayload { + /// Base64 encoded file. This is the standard base64 encoding. + pub base64_file: String, + /// The name of the llm model to use for the task. If not provided, the default model will be used. We support all models from (OpenRouter)[https://openrouter.ai/models] + pub llm_model: Option, + /// The API key to use for the llm being used. + pub llm_api_key: Option, + /// The System prompt that will be used for the conversion of the file. + pub system_prompt: Option, +} + +#[derive(serde::Deserialize, serde::Serialize, Clone, Debug)] +pub struct ModelParams { + pub llm_model: Option, + pub llm_api_key: Option, + pub system_prompt: Option, +} + +impl From for ModelParams { + fn from(payload: UploadFileReqPayload) -> Self { + Self { + llm_model: payload.llm_model, + llm_api_key: payload.llm_api_key, + system_prompt: payload.system_prompt, + } + } +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, clickhouse::Row, Clone)] +pub struct FileTaskClickhouse { + pub id: String, + pub pages: u32, + pub pages_processed: u32, + pub status: String, + #[serde(with = "clickhouse::serde::time::datetime")] + pub created_at: OffsetDateTime, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, clickhouse::Row, Clone)] +pub struct ChunkClickhouse { + pub id: String, + pub task_id: String, + pub content: String, + pub metadata: String, + #[serde(with = "clickhouse::serde::time::datetime")] + pub created_at: OffsetDateTime, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone, ToSchema)] +pub struct Chunk { + pub id: String, + pub task_id: String, + pub content: String, + pub metadata: serde_json::Value, + pub created_at: String, +} + +impl From for Chunk { + fn from(c: ChunkClickhouse) -> Self { + Self { + id: c.id, + task_id: c.task_id, + content: c.content, + metadata: serde_json::from_str(&c.metadata).unwrap(), + created_at: c.created_at.to_string(), + } + } +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone)] +pub struct GetTaskRequest { + pub pagination_token: Option, + pub limit: Option, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone, ToSchema)] +pub struct GetTaskResponse { + pub id: String, + pub total_document_pages: u32, + pub pages_processed: u32, + pub status: String, + pub created_at: String, + pub pages: Option>, + pub pagination_token: Option, +} + +impl GetTaskResponse { + pub fn new(task: FileTaskClickhouse) -> Self { + Self { + id: task.id.clone(), + total_document_pages: task.pages, + pages_processed: task.pages_processed, + status: task.status, + created_at: task.created_at.to_string(), + pagination_token: None, + pages: None, + } + } + pub fn new_with_pages(task: FileTaskClickhouse, pages: Vec) -> Self { + Self { + id: task.id.clone(), + total_document_pages: task.pages, + pages_processed: task.pages_processed, + status: task.status, + created_at: task.created_at.to_string(), + pagination_token: pages.last().map(|c| c.id.clone()), + pages: Some(pages.into_iter().map(Chunk::from).collect()), + } + } +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Display, Clone, PartialEq, Eq, ToSchema)] +pub enum FileTaskStatus { + #[display("Created")] + Created, + #[display("Processing {_0} pages")] + ProcessingFile(u32), + #[display("Processed {_0} pages")] + ChunkingFile(u32), + #[display("Completed")] + Completed, + #[display("Failed")] + Failed, +} + +impl FileTaskStatus { + pub fn get_pages_processed(&self) -> Option { + match self { + FileTaskStatus::ChunkingFile(pages) => Some(*pages), + _ => None, + } + } +} + +impl From for FileTaskStatus { + fn from(s: String) -> Self { + match s.as_str() { + "Created" => FileTaskStatus::Created, + "Completed" => FileTaskStatus::Completed, + "Failed" => FileTaskStatus::Failed, + _ => { + // Try to parse processing or pageing status + if let Some(pages) = s + .strip_prefix("Processed ") + .and_then(|s| s.strip_suffix(" pages")) + { + if let Ok(pages) = pages.parse::() { + return FileTaskStatus::ChunkingFile(pages); + } + } else if let Some(pages) = s + .strip_prefix("Processing ") + .and_then(|s| s.strip_suffix(" pages")) + { + if let Ok(pages) = pages.parse::() { + return FileTaskStatus::ProcessingFile(pages); + } + } + FileTaskStatus::Failed + } + } + } +} diff --git a/pdf2md/server/src/operators/clickhouse.rs b/pdf2md/server/src/operators/clickhouse.rs new file mode 100644 index 0000000000..4004856981 --- /dev/null +++ b/pdf2md/server/src/operators/clickhouse.rs @@ -0,0 +1,187 @@ +use crate::{ + errors::ServiceError, + models::{ + ChunkClickhouse, ChunkingTask, FileTaskClickhouse, FileTaskStatus, GetTaskResponse, + RedisPool, + }, +}; + +pub async fn insert_task( + task: FileTaskClickhouse, + clickhouse_client: &clickhouse::Client, +) -> Result<(), ServiceError> { + let mut task_inserter = clickhouse_client.insert("file_tasks").map_err(|e| { + log::error!("Error inserting recommendations: {:?}", e); + ServiceError::InternalServerError(format!("Error inserting task: {:?}", e)) + })?; + + task_inserter.write(&task).await.map_err(|e| { + log::error!("Error inserting recommendations: {:?}", e); + ServiceError::InternalServerError(format!("Error inserting task: {:?}", e)) + })?; + + task_inserter.end().await.map_err(|e| { + log::error!("Error inserting recommendations: {:?}", e); + ServiceError::InternalServerError(format!("Error inserting task: {:?}", e)) + })?; + + Ok(()) +} + +pub async fn insert_page( + task: ChunkingTask, + page: ChunkClickhouse, + clickhouse_client: &clickhouse::Client, + redis_pool: &RedisPool, +) -> Result<(), ServiceError> { + let mut page_inserter = clickhouse_client.insert("file_chunks").map_err(|e| { + log::error!("Error getting page_inserter: {:?}", e); + ServiceError::InternalServerError(format!("Error getting page_inserter: {:?}", e)) + })?; + + page_inserter.write(&page).await.map_err(|e| { + log::error!("Error inserting page: {:?}", e); + ServiceError::InternalServerError(format!("Error inserting page: {:?}", e)) + })?; + + page_inserter.end().await.map_err(|e| { + log::error!("Error terminating connection: {:?}", e); + ServiceError::InternalServerError(format!("Error inserting task: {:?}", e)) + })?; + + let mut redis_conn = redis_pool.get().await.map_err(|e| { + log::error!("Failed to get redis connection: {:?}", e); + ServiceError::InternalServerError("Failed to get redis connection".to_string()) + })?; + + let total_pages_processed = redis::cmd("incr") + .arg(format!("{}:count", task.task_id)) + .query_async::(&mut *redis_conn) + .await + .map_err(|e| { + log::error!("Failed to push task to chunks_to_process: {:?}", e); + ServiceError::InternalServerError( + "Failed to push task to chunks_to_process".to_string(), + ) + })?; + + let prev_task = get_task(task.task_id, clickhouse_client).await?; + + log::info!( + "total_pages: {} pages processed: {}", + total_pages_processed, + prev_task.pages + ); + + if total_pages_processed >= prev_task.pages { + update_task_status(task.task_id, FileTaskStatus::Completed, clickhouse_client).await?; + } else { + update_task_status( + task.task_id, + FileTaskStatus::ProcessingFile(total_pages_processed), + clickhouse_client, + ) + .await?; + } + + Ok(()) +} + +pub async fn update_task_status( + task_id: uuid::Uuid, + status: FileTaskStatus, + clickhouse_client: &clickhouse::Client, +) -> Result<(), ServiceError> { + let query = match status { + FileTaskStatus::ProcessingFile(pages) => { + format!( + "ALTER TABLE file_tasks UPDATE + status = '{status}', + pages = {pages} + WHERE id = '{task_id}'", + status = status, + pages = pages, + task_id = task_id + ) + } + FileTaskStatus::ChunkingFile(pages) => { + format!( + "ALTER TABLE file_tasks UPDATE + status = '{status}', + pages_processed = {pages} + WHERE id = '{task_id}'", + status = status, + task_id = task_id, + pages = pages + ) + } + _ => { + format!( + "ALTER TABLE file_tasks UPDATE status = '{status}' WHERE id = '{task_id}'", + status = status, + task_id = task_id + ) + } + }; + + log::info!("Update Task Sttaus Query: {}", query); + + clickhouse_client + .query(&query) + .execute() + .await + .map_err(|err| { + log::error!("Failed to update task status {:?}", err); + ServiceError::BadRequest("Failed to update task status".to_string()) + })?; + + Ok(()) +} + +pub async fn get_task( + task_id: uuid::Uuid, + clickhouse_client: &clickhouse::Client, +) -> Result { + let task: FileTaskClickhouse = clickhouse_client + .query("SELECT ?fields FROM file_tasks WHERE id = ?") + .bind(task_id) + .fetch_one() + .await + .map_err(|err| { + log::error!("Failed to get task {:?}", err); + ServiceError::BadRequest("Failed to get task".to_string()) + })?; + + Ok(task) +} + +pub async fn get_task_pages( + task: FileTaskClickhouse, + limit: Option, + offset_id: Option, + clickhouse_client: &clickhouse::Client, +) -> Result { + if FileTaskStatus::from(task.status.clone()) == FileTaskStatus::Completed || task.pages > 0 { + let limit = limit.unwrap_or(20); + + log::info!("offset id {:?}", offset_id); + + let pages: Vec = clickhouse_client + .query( + "SELECT ?fields FROM file_chunks WHERE task_id = ? AND id > ? ORDER BY id LIMIT ?", + ) + .bind(task.id.clone()) + .bind(offset_id.unwrap_or(uuid::Uuid::nil())) + .bind(limit) + .fetch_all() + .await + .map_err(|err| { + log::error!("Failed to get pages {:?}", err); + ServiceError::BadRequest("Failed to get pages".to_string()) + })?; + + return Ok(GetTaskResponse::new_with_pages(task, pages)); + } + + Ok(GetTaskResponse::new(task)) +} diff --git a/pdf2md/server/src/operators/mod.rs b/pdf2md/server/src/operators/mod.rs new file mode 100644 index 0000000000..d61efabdf3 --- /dev/null +++ b/pdf2md/server/src/operators/mod.rs @@ -0,0 +1,4 @@ +pub mod clickhouse; +pub mod pdf_chunk; +pub mod redis; +pub mod s3; diff --git a/pdf2md/server/src/operators/pdf_chunk.rs b/pdf2md/server/src/operators/pdf_chunk.rs new file mode 100644 index 0000000000..541316261c --- /dev/null +++ b/pdf2md/server/src/operators/pdf_chunk.rs @@ -0,0 +1,214 @@ +use crate::models::RedisPool; +use crate::{ + errors::ServiceError, + get_env, + models::{ChunkClickhouse, ChunkingTask, ModelParams}, + operators::clickhouse::insert_page, +}; +use base64::Engine; +use image::{codecs::png::PngEncoder, ImageEncoder}; +use openai_dive::v1::{ + api::Client, + resources::chat::{ + ChatCompletionParametersBuilder, ChatMessage, ChatMessageContent, + ChatMessageImageContentPart, ImageUrlType, + }, +}; +use pdf2image::{image::DynamicImage, PDF}; +use regex::Regex; +use s3::creds::time::OffsetDateTime; + +const CHUNK_SYSTEM_PROMPT: &str = " + Convert the following PDF page to markdown. + Return only the markdown with no explanation text. + Do not exclude any content from the page."; + +fn get_data_url_from_image(img: DynamicImage) -> Result { + let mut encoded = Vec::new(); + + let png_encoder = PngEncoder::new(&mut encoded); + + png_encoder + .write_image( + img.as_bytes(), + img.width(), + img.height(), + image::ExtendedColorType::Rgb8, + ) + .map_err(|_| ServiceError::BadRequest("Failed to encode image".to_string()))?; + + // Encode result base64 - utf-8 + + let encoded = base64::prelude::BASE64_STANDARD.encode(encoded); + + let prefix = "data:image/png;base64,"; + + let final_encoded = format!("{prefix}{encoded}"); + + Ok(final_encoded) +} + +fn get_llm_client(params: ModelParams) -> Client { + let base_url = get_env!("LLM_BASE_URL", "LLM_BASE_URL should be set").into(); + + let llm_api_key: String = params.llm_api_key.unwrap_or( + get_env!( + "LLM_API_KEY", + "LLM_API_KEY for openrouter or self-hosted should be set" + ) + .into(), + ); + + Client { + headers: None, + project: None, + api_key: llm_api_key, + http_client: reqwest::Client::new(), + base_url, + organization: None, + } +} + +async fn get_pages_from_image( + img: DynamicImage, + prev_md_doc: Option, + page: u32, + task: ChunkingTask, + client: Client, +) -> Result { + let llm_model: String = task + .model_params + .llm_model + .unwrap_or(get_env!("LLM_MODEL", "LLM_MODEL should be set").into()); + + let data_url = get_data_url_from_image(img)?; + + let mut messages = vec![ + ChatMessage::System { + content: (ChatMessageContent::Text( + task.model_params + .system_prompt + .unwrap_or(CHUNK_SYSTEM_PROMPT.to_string()), + )), + name: None, + }, + ChatMessage::User { + content: ChatMessageContent::ImageContentPart(vec![ChatMessageImageContentPart { + r#type: "image_url".to_string(), + image_url: ImageUrlType { + url: data_url, + detail: None, + }, + }]), + name: None, + }, + ]; + + if let Some(prev_md_doc) = prev_md_doc { + let prev_md_doc_message = ChatMessage::System { + content: ChatMessageContent::Text(format!( + "Markdown must maintain consistent formatting with the following page: \n\n {}", + prev_md_doc + )), + name: None, + }; + + messages.insert(1, prev_md_doc_message); + } + + let params = ChatCompletionParametersBuilder::default() + .model(llm_model) + .messages(messages) + .build() + .map_err(|_| { + ServiceError::BadRequest("Failed to build chat completion parameters".to_string()) + })?; + + let response = client.chat().create(params).await.map_err(|e| { + ServiceError::InternalServerError( + format!("Failed to get chat completion response: {:?}", e).to_string(), + ) + })?; + + let message_response = response + .choices + .first() + .ok_or(ServiceError::InternalServerError( + "No choices in chat completion response".to_string(), + ))?; + + let content = match &message_response.message { + ChatMessage::Assistant { + content: Some(ChatMessageContent::Text(content)), + .. + } => content.clone(), + + _ => { + return Err(ServiceError::InternalServerError( + "Unexpected message response".to_string(), + )) + } + }; + + let mut metadata = serde_json::json!({ + "page": page, + }); + if let Some(usage) = response.usage { + metadata["usage"] = serde_json::json!(usage); + } + + Ok(ChunkClickhouse { + id: uuid::Uuid::new_v4().to_string(), + task_id: task.task_id.to_string().clone(), + content: format_markdown(&content), + metadata: metadata.to_string(), + created_at: OffsetDateTime::now_utc(), + }) +} + +fn format_markdown(text: &str) -> String { + let formatted_markdown = Regex::new(r"(?m)^```[a-z]*\n([\s\S]*?)\n```$") + .unwrap() + .replace_all(text, "$1"); + let formatted_markdown = Regex::new(r"(?m)^```\n([\s\S]*?)\n```$") + .unwrap() + .replace_all(&formatted_markdown, "$1"); + formatted_markdown.into_owned() +} + +pub async fn chunk_sub_pages( + data: Vec, + task: ChunkingTask, + clickhouse_client: &clickhouse::Client, + redis_pool: &RedisPool, +) -> Result, ServiceError> { + let pdf = PDF::from_bytes(data) + .map_err(|_| ServiceError::BadRequest("Failed to open PDF file".to_string()))?; + + let pages = pdf + .render(pdf2image::Pages::All, None) + .map_err(|_| ServiceError::BadRequest("Failed to render PDF file".to_string()))?; + + let mut result_pages = vec![]; + + let client = get_llm_client(task.model_params.clone()); + let mut prev_md_doc = None; + + for (page_image, page_num) in pages.into_iter().zip(task.page_range.0..task.page_range.1) { + let page = get_pages_from_image( + page_image, + prev_md_doc, + page_num, + task.clone(), + client.clone(), + ) + .await?; + prev_md_doc = Some(page.content.clone()); + insert_page(task.clone(), page.clone(), clickhouse_client, redis_pool).await?; + log::info!("Page {} processed", page_num); + + result_pages.push(page); + } + + Ok(result_pages) +} diff --git a/pdf2md/server/src/operators/redis.rs b/pdf2md/server/src/operators/redis.rs new file mode 100644 index 0000000000..9e6176c0f3 --- /dev/null +++ b/pdf2md/server/src/operators/redis.rs @@ -0,0 +1,147 @@ +use crate::{ + errors::ServiceError, + models::{FileTaskStatus, TaskMessage}, + operators::clickhouse::update_task_status, +}; + +#[macro_export] +macro_rules! process_task_with_retry { + ($redis_conn:expr, &$clickhouse_client:expr, $queue_name:expr, $process_fn:expr, $task_type:ty) => { + let should_terminate = Arc::new(AtomicBool::new(false)); + signal_hook::flag::register(SIGTERM, Arc::clone(&should_terminate)) + .expect("Failed to register shutdown hook"); + + loop { + if should_terminate.load(Ordering::Relaxed) { + log::info!("Shutting down"); + break; + } + + let task = listen_to_redis::<$task_type>($redis_conn.clone(), $queue_name).await; + + match task { + Some(task) => { + log::info!("Processing task: {:?}", task.task_id); + let result = $process_fn(task.clone()).await; + + if let Err(err) = result { + log::error!("Task processing failed: {:?}", err); + + // Requeue the failed task + if let Err(requeue_err) = pdf2md_server::operators::redis::readd_to_queue( + task, + err, + $queue_name, + $redis_conn.clone(), + &$clickhouse_client, + ) + .await + { + log::error!("Failed to requeue task: {:?}", requeue_err); + } else { + log::info!("Successfully requeued failed task"); + } + } + } + None => { + // Optional: Add delay or other handling for when no task is available + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + } + } + } + }; +} + +pub async fn listen_to_redis serde::Deserialize<'a>>( + redis_connection: redis::aio::MultiplexedConnection, + queue_name: &str, +) -> Option { + let payload_result: Result, redis::RedisError> = redis::cmd("brpoplpush") + .arg(queue_name) + .arg(format!("{}_processing", queue_name)) + .arg(1.0) + .query_async(&mut redis_connection.clone()) + .await; + + let serialized_message = if let Ok(payload) = payload_result { + if payload.is_empty() { + return None; + } + + payload + .first() + .expect("Payload must have a first element") + .clone() + } else { + log::error!("Unable to process {:?}", payload_result); + return None; + }; + + let worker_message: T = + serde_json::from_str(&serialized_message).expect("Failed to parse file message"); + + Some(worker_message) +} + +pub async fn readd_to_queue serde::Serialize + TaskMessage>( + mut payload: T, + error: ServiceError, + queue_name: &str, + mut redis_connection: redis::aio::MultiplexedConnection, + clickhouse_client: &clickhouse::Client, +) -> Result<(), ServiceError> { + let old_payload_message = serde_json::to_string(&payload).map_err(|_| { + ServiceError::InternalServerError("Failed to reserialize input for retry".to_string()) + })?; + + payload.increment_attempt(); + + let _ = redis::cmd("LREM") + .arg(format!("{}_processing", queue_name)) + .arg(1) + .arg(old_payload_message.clone()) + .query_async::(&mut redis_connection) + .await; + + if !payload.has_remaining_attempts() { + log::error!("Message failed 3 times quitting {:?}", error); + + update_task_status( + payload.get_task_id(), + FileTaskStatus::Failed, + clickhouse_client, + ) + .await?; + + redis::cmd("lpush") + .arg(format!("{}_failed", queue_name)) + .arg(old_payload_message) + .query_async::(&mut redis_connection) + .await + .map_err(|err| ServiceError::BadRequest(err.to_string()))?; + + return Err(ServiceError::InternalServerError(format!( + "Message failed 3 times {:?}", + error + ))); + } + + let new_payload_message = serde_json::to_string(&payload).map_err(|_| { + ServiceError::InternalServerError("Failed to reserialize input for retry".to_string()) + })?; + + log::error!( + "Message failed, re-adding {:?} retry: {:?}", + error, + payload.get_attempts() + ); + + redis::cmd("lpush") + .arg(queue_name) + .arg(&new_payload_message) + .query_async::(&mut redis_connection) + .await + .map_err(|err| ServiceError::BadRequest(err.to_string()))?; + + Ok(()) +} diff --git a/pdf2md/server/src/operators/s3.rs b/pdf2md/server/src/operators/s3.rs new file mode 100644 index 0000000000..ab885838ab --- /dev/null +++ b/pdf2md/server/src/operators/s3.rs @@ -0,0 +1,37 @@ +use s3::{creds::Credentials, Bucket, Region}; + +use crate::{errors::ServiceError, get_env}; + +pub fn get_aws_bucket() -> Result { + let aws_region_name = std::env::var("AWS_REGION").unwrap_or("".to_string()); + let s3_endpoint = get_env!("S3_ENDPOINT", "S3_ENDPOINT should be set").into(); + let s3_bucket_name = get_env!("S3_BUCKET", "S3_BUCKET should be set"); + + let aws_region = Region::Custom { + region: aws_region_name, + endpoint: s3_endpoint, + }; + + let aws_credentials = if let Ok(creds) = Credentials::from_instance_metadata() { + creds + } else { + let s3_access_key = get_env!("S3_ACCESS_KEY", "S3_ACCESS_KEY should be set").into(); + let s3_secret_key = get_env!("S3_SECRET_KEY", "S3_SECRET_KEY should be set").into(); + Credentials { + access_key: Some(s3_access_key), + secret_key: Some(s3_secret_key), + security_token: None, + session_token: None, + expiration: None, + } + }; + + let aws_bucket = Bucket::new(s3_bucket_name, aws_region, aws_credentials) + .map_err(|e| { + log::error!("Could not create or get bucket {:?}", e); + ServiceError::BadRequest("Could not create or get bucket".to_string()) + })? + .with_path_style(); + + Ok(*aws_bucket) +} diff --git a/pdf2md/server/src/routes/create_task.rs b/pdf2md/server/src/routes/create_task.rs new file mode 100644 index 0000000000..c7e2131599 --- /dev/null +++ b/pdf2md/server/src/routes/create_task.rs @@ -0,0 +1,71 @@ +use crate::{ + errors::{ErrorResponseBody, ServiceError}, + middleware::api_key_middleware::ApiKey, + models::{self, CreateFileTaskResponse, FileTask, FileTaskStatus, RedisPool}, +}; +use actix_web::{post, web, HttpResponse}; +use s3::creds::time::OffsetDateTime; + +/// Create a new File Task +/// +/// This endpoint creates a new task to convert a file to markdown. The task is added to a queue in Redis for processing. +#[utoipa::path( + post, + path = "/task", + tag = "Task", + context_path = "/api", + request_body(content = models::UploadFileReqPayload, description = "JSON request payload to create a new task", content_type = "application/json"), + responses( + (status = 200, description = "JSON response payload containing the created task", body = models::CreateFileTaskResponse), + (status = 400, description = "Error typically due to deserialization issues", body = ErrorResponseBody), + ), + security( + ("api_key" = []) + ) +)] +#[post("")] +async fn create_task( + req: web::Json, + redis_pool: web::Data, + clickhouse_client: web::Data, + _api_key: ApiKey, +) -> Result { + let clickhouse_task = models::FileTaskClickhouse { + id: uuid::Uuid::new_v4().to_string(), + pages: 0, + pages_processed: 0, + status: "CREATED".to_string(), + created_at: OffsetDateTime::now_utc(), + }; + + crate::operators::clickhouse::insert_task(clickhouse_task.clone(), &clickhouse_client) + .await + .map_err(|err| ServiceError::BadRequest(err.to_string()))?; + + let task = FileTask { + task_id: clickhouse_task.id.parse().unwrap(), + upload_file_data: req.into_inner(), + attempt_number: 0, + }; + + let mut redis_conn = redis_pool + .get() + .await + .map_err(|err| ServiceError::BadRequest(err.to_string()))?; + + let serialized_message: String = serde_json::to_string(&task) + .map_err(|_| ServiceError::BadRequest("Failed to Serialize FileTask".to_string()))?; + + let pos_in_queue = redis::cmd("lpush") + .arg("files_to_process") + .arg(&serialized_message) + .query_async::(&mut *redis_conn) + .await + .map_err(|err| ServiceError::BadRequest(err.to_string()))?; + + Ok(HttpResponse::Ok().json(CreateFileTaskResponse { + task_id: task.task_id, + status: FileTaskStatus::Created, + pos_in_queue, + })) +} diff --git a/pdf2md/server/src/routes/get_task.rs b/pdf2md/server/src/routes/get_task.rs new file mode 100644 index 0000000000..254d6f5af7 --- /dev/null +++ b/pdf2md/server/src/routes/get_task.rs @@ -0,0 +1,47 @@ +use crate::{ + errors::{ErrorResponseBody, ServiceError}, + middleware::api_key_middleware::ApiKey, + models::{self, GetTaskRequest}, +}; +use actix_web::{get, web, HttpResponse}; + +/// Retieve a File Task by ID +/// +/// This endpoint retrieves a task by its id. The task is returned along with the pages that have been created, if the file chunking has been completed. +#[utoipa::path( + get, + path = "/task/{task_id}", + tag = "Task", + context_path = "/api", + params( + ("task_id" = uuid::Uuid, Path, description = "The id of the task you want to retrieve."), + ("limit" = Option, Query, description = "The number of pages to return."), + ("pagination_token" = Option, Query, description = "The pagination token to use for the next request."), + ), + responses( + (status = 200, description = "JSON response payload containing the created pages", body = models::GetTaskResponse), + (status = 400, description = "Error typically due to deserialization issues", body = ErrorResponseBody), + ), + security( + ("api_key" = []) + ) +)] +#[get("/{task_id}")] +async fn get_task( + task_id: web::Path, + data: web::Query, + clickhouse_client: web::Data, + _api_key: ApiKey, +) -> Result { + let task_id = task_id.into_inner(); + let task = crate::operators::clickhouse::get_task(task_id, &clickhouse_client).await?; + let result = crate::operators::clickhouse::get_task_pages( + task, + data.limit, + data.pagination_token, + &clickhouse_client, + ) + .await?; + + Ok(HttpResponse::Ok().json(result)) +} diff --git a/pdf2md/server/src/routes/jinja_templates.rs b/pdf2md/server/src/routes/jinja_templates.rs new file mode 100644 index 0000000000..a032445858 --- /dev/null +++ b/pdf2md/server/src/routes/jinja_templates.rs @@ -0,0 +1,48 @@ +use crate::{ + errors::{ErrorResponseBody, ServiceError}, + get_env, Templates, +}; +use actix_web::{get, web, HttpResponse}; +use minijinja::context; + +#[utoipa::path( + get, + path = "/", + context_path = "/", + tag = "UI", + responses( + (status = 200, description = "UI meant for public consumption"), + (status = 400, description = "Service error relating to loading the public page", body = ErrorResponseBody), + ), +)] +#[get("/")] +pub async fn public_page(templates: Templates<'_>) -> Result { + let templ = templates.get_template("demo-ui.html").unwrap(); + let trieve_api_key = get_env!("API_KEY", "API_KEY should be set"); + let response_body = templ + .render(context! { + trieve_api_key + }) + .unwrap(); + + Ok(HttpResponse::Ok().body(response_body)) +} + +#[utoipa::path( + get, + path = "/static/{file_name}", + context_path = "/static", + tag = "UI", + responses( + (status = 200, description = "File"), + (status = 400, description = "Service error relating to getting the file", body = ErrorResponseBody), + ), + )] +#[get("/{file_name}")] +pub async fn static_files(file_name: web::Path) -> Result { + let sanitized_file_name = file_name.replace("..", ""); + let file = std::fs::read_to_string(format!("./static/{}", sanitized_file_name)) + .map_err(|_| ServiceError::InternalServerError("Failed to read file".to_string()))?; + + Ok(HttpResponse::Ok().body(file)) +} diff --git a/pdf2md/server/src/routes/mod.rs b/pdf2md/server/src/routes/mod.rs new file mode 100644 index 0000000000..46ceeb21ff --- /dev/null +++ b/pdf2md/server/src/routes/mod.rs @@ -0,0 +1,3 @@ +pub mod create_task; +pub mod get_task; +pub mod jinja_templates; diff --git a/pdf2md/server/src/templates/demo-ui.html b/pdf2md/server/src/templates/demo-ui.html new file mode 100644 index 0000000000..114b8c9373 --- /dev/null +++ b/pdf2md/server/src/templates/demo-ui.html @@ -0,0 +1,53 @@ +{% extends "skeleton.html" %} {% block body %} +
+
+
+

+ OCR With Intelligence +

+

+ Convert any PDF to LLM-ready Markdown using latest-gen vision models + like GPT-4o. +

+
+
+
+ +
+ +

or drag and drop

+
+

PDF

+
+
+
+
+{% endblock %} diff --git a/pdf2md/server/src/templates/skeleton.html b/pdf2md/server/src/templates/skeleton.html new file mode 100644 index 0000000000..80554b2e07 --- /dev/null +++ b/pdf2md/server/src/templates/skeleton.html @@ -0,0 +1,186 @@ + + + + + + + + + + + + + + + + + + + Trieve PDF2MD + + + + + + +
+ {% block navbar %} +
+ +
+ {% endblock %} {% block body %} +
This is the homepage
+ {% endblock %} +
+ + diff --git a/pdf2md/server/src/workers/chunk-worker.rs b/pdf2md/server/src/workers/chunk-worker.rs new file mode 100644 index 0000000000..3d317d93d0 --- /dev/null +++ b/pdf2md/server/src/workers/chunk-worker.rs @@ -0,0 +1,128 @@ +use chm::tools::migrations::{run_pending_migrations, SetupArgs}; +use pdf2md_server::{ + errors::ServiceError, + get_env, + models::{ChunkingTask, RedisPool}, + operators::{pdf_chunk::chunk_sub_pages, redis::listen_to_redis, s3::get_aws_bucket}, + process_task_with_retry, +}; +use signal_hook::consts::SIGTERM; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; + +#[tokio::main] +async fn main() { + dotenvy::dotenv().ok(); + + env_logger::builder() + .target(env_logger::Target::Stdout) + .filter_level(log::LevelFilter::Info) + .init(); + + let redis_url = get_env!("REDIS_URL", "REDIS_URL is not set"); + let redis_connections: u32 = std::env::var("REDIS_CONNECTIONS") + .unwrap_or("2".to_string()) + .parse() + .unwrap_or(2); + + let redis_manager = + bb8_redis::RedisConnectionManager::new(redis_url).expect("Failed to connect to redis"); + + let redis_pool = bb8_redis::bb8::Pool::builder() + .max_size(redis_connections) + .connection_timeout(std::time::Duration::from_secs(2)) + .build(redis_manager) + .await + .expect("Failed to create redis pool"); + + let args = SetupArgs { + url: Some(std::env::var("CLICKHOUSE_URL").unwrap_or("http://localhost:8123".to_string())), + user: Some(std::env::var("CLICKHOUSE_USER").unwrap_or("default".to_string())), + password: Some(std::env::var("CLICKHOUSE_PASSWORD").unwrap_or("password".to_string())), + database: Some(std::env::var("CLICKHOUSE_DB").unwrap_or("default".to_string())), + }; + + let clickhouse_client = clickhouse::Client::default() + .with_url(args.url.as_ref().unwrap()) + .with_user(args.user.as_ref().unwrap()) + .with_password(args.password.as_ref().unwrap()) + .with_database(args.database.as_ref().unwrap()) + .with_option("async_insert", "1") + .with_option("wait_for_async_insert", "0"); + + let _ = run_pending_migrations(args.clone()).await.map_err(|err| { + log::error!("Failed to run clickhouse migrations: {:?}", err); + }); + + let should_terminate = Arc::new(AtomicBool::new(false)); + signal_hook::flag::register(SIGTERM, Arc::clone(&should_terminate)) + .expect("Failed to register shutdown hook"); + + let mut redis_conn_sleep = std::time::Duration::from_secs(1); + + #[allow(unused_assignments)] + let mut opt_redis_connection = None; + + loop { + let borrowed_redis_connection = match redis_pool.get().await { + Ok(redis_connection) => Some(redis_connection), + Err(err) => { + log::error!("Failed to get redis connection outside of loop: {:?}", err); + None + } + }; + + if borrowed_redis_connection.is_some() { + opt_redis_connection = borrowed_redis_connection; + break; + } + + tokio::time::sleep(redis_conn_sleep).await; + redis_conn_sleep = std::cmp::min(redis_conn_sleep * 2, std::time::Duration::from_secs(300)); + } + + let redis_connection = + opt_redis_connection.expect("Failed to get redis connection outside of loop"); + + log::info!("Starting chunking worker"); + + process_task_with_retry!( + redis_connection, + &clickhouse_client.clone(), + "files_to_chunk", + |task| chunk_sub_pdf(task, clickhouse_client.clone(), redis_pool.clone()), + ChunkingTask + ); +} + +pub async fn chunk_sub_pdf( + task: ChunkingTask, + clickhouse_client: clickhouse::Client, + redis_pool: RedisPool, +) -> Result<(), pdf2md_server::errors::ServiceError> { + let bucket = get_aws_bucket()?; + let file_data = bucket + .get_object(task.file_name.clone()) + .await + .map_err(|e| { + log::error!("Could not get file from S3 {:?}", e); + ServiceError::BadRequest("File is not present in s3".to_string()) + })? + .as_slice() + .to_vec(); + + let result = chunk_sub_pages( + file_data, + task.clone(), + task.page_range, + &clickhouse_client, + &redis_pool, + ) + .await?; + + log::info!("Got {} pages for {:?}", result.len(), task.task_id); + + Ok(()) +} diff --git a/pdf2md/server/src/workers/supervisor-worker.rs b/pdf2md/server/src/workers/supervisor-worker.rs new file mode 100644 index 0000000000..9ae997b1fc --- /dev/null +++ b/pdf2md/server/src/workers/supervisor-worker.rs @@ -0,0 +1,293 @@ +use base64::Engine; +use chm::tools::migrations::{run_pending_migrations, SetupArgs}; +use lopdf::{Document, Object, ObjectId}; +use pdf2md_server::{ + errors::ServiceError, + get_env, + models::{self, FileTask, FileTaskStatus}, + operators::{clickhouse::update_task_status, redis::listen_to_redis, s3::get_aws_bucket}, + process_task_with_retry, +}; +use signal_hook::consts::SIGTERM; +use std::{ + collections::BTreeMap, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, +}; + +#[tokio::main] +async fn main() { + dotenvy::dotenv().ok(); + + env_logger::builder() + .target(env_logger::Target::Stdout) + .filter_level(log::LevelFilter::Info) + .init(); + + let redis_url = get_env!("REDIS_URL", "REDIS_URL is not set"); + let redis_connections: u32 = std::env::var("REDIS_CONNECTIONS") + .unwrap_or("2".to_string()) + .parse() + .unwrap_or(2); + + let redis_manager = + bb8_redis::RedisConnectionManager::new(redis_url).expect("Failed to connect to redis"); + + let redis_pool = bb8_redis::bb8::Pool::builder() + .max_size(redis_connections) + .connection_timeout(std::time::Duration::from_secs(2)) + .build(redis_manager) + .await + .expect("Failed to create redis pool"); + + let args = SetupArgs { + url: Some(std::env::var("CLICKHOUSE_URL").unwrap_or("http://localhost:8123".to_string())), + user: Some(std::env::var("CLICKHOUSE_USER").unwrap_or("default".to_string())), + password: Some(std::env::var("CLICKHOUSE_PASSWORD").unwrap_or("password".to_string())), + database: Some(std::env::var("CLICKHOUSE_DB").unwrap_or("default".to_string())), + }; + + let clickhouse_client = clickhouse::Client::default() + .with_url(args.url.as_ref().unwrap()) + .with_user(args.user.as_ref().unwrap()) + .with_password(args.password.as_ref().unwrap()) + .with_database(args.database.as_ref().unwrap()) + .with_option("async_insert", "1") + .with_option("wait_for_async_insert", "0"); + + let _ = run_pending_migrations(args.clone()).await.map_err(|err| { + log::error!("Failed to run clickhouse migrations: {:?}", err); + }); + + let should_terminate = Arc::new(AtomicBool::new(false)); + signal_hook::flag::register(SIGTERM, Arc::clone(&should_terminate)) + .expect("Failed to register shutdown hook"); + + let mut redis_conn_sleep = std::time::Duration::from_secs(1); + + #[allow(unused_assignments)] + let mut opt_redis_connection = None; + + loop { + let borrowed_redis_connection = match redis_pool.get().await { + Ok(redis_connection) => Some(redis_connection), + Err(err) => { + log::error!("Failed to get redis connection outside of loop: {:?}", err); + None + } + }; + + if borrowed_redis_connection.is_some() { + opt_redis_connection = borrowed_redis_connection; + break; + } + + tokio::time::sleep(redis_conn_sleep).await; + redis_conn_sleep = std::cmp::min(redis_conn_sleep * 2, std::time::Duration::from_secs(300)); + } + + let redis_connection = + opt_redis_connection.expect("Failed to get redis connection outside of loop"); + + log::info!("Starting supervisor worker"); + + process_task_with_retry!( + redis_connection, + &clickhouse_client.clone(), + "files_to_process", + |task| chunk_pdf(task, redis_connection.clone(), clickhouse_client.clone()), + FileTask + ); +} + +pub async fn chunk_pdf( + task: FileTask, + mut redis_connection: redis::aio::MultiplexedConnection, + clickhouse_client: clickhouse::Client, +) -> Result<(), ServiceError> { + let estimated_size = (task.upload_file_data.base64_file.len() * 3) / 4; + let mut decoded_file_data = Vec::with_capacity(estimated_size); + base64::prelude::BASE64_STANDARD + .decode_vec( + task.upload_file_data.base64_file.as_bytes(), + &mut decoded_file_data, + ) + .map_err(|_e| ServiceError::BadRequest("Could not decode base64 file".to_string()))?; + + let doc = lopdf::Document::load_mem(&decoded_file_data) + .map_err(|e| ServiceError::BadRequest(format!("Could not load pdf: {}", e)))?; + + let all_pages = doc.get_pages(); + let max_page_num = *all_pages.keys().last().unwrap(); + let pages_per_doc = 10; + let num_docs = (max_page_num as f64 / pages_per_doc as f64).ceil() as u32; + + let bucket = get_aws_bucket()?; + let mut buffer = Vec::new(); + + // Process each chunk + for i in 0..num_docs { + let start_page = i * pages_per_doc + 1; + let end_page = std::cmp::min((i + 1) * pages_per_doc, max_page_num); + + // Split the document + let mut split_doc = split_pdf(doc.clone(), start_page, end_page) + .map_err(|e| ServiceError::BadRequest(format!("Failed to split PDF: {}", e)))?; + + // Clear and reuse buffer + buffer.clear(); + + // Save to reused buffer + split_doc + .save_to(&mut buffer) + .map_err(|_e| ServiceError::BadRequest("Could not save pdf to buffer".to_string()))?; + + let file_name = format!("{}part{}.pdf", task.task_id, i + 1); + bucket + .put_object(file_name.clone(), buffer.as_slice()) + .await + .map_err(|e| { + log::error!("Could not upload file to S3 {:?}", e); + ServiceError::BadRequest("Could not upload file to S3".to_string()) + })?; + + let chunking_task = serde_json::to_string(&models::ChunkingTask { + task_id: task.task_id, + file_name, + page_range: (start_page, end_page), + model_params: task.upload_file_data.clone().into(), + attempt_number: 0, + }) + .map_err(|_e| ServiceError::BadRequest("Failed to serialize chunking task".to_string()))?; + + redis::cmd("lpush") + .arg("files_to_chunk") + .arg(&chunking_task) + .query_async::(&mut redis_connection) + .await + .map_err(|err| ServiceError::BadRequest(err.to_string()))?; + + log::info!("Uploaded part {} of {} to S3", i + 1, num_docs); + } + + update_task_status( + task.task_id, + FileTaskStatus::ProcessingFile(num_docs * pages_per_doc), + &clickhouse_client, + ) + .await?; + + Ok(()) +} + +pub fn split_pdf(doc: Document, start_page: u32, end_page: u32) -> Result { + let mut new_document = Document::with_version(doc.version.clone()); + let page_numbers_to_keep: Vec = (start_page..=end_page).collect(); + + // Get mapping of page numbers to object IDs + let page_map = doc.get_pages(); + + // Collect only the pages we want to keep + let mut documents_pages = BTreeMap::new(); + let mut documents_objects = BTreeMap::new(); + + // Filter and collect pages we want to keep + for page_num in page_numbers_to_keep { + if let Some(&object_id) = page_map.get(&page_num) { + if let Ok(page_object) = doc.get_object(object_id) { + documents_pages.insert(object_id, page_object.clone()); + } + } + } + + // Collect all objects from original document + documents_objects.extend(doc.objects.clone()); + + // "Catalog" and "Pages" are mandatory + let mut catalog_object: Option<(ObjectId, Object)> = None; + let mut pages_object: Option<(ObjectId, Object)> = None; + + // Process all objects except "Page" type + for (object_id, object) in documents_objects.iter() { + match object.type_name().unwrap_or("") { + "Catalog" => { + catalog_object = Some(( + if let Some((id, _)) = catalog_object { + id + } else { + *object_id + }, + object.clone(), + )); + } + "Pages" => { + if let Ok(dictionary) = object.as_dict() { + pages_object = Some(( + if let Some((id, _)) = pages_object { + id + } else { + *object_id + }, + Object::Dictionary(dictionary.clone()), + )); + } + } + "Page" => {} // Handled separately + _ => { + // Copy other necessary objects (resources, fonts, etc.) + new_document.objects.insert(*object_id, object.clone()); + } + } + } + + // If no "Pages" object found, abort + let pages_object = pages_object.ok_or_else(|| "Pages root not found".to_string())?; + let catalog_object = catalog_object.ok_or_else(|| "Catalog root not found".to_string())?; + + // Add pages to new document + for (object_id, object) in documents_pages.iter() { + if let Ok(dictionary) = object.as_dict() { + let mut dictionary = dictionary.clone(); + dictionary.set("Parent", pages_object.0); + new_document + .objects + .insert(*object_id, Object::Dictionary(dictionary)); + } + } + + // Build new "Pages" object + if let Ok(dictionary) = pages_object.1.as_dict() { + let mut dictionary = dictionary.clone(); + dictionary.set("Count", documents_pages.len() as u32); + dictionary.set( + "Kids", + documents_pages + .into_keys() + .map(Object::Reference) + .collect::>(), + ); + new_document + .objects + .insert(pages_object.0, Object::Dictionary(dictionary)); + } + + // Build new "Catalog" object + if let Ok(dictionary) = catalog_object.1.as_dict() { + let mut dictionary = dictionary.clone(); + dictionary.set("Pages", pages_object.0); + dictionary.remove(b"Outlines"); // Remove outlines as we're splitting + new_document + .objects + .insert(catalog_object.0, Object::Dictionary(dictionary)); + } + + // Set up trailer and document structure + new_document.trailer.set("Root", catalog_object.0); + new_document.max_id = new_document.objects.len() as u32; + new_document.renumber_objects(); + new_document.compress(); + + Ok(new_document) +} diff --git a/pdf2md/server/static/in.css b/pdf2md/server/static/in.css new file mode 100644 index 0000000000..b5c61c9567 --- /dev/null +++ b/pdf2md/server/static/in.css @@ -0,0 +1,3 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; diff --git a/pdf2md/server/static/output.css b/pdf2md/server/static/output.css new file mode 100644 index 0000000000..1dd563dce9 --- /dev/null +++ b/pdf2md/server/static/output.css @@ -0,0 +1,830 @@ +/* +! tailwindcss v3.4.10 | MIT License | https://tailwindcss.com +*/ + +/* +1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4) +2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116) +*/ + +*, +::before, +::after { + box-sizing: border-box; + /* 1 */ + border-width: 0; + /* 2 */ + border-style: solid; + /* 2 */ + border-color: #e5e7eb; + /* 2 */ +} + +::before, +::after { + --tw-content: ''; +} + +/* +1. Use a consistent sensible line-height in all browsers. +2. Prevent adjustments of font size after orientation changes in iOS. +3. Use a more readable tab size. +4. Use the user's configured `sans` font-family by default. +5. Use the user's configured `sans` font-feature-settings by default. +6. Use the user's configured `sans` font-variation-settings by default. +7. Disable tap highlights on iOS +*/ + +html, +:host { + line-height: 1.5; + /* 1 */ + -webkit-text-size-adjust: 100%; + /* 2 */ + -moz-tab-size: 4; + /* 3 */ + -o-tab-size: 4; + tab-size: 4; + /* 3 */ + font-family: Quicksand, system-ui, sans-serif; + /* 4 */ + font-feature-settings: normal; + /* 5 */ + font-variation-settings: normal; + /* 6 */ + -webkit-tap-highlight-color: transparent; + /* 7 */ +} + +/* +1. Remove the margin in all browsers. +2. Inherit line-height from `html` so users can set them as a class directly on the `html` element. +*/ + +body { + margin: 0; + /* 1 */ + line-height: inherit; + /* 2 */ +} + +/* +1. Add the correct height in Firefox. +2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655) +3. Ensure horizontal rules are visible by default. +*/ + +hr { + height: 0; + /* 1 */ + color: inherit; + /* 2 */ + border-top-width: 1px; + /* 3 */ +} + +/* +Add the correct text decoration in Chrome, Edge, and Safari. +*/ + +abbr:where([title]) { + -webkit-text-decoration: underline dotted; + text-decoration: underline dotted; +} + +/* +Remove the default font size and weight for headings. +*/ + +h1, +h2, +h3, +h4, +h5, +h6 { + font-size: inherit; + font-weight: inherit; +} + +/* +Reset links to optimize for opt-in styling instead of opt-out. +*/ + +a { + color: inherit; + text-decoration: inherit; +} + +/* +Add the correct font weight in Edge and Safari. +*/ + +b, +strong { + font-weight: bolder; +} + +/* +1. Use the user's configured `mono` font-family by default. +2. Use the user's configured `mono` font-feature-settings by default. +3. Use the user's configured `mono` font-variation-settings by default. +4. Correct the odd `em` font sizing in all browsers. +*/ + +code, +kbd, +samp, +pre { + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + /* 1 */ + font-feature-settings: normal; + /* 2 */ + font-variation-settings: normal; + /* 3 */ + font-size: 1em; + /* 4 */ +} + +/* +Add the correct font size in all browsers. +*/ + +small { + font-size: 80%; +} + +/* +Prevent `sub` and `sup` elements from affecting the line height in all browsers. +*/ + +sub, +sup { + font-size: 75%; + line-height: 0; + position: relative; + vertical-align: baseline; +} + +sub { + bottom: -0.25em; +} + +sup { + top: -0.5em; +} + +/* +1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297) +2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016) +3. Remove gaps between table borders by default. +*/ + +table { + text-indent: 0; + /* 1 */ + border-color: inherit; + /* 2 */ + border-collapse: collapse; + /* 3 */ +} + +/* +1. Change the font styles in all browsers. +2. Remove the margin in Firefox and Safari. +3. Remove default padding in all browsers. +*/ + +button, +input, +optgroup, +select, +textarea { + font-family: inherit; + /* 1 */ + font-feature-settings: inherit; + /* 1 */ + font-variation-settings: inherit; + /* 1 */ + font-size: 100%; + /* 1 */ + font-weight: inherit; + /* 1 */ + line-height: inherit; + /* 1 */ + letter-spacing: inherit; + /* 1 */ + color: inherit; + /* 1 */ + margin: 0; + /* 2 */ + padding: 0; + /* 3 */ +} + +/* +Remove the inheritance of text transform in Edge and Firefox. +*/ + +button, +select { + text-transform: none; +} + +/* +1. Correct the inability to style clickable types in iOS and Safari. +2. Remove default button styles. +*/ + +button, +input:where([type='button']), +input:where([type='reset']), +input:where([type='submit']) { + -webkit-appearance: button; + /* 1 */ + background-color: transparent; + /* 2 */ + background-image: none; + /* 2 */ +} + +/* +Use the modern Firefox focus style for all focusable elements. +*/ + +:-moz-focusring { + outline: auto; +} + +/* +Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737) +*/ + +:-moz-ui-invalid { + box-shadow: none; +} + +/* +Add the correct vertical alignment in Chrome and Firefox. +*/ + +progress { + vertical-align: baseline; +} + +/* +Correct the cursor style of increment and decrement buttons in Safari. +*/ + +::-webkit-inner-spin-button, +::-webkit-outer-spin-button { + height: auto; +} + +/* +1. Correct the odd appearance in Chrome and Safari. +2. Correct the outline style in Safari. +*/ + +[type='search'] { + -webkit-appearance: textfield; + /* 1 */ + outline-offset: -2px; + /* 2 */ +} + +/* +Remove the inner padding in Chrome and Safari on macOS. +*/ + +::-webkit-search-decoration { + -webkit-appearance: none; +} + +/* +1. Correct the inability to style clickable types in iOS and Safari. +2. Change font properties to `inherit` in Safari. +*/ + +::-webkit-file-upload-button { + -webkit-appearance: button; + /* 1 */ + font: inherit; + /* 2 */ +} + +/* +Add the correct display in Chrome and Safari. +*/ + +summary { + display: list-item; +} + +/* +Removes the default spacing and border for appropriate elements. +*/ + +blockquote, +dl, +dd, +h1, +h2, +h3, +h4, +h5, +h6, +hr, +figure, +p, +pre { + margin: 0; +} + +fieldset { + margin: 0; + padding: 0; +} + +legend { + padding: 0; +} + +ol, +ul, +menu { + list-style: none; + margin: 0; + padding: 0; +} + +/* +Reset default styling for dialogs. +*/ + +dialog { + padding: 0; +} + +/* +Prevent resizing textareas horizontally by default. +*/ + +textarea { + resize: vertical; +} + +/* +1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300) +2. Set the default placeholder color to the user's configured gray 400 color. +*/ + +input::-moz-placeholder, textarea::-moz-placeholder { + opacity: 1; + /* 1 */ + color: #9ca3af; + /* 2 */ +} + +input::placeholder, +textarea::placeholder { + opacity: 1; + /* 1 */ + color: #9ca3af; + /* 2 */ +} + +/* +Set the default cursor for buttons. +*/ + +button, +[role="button"] { + cursor: pointer; +} + +/* +Make sure disabled buttons don't get the pointer cursor. +*/ + +:disabled { + cursor: default; +} + +/* +1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14) +2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210) + This can trigger a poorly considered lint error in some tools but is included by design. +*/ + +img, +svg, +video, +canvas, +audio, +iframe, +embed, +object { + display: block; + /* 1 */ + vertical-align: middle; + /* 2 */ +} + +/* +Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14) +*/ + +img, +video { + max-width: 100%; + height: auto; +} + +/* Make elements with the HTML hidden attribute stay hidden by default */ + +[hidden] { + display: none; +} + +*, ::before, ::after { + --tw-border-spacing-x: 0; + --tw-border-spacing-y: 0; + --tw-translate-x: 0; + --tw-translate-y: 0; + --tw-rotate: 0; + --tw-skew-x: 0; + --tw-skew-y: 0; + --tw-scale-x: 1; + --tw-scale-y: 1; + --tw-pan-x: ; + --tw-pan-y: ; + --tw-pinch-zoom: ; + --tw-scroll-snap-strictness: proximity; + --tw-gradient-from-position: ; + --tw-gradient-via-position: ; + --tw-gradient-to-position: ; + --tw-ordinal: ; + --tw-slashed-zero: ; + --tw-numeric-figure: ; + --tw-numeric-spacing: ; + --tw-numeric-fraction: ; + --tw-ring-inset: ; + --tw-ring-offset-width: 0px; + --tw-ring-offset-color: #fff; + --tw-ring-color: rgb(59 130 246 / 0.5); + --tw-ring-offset-shadow: 0 0 #0000; + --tw-ring-shadow: 0 0 #0000; + --tw-shadow: 0 0 #0000; + --tw-shadow-colored: 0 0 #0000; + --tw-blur: ; + --tw-brightness: ; + --tw-contrast: ; + --tw-grayscale: ; + --tw-hue-rotate: ; + --tw-invert: ; + --tw-saturate: ; + --tw-sepia: ; + --tw-drop-shadow: ; + --tw-backdrop-blur: ; + --tw-backdrop-brightness: ; + --tw-backdrop-contrast: ; + --tw-backdrop-grayscale: ; + --tw-backdrop-hue-rotate: ; + --tw-backdrop-invert: ; + --tw-backdrop-opacity: ; + --tw-backdrop-saturate: ; + --tw-backdrop-sepia: ; + --tw-contain-size: ; + --tw-contain-layout: ; + --tw-contain-paint: ; + --tw-contain-style: ; +} + +::backdrop { + --tw-border-spacing-x: 0; + --tw-border-spacing-y: 0; + --tw-translate-x: 0; + --tw-translate-y: 0; + --tw-rotate: 0; + --tw-skew-x: 0; + --tw-skew-y: 0; + --tw-scale-x: 1; + --tw-scale-y: 1; + --tw-pan-x: ; + --tw-pan-y: ; + --tw-pinch-zoom: ; + --tw-scroll-snap-strictness: proximity; + --tw-gradient-from-position: ; + --tw-gradient-via-position: ; + --tw-gradient-to-position: ; + --tw-ordinal: ; + --tw-slashed-zero: ; + --tw-numeric-figure: ; + --tw-numeric-spacing: ; + --tw-numeric-fraction: ; + --tw-ring-inset: ; + --tw-ring-offset-width: 0px; + --tw-ring-offset-color: #fff; + --tw-ring-color: rgb(59 130 246 / 0.5); + --tw-ring-offset-shadow: 0 0 #0000; + --tw-ring-shadow: 0 0 #0000; + --tw-shadow: 0 0 #0000; + --tw-shadow-colored: 0 0 #0000; + --tw-blur: ; + --tw-brightness: ; + --tw-contrast: ; + --tw-grayscale: ; + --tw-hue-rotate: ; + --tw-invert: ; + --tw-saturate: ; + --tw-sepia: ; + --tw-drop-shadow: ; + --tw-backdrop-blur: ; + --tw-backdrop-brightness: ; + --tw-backdrop-contrast: ; + --tw-backdrop-grayscale: ; + --tw-backdrop-hue-rotate: ; + --tw-backdrop-invert: ; + --tw-backdrop-opacity: ; + --tw-backdrop-saturate: ; + --tw-backdrop-sepia: ; + --tw-contain-size: ; + --tw-contain-layout: ; + --tw-contain-paint: ; + --tw-contain-style: ; +} + +.sr-only { + position: absolute; + width: 1px; + height: 1px; + padding: 0; + margin: -1px; + overflow: hidden; + clip: rect(0, 0, 0, 0); + white-space: nowrap; + border-width: 0; +} + +.relative { + position: relative; +} + +.mx-auto { + margin-left: auto; + margin-right: auto; +} + +.mt-2 { + margin-top: 0.5rem; +} + +.mt-4 { + margin-top: 1rem; +} + +.block { + display: block; +} + +.inline { + display: inline; +} + +.flex { + display: flex; +} + +.size-12 { + width: 3rem; + height: 3rem; +} + +.h-12 { + height: 3rem; +} + +.h-\[75vh\] { + height: 75vh; +} + +.w-12 { + width: 3rem; +} + +.max-w-7xl { + max-width: 80rem; +} + +.max-w-md { + max-width: 28rem; +} + +.cursor-pointer { + cursor: pointer; +} + +.flex-wrap { + flex-wrap: wrap; +} + +.items-center { + align-items: center; +} + +.justify-center { + justify-content: center; +} + +.justify-between { + justify-content: space-between; +} + +.gap-x-4 { + -moz-column-gap: 1rem; + column-gap: 1rem; +} + +.gap-y-6 { + row-gap: 1.5rem; +} + +.self-center { + align-self: center; +} + +.whitespace-nowrap { + white-space: nowrap; +} + +.text-balance { + text-wrap: balance; +} + +.text-pretty { + text-wrap: pretty; +} + +.rounded-lg { + border-radius: 0.5rem; +} + +.rounded-md { + border-radius: 0.375rem; +} + +.border { + border-width: 1px; +} + +.border-dashed { + border-style: dashed; +} + +.border-gray-900\/25 { + border-color: rgb(17 24 39 / 0.25); +} + +.bg-white { + --tw-bg-opacity: 1; + background-color: rgb(255 255 255 / var(--tw-bg-opacity)); +} + +.p-6 { + padding: 1.5rem; +} + +.p-8 { + padding: 2rem; +} + +.px-4 { + padding-left: 1rem; + padding-right: 1rem; +} + +.px-6 { + padding-left: 1.5rem; + padding-right: 1.5rem; +} + +.py-10 { + padding-top: 2.5rem; + padding-bottom: 2.5rem; +} + +.pl-1 { + padding-left: 0.25rem; +} + +.text-center { + text-align: center; +} + +.text-4xl { + font-size: 2.25rem; + line-height: 2.5rem; +} + +.text-lg { + font-size: 1.125rem; + line-height: 1.75rem; +} + +.text-lg\/8 { + font-size: 1.125rem; + line-height: 2rem; +} + +.text-sm\/6 { + font-size: 0.875rem; + line-height: 1.5rem; +} + +.text-xs\/5 { + font-size: 0.75rem; + line-height: 1.25rem; +} + +.font-medium { + font-weight: 500; +} + +.font-semibold { + font-weight: 600; +} + +.tracking-tight { + letter-spacing: -0.025em; +} + +.text-gray-300 { + --tw-text-opacity: 1; + color: rgb(209 213 219 / var(--tw-text-opacity)); +} + +.text-gray-600 { + --tw-text-opacity: 1; + color: rgb(75 85 99 / var(--tw-text-opacity)); +} + +.text-gray-900 { + --tw-text-opacity: 1; + color: rgb(17 24 39 / var(--tw-text-opacity)); +} + +.text-magenta-600 { + --tw-text-opacity: 1; + color: rgb(125 48 139 / var(--tw-text-opacity)); +} + +.focus-within\:outline-none:focus-within { + outline: 2px solid transparent; + outline-offset: 2px; +} + +.focus-within\:ring-2:focus-within { + --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color); + --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(2px + var(--tw-ring-offset-width)) var(--tw-ring-color); + box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000); +} + +.focus-within\:ring-magenta-600:focus-within { + --tw-ring-opacity: 1; + --tw-ring-color: rgb(125 48 139 / var(--tw-ring-opacity)); +} + +.focus-within\:ring-offset-2:focus-within { + --tw-ring-offset-width: 2px; +} + +.hover\:text-magenta-500:hover { + --tw-text-opacity: 1; + color: rgb(163 62 181 / var(--tw-text-opacity)); +} + +@media (min-width: 640px) { + .sm\:text-5xl { + font-size: 3rem; + line-height: 1; + } +} + +@media (min-width: 768px) { + .md\:text-2xl { + font-size: 1.5rem; + line-height: 2rem; + } +} + +@media (min-width: 1024px) { + .lg\:gap-x-12 { + -moz-column-gap: 3rem; + column-gap: 3rem; + } + + .lg\:px-8 { + padding-left: 2rem; + padding-right: 2rem; + } +} diff --git a/pdf2md/server/static/pdf2md.js b/pdf2md/server/static/pdf2md.js new file mode 100644 index 0000000000..f9783a3751 --- /dev/null +++ b/pdf2md/server/static/pdf2md.js @@ -0,0 +1,37 @@ +const fileUploadInput = document.getElementById("file-upload"); + +fileUploadInput.addEventListener("change", (event) => { + const file = event.target.files[0]; + if (!file) { + console.error("No file selected"); + return; + } + + const reader = new FileReader(); + reader.onload = (event) => { + const base64 = event.target.result; + console.log(base64); + + const formData = { + base64_file: base64, + }; + + fetch("/api/task", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: window.TRIEVE_API_KEY, + }, + body: JSON.stringify(formData), + }) + .then((response) => response.json()) + .then((data) => { + console.log(data); + }) + .catch((error) => { + console.error("Error:", error); + }); + }; + + reader.readAsDataURL(file); +}); diff --git a/pdf2md/server/tailwind.config.js b/pdf2md/server/tailwind.config.js new file mode 100644 index 0000000000..64a55a32a0 --- /dev/null +++ b/pdf2md/server/tailwind.config.js @@ -0,0 +1,29 @@ +/** @type {import('tailwindcss').Config} */ +module.exports = { + content: ["./src/templates/**/*.html"], + theme: { + fontFamily: { + sans: ["Quicksand", "system-ui", "sans-serif"], + verdana: ["Verdana", "Geneva", "sans-serif"], + }, + extend: { + colors: { + magenta: { + DEFAULT: "#A33EB5", + 50: "#E4C1EA", + 100: "#DDB2E5", + 200: "#CF93DA", + 300: "#C275D0", + 400: "#B557C5", + 500: "#A33EB5", + 600: "#7D308B", + 700: "#582161", + 800: "#321338", + 900: "#0C050E", + 950: "#000000", + }, + }, + }, + }, + plugins: [], +}; diff --git a/server/Cargo.toml b/server/Cargo.toml index 43620431b1..8a1a9f27a1 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -90,7 +90,7 @@ async-stripe = { version = "0.37.1", features = [ "billing", ] } chrono = { version = "0.4.20", features = ["serde"] } -derive_more = { version = "0.99.7" } +derive_more = { version = "0.99.7", features = ["display"] } diesel = { version = "2", features = [ "uuid", "chrono", diff --git a/server/Dockerfile.ingestion-worker b/server/Dockerfile.ingestion-worker index 0fafdbe800..a78e881205 100644 --- a/server/Dockerfile.ingestion-worker +++ b/server/Dockerfile.ingestion-worker @@ -17,7 +17,7 @@ RUN cargo chef cook --release --recipe-path recipe.json --bin "ingestion-worker" COPY . . RUN cargo build --release --features "runtime-env" --bin "ingestion-worker" -FROM debian:bookworm-slim as runtime +FROM debian:bookworm-slim AS runtime RUN apt-get update -y && apt-get -y install pkg-config libssl-dev libpq-dev ca-certificates WORKDIR /app COPY ./migrations/ /app/migrations diff --git a/server/Dockerfile.server b/server/Dockerfile.server index 804d7d9093..a6ca97c724 100644 --- a/server/Dockerfile.server +++ b/server/Dockerfile.server @@ -17,7 +17,7 @@ RUN cargo chef cook --release --recipe-path recipe.json --bin "trieve-server" COPY . . RUN cargo build --release --features "runtime-env" --bin "trieve-server" -FROM debian:bookworm-slim as runtime +FROM debian:bookworm-slim AS runtime WORKDIR /app RUN apt-get update -y; \ diff --git a/server/src/bin/delete-worker.rs b/server/src/bin/delete-worker.rs index 7dc0ff944b..f1dc8f0568 100644 --- a/server/src/bin/delete-worker.rs +++ b/server/src/bin/delete-worker.rs @@ -413,6 +413,7 @@ pub async fn bulk_delete_chunks( bulk_delete_chunks_query( chunk_delete_message.filter, + chunk_delete_message.deleted_at, chunk_delete_message.dataset_id, dataset_config, web_pool.clone(), diff --git a/server/src/bin/file-worker.rs b/server/src/bin/file-worker.rs index 3a669d0ed3..8a8e611b2f 100644 --- a/server/src/bin/file-worker.rs +++ b/server/src/bin/file-worker.rs @@ -1,3 +1,4 @@ +use base64::Engine; use diesel_async::pooled_connection::{AsyncDieselConnectionManager, ManagerConfig}; use redis::aio::MultiplexedConnection; use sentry::{Hub, SentryFutureExt}; @@ -8,13 +9,17 @@ use std::sync::{ }; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Layer}; use trieve_server::{ - data::models::{self, FileWorkerMessage}, + data::models::{self, ChunkGroup, FileWorkerMessage}, errors::ServiceError, establish_connection, get_env, + handlers::chunk_handler::ChunkReqPayload, operators::{ clickhouse_operator::{ClickHouseEvent, EventQueue}, dataset_operator::get_dataset_and_organization_from_dataset_id_query, - file_operator::{create_file_chunks, create_file_query, get_aws_bucket}, + file_operator::{ + create_file_chunks, create_file_query, get_aws_bucket, preprocess_file_to_chunks, + }, + group_operator::{create_group_from_file_query, create_groups_query}, }, }; @@ -252,7 +257,7 @@ async fn file_worker( .query_async::(&mut *redis_connection) .await; } - Ok(None) => { + Ok(_) => { log::info!( "File was uploaded with specification to not create chunks for it: {:?}", file_worker_message.file_id @@ -275,6 +280,42 @@ async fn file_worker( } } +#[derive(serde::Deserialize, serde::Serialize, Clone, Debug)] +pub struct CreateFileTaskResponse { + pub task_id: uuid::Uuid, + pub status: FileTaskStatus, + pub pos_in_queue: String, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone, PartialEq, Eq)] +pub enum FileTaskStatus { + Created, + ProcessingFile(u32), + ChunkingFile(u32), + Completed, + Failed, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone)] +pub struct PollTaskResponse { + pub id: String, + pub total_document_pages: u32, + pub pages_processed: u32, + pub status: String, + pub created_at: String, + pub pages: Option>, + pub pagination_token: Option, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone)] +pub struct PdfToMdChunk { + pub id: String, + pub task_id: String, + pub content: String, + pub metadata: serde_json::Value, + pub created_at: String, +} + async fn upload_file( file_worker_message: FileWorkerMessage, web_pool: actix_web::web::Data, @@ -303,6 +344,126 @@ async fn upload_file( get_file_span.finish(); + let file_name = file_worker_message.upload_file_data.file_name.clone(); + + let dataset_org_plan_sub = get_dataset_and_organization_from_dataset_id_query( + models::UnifiedId::TrieveUuid(file_worker_message.dataset_id), + None, + web_pool.clone(), + ) + .await?; + + if file_name.ends_with(".pdf") { + // Send file to router PDF2MD + let pdf2md_url = std::env::var("PDF2MD_URL") + .expect("PDF2MD_URL must be set") + .to_string(); + + let pdf2md_auth = std::env::var("PDF2MD_AUTH").unwrap_or("".to_string()); + + let pdf2md_client = reqwest::Client::new(); + let encoded_file = base64::prelude::BASE64_STANDARD.encode(file_data.clone()); + + let json_value = serde_json::json!({ + "base64_file": encoded_file.clone() + }); + + let pdf2md_response = pdf2md_client + .post(format!("{}/api/task", pdf2md_url)) + .header("Content-Type", "application/json") + .header("Authorization", &pdf2md_auth) + .json(&json_value) + .send() + .await + .map_err(|err| { + log::error!("Could not send file to pdf2md {:?}", err); + ServiceError::BadRequest("Could not send file to pdf2md".to_string()) + })?; + + let response = pdf2md_response.json::().await; + + let task_id = match response { + Ok(response) => response.task_id, + Err(err) => { + log::error!("Could not parse task_id from pdf2md {:?}", err); + return Err(ServiceError::BadRequest(format!( + "Could not parse task_id from pdf2md {:?}", + err + ))); + } + }; + + log::info!("Waiting on Task {}", task_id); + let mut completed_task: Option = None; + + loop { + let request = pdf2md_client + .get(format!("{}/api/task/{}", pdf2md_url, task_id).as_str()) + .header("Content-Type", "application/json") + .header("Authorization", &pdf2md_auth) + .send() + .await + .map_err(|err| { + log::error!("Could not send poll request to pdf2md {:?}", err); + ServiceError::BadRequest(format!("Could not send request to pdf2md {:?}", err)) + })?; + + let response = request.json::().await.map_err(|err| { + log::error!("Could not parse response from pdf2md {:?}", err); + ServiceError::BadRequest(format!("Could not parse response from pdf2md {:?}", err)) + })?; + + if (response.status == "Completed" && response.total_document_pages != 0) + && response.pages.is_some() + { + log::info!("Got job back from task {}", task_id); + completed_task = Some(response); + break; + } else { + log::info!("Polling on task {}... {:?}", task_id, response); + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + continue; + } + } + + if let Some(task) = completed_task { + // Poll Chunks from pdf chunks from service + let file_size_mb = (file_data.len() as f64 / 1024.0 / 1024.0).round() as i64; + let created_file = create_file_query( + file_id, + file_size_mb, + file_worker_message.upload_file_data.clone(), + file_worker_message.dataset_id, + web_pool.clone(), + ) + .await?; + + let mut chunk_htmls: Vec = vec![]; + + log::info!("Chunks got {:?}", task); + if let Some(pages) = task.pages { + for page in pages { + chunk_htmls.push(page.content.clone()); + } + } + + log::info!("Chunks got {}", chunk_htmls.len()); + + create_file_chunks( + created_file.id, + file_worker_message.upload_file_data, + chunk_htmls, + dataset_org_plan_sub, + web_pool.clone(), + event_queue.clone(), + redis_conn, + ) + .await?; + + return Ok(Some(file_id)); + } + } + let tika_url = std::env::var("TIKA_URL") .expect("TIKA_URL must be set") .to_string(); @@ -369,10 +530,16 @@ async fn upload_file( ) .await?; + let Ok(chunk_htmls) = + preprocess_file_to_chunks(html_content, file_worker_message.upload_file_data.clone()) + else { + return Err(ServiceError::BadRequest("Could not parse file".to_string())); + }; + create_file_chunks( created_file.id, file_worker_message.upload_file_data, - html_content, + chunk_htmls, dataset_org_plan_sub, web_pool.clone(), event_queue.clone(), diff --git a/server/src/bin/ingestion-worker.rs b/server/src/bin/ingestion-worker.rs index 95fd3a799d..e088fb21a2 100644 --- a/server/src/bin/ingestion-worker.rs +++ b/server/src/bin/ingestion-worker.rs @@ -20,10 +20,13 @@ use trieve_server::handlers::chunk_handler::{ use trieve_server::handlers::group_handler::dataset_owns_group; use trieve_server::operators::chunk_operator::{ bulk_insert_chunk_metadata_query, bulk_revert_insert_chunk_metadata_query, - insert_chunk_metadata_query, update_chunk_metadata_query, + get_row_count_for_organization_id_query, insert_chunk_metadata_query, + update_chunk_metadata_query, }; use trieve_server::operators::clickhouse_operator::{ClickHouseEvent, EventQueue}; -use trieve_server::operators::dataset_operator::get_dataset_by_id_query; +use trieve_server::operators::dataset_operator::{ + get_dataset_and_organization_from_dataset_id_query, get_dataset_by_id_query, +}; use trieve_server::operators::group_operator::get_groups_from_group_ids_query; use trieve_server::operators::model_operator::{ get_bm25_embeddings, get_dense_vector, get_dense_vectors, get_sparse_vectors, @@ -416,6 +419,34 @@ pub async fn bulk_upload_chunks( "precomputing some important data before insert", ); + let unlimited = std::env::var("UNLIMITED").unwrap_or("false".to_string()); + if unlimited == "false" { + let dataset_org_plan_sub = get_dataset_and_organization_from_dataset_id_query( + models::UnifiedId::TrieveUuid(payload.dataset_id), + None, + web_pool.clone(), + ) + .await?; + + let chunk_count = get_row_count_for_organization_id_query( + dataset_org_plan_sub.organization.organization.id, + web_pool.clone(), + ) + .await?; + + if chunk_count + payload.ingestion_messages.len() + > dataset_org_plan_sub + .organization + .plan + .unwrap_or_default() + .chunk_count as usize + { + return Err(ServiceError::BadRequest( + "Chunk count exceeds plan limit".to_string(), + )); + } + } + // Being blocked out because it is difficult to create multiple split_avg embeddings in batch let split_average_being_used = payload .ingestion_messages diff --git a/server/src/handlers/chunk_handler.rs b/server/src/handlers/chunk_handler.rs index 50bc6cee75..9367e85ca8 100644 --- a/server/src/handlers/chunk_handler.rs +++ b/server/src/handlers/chunk_handler.rs @@ -527,6 +527,7 @@ pub async fn bulk_delete_chunk( dataset_id: dataset_org_plan_sub.dataset.id, attempt_number: 0, filter: chunk_filter.into_inner().filter, + deleted_at: chrono::Utc::now().naive_utc(), }; let serialized_message = serde_json::to_string(&DeleteMessage::ChunkDelete(message)) diff --git a/server/src/handlers/file_handler.rs b/server/src/handlers/file_handler.rs index 32232f3559..99f540bfd9 100644 --- a/server/src/handlers/file_handler.rs +++ b/server/src/handlers/file_handler.rs @@ -11,7 +11,8 @@ use crate::{ middleware::auth_middleware::verify_member, operators::{ file_operator::{ - delete_file_query, get_aws_bucket, get_dataset_file_query, get_file_query, + delete_file_query, get_aws_bucket, get_dataset_file_query, + get_file_query, }, organization_operator::get_file_size_sum_org, }, diff --git a/server/src/handlers/page_handler.rs b/server/src/handlers/page_handler.rs index a51d907c46..97dc160976 100644 --- a/server/src/handlers/page_handler.rs +++ b/server/src/handlers/page_handler.rs @@ -1,5 +1,8 @@ -use std::env; - +use super::{ + auth_handler::LoggedUser, + chunk_handler::{ChunkFilter, ScoringOptions}, +}; +use crate::data::models::Templates; use crate::{ data::models::{DatasetConfiguration, Pool, SearchMethod, SortOptions, TypoOptions, UnifiedId}, errors::ServiceError, @@ -9,15 +12,9 @@ use crate::{ use actix_web::{web, HttpMessage, HttpRequest, HttpResponse}; use minijinja::context; use serde::{Deserialize, Serialize}; +use std::env; use utoipa::ToSchema; -use crate::data::models::Templates; - -use super::{ - auth_handler::LoggedUser, - chunk_handler::{ChunkFilter, ScoringOptions}, -}; - #[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default)] pub enum PublicPageTheme { #[default] diff --git a/server/src/operators/chunk_operator.rs b/server/src/operators/chunk_operator.rs index 79a9a97b80..23e5efd65a 100644 --- a/server/src/operators/chunk_operator.rs +++ b/server/src/operators/chunk_operator.rs @@ -605,6 +605,7 @@ pub async fn get_metadata_from_tracking_ids_query( pub async fn bulk_delete_chunks_query( filter: ChunkFilter, + deleted_at: chrono::NaiveDateTime, dataset_id: uuid::Uuid, dataset_config: DatasetConfiguration, pool: web::Data, @@ -629,40 +630,48 @@ pub async fn bulk_delete_chunks_query( log::info!("Deleting {:?} chunks with point_ids", point_ids.len()); - let transaction_result = conn + let deleted_point_ids = conn .transaction::<_, diesel::result::Error, _>(|conn| { async move { { - // if there were no collisions, just delete the chunk_metadata without issue - let deleted_chunks = diesel::delete( + let deleted_ids_uuids: Vec<(uuid::Uuid, uuid::Uuid)> = diesel::delete( chunk_metadata_columns::chunk_metadata .filter( chunk_metadata_columns::qdrant_point_id .eq_any(point_ids.clone()), ) - .filter(chunk_metadata_columns::dataset_id.eq(dataset_id)), + .filter(chunk_metadata_columns::dataset_id.eq(dataset_id)) + .filter(chunk_metadata_columns::created_at.le(deleted_at)), ) - .returning(chunk_metadata_columns::id) - .get_results::(conn) + .returning(( + chunk_metadata_columns::id, + chunk_metadata_columns::qdrant_point_id, + )) + .get_results::<(uuid::Uuid, uuid::Uuid)>(conn) .await?; + let (deleted_ids, deleted_point_ids): (Vec, Vec) = + deleted_ids_uuids.into_iter().unzip(); + diesel::delete( - chunk_group_bookmarks_columns::chunk_group_bookmarks.filter( - chunk_group_bookmarks_columns::chunk_metadata_id - .eq_any(deleted_chunks.clone()), - ), + chunk_group_bookmarks_columns::chunk_group_bookmarks + .filter( + chunk_group_bookmarks_columns::chunk_metadata_id + .eq_any(deleted_ids.clone()), + ) + .filter(chunk_group_bookmarks_columns::created_at.le(deleted_at)), ) .execute(conn) .await?; - Ok(point_ids) + Ok(deleted_point_ids) } } .scope_boxed() }) .await; - match transaction_result { + match deleted_point_ids { Ok(point_ids) => { delete_points_from_qdrant(point_ids, qdrant_collection.clone()).await?; } diff --git a/server/src/operators/dataset_operator.rs b/server/src/operators/dataset_operator.rs index f1925fc607..ea1e0d1611 100644 --- a/server/src/operators/dataset_operator.rs +++ b/server/src/operators/dataset_operator.rs @@ -221,6 +221,7 @@ pub struct ChunkDeleteMessage { pub dataset_id: uuid::Uuid, pub attempt_number: usize, pub filter: ChunkFilter, + pub deleted_at: chrono::NaiveDateTime, } #[derive(Serialize, Deserialize, Clone, Debug)] diff --git a/server/src/operators/file_operator.rs b/server/src/operators/file_operator.rs index 3e3daf7672..4c1f52c064 100644 --- a/server/src/operators/file_operator.rs +++ b/server/src/operators/file_operator.rs @@ -94,26 +94,21 @@ pub async fn create_file_query( .values(&new_file) .get_result(&mut conn) .await - .map_err(|_| ServiceError::BadRequest("Could not create file, try again".to_string()))?; + .map_err(|err| { + ServiceError::BadRequest(format!("Could not create file {:?}", err)) + })?; Ok(created_file) } -#[allow(clippy::too_many_arguments)] -#[tracing::instrument(skip(pool, redis_conn, event_queue))] -pub async fn create_file_chunks( - created_file_id: uuid::Uuid, - upload_file_data: UploadFileReqPayload, +#[tracing::instrument] +pub fn preprocess_file_to_chunks( html_content: String, - dataset_org_plan_sub: DatasetAndOrgWithSubAndPlan, - pool: web::Data, - event_queue: web::Data, - mut redis_conn: MultiplexedConnection, -) -> Result<(), ServiceError> { + upload_file_data: UploadFileReqPayload, +) -> Result, ServiceError> { let file_text = convert_html_to_text(&html_content); - let split_regex: Option = upload_file_data - .split_delimiters + let split_regex: Option = upload_file_data.split_delimiters .map(|delimiters| { build_chunking_regex(delimiters).map_err(|e| { log::error!("Could not parse chunking delimiters {:?}", e); @@ -132,9 +127,24 @@ pub async fn create_file_chunks( target_splits_per_chunk, ); + return Ok(chunk_htmls); +} + +#[allow(clippy::too_many_arguments)] +#[tracing::instrument(skip(pool, redis_conn, event_queue))] +pub async fn create_file_chunks( + created_file_id: uuid::Uuid, + upload_file_data: UploadFileReqPayload, + chunk_htmls: Vec, + dataset_org_plan_sub: DatasetAndOrgWithSubAndPlan, + pool: web::Data, + event_queue: web::Data, + mut redis_conn: MultiplexedConnection, +) -> Result<(), ServiceError> { + let mut chunks: Vec = [].to_vec(); - let name = format!("Group for file {}", upload_file_data.file_name); + let name = format!("{}", upload_file_data.file_name); let chunk_group = ChunkGroup::from_details( Some(name.clone()),