Skip to content

Commit fb009a5

Browse files
tae898robfrank
authored andcommitted
Enhance Python bindings for ArcadeDB (#3466)
* Enhance Python bindings for ArcadeDB - Updated `pyproject.toml` to clarify the requirement for `py7zr`. - Added `build_graph_now` parameter to `create_vector_index` method in `Database` class to control immediate graph building. - Modified `DatabaseFactory` and `create_database` functions to accept optional JVM arguments for better configuration. - Enhanced `Importer` class to support JVM arguments, improving memory management during data import. - Refined JVM startup logic in `jvm.py` to allow for more flexible configuration and deduplication of heap size arguments. - Introduced tests for OLTP mixed workload scenarios and CSV import handling of complex data types, null values, and performance. - Updated vector index creation tests to verify eager graph building behavior. - Improved documentation and comments throughout the code for clarity and maintainability. * fix(python): correct gremlin labels and robust JVM arg parsing (cherry picked from commit 650e2bb)
1 parent c94c6b0 commit fb009a5

24 files changed

+2657
-7116
lines changed

.github/workflows/test-python-bindings.yml

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -121,24 +121,29 @@ jobs:
121121
# Don't cancel other matrix jobs if one fails
122122
fail-fast: false
123123
matrix:
124-
python-version: ['3.10', '3.11', '3.12', '3.13', '3.14']
125-
# Temporarily limit to three platforms (skip macOS x86_64 and all Windows)
124+
python-version:
125+
- '3.10'
126+
- '3.11'
127+
- '3.12'
128+
- '3.13'
129+
# - '3.14' # Temporarily disabled to reduce wheel storage usage
130+
# Temporarily limit to four platforms (skip macOS x86_64, Windows ARM64)
126131
# platform: ['linux/amd64', 'linux/arm64', 'darwin/amd64', 'darwin/arm64', 'windows/amd64', 'windows/arm64']
127-
platform: ['linux/amd64', 'linux/arm64', 'darwin/arm64']
132+
platform: ['linux/amd64', 'linux/arm64', 'darwin/arm64', 'windows/amd64']
128133
include:
129134
- platform: linux/amd64
130135
runs-on: ubuntu-24.04
131136
- platform: linux/arm64
132137
runs-on: ubuntu-24.04-arm
133138
- platform: darwin/arm64
134139
runs-on: macos-15
140+
- platform: windows/amd64
141+
runs-on: windows-2025
135142
# - platform: darwin/amd64
136143
# runs-on: macos-15-intel
137-
# - platform: windows/amd64
138-
# runs-on: windows-2025
139144
# - platform: windows/arm64
140145
# runs-on: windows-11-arm
141-
# macOS x86_64 and all Windows temporarily disabled
146+
# macOS x86_64 and Windows ARM64 temporarily disabled
142147

143148
steps:
144149
- name: Checkout code
@@ -153,6 +158,8 @@ jobs:
153158
shell: bash
154159
run: |
155160
curl -LsSf https://astral.sh/uv/install.sh | sh
161+
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
162+
export PATH="$HOME/.local/bin:$PATH"
156163
uv --version
157164
158165
- name: Download ArcadeDB JARs artifact
@@ -168,7 +175,7 @@ jobs:
168175
if: matrix.platform != 'linux/amd64' && matrix.platform != 'linux/arm64'
169176
uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0
170177
with:
171-
distribution: 'temurin'
178+
distribution: 'corretto'
172179
java-version: '25'
173180

174181
- name: Set up Docker Buildx (Linux only)
@@ -180,7 +187,14 @@ jobs:
180187
run: |
181188
uv pip install --system build wheel setuptools
182189
183-
# Windows currently disabled, no symlink needed
190+
- name: Set UTF-8 encoding (Windows only)
191+
if: matrix.platform == 'windows/amd64' || matrix.platform == 'windows/arm64'
192+
shell: bash
193+
run: |
194+
echo "PYTHONIOENCODING=utf-8" >> $GITHUB_ENV
195+
echo "PYTHONUTF8=1" >> $GITHUB_ENV
196+
197+
# Windows currently enabled, no symlink needed
184198
# - name: Create python3 symlink (Windows only)
185199
# if: matrix.platform == 'windows/amd64' || matrix.platform == 'windows/arm64'
186200
# shell: bash
@@ -419,8 +433,9 @@ jobs:
419433
echo "**Package**: arcadedb-embedded" >> $GITHUB_STEP_SUMMARY
420434
echo "" >> $GITHUB_STEP_SUMMARY
421435
echo "ℹ️ **Note**: Some platform/Python combinations are excluded from testing:" >> $GITHUB_STEP_SUMMARY
422-
echo "- Windows ARM64 + Python 3.10, 3.14 (no GitHub-hosted runners available)" >> $GITHUB_STEP_SUMMARY
423-
echo "- macOS x86_64 + Python 3.13, 3.14 (no suitable dependencies available)" >> $GITHUB_STEP_SUMMARY
436+
echo "- Python 3.14 is temporarily disabled to reduce wheel storage usage" >> $GITHUB_STEP_SUMMARY
437+
echo "- Windows ARM64 (no GitHub-hosted runners available)" >> $GITHUB_STEP_SUMMARY
438+
echo "- macOS x86_64 (temporarily disabled)" >> $GITHUB_STEP_SUMMARY
424439
else
425440
echo "❌ **Some platforms failed testing**" >> $GITHUB_STEP_SUMMARY
426441
echo "" >> $GITHUB_STEP_SUMMARY

.github/workflows/test-python-examples.yml

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -130,24 +130,29 @@ jobs:
130130
strategy:
131131
fail-fast: false
132132
matrix:
133-
python-version: ['3.10', '3.11', '3.12', '3.13', '3.14']
134-
# Temporarily limit to three platforms (skip macOS x86_64, all Windows)
133+
python-version:
134+
- '3.10'
135+
- '3.11'
136+
- '3.12'
137+
- '3.13'
138+
# - '3.14' # Temporarily disabled to reduce wheel storage usage
139+
# Temporarily limit to four platforms (skip macOS x86_64, Windows ARM64)
135140
# platform: ['linux/amd64', 'linux/arm64', 'darwin/amd64', 'darwin/arm64', 'windows/amd64', 'windows/arm64']
136-
platform: ['linux/amd64', 'linux/arm64', 'darwin/arm64']
141+
platform: ['linux/amd64', 'linux/arm64', 'darwin/arm64', 'windows/amd64']
137142
include:
138143
- platform: linux/amd64
139144
runs-on: ubuntu-24.04
140145
- platform: linux/arm64
141146
runs-on: ubuntu-24.04-arm
142147
- platform: darwin/arm64
143148
runs-on: macos-15
149+
- platform: windows/amd64
150+
runs-on: windows-2025
144151
# - platform: darwin/amd64
145152
# runs-on: macos-15-intel
146-
# - platform: windows/amd64
147-
# runs-on: windows-2025
148153
# - platform: windows/arm64
149154
# runs-on: windows-11-arm
150-
# macOS x86_64 and all Windows temporarily disabled
155+
# macOS x86_64 and Windows ARM64 temporarily disabled
151156

152157
steps:
153158
- name: Checkout code
@@ -163,7 +168,7 @@ jobs:
163168
if: matrix.platform != 'linux/amd64' && matrix.platform != 'linux/arm64'
164169
uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0
165170
with:
166-
distribution: 'temurin'
171+
distribution: 'corretto'
167172
java-version: '25'
168173

169174
- name: Set up Docker Buildx (Linux only)
@@ -179,14 +184,16 @@ jobs:
179184
shell: bash
180185
run: |
181186
curl -LsSf https://astral.sh/uv/install.sh | sh
187+
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
188+
export PATH="$HOME/.local/bin:$PATH"
182189
uv --version
183190
184191
- name: Install Python build dependencies
185192
shell: bash
186193
run: |
187194
uv pip install --system build wheel setuptools
188195
189-
# Windows currently disabled, no symlink needed
196+
# Windows currently enabled, no symlink needed
190197
# - name: Create python3 symlink (Windows only)
191198
# if: matrix.platform == 'windows/amd64' || matrix.platform == 'windows/arm64'
192199
# shell: bash
@@ -240,24 +247,34 @@ jobs:
240247
python3 download_data.py movielens-small
241248
fi
242249
243-
if echo "$examples" | grep -qE '(^|[[:space:]])07_stackoverflow_multimodel\.py([[:space:]]|$)'; then
250+
if echo "$examples" | grep -qE '(^|[[:space:]])(08_stackoverflow_tables_oltp|09_stackoverflow_tables_olap|10_stackoverflow_graph_oltp|11_stackoverflow_graph_olap|14_stackoverflow_hybrid_queries)\.py([[:space:]]|$)'; then
244251
echo "📥 Downloading Stack Overflow Small dataset..."
245-
python3 download_data.py stackoverflow-small
252+
python3 download_data.py stackoverflow-small --no-vectors
246253
fi
247254
255+
- name: Download datasets
256+
shell: bash
257+
run: |
258+
uv pip install --system tqdm py7zr lxml
259+
cd bindings/python/examples
260+
echo "📥 Downloading MovieLens Small dataset..."
261+
python3 download_data.py movielens-small
262+
echo "📥 Downloading Stack Overflow Small dataset..."
263+
python3 download_data.py stackoverflow-small --no-vectors
264+
248265
- name: Install timeout command (macOS only)
249266
if: matrix.platform == 'darwin/amd64' || matrix.platform == 'darwin/arm64'
250267
shell: bash
251268
run: |
252269
# macOS doesn't have timeout command by default, use coreutils
253270
brew install coreutils
254271
255-
# Windows currently disabled, no env override needed
256-
# - name: Set UTF-8 encoding (Windows only)
257-
# if: matrix.platform == 'windows/amd64' || matrix.platform == 'windows/arm64'
258-
# shell: bash
259-
# run: |
260-
# echo "PYTHONIOENCODING=utf-8" >> $GITHUB_ENV
272+
- name: Set UTF-8 encoding (Windows only)
273+
if: matrix.platform == 'windows/amd64' || matrix.platform == 'windows/arm64'
274+
shell: bash
275+
run: |
276+
echo "PYTHONIOENCODING=utf-8" >> $GITHUB_ENV
277+
echo "PYTHONUTF8=1" >> $GITHUB_ENV
261278
262279
- name: Run all examples
263280
id: run_examples
@@ -317,11 +334,6 @@ jobs:
317334
example_name="$example (vector search, import from JSONL)"
318335
timeout_duration=900 # 15 minutes
319336
;;
320-
"07_stackoverflow_multimodel.py")
321-
example_args="--dataset stackoverflow-small"
322-
example_name="$example (stackoverflow-small dataset)"
323-
timeout_duration=1800 # 30 minutes
324-
;;
325337
*)
326338
example_args=""
327339
example_name="$example"
@@ -476,7 +488,7 @@ jobs:
476488
echo "" >> $GITHUB_STEP_SUMMARY
477489
echo "All examples ran successfully across all enabled platforms." >> $GITHUB_STEP_SUMMARY
478490
echo "" >> $GITHUB_STEP_SUMMARY
479-
echo "**Platforms tested**: linux/amd64, linux/arm64, darwin/arm64" >> $GITHUB_STEP_SUMMARY
491+
echo "**Platforms tested**: linux/amd64, linux/arm64, darwin/arm64, windows/amd64" >> $GITHUB_STEP_SUMMARY
480492
else
481493
echo "❌ **Some platforms failed example testing**" >> $GITHUB_STEP_SUMMARY
482494
echo "" >> $GITHUB_STEP_SUMMARY

bindings/python/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,6 @@ README.old.md
4242

4343
# Jupyter Notebooks for internal testing
4444
notebooks/
45+
46+
# local built jars
47+
local-jars/

bindings/python/Dockerfile.build

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ FROM arcadedata/arcadedb:${ARCADEDB_TAG} AS java-builder
2424
# nothing to do here; jars will be copied from /home/arcadedb/lib in the python-builder stage
2525

2626
# Stage 2: Build minimal JRE with jlink
27-
FROM eclipse-temurin:25-jdk-jammy AS jre-builder
27+
FROM amazoncorretto:25 AS jre-builder
28+
29+
# Install required tooling (findutils for jar exclusion, binutils for jlink)
30+
RUN yum -y install findutils binutils && yum clean all
2831

2932
ARG TARGET_PLATFORM
3033
ARG USE_LOCAL_JARS

bindings/python/README.md

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Native Python bindings for ArcadeDB - the multi-model database that supports Graph, Document, Key/Value, Search Engine, Time Series, and Vector models.
44

5-
**Status**: ✅ Production Ready | **Tests**: 258 Passing | **Platforms**: 3 Supported
5+
**Status**: ✅ Production Ready | **Tests**: 260 Passing | **Platforms**: 4 Supported
66

77
---
88

@@ -23,9 +23,10 @@ uv pip install arcadedb-embedded
2323
**Requirements:**
2424

2525
- **Python 3.10–3.14** (packaged/tested on CPython 3.12) - No Java installation required!
26-
- **Supported Platforms**: Prebuilt wheels for **3 platforms**
26+
- **Supported Platforms**: Prebuilt wheels for **4 platforms**
2727
- Linux: x86_64, ARM64
2828
- macOS: Apple Silicon (ARM64)
29+
- Windows: x86_64
2930

3031
### 5-Minute Example
3132

@@ -58,11 +59,11 @@ with arcadedb.create_database("./mydb") as db:
5859

5960
## ✨ Features
6061

61-
-**No Java Installation Required**: Bundled JRE (~249MB uncompressed)
62-
- 🌍 **3 Platforms Supported**: Linux (x86_64, ARM64), macOS (ARM64)
62+
-**No Java Installation Required**: Bundled JRE (~60MB uncompressed)
63+
- 🌍 **4 Platforms Supported**: Linux (x86_64, ARM64), macOS (ARM64), Windows (x86_64)
6364
- 🚀 **Embedded Mode**: Direct database access in Python process (no network)
6465
- 🌐 **Server Mode**: Optional HTTP server with Studio web interface
65-
- 📦 **Self-contained**: All dependencies bundled (~116MB wheel)
66+
- 📦 **Self-contained**: All dependencies bundled (~68MB wheel)
6667
- 🔄 **Multi-model**: Graph, Document, Key/Value, Vector, Time Series
6768
- 🔍 **Multiple query languages**: SQL, OpenCypher, MongoDB
6869
-**High performance**: Direct JVM integration via JPype
@@ -78,10 +79,10 @@ The `arcadedb-embedded` package is platform-specific and self-contained:
7879

7980
**Package Contents (all platforms):**
8081

81-
- **Wheel size (compressed)**: ~116MB
82+
- **Wheel size (compressed)**: ~68MB
8283
- **ArcadeDB JARs (uncompressed)**: ~32MB
83-
- **Bundled JRE (uncompressed)**: ~249MB (platform-specific Java 25 runtime via jlink)
84-
- **Total uncompressed size**: ~281MB
84+
- **Bundled JRE (uncompressed)**: ~60MB (platform-specific Java 25 runtime via jlink)
85+
- **Total uncompressed size**: ~95MB
8586

8687
**Note**: Some JARs are excluded to optimize package size (e.g., gRPC wire protocol). See [`jar_exclusions.txt`](https://github.com/humemai/arcadedb-embedded-python/blob/main/bindings/python/jar_exclusions.txt) for details.
8788

@@ -91,7 +92,7 @@ Import: `import arcadedb_embedded as arcadedb`
9192

9293
## 🧪 Testing
9394

94-
**Status**: 258 tests + example scripts passing on all 3 platforms
95+
**Status**: 260 tests + example scripts passing on all 4 platforms
9596

9697
```bash
9798
# Run all tests
@@ -107,7 +108,7 @@ See [testing documentation](https://docs.humem.ai/arcadedb/latest/development/te
107108

108109
## 🔧 Building from Source (Advanced)
109110

110-
Linux uses Docker. macOS uses a native Java 25+ JDK with jlink.
111+
Linux uses Docker. macOS and Windows use a native Java 25+ JDK with jlink.
111112

112113
```bash
113114
cd bindings/python/

bindings/python/build.sh

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#
77
# Quick local-jar workflow (no host Java install required):
88
# 1) Build ArcadeDB JARs in Docker:
9-
# docker run --rm -v "$PWD":/src -w /src maven:3.9-eclipse-temurin-25 \
9+
# docker run --rm -v "$PWD":/src -w /src maven:3.9-amazoncorretto-25 \
1010
# sh -c "git config --global --add safe.directory /src && ./mvnw -DskipTests -pl package -am package"
1111
# 2) Point the build at your JAR directory:
1212
# cd bindings/python && ./build.sh linux/amd64 3.12 package/target/arcadedb-*/lib
@@ -44,6 +44,15 @@ print_usage() {
4444
echo " linux/amd64 Linux x86_64 (Docker build)"
4545
echo " linux/arm64 Linux ARM64 (Docker build, native ARM64 runner)"
4646
echo " darwin/arm64 macOS ARM64 Apple Silicon (native build on macOS)"
47+
echo " windows/amd64 Windows x86_64 (native build on Windows)"
48+
echo ""
49+
echo "PYTHON_VERSION:"
50+
echo " Python version for wheel (default: 3.12)"
51+
echo " Examples: 3.10, 3.11, 3.12, 3.13, 3.14"
52+
echo ""
53+
echo "JAR_LIB_DIR (optional):"
54+
echo " Directory containing ArcadeDB JARs to embed"
55+
echo " If omitted, JARs are pulled from arcadedata/arcadedb:<version>"
4756
echo ""
4857
echo "PYTHON_VERSION:"
4958
echo " Python version for wheel (default: 3.12)"
@@ -67,7 +76,7 @@ print_usage() {
6776
echo "Package features:"
6877
echo " ✅ Bundled platform-specific JRE (no Java required)"
6978
echo " ✅ Optimized JAR selection (see jar_exclusions.txt)"
70-
echo " ✅ Multi-platform support (3 platforms)"
79+
echo " ✅ Multi-platform support (4 platforms)"
7180
echo " 📦 Size: ~215MB (compressed), ~289MB (installed)"
7281
echo ""
7382
}

0 commit comments

Comments
 (0)