Skip to content

Commit 3c160e0

Browse files
authored
Merge branch 'main' into jbrooks-bounded-unique-count
2 parents 3c368e8 + d3cd673 commit 3c160e0

270 files changed

Lines changed: 17082 additions & 3376 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.bazeliskrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
USE_BAZEL_VERSION=6.4.0

.bazelrc

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
## Disable remote cache completely when --config=local is passed
2+
build:local --remote_cache=
3+
4+
# Scala version config flags:
5+
# To build with Scala 2.12, pass "--config scala_2.12" to "bazel build"
6+
# To set a different default Scala version, add the following to
7+
# user.bazelrc:
8+
# common --config scala_2.12
9+
common:scala_2.11 --repo_env=SCALA_VERSION=2.11.12 --define scala_version=2.11
10+
common:scala_2.12 --repo_env=SCALA_VERSION=2.12.18 --define scala_version=2.12
11+
common:scala_2.13 --repo_env=SCALA_VERSION=2.13.12 --define scala_version=2.13
12+
13+
# Default scala version to 2.12
14+
common --repo_env=SCALA_VERSION=2.12.18 --define scala_version=2.12
15+
16+
# Spark versions
17+
common:spark_2.4 --define spark_version=2.4
18+
common:spark_3.1 --define spark_version=3.1
19+
common:spark_3.2 --define spark_version=3.2
20+
common:spark_3.5 --define spark_version=3.5
21+
# Default Spark version
22+
common --define spark_version=3.1
23+
24+
# Flink versions
25+
common:flink_1.16 --define flink_version=1.16
26+
# Default Flink version
27+
common --define flink_version=1.16
28+
29+
# Default version to pass the builds
30+
common --define version=0.0.0
31+
32+
build --javacopt=-Xep:DoubleBraceInitialization:OFF
33+
34+
# Don't implicitly create __init__.py files
35+
build --incompatible_default_to_explicit_init_py
36+
37+
# https://github.com/bazelbuild/bazel/issues/2377
38+
test --spawn_strategy=standalone
39+
40+
# Don't depend on a JAVA_HOME pointing at a system JDK
41+
# see https://github.com/bazelbuild/rules_jvm_external/issues/445
42+
build --repo_env=JAVA_HOME=../bazel_tools/jdk
43+
44+
build:java_8 --java_language_version=8 --java_runtime_version=remotejdk_8
45+
build:java_11 --java_language_version=11 --java_runtime_version=remotejdk_11
46+
build:java_21 --java_language_version=21 --java_runtime_version=remotejdk_21
47+
48+
# Use Java 8 as default for Bazel builds
49+
build --java_language_version=8 --java_runtime_version=remotejdk_8

.circleci/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ RUN apt-get update && apt-get -y -q install \
3434
openjdk-8-jdk \
3535
pkg-config \
3636
sbt \
37+
bazelisk \
3738
&& apt-get clean
3839

3940
# Install thrift

.circleci/config.yml

Lines changed: 78 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ executors:
1010
working_directory: /chronon
1111
docker:
1212
- image: houpy0829/chronon-ci:base--f87f50dc520f7a73894ae024eb78bd305d5b08e2
13+
docker_baseimg_executor_xxlarge:
14+
resource_class: 2xlarge
15+
working_directory: /chronon
16+
docker:
17+
- image: houpy0829/chronon-ci:base--f87f50dc520f7a73894ae024eb78bd305d5b08e2
1318

1419
jobs:
1520
"Pull Docker Image":
@@ -27,7 +32,7 @@ jobs:
2732
docker pull houpy0829/chronon-ci:base--f87f50dc520f7a73894ae024eb78bd305d5b08e2 || true
2833
2934
"Scala 12 -- Spark 3 Tests":
30-
executor: docker_baseimg_executor
35+
executor: docker_baseimg_executor_xxlarge
3136
steps:
3237
- checkout
3338
- run:
@@ -62,7 +67,7 @@ jobs:
6267
command: |
6368
conda activate chronon_py
6469
# Increase if we see OOM.
65-
export SBT_OPTS="-XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xmx4G -Xms2G"
70+
export SBT_OPTS="-XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=8G -Xmx8G -Xms4G"
6671
sbt "++ 2.13.6 test"
6772
- store_test_results:
6873
path: /chronon/spark/target/test-reports
@@ -102,7 +107,7 @@ jobs:
102107
conda activate chronon_py
103108
cd /chronon/api/py/ai/chronon
104109
pip install importlib-metadata==4.11.4 #Install importlib-metadata < 5
105-
flake8 --extend-ignore=W605,Q000,F631
110+
flake8 --extend-ignore=W605,Q000,F631,E203
106111
107112
"Chronon Python Tests":
108113
executor: docker_baseimg_executor
@@ -112,14 +117,14 @@ jobs:
112117
name: Run Chronon Python tests
113118
shell: /bin/bash -leuxo pipefail
114119
command: |
115-
conda activate chronon_py
116-
pushd /chronon/api/
117-
thrift --gen py -out /chronon/api/py/ai/chronon\
118-
/chronon/api/thrift/api.thrift # Generate thrift files
119-
cd /chronon/api/py # Go to Python module
120-
pip install -r requirements/dev.txt # Install latest requirements
121-
tox # Run tests
122-
popd
120+
conda activate chronon_py
121+
pushd /chronon/api/
122+
thrift --gen py -out /chronon/api/py/ai/chronon\
123+
/chronon/api/thrift/api.thrift # Generate thrift files
124+
cd /chronon/ # Go to Python module
125+
pip install -r api/py/requirements/dev.txt # Install latest requirements
126+
tox # Run tests
127+
popd
123128
- store_artifacts:
124129
path: /chronon/api/py/htmlcov
125130

@@ -133,6 +138,62 @@ jobs:
133138
command: |
134139
conda activate chronon_py
135140
sbt +scalafmtCheck
141+
# run these separately as we need a isolated JVM to not have Spark session settings interfere with other runs
142+
# long term goal is to refactor the current testing spark session builder and avoid adding new single test to CI
143+
"Scala 13 -- Iceberg Format Tests":
144+
executor: docker_baseimg_executor
145+
steps:
146+
- checkout
147+
- run:
148+
name: Run Scala 13 tests for Iceberg format
149+
environment:
150+
format_test: iceberg
151+
shell: /bin/bash -leuxo pipefail
152+
command: |
153+
conda activate chronon_py
154+
# Increase if we see OOM.
155+
export SBT_OPTS="-XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xmx4G -Xms2G"
156+
sbt ';project spark_embedded; ++ 2.13.6; testOnly ai.chronon.spark.test.TableUtilsFormatTest'
157+
- store_test_results:
158+
path: /chronon/spark/target/test-reports
159+
- store_test_results:
160+
path: /chronon/aggregator/target/test-reports
161+
- run:
162+
name: Compress spark-warehouse
163+
command: |
164+
cd /tmp/ && tar -czvf spark-warehouse.tar.gz chronon/spark-warehouse
165+
when: on_fail
166+
- store_artifacts:
167+
path: /tmp/spark-warehouse.tar.gz
168+
destination: spark_warehouse.tar.gz
169+
when: on_fail
170+
"Scala 13 -- Iceberg Table Utils Tests":
171+
executor: docker_baseimg_executor
172+
steps:
173+
- checkout
174+
- run:
175+
name: Run Scala 13 tests for Iceberg Table Utils
176+
environment:
177+
format_test: iceberg
178+
shell: /bin/bash -leuxo pipefail
179+
command: |
180+
conda activate chronon_py
181+
# Increase if we see OOM.
182+
export SBT_OPTS="-XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xmx4G -Xms2G"
183+
sbt ';project spark_embedded; ++ 2.13.6; testOnly ai.chronon.spark.test.TableUtilsTest'
184+
- store_test_results:
185+
path: /chronon/spark/target/test-reports
186+
- store_test_results:
187+
path: /chronon/aggregator/target/test-reports
188+
- run:
189+
name: Compress spark-warehouse
190+
command: |
191+
cd /tmp/ && tar -czvf spark-warehouse.tar.gz chronon/spark-warehouse
192+
when: on_fail
193+
- store_artifacts:
194+
path: /tmp/spark-warehouse.tar.gz
195+
destination: spark_warehouse.tar.gz
196+
when: on_fail
136197

137198
workflows:
138199
build_test_deploy:
@@ -156,3 +217,9 @@ workflows:
156217
- "Chronon Python Lint":
157218
requires:
158219
- "Pull Docker Image"
220+
- "Scala 13 -- Iceberg Format Tests":
221+
requires:
222+
- "Pull Docker Image"
223+
- "Scala 13 -- Iceberg Table Utils Tests":
224+
requires:
225+
- "Pull Docker Image"

.gitignore

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
*.logs
1111
*.iml
1212
.idea/
13+
*.jvmopts
14+
.bloop*
15+
.metals*
16+
.venv*
17+
*metals.sbt*
1318
.eclipse
1419
**/.vscode/
1520
**/__pycache__/
@@ -21,7 +26,9 @@ api/py/.coverage
2126
api/py/htmlcov/
2227
**/derby.log
2328
cs
24-
29+
*.bloop
30+
*.metals
31+
*.venv
2532
# Documentation builds
2633
docs/build/
2734

@@ -33,6 +40,7 @@ api/py/build/
3340
api/py/ai/chronon/thrift/
3441
api/py/*.egg-info/
3542
*.egg
43+
.coverage
3644

3745
# spark test scratch area
3846
**/spark-warehouse/
@@ -43,3 +51,5 @@ mvn_settings.xml
4351
# Release folder
4452
releases
4553

54+
# bazel
55+
bazel-*

.ijwb/.bazelproject

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
directories:
2+
# Add the directories you want added as source here
3+
# By default, we've added your entire workspace ('.')
4+
.
5+
6+
# Automatically includes all relevant targets under the 'directories' above
7+
derive_targets_from_directories: true
8+
9+
targets:
10+
# If source code isn't resolving, add additional targets that compile it here
11+
12+
additional_languages:
13+
# Uncomment any additional languages you want supported
14+
python
15+
scala
16+
java

AUTHORS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Vamsee Yarlagadda (Airbnb)
88
Hao Cen (Airbnb)
99
Donghan Zhang (Airbnb)
1010
Yuli Han (Airbnb)
11+
Praveen Kundurthy (Airbnb)
1112
Ben Mears (Stripe)
1213
Andrew Lee (Stripe)
1314
Cam Weston (Stripe)
@@ -16,3 +17,4 @@ Daniel Kristjansson (Stripe)
1617
Piyush Narang (Stripe)
1718
Caio Camatta (Stripe)
1819
Divya Manohar (Stripe)
20+
Krish Narukulla (Roku)

BUILD.bazel

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package(default_visibility = ["//visibility:public"])
2+
3+
load("@rules_python//python:pip.bzl", "compile_pip_requirements")
4+
5+
# To update py3_requirements_lock.txt, run:
6+
compile_pip_requirements(
7+
name = "pip",
8+
extra_args = [
9+
"--allow-unsafe",
10+
"--resolver=backtracking",
11+
],
12+
requirements_in = "//:requirements.txt",
13+
requirements_txt = "//:requirements_lock.txt",
14+
# force this to mac-only, since that's where we expect it to run.
15+
# remove + adopt requirements_[platform] arguments when we're on rules_python>=0.10.0
16+
tags = ["manual"],
17+
)
18+
19+
exports_files(
20+
["pom.xml.tpl"],
21+
)

CONTRIBUTING.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,18 @@ The process for reporting bugs and requesting smaller features is also outlined
3838

3939
Pull Requests (PRs) should follow these guidelines as much as possible:
4040

41+
### Creating a pull request from a fork
42+
Below is a general workflow to create a PR from a forked repository. By following these steps, we can avoid granting write permissions to the main repository.
43+
- [Fork the repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo)
44+
- Clone the forked repository
45+
- [Configuring a remote repository for a fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/configuring-a-remote-repository-for-a-fork)
46+
- Sync the forked repository with the main repository
47+
- [Syncing a fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork)
48+
- Create a new branch for your changes
49+
- Make your changes
50+
- Push the branch to your fork
51+
- [Create a PR from your branch to the main repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork)
52+
4153
### Code Guidelines
4254

4355
- Follow our [code style guidelines](docs/source/Code_Guidelines.md)

PROJECT_MANAGEMENT_COMMITTEE

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Airbnb (8 seats)
2+
Haozhen Ding
3+
Pallavi Adusumilli
4+
Pengyu Hou
5+
Praveen Kundurthy
6+
Rohit Girme
7+
Sophie Wang
8+
Sherry Li
9+
Yuli Han
10+
11+
# Stripe (6 seats)
12+
Ben Mears
13+
Caio Camatta
14+
Cam Weston
15+
Jeff Brooks
16+
Jeremy Robin
17+
Spencer McNamara

0 commit comments

Comments
 (0)