Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packaging/bundle-validation/ci_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ fi
# Copy bundle jars to temp dir for mounting
TMP_JARS_DIR=/tmp/jars/$(date +%s)
mkdir -p $TMP_JARS_DIR
cp ${GITHUB_WORKSPACE}/packaging/hudi-hadoop-mr-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
cp ${GITHUB_WORKSPACE}/packaging/hudi-spark-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
cp ${GITHUB_WORKSPACE}/packaging/hudi-utilities-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
cp ${GITHUB_WORKSPACE}/packaging/hudi-utilities-slim-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,5 @@ df.write.format("hudi").
save(basePath)

spark.sql("desc " + tableName).show
val actual = spark.sql("select * from " + tableName).count
if (expected == actual) {
System.out.println($"bundle combination passed sanity run.")
System.exit(0)
} else {
System.err.println($"bundle combination failed sanity run:\n\tshould have written $expected records in $database.$tableName")
System.exit(1)
}

System.exit(0)
39 changes: 29 additions & 10 deletions packaging/bundle-validation/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,31 +28,50 @@
WORKDIR=/opt/bundle-validation
JARS_DIR=${WORKDIR}/jars
# link the jar names to easier to use names
ln -sf $JARS_DIR/hudi-hadoop-mr*.jar $JARS_DIR/hadoop-mr.jar
ln -sf $JARS_DIR/hudi-spark*.jar $JARS_DIR/spark.jar
ln -sf $JARS_DIR/hudi-utilities-bundle*.jar $JARS_DIR/utilities.jar
ln -sf $JARS_DIR/hudi-utilities-slim*.jar $JARS_DIR/utilities-slim.jar


##
# Function to test the spark bundle with hive sync.
# Function to test the spark & hadoop-mr bundles with hive sync.
#
# env vars (defined in container):
# HIVE_HOME: path to the hive directory
# DERBY_HOME: path to the derby directory
# SPARK_HOME: path to the spark directory
##
test_spark_bundle () {
echo "::warning::validate.sh setting up hive metastore for spark bundle validation"
test_spark_hadoop_mr_bundles () {
echo "::warning::validate.sh setting up hive metastore for spark & hadoop-mr bundles validation"

$DERBY_HOME/bin/startNetworkServer -h 0.0.0.0 &
$HIVE_HOME/bin/hiveserver2 &
echo "::warning::validate.sh hive metastore setup complete. Testing"
$SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar < $WORKDIR/spark/validate.scala
if [ "$?" -ne 0 ]; then
echo "::error::validate.sh failed hive testing"
$HIVE_HOME/bin/hiveserver2 --hiveconf hive.aux.jars.path=$JARS_DIR/hadoop-mr.jar &
echo "::warning::validate.sh Writing sample data via Spark DataSource and run Hive Sync..."
$SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar < $WORKDIR/spark_hadoop_mr/write.scala

echo "::warning::validate.sh Query and validate the results using Spark SQL"
# save Spark SQL query results
$SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar \
-i <(echo 'spark.sql("select * from trips").coalesce(1).write.csv("/tmp/sparksql/trips/results"); System.exit(0)')
numRecordsSparkSQL=$(cat /tmp/sparksql/trips/results/*.csv | wc -l)
if [ "$numRecordsSparkSQL" -ne 10 ]; then
echo "::error::validate.sh Spark SQL validation failed."
exit 1
fi
echo "::warning::validate.sh Query and validate the results using HiveQL"
# save HiveQL query results
hiveqlresultsdir=/tmp/hiveql/trips/results
mkdir -p $hiveqlresultsdir
$HIVE_HOME/bin/beeline --hiveconf hive.input.format=org.apache.hudi.hadoop.HoodieParquetInputFormat \
-u jdbc:hive2://localhost:10000/default --showHeader=false --outputformat=csv2 \
-e 'select * from trips' >> $hiveqlresultsdir/results.csv
numRecordsHiveQL=$(cat $hiveqlresultsdir/*.csv | wc -l)
if [ "$numRecordsHiveQL" -ne 10 ]; then
echo "::error::validate.sh HiveQL validation failed."
exit 1
fi
echo "::warning::validate.sh spark bundle validation successful"
echo "::warning::validate.sh spark & hadoop-mr bundles validation was successful."
}


Expand Down Expand Up @@ -112,7 +131,7 @@ test_utilities_bundle () {
}


test_spark_bundle
test_spark_hadoop_mr_bundles
if [ "$?" -ne 0 ]; then
exit 1
fi
Expand Down