Skip to content

Commit 1809a87

Browse files
authored
Merge pull request apache#107 from mesosphere/SPARK-389-hdfs-tests
[WIP] [SPARK-389] teragen hdfs integration test
2 parents 60afd8f + 25d822a commit 1809a87

2 files changed

Lines changed: 55 additions & 26 deletions

File tree

bin/test.sh

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -60,35 +60,11 @@ configure_cli() {
6060
fi
6161
}
6262

63-
install_spark() {
64-
notify_github pending "Installing Spark"
65-
63+
setup_permissions() {
6664
if [ "$SECURITY" = "strict" ]; then
6765
# custom configuration to enable auth stuff:
6866
${COMMONS_TOOLS_DIR}/setup_permissions.sh nobody "*" # spark's default service.role
69-
echo '{ "service": { "user": "nobody", "principal": "service-acct", "secret_name": "secret" } }' > /tmp/spark.json
70-
dcos --log-level=INFO package install spark --options=/tmp/spark.json --yes
71-
else
72-
dcos --log-level=INFO package install spark --yes
73-
fi
74-
75-
if [ $? -ne 0 ]; then
76-
notify_github failure "Spark install failed"
77-
exit 1
7867
fi
79-
80-
SECONDS=0
81-
while [[ $(dcos marathon app list --json | jq '.[] | select(.id=="/spark") | .tasksHealthy') -ne "1" ]]
82-
do
83-
sleep 5
84-
if [ $SECONDS -gt 600 ]; then # 10 mins
85-
notify_github failure "Spark install timed out"
86-
exit 1
87-
fi
88-
done
89-
90-
# sleep 30s due to mesos-dns propagation delays to /service/sparkcli/
91-
sleep 30
9268
}
9369

9470
run_tests() {
@@ -113,7 +89,7 @@ fetch_commons_tools
11389
start_cluster
11490
# TODO: Migrate the following three commands to dcos-commons-tools/run-tests.py
11591
configure_cli
116-
install_spark
92+
setup_permissions
11793
run_tests
11894

11995
notify_github success "Tests Passed"

tests/test.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,64 @@
77

88
from boto.s3.connection import S3Connection
99
from boto.s3.key import Key
10+
import dcos.config
11+
import dcos.http
12+
import dcos.package
1013
import os
1114
import pytest
1215
import re
1316
import shakedown
1417
import subprocess
18+
import urllib
19+
20+
21+
def setup_module(module):
22+
_require_package('hdfs')
23+
_install_spark()
24+
25+
26+
def _install_spark():
27+
options = {"hdfs":
28+
{"config-url":
29+
"http://hdfs.marathon.mesos:9000/v1/connection"}}
30+
31+
if os.environ.get('SECURITY') == 'strict':
32+
options['service'] = {"user": "nobody",
33+
"principal": "service-acct",
34+
"secret_name": "secret" }
35+
36+
shakedown.install_package('spark', options_json=options, wait_for_completion=True)
37+
38+
def pred():
39+
dcos_url = dcos.config.get_config_val("core.dcos_url")
40+
spark_url = urllib.parse.urljoin(dcos_url, "/service/spark")
41+
status_code = dcos.http.get(spark_url).status_code
42+
return status_code == 200
43+
44+
shakedown.spinner.wait_for(pred)
45+
46+
47+
def _require_package(pkg_name):
48+
pkg_manager = dcos.package.get_package_manager()
49+
installed_pkgs = dcos.package.installed_packages(pkg_manager, None, None, False)
50+
if not any(pkg['name'] == pkg_name for pkg in installed_pkgs):
51+
shakedown.install_package(pkg_name, wait_for_completion=True)
52+
shakedown.wait_for(_is_hdfs_ready, ignore_exceptions=False, timeout_seconds=600)
53+
54+
55+
DEFAULT_HDFS_TASK_COUNT=8
56+
def _is_hdfs_ready(expected_tasks = DEFAULT_HDFS_TASK_COUNT):
57+
running_tasks = [t for t in shakedown.get_service_tasks('hdfs') \
58+
if t['state'] == 'TASK_RUNNING']
59+
return len(running_tasks) >= expected_tasks
60+
61+
62+
def test_teragen():
63+
jar_url = "https://downloads.mesosphere.io/spark/examples/spark-terasort-1.0-jar-with-dependencies_2.11.jar"
64+
_run_tests(jar_url,
65+
"1g hdfs:///terasort_in",
66+
"Number of records written",
67+
{"--class": "com.github.ehiggs.spark.terasort.TeraGen"})
1568

1669

1770
def test_jar():

0 commit comments

Comments
 (0)