Skip to content

Commit 9149027

Browse files
committed
Merge branch 'master' of git://git.apache.org/spark into add-triple-slash-testcase
2 parents c1c80ca + c6e0c2a commit 9149027

18 files changed

Lines changed: 487 additions & 854 deletions

File tree

core/src/main/java/org/apache/spark/api/java/function/package.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ package org.apache.spark.api.java
2222
* these interfaces to pass functions to various Java API methods for Spark. Please visit Spark's
2323
* Java programming guide for more details.
2424
*/
25-
package object function
25+
package object function

dev/create-release/create-release.sh

Lines changed: 105 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,19 @@
2828
# - Send output to stderr and have useful logging in stdout
2929

3030
# Note: The following variables must be set before use!
31-
GIT_USERNAME=${GIT_USERNAME:-pwendell}
32-
GIT_PASSWORD=${GIT_PASSWORD:-XXX}
31+
ASF_USERNAME=${ASF_USERNAME:-pwendell}
32+
ASF_PASSWORD=${ASF_PASSWORD:-XXX}
3333
GPG_PASSPHRASE=${GPG_PASSPHRASE:-XXX}
3434
GIT_BRANCH=${GIT_BRANCH:-branch-1.0}
35-
RELEASE_VERSION=${RELEASE_VERSION:-1.0.0}
35+
RELEASE_VERSION=${RELEASE_VERSION:-1.2.0}
36+
NEXT_VERSION=${NEXT_VERSION:-1.2.1}
3637
RC_NAME=${RC_NAME:-rc2}
37-
USER_NAME=${USER_NAME:-pwendell}
38+
39+
M2_REPO=~/.m2/repository
40+
SPARK_REPO=$M2_REPO/org/apache/spark
41+
NEXUS_ROOT=https://repository.apache.org/service/local/staging
42+
NEXUS_UPLOAD=$NEXUS_ROOT/deploy/maven2
43+
NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads
3844

3945
if [ -z "$JAVA_HOME" ]; then
4046
echo "Error: JAVA_HOME is not set, cannot proceed."
@@ -47,31 +53,90 @@ set -e
4753
GIT_TAG=v$RELEASE_VERSION-$RC_NAME
4854

4955
if [[ ! "$@" =~ --package-only ]]; then
50-
echo "Creating and publishing release"
56+
echo "Creating release commit and publishing to Apache repository"
5157
# Artifact publishing
52-
git clone https://git-wip-us.apache.org/repos/asf/spark.git -b $GIT_BRANCH
53-
cd spark
58+
git clone https://$ASF_USERNAME:$ASF_PASSWORD@git-wip-us.apache.org/repos/asf/spark.git \
59+
-b $GIT_BRANCH
60+
pushd spark
5461
export MAVEN_OPTS="-Xmx3g -XX:MaxPermSize=1g -XX:ReservedCodeCacheSize=1g"
5562

56-
mvn -Pyarn release:clean
57-
58-
mvn -DskipTests \
59-
-Darguments="-DskipTests=true -Dmaven.javadoc.skip=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \
60-
-Dusername=$GIT_USERNAME -Dpassword=$GIT_PASSWORD \
61-
-Dmaven.javadoc.skip=true \
62-
-Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
63-
-Dtag=$GIT_TAG -DautoVersionSubmodules=true \
64-
-Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
65-
--batch-mode release:prepare
66-
67-
mvn -DskipTests \
68-
-Darguments="-DskipTests=true -Dmaven.javadoc.skip=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \
69-
-Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
70-
-Dmaven.javadoc.skip=true \
63+
# Create release commits and push them to github
64+
# NOTE: This is done "eagerly" i.e. we don't check if we can succesfully build
65+
# or before we coin the release commit. This helps avoid races where
66+
# other people add commits to this branch while we are in the middle of building.
67+
old=" <version>${RELEASE_VERSION}-SNAPSHOT<\/version>"
68+
new=" <version>${RELEASE_VERSION}<\/version>"
69+
find . -name pom.xml -o -name package.scala | grep -v dev | xargs -I {} sed -i \
70+
-e "s/$old/$new/" {}
71+
git commit -a -m "Preparing Spark release $GIT_TAG"
72+
echo "Creating tag $GIT_TAG at the head of $GIT_BRANCH"
73+
git tag $GIT_TAG
74+
75+
old=" <version>${RELEASE_VERSION}<\/version>"
76+
new=" <version>${NEXT_VERSION}-SNAPSHOT<\/version>"
77+
find . -name pom.xml -o -name package.scala | grep -v dev | xargs -I {} sed -i \
78+
-e "s/$old/$new/" {}
79+
git commit -a -m "Preparing development version ${NEXT_VERSION}-SNAPSHOT"
80+
git push origin $GIT_TAG
81+
git push origin HEAD:$GIT_BRANCH
82+
git checkout -f $GIT_TAG
83+
84+
# Using Nexus API documented here:
85+
# https://support.sonatype.com/entries/39720203-Uploading-to-a-Staging-Repository-via-REST-API
86+
echo "Creating Nexus staging repository"
87+
repo_request="<promoteRequest><data><description>Apache Spark $GIT_TAG</description></data></promoteRequest>"
88+
out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \
89+
-H "Content-Type:application/xml" -v \
90+
$NEXUS_ROOT/profiles/$NEXUS_PROFILE/start)
91+
staged_repo_id=$(echo $out | sed -e "s/.*\(orgapachespark-[0-9]\{4\}\).*/\1/")
92+
echo "Created Nexus staging repository: $staged_repo_id"
93+
94+
rm -rf $SPARK_REPO
95+
96+
mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
7197
-Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
72-
release:perform
98+
clean install
7399

74-
cd ..
100+
./dev/change-version-to-2.11.sh
101+
102+
mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
103+
-Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
104+
clean install
105+
106+
./dev/change-version-to-2.10.sh
107+
108+
pushd $SPARK_REPO
109+
110+
# Remove any extra files generated during install
111+
find . -type f |grep -v \.jar |grep -v \.pom | xargs rm
112+
113+
echo "Creating hash and signature files"
114+
for file in $(find . -type f)
115+
do
116+
echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --output $file.asc --detach-sig --armour $file;
117+
gpg --print-md MD5 $file > $file.md5;
118+
gpg --print-md SHA1 $file > $file.sha1
119+
done
120+
121+
echo "Uplading files to $NEXUS_UPLOAD"
122+
for file in $(find . -type f)
123+
do
124+
# strip leading ./
125+
file_short=$(echo $file | sed -e "s/\.\///")
126+
dest_url="$NEXUS_UPLOAD/org/apache/spark/$file_short"
127+
echo " Uploading $file_short"
128+
curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
129+
done
130+
131+
echo "Closing nexus staging repository"
132+
repo_request="<promoteRequest><data><stagedRepositoryId>$staged_repo_id</stagedRepositoryId><description>Apache Spark $GIT_TAG</description></data></promoteRequest>"
133+
out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \
134+
-H "Content-Type:application/xml" -v \
135+
$NEXUS_ROOT/profiles/$NEXUS_PROFILE/finish)
136+
echo "Closed Nexus staging repository: $staged_repo_id"
137+
138+
popd
139+
popd
75140
rm -rf spark
76141
fi
77142

@@ -102,6 +167,12 @@ make_binary_release() {
102167
cp -r spark spark-$RELEASE_VERSION-bin-$NAME
103168

104169
cd spark-$RELEASE_VERSION-bin-$NAME
170+
171+
# TODO There should probably be a flag to make-distribution to allow 2.11 support
172+
if [[ $FLAGS == *scala-2.11* ]]; then
173+
./dev/change-version-to-2.11.sh
174+
fi
175+
105176
./make-distribution.sh --name $NAME --tgz $FLAGS 2>&1 | tee ../binary-release-$NAME.log
106177
cd ..
107178
cp spark-$RELEASE_VERSION-bin-$NAME/spark-$RELEASE_VERSION-bin-$NAME.tgz .
@@ -118,22 +189,23 @@ make_binary_release() {
118189
spark-$RELEASE_VERSION-bin-$NAME.tgz.sha
119190
}
120191

192+
121193
make_binary_release "hadoop1" "-Phive -Phive-thriftserver -Dhadoop.version=1.0.4" &
194+
make_binary_release "hadoop1-scala2.11" "-Phive -Dscala-2.11" &
122195
make_binary_release "cdh4" "-Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" &
123196
make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" &
124197
make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" &
125-
make_binary_release "hadoop2.4-without-hive" "-Phadoop-2.4 -Pyarn" &
126198
make_binary_release "mapr3" "-Pmapr3 -Phive -Phive-thriftserver" &
127199
make_binary_release "mapr4" "-Pmapr4 -Pyarn -Phive -Phive-thriftserver" &
128200
wait
129201

130202
# Copy data
131203
echo "Copying release tarballs"
132204
rc_folder=spark-$RELEASE_VERSION-$RC_NAME
133-
ssh $USER_NAME@people.apache.org \
134-
mkdir /home/$USER_NAME/public_html/$rc_folder
205+
ssh $ASF_USERNAME@people.apache.org \
206+
mkdir /home/$ASF_USERNAME/public_html/$rc_folder
135207
scp spark-* \
136-
$USER_NAME@people.apache.org:/home/$USER_NAME/public_html/$rc_folder/
208+
$ASF_USERNAME@people.apache.org:/home/$ASF_USERNAME/public_html/$rc_folder/
137209

138210
# Docs
139211
cd spark
@@ -143,12 +215,12 @@ cd docs
143215
JAVA_HOME=$JAVA_7_HOME PRODUCTION=1 jekyll build
144216
echo "Copying release documentation"
145217
rc_docs_folder=${rc_folder}-docs
146-
ssh $USER_NAME@people.apache.org \
147-
mkdir /home/$USER_NAME/public_html/$rc_docs_folder
148-
rsync -r _site/* $USER_NAME@people.apache.org:/home/$USER_NAME/public_html/$rc_docs_folder
218+
ssh $ASF_USERNAME@people.apache.org \
219+
mkdir /home/$ASF_USERNAME/public_html/$rc_docs_folder
220+
rsync -r _site/* $ASF_USERNAME@people.apache.org:/home/$ASF_USERNAME/public_html/$rc_docs_folder
149221

150222
echo "Release $RELEASE_VERSION completed:"
151223
echo "Git tag:\t $GIT_TAG"
152224
echo "Release commit:\t $release_hash"
153-
echo "Binary location:\t http://people.apache.org/~$USER_NAME/$rc_folder"
154-
echo "Doc location:\t http://people.apache.org/~$USER_NAME/$rc_docs_folder"
225+
echo "Binary location:\t http://people.apache.org/~$ASF_USERNAME/$rc_folder"
226+
echo "Doc location:\t http://people.apache.org/~$ASF_USERNAME/$rc_docs_folder"

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
3232
override def nullable = (child.dataType, dataType) match {
3333
case (StringType, _: NumericType) => true
3434
case (StringType, TimestampType) => true
35+
case (DoubleType, TimestampType) => true
36+
case (FloatType, TimestampType) => true
3537
case (StringType, DateType) => true
3638
case (_: NumericType, DateType) => true
3739
case (BooleanType, DateType) => true
3840
case (DateType, _: NumericType) => true
3941
case (DateType, BooleanType) => true
42+
case (DoubleType, _: DecimalType) => true
43+
case (FloatType, _: DecimalType) => true
4044
case (_, DecimalType.Fixed(_, _)) => true // TODO: not all upcasts here can really give null
4145
case _ => child.nullable
4246
}
@@ -115,10 +119,18 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
115119
buildCast[Decimal](_, d => decimalToTimestamp(d))
116120
// TimestampWritable.doubleToTimestamp
117121
case DoubleType =>
118-
buildCast[Double](_, d => decimalToTimestamp(Decimal(d)))
122+
buildCast[Double](_, d => try {
123+
decimalToTimestamp(Decimal(d))
124+
} catch {
125+
case _: NumberFormatException => null
126+
})
119127
// TimestampWritable.floatToTimestamp
120128
case FloatType =>
121-
buildCast[Float](_, f => decimalToTimestamp(Decimal(f)))
129+
buildCast[Float](_, f => try {
130+
decimalToTimestamp(Decimal(f))
131+
} catch {
132+
case _: NumberFormatException => null
133+
})
122134
}
123135

124136
private[this] def decimalToTimestamp(d: Decimal) = {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,11 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row {
215215
def copy() = this
216216
}
217217

218-
class GenericMutableRow(size: Int) extends GenericRow(size) with MutableRow {
218+
class GenericMutableRow(v: Array[Any]) extends GenericRow(v) with MutableRow {
219219
/** No-arg constructor for serialization. */
220-
def this() = this(0)
220+
def this() = this(null)
221+
222+
def this(size: Int) = this(new Array[Any](size))
221223

222224
override def setBoolean(ordinal: Int, value: Boolean): Unit = { values(ordinal) = value }
223225
override def setByte(ordinal: Int, value: Byte): Unit = { values(ordinal) = value }

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.util.Metadata
2626
object NamedExpression {
2727
private val curId = new java.util.concurrent.atomic.AtomicLong()
2828
def newExprId = ExprId(curId.getAndIncrement())
29+
def unapply(expr: NamedExpression): Option[(String, DataType)] = Some(expr.name, expr.dataType)
2930
}
3031

3132
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,8 @@ class ExpressionEvaluationSuite extends FunSuite {
347347
// - Because of this, casts to fixed-precision decimals should be nullable
348348

349349
assert(Cast(Literal(123), DecimalType.Unlimited).nullable === false)
350-
assert(Cast(Literal(10.03f), DecimalType.Unlimited).nullable === false)
351-
assert(Cast(Literal(10.03), DecimalType.Unlimited).nullable === false)
350+
assert(Cast(Literal(10.03f), DecimalType.Unlimited).nullable === true)
351+
assert(Cast(Literal(10.03), DecimalType.Unlimited).nullable === true)
352352
assert(Cast(Literal(Decimal(10.03)), DecimalType.Unlimited).nullable === false)
353353

354354
assert(Cast(Literal(123), DecimalType(2, 1)).nullable === true)
@@ -396,6 +396,16 @@ class ExpressionEvaluationSuite extends FunSuite {
396396
checkEvaluation(Cast(Literal(-9.95), DecimalType(1, 0)), null)
397397
checkEvaluation(Cast(Literal(Decimal(-9.95)), DecimalType(3, 1)), Decimal(-10.0))
398398
checkEvaluation(Cast(Literal(Decimal(-9.95)), DecimalType(1, 0)), null)
399+
400+
checkEvaluation(Cast(Literal(Double.NaN), DecimalType.Unlimited), null)
401+
checkEvaluation(Cast(Literal(1.0 / 0.0), DecimalType.Unlimited), null)
402+
checkEvaluation(Cast(Literal(Float.NaN), DecimalType.Unlimited), null)
403+
checkEvaluation(Cast(Literal(1.0f / 0.0f), DecimalType.Unlimited), null)
404+
405+
checkEvaluation(Cast(Literal(Double.NaN), DecimalType(2, 1)), null)
406+
checkEvaluation(Cast(Literal(1.0 / 0.0), DecimalType(2, 1)), null)
407+
checkEvaluation(Cast(Literal(Float.NaN), DecimalType(2, 1)), null)
408+
checkEvaluation(Cast(Literal(1.0f / 0.0f), DecimalType(2, 1)), null)
399409
}
400410

401411
test("timestamp") {
@@ -440,6 +450,11 @@ class ExpressionEvaluationSuite extends FunSuite {
440450

441451
// A test for higher precision than millis
442452
checkEvaluation(Cast(Cast(0.00000001, TimestampType), DoubleType), 0.00000001)
453+
454+
checkEvaluation(Cast(Literal(Double.NaN), TimestampType), null)
455+
checkEvaluation(Cast(Literal(1.0 / 0.0), TimestampType), null)
456+
checkEvaluation(Cast(Literal(Float.NaN), TimestampType), null)
457+
checkEvaluation(Cast(Literal(1.0f / 0.0f), TimestampType), null)
443458
}
444459

445460
test("null checking") {

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -209,22 +209,15 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
209209
case PhysicalOperation(projectList, filters: Seq[Expression], relation: ParquetRelation) =>
210210
val prunePushedDownFilters =
211211
if (sqlContext.parquetFilterPushDown) {
212-
(filters: Seq[Expression]) => {
213-
filters.filter { filter =>
214-
// Note: filters cannot be pushed down to Parquet if they contain more complex
215-
// expressions than simple "Attribute cmp Literal" comparisons. Here we remove
216-
// all filters that have been pushed down. Note that a predicate such as
217-
// "(A AND B) OR C" can result in "A OR C" being pushed down.
218-
val recordFilter = ParquetFilters.createFilter(filter)
219-
if (!recordFilter.isDefined) {
220-
// First case: the pushdown did not result in any record filter.
221-
true
222-
} else {
223-
// Second case: a record filter was created; here we are conservative in
224-
// the sense that even if "A" was pushed and we check for "A AND B" we
225-
// still want to keep "A AND B" in the higher-level filter, not just "B".
226-
!ParquetFilters.findExpression(recordFilter.get, filter).isDefined
227-
}
212+
(predicates: Seq[Expression]) => {
213+
// Note: filters cannot be pushed down to Parquet if they contain more complex
214+
// expressions than simple "Attribute cmp Literal" comparisons. Here we remove all
215+
// filters that have been pushed down. Note that a predicate such as "(A AND B) OR C"
216+
// can result in "A OR C" being pushed down. Here we are conservative in the sense
217+
// that even if "A" was pushed and we check for "A AND B" we still want to keep
218+
// "A AND B" in the higher-level filter, not just "B".
219+
predicates.map(p => p -> ParquetFilters.createFilter(p)).collect {
220+
case (predicate, None) => predicate
228221
}
229222
}
230223
} else {

0 commit comments

Comments
 (0)