-
Notifications
You must be signed in to change notification settings - Fork 6
[SPARK-25299] Implement default version of the API for shuffle writes #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
7160ce3
460f0ea
96d1774
64fb327
996e903
3b9d33c
1f1c159
0737515
1ded83d
3353155
7a79bd9
9e3f05c
9f6230b
14df750
8cf80f7
46a0174
1325903
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.shuffle.sort.io; | ||
|
|
||
| import org.apache.spark.SparkConf; | ||
| import org.apache.spark.api.shuffle.ShuffleExecutorComponents; | ||
| import org.apache.spark.api.shuffle.ShuffleDataIO; | ||
|
|
||
| public class DefaultShuffleDataIO implements ShuffleDataIO { | ||
|
|
||
| private final SparkConf sparkConf; | ||
|
|
||
| public DefaultShuffleDataIO(SparkConf sparkConf) { | ||
| this.sparkConf = sparkConf; | ||
| } | ||
|
|
||
|
|
||
| @Override | ||
| public ShuffleExecutorComponents executor() { | ||
| return new DefaultShuffleExecutorComponents(sparkConf); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.shuffle.sort.io; | ||
|
|
||
| import org.apache.spark.SparkConf; | ||
| import org.apache.spark.SparkEnv; | ||
| import org.apache.spark.TaskContext; | ||
| import org.apache.spark.api.shuffle.ShuffleExecutorComponents; | ||
| import org.apache.spark.api.shuffle.ShuffleWriteSupport; | ||
| import org.apache.spark.executor.TaskMetrics; | ||
| import org.apache.spark.shuffle.IndexShuffleBlockResolver; | ||
| import org.apache.spark.storage.BlockManager; | ||
|
|
||
| public class DefaultShuffleExecutorComponents implements ShuffleExecutorComponents { | ||
|
|
||
| private final SparkConf sparkConf; | ||
| private BlockManager blockManager; | ||
| private IndexShuffleBlockResolver blockResolver; | ||
| private TaskMetrics metrics; | ||
|
|
||
| public DefaultShuffleExecutorComponents(SparkConf sparkConf) { | ||
| this.sparkConf = sparkConf; | ||
| } | ||
|
|
||
| @Override | ||
| public void intitializeExecutor(String appId, String execId) { | ||
| blockManager = SparkEnv.get().blockManager(); | ||
| blockResolver = new IndexShuffleBlockResolver(sparkConf, blockManager); | ||
| metrics = TaskContext.get().taskMetrics(); | ||
|
||
| } | ||
|
|
||
| @Override | ||
| public ShuffleWriteSupport writes() { | ||
| if (blockResolver == null || metrics == null) { | ||
| throw new IllegalStateException( | ||
| "Executor components must be initialized before getting writers."); | ||
| } | ||
|
|
||
| return new DefaultShuffleWriteSupport( | ||
| sparkConf, blockResolver, metrics.shuffleWriteMetrics()); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
quick question: can the
appIdactually just be passed to theShuffleMapOutputWriterthrough theShuffleDataIO? It should be part of the sparkConf and shouldn't change in the executors right?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can, but its doesn't add much computation besides the above call to
getAppId()so it seems pretty unintrusive.However, the API was built so that you call:
so I am a bit bounded by that :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea I'm just wondering whether we need that in the API or not, since some implementations, like this refactor one that we're doing, don't necessarily need it, although all remote implementations might. @mccheah thoughts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah we can change the API, it should be passed through
ShuffleDataIO- maybeShuffleExecutorComponents#initialize?