We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent d14f2f1 commit c85e1e5Copy full SHA for c85e1e5
1 file changed
python/pyspark/join.py
@@ -33,10 +33,11 @@
33
34
from pyspark.resultiterable import ResultIterable
35
36
+
37
def _do_python_join(rdd, other, numPartitions, dispatch):
38
vs = rdd.map(lambda (k, v): (k, (1, v)))
39
ws = other.map(lambda (k, v): (k, (2, v)))
- return vs.union(ws).groupByKey(numPartitions).flatMapValues(lambda x : dispatch(x.__iter__()))
40
+ return vs.union(ws).groupByKey(numPartitions).flatMapValues(lambda x: dispatch(x.__iter__()))
41
42
43
def python_join(rdd, other, numPartitions):
@@ -85,6 +86,7 @@ def make_mapper(i):
85
86
vrdds = [rdd.map(make_mapper(i)) for i, rdd in enumerate(rdds)]
87
union_vrdds = reduce(lambda acc, other: acc.union(other), vrdds)
88
rdd_len = len(vrdds)
89
90
def dispatch(seq):
91
bufs = [[] for i in range(rdd_len)]
92
for (n, v) in seq:
0 commit comments