Skip to content

Commit 926fa56

Browse files
committed
Fix python test.
1 parent d2e6bc2 commit 926fa56

1 file changed

Lines changed: 19 additions & 6 deletions

File tree

python/pyspark/mllib/clustering.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -571,12 +571,25 @@ class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader):
571571
572572
Model produced by [[PowerIterationClustering]].
573573
574-
>>> data = [(0, 1, 1.0), (0, 2, 1.0), (0, 3, 1.0), (1, 2, 1.0), (1, 3, 1.0),
575-
... (2, 3, 1.0), (3, 4, 0.1), (4, 5, 1.0), (4, 15, 1.0), (5, 6, 1.0),
576-
... (6, 7, 1.0), (7, 8, 1.0), (8, 9, 1.0), (9, 10, 1.0), (10, 11, 1.0),
577-
... (11, 12, 1.0), (12, 13, 1.0), (13, 14, 1.0), (14, 15, 1.0)]
578-
>>> rdd = sc.parallelize(data, 2)
579-
>>> model = PowerIterationClustering.train(rdd, 2, 100)
574+
>>> import math
575+
>>> def genCircle(r, n):
576+
... points = []
577+
... for i in range(0, n):
578+
... theta = 2.0 * math.pi * i / n
579+
... points.append((r * math.cos(theta), r * math.sin(theta)))
580+
... return points
581+
>>> def sim(x, y):
582+
... dist2 = (x[0] - y[0]) * (x[0] - y[0]) + (x[1] - y[1]) * (x[1] - y[1])
583+
... return math.exp(-dist2 / 2.0)
584+
>>> r1 = 1.0
585+
>>> n1 = 10
586+
>>> r2 = 4.0
587+
>>> n2 = 40
588+
>>> n = n1 + n2
589+
>>> points = genCircle(r1, n1) + genCircle(r2, n2)
590+
>>> similarities = [(i, j, sim(points[i], points[j])) for i in range(1, n) for j in range(0, i)]
591+
>>> rdd = sc.parallelize(similarities, 2)
592+
>>> model = PowerIterationClustering.train(rdd, 2, 40)
580593
>>> model.k
581594
2
582595
>>> result = sorted(model.assignments().collect(), key=lambda x: x.id)

0 commit comments

Comments
 (0)