@@ -91,11 +91,11 @@ private[spark] object SamplingUtils {
9191 */
9292 def computeFractionForSampleSize (sampleSizeLowerBound : Int , total : Long ,
9393 withReplacement : Boolean ): Double = {
94- val fraction = sampleSizeLowerBound.toDouble / total
9594 if (withReplacement) {
96- PoissonBounds .getUpperBound(sampleSizeLowerBound)
95+ PoissonBounds .getUpperBound(sampleSizeLowerBound) / total
9796 } else {
98- BernoulliBounds .getLowerBound(1e-4 , total, fraction)
97+ val fraction = sampleSizeLowerBound.toDouble / total
98+ BinomialBounds .getUpperBound(1e-4 , total, fraction)
9999 }
100100 }
101101}
@@ -138,25 +138,25 @@ private[spark] object PoissonBounds {
138138 * Utility functions that help us determine bounds on adjusted sampling rate to guarantee exact
139139 * sample size with high confidence when sampling without replacement.
140140 */
141- private [spark] object BernoulliBounds {
141+ private [spark] object BinomialBounds {
142142
143143 val minSamplingRate = 1e-10
144144
145145 /**
146- * Returns a threshold such that if we apply Bernoulli sampling with that threshold, it is very
147- * unlikely to sample less than `fraction * n` items out of `n` items .
146+ * Returns a threshold `p` such that if we conduct n Bernoulli trials with success rate = `p`,
147+ * it is very unlikely to have more than `fraction * n` successes .
148148 */
149- def getUpperBound (delta : Double , n : Long , fraction : Double ): Double = {
149+ def getLowerBound (delta : Double , n : Long , fraction : Double ): Double = {
150150 val gamma = - math.log(delta) / n * (2.0 / 3.0 )
151151 math.max(minSamplingRate,
152152 fraction + gamma - math.sqrt(gamma * gamma + 3 * gamma * fraction))
153153 }
154154
155155 /**
156- * Returns a threshold such that if we apply Bernoulli sampling with that threshold, it is very
157- * unlikely to sample more than `fraction * n` items out of `n` items .
156+ * Returns a threshold `p` such that if we conduct n Bernoulli trials with success rate = `p`,
157+ * it is very unlikely to have less than `fraction * n` successes .
158158 */
159- def getLowerBound (delta : Double , n : Long , fraction : Double ): Double = {
159+ def getUpperBound (delta : Double , n : Long , fraction : Double ): Double = {
160160 val gamma = - math.log(delta) / n
161161 math.min(1 , fraction + gamma + math.sqrt(gamma * gamma + 2 * gamma * fraction))
162162 }
0 commit comments