fix style

Davies Liu · Davies Liu · commit 081e6fe81e22 · 2016-02-29T16:49:55.000-08:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -156,7 +156,7 @@ object DecimalType extends AbstractDataType {
   def is32BitDecimalType(dt: DataType): Boolean = {
     dt match {
       case t: DecimalType =>
-        t.precision <= Decimal.MAX_LONG_DIGITS
+        t.precision <= Decimal.MAX_INT_DIGITS
       case _ => false
     }
   }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java
@@ -699,7 +699,7 @@ private void decodeDictionaryIds(int rowId, int num, ColumnVector column) {
           break;
 
         case FIXED_LEN_BYTE_ARRAY:
-          // This is the legacy mode to write DecimalType
+          // DecimalType written in the legacy mode
           if (DecimalType.is32BitDecimalType(column.dataType())) {
             for (int i = rowId; i < rowId + num; ++i) {
               Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
@@ -159,7 +159,7 @@ public Object[] array() {
       } else if (dt instanceof StringType) {
         for (int i = 0; i < length; i++) {
           if (!data.getIsNull(offset + i)) {
-            list[i] = ColumnVectorUtils.toString(data.getByteArray(offset + i));
+            list[i] = getUTF8String(i).toString();
           }
         }
       } else if (dt instanceof CalendarIntervalType) {
@@ -525,12 +525,15 @@ public final int putByteArray(int rowId, byte[] value) {
   /**
    * Returns the value for rowId.
    */
-  public final Array getByteArray(int rowId) {
+  private Array getByteArray(int rowId) {
     Array array = getArray(rowId);
     array.data.loadBytes(array);
     return array;
   }
 
+  /**
+   * Returns the decimal for rowId.
+   */
   public final Decimal getDecimal(int rowId, int precision, int scale) {
     if (precision <= Decimal.MAX_INT_DIGITS()) {
       return Decimal.apply(getInt(rowId), precision, scale);
@@ -545,6 +548,9 @@ public final Decimal getDecimal(int rowId, int precision, int scale) {
     }
   }
 
+  /**
+   * Returns the UTF8String for rowId.
+   */
   public final UTF8String getUTF8String(int rowId) {
     if (dictionary == null) {
       ColumnVector.Array a = getByteArray(rowId);
@@ -555,6 +561,9 @@ public final UTF8String getUTF8String(int rowId) {
     }
   }
 
+  /**
+   * Returns the byte array for rowId.
+   */
   public final byte[] getBinary(int rowId) {
     if (dictionary == null) {
       ColumnVector.Array array = getByteArray(rowId);
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
@@ -22,24 +22,20 @@
 import java.util.Iterator;
 import java.util.List;
 
+import org.apache.commons.lang.NotImplementedException;
+
 import org.apache.spark.memory.MemoryMode;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.util.DateTimeUtils;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.unsafe.types.CalendarInterval;
 
-import org.apache.commons.lang.NotImplementedException;
-
 /**
  * Utilities to help manipulate data associate with ColumnVectors. These should be used mostly
  * for debugging or other non-performance critical paths.
  * These utilities are mostly used to convert ColumnVectors into other formats.
  */
 public class ColumnVectorUtils {
-  public static String toString(ColumnVector.Array a) {
-    return new String(a.byteArray, a.byteArrayOffset, a.length);
-  }
-
   /**
    * Returns the array data as the java primitive array.
    * For example, an array of IntegerType will return an int[].
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
@@ -24,6 +24,7 @@ import org.apache.parquet.schema._
 import org.apache.parquet.schema.OriginalType._
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
 import org.apache.parquet.schema.Type.Repetition._
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.parquet.CatalystSchemaConverter.maxPrecisionForBytes
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystWriteSupport.scala
@@ -28,6 +28,7 @@ import org.apache.parquet.hadoop.ParquetOutputFormat
 import org.apache.parquet.hadoop.api.WriteSupport
 import org.apache.parquet.hadoop.api.WriteSupport.WriteContext
 import org.apache.parquet.io.api.{Binary, RecordConsumer}
+
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
@@ -47,7 +47,7 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSQLContex
           assert(batch.column(0).getByte(i) == 1)
           assert(batch.column(1).getInt(i) == 2)
           assert(batch.column(2).getLong(i) == 3)
-          assert(ColumnVectorUtils.toString(batch.column(3).getByteArray(i)) == "abc")
+          assert(batch.column(3).getUTF8String(i).toString == "abc")
           i += 1
         }
         reader.close()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
@@ -370,7 +370,7 @@ object ColumnarBatchBenchmark {
         }
         i = 0
         while (i < count) {
-          sum += column.getByteArray(i).length
+          sum += column.getUTF8String(i).numBytes()
           i += 1
         }
         column.reset()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -360,7 +360,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
 
       reference.zipWithIndex.foreach { v =>
         assert(v._1.length == column.getArrayLength(v._2), "MemoryMode=" + memMode)
-        assert(v._1 == ColumnVectorUtils.toString(column.getByteArray(v._2)),
+        assert(v._1 == column.getUTF8String(v._2).toString,
           "MemoryMode" + memMode)
       }
 
@@ -488,7 +488,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
       assert(batch.column(1).getDouble(0) == 1.1)
       assert(batch.column(1).getIsNull(0) == false)
       assert(batch.column(2).getIsNull(0) == true)
-      assert(ColumnVectorUtils.toString(batch.column(3).getByteArray(0)) == "Hello")
+      assert(batch.column(3).getUTF8String(0).toString == "Hello")
 
       // Verify the iterator works correctly.
       val it = batch.rowIterator()
@@ -499,7 +499,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
       assert(row.getDouble(1) == 1.1)
       assert(row.isNullAt(1) == false)
       assert(row.isNullAt(2) == true)
-      assert(ColumnVectorUtils.toString(batch.column(3).getByteArray(0)) == "Hello")
+      assert(batch.column(3).getUTF8String(0).toString == "Hello")
       assert(it.hasNext == false)
       assert(it.hasNext == false)
 

Original file line number	Diff line number	Diff line change
`@@ -156,7 +156,7 @@ object DecimalType extends AbstractDataType {`
`156`	`156`	`def is32BitDecimalType(dt: DataType): Boolean = {`
`157`	`157`	`dt match {`
`158`	`158`	`case t: DecimalType =>`
`159`		`- t.precision <= Decimal.MAX_LONG_DIGITS`
	`159`	`+ t.precision <= Decimal.MAX_INT_DIGITS`
`160`	`160`	`case _ => false`
`161`	`161`	`}`
`162`	`162`	`}`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSQLContex`
`47`	`47`	`assert(batch.column(0).getByte(i) == 1)`
`48`	`48`	`assert(batch.column(1).getInt(i) == 2)`
`49`	`49`	`assert(batch.column(2).getLong(i) == 3)`
`50`		`- assert(ColumnVectorUtils.toString(batch.column(3).getByteArray(i)) == "abc")`
	`50`	`+ assert(batch.column(3).getUTF8String(i).toString == "abc")`
`51`	`51`	`i += 1`
`52`	`52`	`}`
`53`	`53`	`reader.close()`
Original file line number	Diff line number	Diff line change
`@@ -370,7 +370,7 @@ object ColumnarBatchBenchmark {`
`370`	`370`	`}`
`371`	`371`	`i = 0`
`372`	`372`	`while (i < count) {`
`373`		`- sum += column.getByteArray(i).length`
	`373`	`+ sum += column.getUTF8String(i).numBytes()`
`374`	`374`	`i += 1`
`375`	`375`	`}`
`376`	`376`	`column.reset()`