Skip to content

Commit 081e6fe

Browse files
author
Davies Liu
committed
fix style
1 parent 6676e74 commit 081e6fe

File tree

9 files changed

+22
-15
lines changed

9 files changed

+22
-15
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ object DecimalType extends AbstractDataType {
156156
def is32BitDecimalType(dt: DataType): Boolean = {
157157
dt match {
158158
case t: DecimalType =>
159-
t.precision <= Decimal.MAX_LONG_DIGITS
159+
t.precision <= Decimal.MAX_INT_DIGITS
160160
case _ => false
161161
}
162162
}

sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,7 @@ private void decodeDictionaryIds(int rowId, int num, ColumnVector column) {
699699
break;
700700

701701
case FIXED_LEN_BYTE_ARRAY:
702-
// This is the legacy mode to write DecimalType
702+
// DecimalType written in the legacy mode
703703
if (DecimalType.is32BitDecimalType(column.dataType())) {
704704
for (int i = rowId; i < rowId + num; ++i) {
705705
Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));

sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ public Object[] array() {
159159
} else if (dt instanceof StringType) {
160160
for (int i = 0; i < length; i++) {
161161
if (!data.getIsNull(offset + i)) {
162-
list[i] = ColumnVectorUtils.toString(data.getByteArray(offset + i));
162+
list[i] = getUTF8String(i).toString();
163163
}
164164
}
165165
} else if (dt instanceof CalendarIntervalType) {
@@ -525,12 +525,15 @@ public final int putByteArray(int rowId, byte[] value) {
525525
/**
526526
* Returns the value for rowId.
527527
*/
528-
public final Array getByteArray(int rowId) {
528+
private Array getByteArray(int rowId) {
529529
Array array = getArray(rowId);
530530
array.data.loadBytes(array);
531531
return array;
532532
}
533533

534+
/**
535+
* Returns the decimal for rowId.
536+
*/
534537
public final Decimal getDecimal(int rowId, int precision, int scale) {
535538
if (precision <= Decimal.MAX_INT_DIGITS()) {
536539
return Decimal.apply(getInt(rowId), precision, scale);
@@ -545,6 +548,9 @@ public final Decimal getDecimal(int rowId, int precision, int scale) {
545548
}
546549
}
547550

551+
/**
552+
* Returns the UTF8String for rowId.
553+
*/
548554
public final UTF8String getUTF8String(int rowId) {
549555
if (dictionary == null) {
550556
ColumnVector.Array a = getByteArray(rowId);
@@ -555,6 +561,9 @@ public final UTF8String getUTF8String(int rowId) {
555561
}
556562
}
557563

564+
/**
565+
* Returns the byte array for rowId.
566+
*/
558567
public final byte[] getBinary(int rowId) {
559568
if (dictionary == null) {
560569
ColumnVector.Array array = getByteArray(rowId);

sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,24 +22,20 @@
2222
import java.util.Iterator;
2323
import java.util.List;
2424

25+
import org.apache.commons.lang.NotImplementedException;
26+
2527
import org.apache.spark.memory.MemoryMode;
2628
import org.apache.spark.sql.Row;
2729
import org.apache.spark.sql.catalyst.util.DateTimeUtils;
2830
import org.apache.spark.sql.types.*;
2931
import org.apache.spark.unsafe.types.CalendarInterval;
3032

31-
import org.apache.commons.lang.NotImplementedException;
32-
3333
/**
3434
* Utilities to help manipulate data associate with ColumnVectors. These should be used mostly
3535
* for debugging or other non-performance critical paths.
3636
* These utilities are mostly used to convert ColumnVectors into other formats.
3737
*/
3838
public class ColumnVectorUtils {
39-
public static String toString(ColumnVector.Array a) {
40-
return new String(a.byteArray, a.byteArrayOffset, a.length);
41-
}
42-
4339
/**
4440
* Returns the array data as the java primitive array.
4541
* For example, an array of IntegerType will return an int[].

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import org.apache.parquet.schema._
2424
import org.apache.parquet.schema.OriginalType._
2525
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
2626
import org.apache.parquet.schema.Type.Repetition._
27+
2728
import org.apache.spark.sql.AnalysisException
2829
import org.apache.spark.sql.execution.datasources.parquet.CatalystSchemaConverter.maxPrecisionForBytes
2930
import org.apache.spark.sql.internal.SQLConf

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystWriteSupport.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import org.apache.parquet.hadoop.ParquetOutputFormat
2828
import org.apache.parquet.hadoop.api.WriteSupport
2929
import org.apache.parquet.hadoop.api.WriteSupport.WriteContext
3030
import org.apache.parquet.io.api.{Binary, RecordConsumer}
31+
3132
import org.apache.spark.Logging
3233
import org.apache.spark.sql.catalyst.InternalRow
3334
import org.apache.spark.sql.catalyst.expressions.SpecializedGetters

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSQLContex
4747
assert(batch.column(0).getByte(i) == 1)
4848
assert(batch.column(1).getInt(i) == 2)
4949
assert(batch.column(2).getLong(i) == 3)
50-
assert(ColumnVectorUtils.toString(batch.column(3).getByteArray(i)) == "abc")
50+
assert(batch.column(3).getUTF8String(i).toString == "abc")
5151
i += 1
5252
}
5353
reader.close()

sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ object ColumnarBatchBenchmark {
370370
}
371371
i = 0
372372
while (i < count) {
373-
sum += column.getByteArray(i).length
373+
sum += column.getUTF8String(i).numBytes()
374374
i += 1
375375
}
376376
column.reset()

sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
360360

361361
reference.zipWithIndex.foreach { v =>
362362
assert(v._1.length == column.getArrayLength(v._2), "MemoryMode=" + memMode)
363-
assert(v._1 == ColumnVectorUtils.toString(column.getByteArray(v._2)),
363+
assert(v._1 == column.getUTF8String(v._2).toString,
364364
"MemoryMode" + memMode)
365365
}
366366

@@ -488,7 +488,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
488488
assert(batch.column(1).getDouble(0) == 1.1)
489489
assert(batch.column(1).getIsNull(0) == false)
490490
assert(batch.column(2).getIsNull(0) == true)
491-
assert(ColumnVectorUtils.toString(batch.column(3).getByteArray(0)) == "Hello")
491+
assert(batch.column(3).getUTF8String(0).toString == "Hello")
492492

493493
// Verify the iterator works correctly.
494494
val it = batch.rowIterator()
@@ -499,7 +499,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
499499
assert(row.getDouble(1) == 1.1)
500500
assert(row.isNullAt(1) == false)
501501
assert(row.isNullAt(2) == true)
502-
assert(ColumnVectorUtils.toString(batch.column(3).getByteArray(0)) == "Hello")
502+
assert(batch.column(3).getUTF8String(0).toString == "Hello")
503503
assert(it.hasNext == false)
504504
assert(it.hasNext == false)
505505

0 commit comments

Comments
 (0)