apache · MaxGekk · Nov 8, 2018 · Nov 8, 2018 · Nov 8, 2018 · Nov 8, 2018
diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
@@ -9,6 +9,8 @@ displayTitle: Spark SQL Upgrading Guide
 
 ## Upgrading From Spark SQL 2.4 to 3.0
 
+  - Since Spark 3.0, to parse decimals in locale specific format from CSV, set the `locale` option to proper value.
+
   - In PySpark, when creating a `SparkSession` with `SparkSession.builder.getOrCreate()`, if there is an existing `SparkContext`, the builder was trying to update the `SparkConf` of the existing `SparkContext` with configurations specified to the builder, but the `SparkContext` is shared by all `SparkSession`s, so we should not update them. Since 3.0, the builder comes to not update the configurations. This is the same behavior as Java/Scala API in 2.3 and above. If you want to update them, you need to update them prior to creating a `SparkSession`.
 
   - In Spark version 2.4 and earlier, the parser of JSON data source treats empty strings as null for some data types such as `IntegerType`. For `FloatType` and `DoubleType`, it fails on empty strings and throws exceptions. Since Spark 3.0, we disallow empty strings and will throw exceptions for data types except for `StringType` and `BinaryType`.

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.csv
 
 import java.io.InputStream
 import java.math.BigDecimal
+import java.text.{DecimalFormat, DecimalFormatSymbols}
 
 import scala.util.Try
 import scala.util.control.NonFatal
@@ -104,6 +105,12 @@ class UnivocityParser(
     requiredSchema.map(f => makeConverter(f.name, f.dataType, f.nullable, options)).toArray
   }
 
+  private val decimalParser = {
+    val df = new DecimalFormat("", new DecimalFormatSymbols(options.locale))
+    df.setParseBigDecimal(true)
+    df
+  }
+
   /**
    * Create a converter which converts the string value to a value according to a desired type.
    * Currently, we do not support complex types (`ArrayType`, `MapType`, `StructType`).
@@ -149,8 +156,8 @@ class UnivocityParser(
 
     case dt: DecimalType => (d: String) =>
       nullSafeDatum(d, name, nullable, options) { datum =>
-        val value = new BigDecimal(datum.replaceAll(",", ""))
-        Decimal(value, dt.precision, dt.scale)
+        val bigDecimal = decimalParser.parse(datum).asInstanceOf[BigDecimal]
+        Decimal(bigDecimal, dt.precision, dt.scale)
       }
 
     case _: TimestampType => (d: String) =>

diff --git a/...talyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/...talyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.text.{DecimalFormat, DecimalFormatSymbols}
 import java.text.SimpleDateFormat
 import java.util.{Calendar, Locale}
 
@@ -226,4 +227,17 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P
         InternalRow(17836)) // number of days from 1970-01-01
     }
   }
+
+  test("parse decimals using locale") {
+    Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach { langTag =>
+      val schema = new StructType().add("d", DecimalType(10, 5))
+      val options = Map("locale" -> langTag, "sep" -> "|")
+      val expected = Decimal(1000.001, 10, 5)
+      val df = new DecimalFormat("", new DecimalFormatSymbols(Locale.forLanguageTag(langTag)))
+      val input = df.format(expected.toBigDecimal)
+      checkEvaluation(
+        CsvToStructs(schema, options, Literal.create(input), gmtId),
+        InternalRow(expected))
+    }
+  }
 }