-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-30292][SQL]Throw Exception when invalid string is cast to numeric type in ANSI mode #26933
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from 14 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
3ed7795
Cast to Decimal
iRakson 1686c6a
Fix Review Comments
iRakson 69ee231
Fix
iRakson 74809d0
Fix
iRakson a336084
Fix
iRakson c0f8baf
Test Cases Fix.
iRakson f46181d
Fix
iRakson d3ffa3c
Fix
iRakson c7dbeef
Fix
iRakson 7d0faa6
Fix
iRakson d454452
Fix
iRakson 4b0149c
Fix
iRakson 40afc54
Fix
iRakson 2f845c3
Fix
iRakson 0cb4edc
Fix
iRakson File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -482,6 +482,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
|
|
||
| // LongConverter | ||
| private[this] def castToLong(from: DataType): Any => Any = from match { | ||
| case StringType if ansiEnabled => | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| buildCast[UTF8String](_, _.toLongExact()) | ||
| case StringType => | ||
| val result = new LongWrapper() | ||
| buildCast[UTF8String](_, s => if (s.toLong(result)) result.value else null) | ||
|
|
@@ -499,6 +501,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
|
|
||
| // IntConverter | ||
| private[this] def castToInt(from: DataType): Any => Any = from match { | ||
| case StringType if ansiEnabled => | ||
| buildCast[UTF8String](_, _.toIntExact()) | ||
| case StringType => | ||
| val result = new IntWrapper() | ||
| buildCast[UTF8String](_, s => if (s.toInt(result)) result.value else null) | ||
|
|
@@ -518,6 +522,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
|
|
||
| // ShortConverter | ||
| private[this] def castToShort(from: DataType): Any => Any = from match { | ||
| case StringType if ansiEnabled => | ||
| buildCast[UTF8String](_, _.toShortExact()) | ||
| case StringType => | ||
| val result = new IntWrapper() | ||
| buildCast[UTF8String](_, s => if (s.toShort(result)) { | ||
|
|
@@ -559,6 +565,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
|
|
||
| // ByteConverter | ||
| private[this] def castToByte(from: DataType): Any => Any = from match { | ||
| case StringType if ansiEnabled => | ||
| buildCast[UTF8String](_, _.toByteExact()) | ||
| case StringType => | ||
| val result = new IntWrapper() | ||
| buildCast[UTF8String](_, s => if (s.toByte(result)) { | ||
|
|
@@ -636,7 +644,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
| // Please refer to https://github.com/apache/spark/pull/26640 | ||
| changePrecision(Decimal(new JavaBigDecimal(s.toString.trim)), target) | ||
| } catch { | ||
| case _: NumberFormatException => null | ||
| case _: NumberFormatException => | ||
| if (ansiEnabled) { | ||
| throw new NumberFormatException(s"invalid input syntax for type numeric: $s") | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } else { | ||
| null | ||
| } | ||
| }) | ||
| case BooleanType => | ||
| buildCast[Boolean](_, b => toPrecision(if (b) Decimal.ONE else Decimal.ZERO, target)) | ||
|
|
@@ -664,7 +677,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
| val doubleStr = s.toString | ||
| try doubleStr.toDouble catch { | ||
| case _: NumberFormatException => | ||
| Cast.processFloatingPointSpecialLiterals(doubleStr, false) | ||
| val d = Cast.processFloatingPointSpecialLiterals(doubleStr, false) | ||
| if(ansiEnabled && d == null) { | ||
| throw new NumberFormatException(s"invalid input syntax for type numeric: $s") | ||
| } else { | ||
| d | ||
| } | ||
| } | ||
| }) | ||
| case BooleanType => | ||
|
|
@@ -684,7 +702,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
| val floatStr = s.toString | ||
| try floatStr.toFloat catch { | ||
| case _: NumberFormatException => | ||
| Cast.processFloatingPointSpecialLiterals(floatStr, true) | ||
| val f = Cast.processFloatingPointSpecialLiterals(floatStr, true) | ||
| if (ansiEnabled && f == null) { | ||
| throw new NumberFormatException(s"invalid input syntax for type numeric: $s") | ||
| } else { | ||
| f | ||
| } | ||
| } | ||
| }) | ||
| case BooleanType => | ||
|
|
@@ -1133,7 +1156,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
| Decimal $tmp = Decimal.apply(new java.math.BigDecimal($c.toString().trim())); | ||
| ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)} | ||
| } catch (java.lang.NumberFormatException e) { | ||
| $evNull = true; | ||
| if ($ansiEnabled) { | ||
cloud-fan marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| throw new NumberFormatException("invalid input syntax for type numeric: $c"); | ||
| } else { | ||
| $evNull =true; | ||
| } | ||
| } | ||
| """ | ||
| case BooleanType => | ||
|
|
@@ -1355,6 +1382,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
| } | ||
|
|
||
| private[this] def castToByteCode(from: DataType, ctx: CodegenContext): CastFunction = from match { | ||
| case StringType if ansiEnabled => | ||
| (c, evPrim, evNull) => code"$evPrim = $c.toByteExact();" | ||
| case StringType => | ||
| val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper]) | ||
| (c, evPrim, evNull) => | ||
|
|
@@ -1386,6 +1415,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
| private[this] def castToShortCode( | ||
| from: DataType, | ||
| ctx: CodegenContext): CastFunction = from match { | ||
| case StringType if ansiEnabled => | ||
| (c, evPrim, evNull) => code"$evPrim = $c.toShortExact();" | ||
| case StringType => | ||
| val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper]) | ||
| (c, evPrim, evNull) => | ||
|
|
@@ -1415,6 +1446,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
| } | ||
|
|
||
| private[this] def castToIntCode(from: DataType, ctx: CodegenContext): CastFunction = from match { | ||
| case StringType if ansiEnabled => | ||
| (c, evPrim, evNull) => code"$evPrim = $c.toIntExact();" | ||
| case StringType => | ||
| val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper]) | ||
| (c, evPrim, evNull) => | ||
|
|
@@ -1443,9 +1476,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
| } | ||
|
|
||
| private[this] def castToLongCode(from: DataType, ctx: CodegenContext): CastFunction = from match { | ||
| case StringType if ansiEnabled => | ||
| (c, evPrim, evNull) => code"$evPrim = $c.toLongExact();" | ||
| case StringType => | ||
| val wrapper = ctx.freshVariable("longWrapper", classOf[UTF8String.LongWrapper]) | ||
|
|
||
| (c, evPrim, evNull) => | ||
| code""" | ||
| UTF8String.LongWrapper $wrapper = new UTF8String.LongWrapper(); | ||
|
|
@@ -1476,14 +1510,19 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
| case StringType => | ||
| val floatStr = ctx.freshVariable("floatStr", StringType) | ||
| (c, evPrim, evNull) => | ||
| val handleNull = if (ansiEnabled) { | ||
| s"""throw new NumberFormatException("invalid input syntax for type numeric: $c");""" | ||
| } else { | ||
| s"$evNull = true;" | ||
| } | ||
| code""" | ||
| final String $floatStr = $c.toString(); | ||
| try { | ||
| $evPrim = Float.valueOf($floatStr); | ||
| } catch (java.lang.NumberFormatException e) { | ||
| final Float f = (Float) Cast.processFloatingPointSpecialLiterals($floatStr, true); | ||
| if (f == null) { | ||
| $evNull = true; | ||
| $handleNull | ||
| } else { | ||
| $evPrim = f.floatValue(); | ||
| } | ||
|
|
@@ -1507,14 +1546,19 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |
| case StringType => | ||
| val doubleStr = ctx.freshVariable("doubleStr", StringType) | ||
| (c, evPrim, evNull) => | ||
| val handleNull = if (ansiEnabled) { | ||
| s"""throw new NumberFormatException("invalid input syntax for type numeric: $c");""" | ||
| } else { | ||
| s"$evNull = true;" | ||
| } | ||
| code""" | ||
| final String $doubleStr = $c.toString(); | ||
| try { | ||
| $evPrim = Double.valueOf($doubleStr); | ||
| } catch (java.lang.NumberFormatException e) { | ||
| final Double d = (Double) Cast.processFloatingPointSpecialLiterals($doubleStr, false); | ||
| if (d == null) { | ||
| $evNull = true; | ||
| $handleNull | ||
| } else { | ||
| $evPrim = d.doubleValue(); | ||
| } | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.