Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,15 @@ object StringUtils extends Logging {
"(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines
}

private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)
private[this] val trueStrings =
Set("true", "tru", "tr", "t", "yes", "ye", "y", "on", "1").map(UTF8String.fromString)

private[this] val falseStrings =
Set("false", "fals", "fal", "fa", "f", "no", "n", "off", "of", "0").map(UTF8String.fromString)

// scalastyle:off caselocale
def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)
def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase.trim())
def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase.trim())
// scalastyle:on caselocale

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -819,19 +819,32 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
}

test("cast string to boolean") {
checkCast("t", true)

checkCast("true", true)
checkCast("tru", true)
checkCast("tr", true)
checkCast("t", true)
checkCast("tRUe", true)
checkCast("y", true)
checkCast(" tRue ", true)
checkCast(" tRu ", true)
checkCast("yes", true)
checkCast("ye", true)
checkCast("y", true)
checkCast("1", true)
checkCast("on", true)

checkCast("f", false)
checkCast("false", false)
checkCast("FAlsE", false)
checkCast("n", false)
checkCast("fals", false)
checkCast("fal", false)
checkCast("fa", false)
checkCast("f", false)
checkCast(" fAlse ", false)
checkCast(" fAls ", false)
checkCast("no", false)
checkCast("n", false)
checkCast("0", false)
checkCast("off", false)
checkCast("of", false)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@younggyuchun, just for doubly sure, did you double check the behaviours against PostgreSQL?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon Here it is:
image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not documented: https://www.postgresql.org/docs/devel/datatype-boolean.html

Postgres may support of for history reasons, I don't think we have to follow it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cloud-fan @dongjoon-hyun @HyukjinKwon
This build accepts several unique prefixes for the boolean data type. For example, tru, tr, ye, fals, fal, fa and of, which are not documented. Do we want to not to accept these prefixes?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cloud-fan . It's a documented feature in that document. We had better support it.

Unique prefixes of these strings are also accepted, for example t or n. Leading or trailing whitespace is ignored, and case does not matter.

cc @gatorsmile

Copy link
Member

@dongjoon-hyun dongjoon-hyun Aug 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, @younggyuchun . Please add a negative test case.
o is not supported because it's not unique. It's a common prefix for on and off.
It should be null.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems okay to me

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SGTM


checkEvaluation(cast("abc", BooleanType), null)
checkEvaluation(cast("", BooleanType), null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ SELECT false AS `false`;
SELECT boolean('t') AS true;

-- [SPARK-27931] Trim the string when cast string type to boolean type
SELECT boolean(' f ') AS `false`;
SELECT boolean(' f ') AS `true`;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wangyum I think it should be 'true'. Is it correct?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's false:
image

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me dig into this.


SELECT boolean('true') AS true;

Expand Down