@@ -298,6 +298,21 @@ case class Bin(child: Expression)
298298 }
299299}
300300
301+ object Hex {
302+ val hexDigits = Array [Char ](
303+ '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' , 'A' , 'B' , 'C' , 'D' , 'E' , 'F'
304+ ).map(_.toByte)
305+
306+ // lookup table to translate '0' -> 0 ... 'F'/'f' -> 15
307+ val unhexDigits = {
308+ val array = Array .fill[Byte ](128 )(- 1 )
309+ (0 to 9 ).foreach(i => array('0' + i) = i.toByte)
310+ (0 to 5 ).foreach(i => array('A' + i) = (i + 10 ).toByte)
311+ (0 to 5 ).foreach(i => array('a' + i) = (i + 10 ).toByte)
312+ array
313+ }
314+ }
315+
301316/**
302317 * If the argument is an INT or binary, hex returns the number as a STRING in hexadecimal format.
303318 * Otherwise if the number is a STRING, it converts each character into its hex representation
@@ -307,7 +322,7 @@ case class Hex(child: Expression) extends UnaryExpression with ExpectsInputTypes
307322 // TODO: Create code-gen version.
308323
309324 override def inputTypes : Seq [AbstractDataType ] =
310- Seq (TypeCollection (LongType , StringType , BinaryType ))
325+ Seq (TypeCollection (LongType , BinaryType , StringType ))
311326
312327 override def dataType : DataType = StringType
313328
@@ -319,30 +334,18 @@ case class Hex(child: Expression) extends UnaryExpression with ExpectsInputTypes
319334 child.dataType match {
320335 case LongType => hex(num.asInstanceOf [Long ])
321336 case BinaryType => hex(num.asInstanceOf [Array [Byte ]])
322- case StringType => hex(num.asInstanceOf [UTF8String ])
337+ case StringType => hex(num.asInstanceOf [UTF8String ].getBytes )
323338 }
324339 }
325340 }
326341
327- /**
328- * Converts every character in s to two hex digits.
329- */
330- private def hex (str : UTF8String ): UTF8String = {
331- hex(str.getBytes)
332- }
333-
334- private def hex (bytes : Array [Byte ]): UTF8String = {
335- doHex(bytes, bytes.length)
336- }
337-
338- private def doHex (bytes : Array [Byte ], length : Int ): UTF8String = {
342+ private [this ] def hex (bytes : Array [Byte ]): UTF8String = {
343+ val length = bytes.length
339344 val value = new Array [Byte ](length * 2 )
340345 var i = 0
341346 while (i < length) {
342- value(i * 2 ) = Character .toUpperCase(Character .forDigit(
343- (bytes(i) & 0xF0 ) >>> 4 , 16 )).toByte
344- value(i * 2 + 1 ) = Character .toUpperCase(Character .forDigit(
345- bytes(i) & 0x0F , 16 )).toByte
347+ value(i * 2 ) = Hex .hexDigits((bytes(i) & 0xF0 ) >> 4 )
348+ value(i * 2 + 1 ) = Hex .hexDigits(bytes(i) & 0x0F )
346349 i += 1
347350 }
348351 UTF8String .fromBytes(value)
@@ -355,24 +358,23 @@ case class Hex(child: Expression) extends UnaryExpression with ExpectsInputTypes
355358 var len = 0
356359 do {
357360 len += 1
358- value(value.length - len) =
359- Character .toUpperCase(Character .forDigit((numBuf & 0xF ).toInt, 16 )).toByte
361+ value(value.length - len) = Hex .hexDigits((numBuf & 0xF ).toInt)
360362 numBuf >>>= 4
361363 } while (numBuf != 0 )
362364 UTF8String .fromBytes(java.util.Arrays .copyOfRange(value, value.length - len, value.length))
363365 }
364366}
365367
366-
367368/**
368369 * Performs the inverse operation of HEX.
369370 * Resulting characters are returned as a byte array.
370371 */
371- case class UnHex (child : Expression ) extends UnaryExpression with ExpectsInputTypes {
372+ case class Unhex (child : Expression ) extends UnaryExpression with ExpectsInputTypes {
372373 // TODO: Create code-gen version.
373374
374375 override def inputTypes : Seq [AbstractDataType ] = Seq (StringType )
375376
377+ override def nullable : Boolean = true
376378 override def dataType : DataType = BinaryType
377379
378380 override def eval (input : InternalRow ): Any = {
@@ -384,26 +386,31 @@ case class UnHex(child: Expression) extends UnaryExpression with ExpectsInputTyp
384386 }
385387 }
386388
387- private val unhexDigits = {
388- val array = Array .fill[Byte ](128 )(- 1 )
389- (0 to 9 ).foreach(i => array('0' + i) = i.toByte)
390- (0 to 5 ).foreach(i => array('A' + i) = (i + 10 ).toByte)
391- (0 to 5 ).foreach(i => array('a' + i) = (i + 10 ).toByte)
392- array
393- }
394-
395- private def unhex (inputBytes : Array [Byte ]): Array [Byte ] = {
396- var bytes = inputBytes
389+ private [this ] def unhex (bytes : Array [Byte ]): Array [Byte ] = {
390+ val out = new Array [Byte ]((bytes.length + 1 ) >> 1 )
391+ var i = 0
397392 if ((bytes.length & 0x01 ) != 0 ) {
398- bytes = '0' .toByte +: bytes
393+ // padding with '0'
394+ if (bytes(0 ) < 0 ) {
395+ return null
396+ }
397+ val v = Hex .unhexDigits(bytes(0 ))
398+ if (v == - 1 ) {
399+ return null
400+ }
401+ out(0 ) = v
402+ i += 1
399403 }
400- val out = new Array [Byte ](bytes.length >> 1 )
401404 // two characters form the hex value.
402- var i = 0
403405 while (i < bytes.length) {
404- val first = unhexDigits(bytes(i))
405- val second = unhexDigits(bytes(i + 1 ))
406- if (first == - 1 || second == - 1 ) { return null }
406+ if (bytes(i) < 0 || bytes(i + 1 ) < 0 ) {
407+ return null
408+ }
409+ val first = Hex .unhexDigits(bytes(i))
410+ val second = Hex .unhexDigits(bytes(i + 1 ))
411+ if (first == - 1 || second == - 1 ) {
412+ return null
413+ }
407414 out(i / 2 ) = (((first << 4 ) | second) & 0xFF ).toByte
408415 i += 2
409416 }
0 commit comments