[SPARK-45070][SQL][DOCS] Describe the binary and datetime formats of to_char/to_varchar

MaxGekk · MaxGekk · commit 637f16e7ff88 · 2023-09-06T08:25:41.000+03:00
### What changes were proposed in this pull request? In the PR, I propose to document the recent changes related to the `format` of the `to_char`/`to_varchar` functions: 1. binary formats added by #42632 2. datetime formats introduced by #42534 ### Why are the changes needed? To inform users about recent changes. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By CI. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #42801 from MaxGekk/doc-to_char-api. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4280,6 +4280,7 @@ object functions {
    */
   def to_binary(e: Column): Column = Column.fn("to_binary", e)
 
+  // scalastyle:off line.size.limit
   /**
    * Convert `e` to a string based on the `format`. Throws an exception if the conversion fails.
    *
@@ -4300,13 +4301,20 @@ object functions {
    *   (optional, only allowed once at the beginning or end of the format string). Note that 'S'
    *   prints '+' for positive values but 'MI' prints a space.</li> <li>'PR': Only allowed at the
    *   end of the format string; specifies that the result string will be wrapped by angle
-   *   brackets if the input value is negative.</li> </ul>
+   *   brackets if the input value is negative.</li> </ul> If `e` is a datetime, `format` shall be
+   *   a valid datetime pattern, see <a
+   *   href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime
+   *   Patterns</a>. If `e` is a binary, it is converted to a string in one of the formats: <ul>
+   *   <li>'base64': a base 64 string.</li> <li>'hex': a string in the hexadecimal format.</li>
+   *   <li>'utf-8': the input binary is decoded to UTF-8 string.</li> </ul>
    *
    * @group string_funcs
    * @since 3.5.0
    */
+  // scalastyle:on line.size.limit
   def to_char(e: Column, format: Column): Column = Column.fn("to_char", e, format)
 
+  // scalastyle:off line.size.limit
   /**
    * Convert `e` to a string based on the `format`. Throws an exception if the conversion fails.
    *
@@ -4327,11 +4335,17 @@ object functions {
    *   (optional, only allowed once at the beginning or end of the format string). Note that 'S'
    *   prints '+' for positive values but 'MI' prints a space.</li> <li>'PR': Only allowed at the
    *   end of the format string; specifies that the result string will be wrapped by angle
-   *   brackets if the input value is negative.</li> </ul>
+   *   brackets if the input value is negative.</li> </ul> If `e` is a datetime, `format` shall be
+   *   a valid datetime pattern, see <a
+   *   href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime
+   *   Patterns</a>. If `e` is a binary, it is converted to a string in one of the formats: <ul>
+   *   <li>'base64': a base 64 string.</li> <li>'hex': a string in the hexadecimal format.</li>
+   *   <li>'utf-8': the input binary is decoded to UTF-8 string.</li> </ul>
    *
    * @group string_funcs
    * @since 3.5.0
    */
+  // scalastyle:on line.size.limit
   def to_varchar(e: Column, format: Column): Column = Column.fn("to_varchar", e, format)
 
   /**
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
@@ -10902,6 +10902,12 @@ def to_char(col: "ColumnOrName", format: "ColumnOrName") -> Column:
     values but 'MI' prints a space.
     'PR': Only allowed at the end of the format string; specifies that the result string
     will be wrapped by angle brackets if the input value is negative.
+    If `col` is a datetime, `format` shall be a valid datetime pattern, see
+    <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Patterns</a>.
+    If `col` is a binary, it is converted to a string in one of the formats:
+    'base64': a base 64 string.
+    'hex': a string in the hexadecimal format.
+    'utf-8': the input binary is decoded to UTF-8 string.
 
     .. versionadded:: 3.5.0
 
@@ -10942,6 +10948,12 @@ def to_varchar(col: "ColumnOrName", format: "ColumnOrName") -> Column:
     values but 'MI' prints a space.
     'PR': Only allowed at the end of the format string; specifies that the result string
     will be wrapped by angle brackets if the input value is negative.
+    If `col` is a datetime, `format` shall be a valid datetime pattern, see
+    <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Patterns</a>.
+    If `col` is a binary, it is converted to a string in one of the formats:
+    'base64': a base 64 string.
+    'hex': a string in the hexadecimal format.
+    'utf-8': the input binary is decoded to UTF-8 string.
 
     .. versionadded:: 3.5.0
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4400,6 +4400,7 @@ object functions {
     new ToBinary(e.expr)
   }
 
+  // scalastyle:off line.size.limit
   /**
    * Convert `e` to a string based on the `format`.
    * Throws an exception if the conversion fails. The format can consist of the following
@@ -4421,11 +4422,20 @@ object functions {
    *   'PR': Only allowed at the end of the format string; specifies that the result string will be
    *     wrapped by angle brackets if the input value is negative.
    *
+   *  If `e` is a datetime, `format` shall be a valid datetime pattern, see
+   *  <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a>.
+   *  If `e` is a binary, it is converted to a string in one of the formats:
+   *     'base64': a base 64 string.
+   *     'hex': a string in the hexadecimal format.
+   *     'utf-8': the input binary is decoded to UTF-8 string.
+   *
    * @group string_funcs
    * @since 3.5.0
    */
+  // scalastyle:on line.size.limit
   def to_char(e: Column, format: Column): Column = call_function("to_char", e, format)
 
+  // scalastyle:off line.size.limit
   /**
    * Convert `e` to a string based on the `format`.
    * Throws an exception if the conversion fails. The format can consist of the following
@@ -4447,9 +4457,17 @@ object functions {
    *   'PR': Only allowed at the end of the format string; specifies that the result string will be
    *     wrapped by angle brackets if the input value is negative.
    *
+   *  If `e` is a datetime, `format` shall be a valid datetime pattern, see
+   *  <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a>.
+   *  If `e` is a binary, it is converted to a string in one of the formats:
+   *     'base64': a base 64 string.
+   *     'hex': a string in the hexadecimal format.
+   *     'utf-8': the input binary is decoded to UTF-8 string.
+   *
    * @group string_funcs
    * @since 3.5.0
    */
+  // scalastyle:on line.size.limit
   def to_varchar(e: Column, format: Column): Column = call_function("to_varchar", e, format)
 
   /**