apache · niofire · Sep 29, 2018 · Oct 10, 2018 · Oct 10, 2018 · Oct 11, 2018
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -46,14 +46,16 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
   /**
    * Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink.
-   *   - `OutputMode.Append()`: only the new rows in the streaming DataFrame/Dataset will be
-   *                            written to the sink
-   *   - `OutputMode.Complete()`: all the rows in the streaming DataFrame/Dataset will be written
-   *                              to the sink every time these is some updates
-   *   - `OutputMode.Update()`: only the rows that were updated in the streaming DataFrame/Dataset
+   * <ul>
+   *   <li> `OutputMode.Append()`: only the new rows in the streaming DataFrame/Dataset will be
+   *                            written to the sink.</li>
    * <ul> 
    * <li>`primitivesAsString` (default `false`): infers all primitive values as a string type</li> 
    * <li>`prefersDecimal` (default `false`): infers all floating-point values as a decimal 
    * type. If the values do not fit in decimal, then it infers them as doubles.</li> 
    * <li>`allowComments` (default `false`): ignores Java/C++ style comment in JSON records</li> 
    * <li>`allowUnquotedFieldNames` (default `false`): allows unquoted JSON field names</li> 
    * <li>`allowSingleQuotes` (default `true`): allows single quotes in addition to double quotes 
    * </li> 
    * <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers 
    * (e.g. 00012)</li> 
    * <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all 
    * character using backslash quoting mechanism</li> 
    * <li>`allowUnquotedControlChars` (default `false`): allows JSON Strings to contain unquoted 
    * control characters (ASCII characters with value less than 32, including tab and line feed 
    * characters) or not.</li> 
    * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records 
    * during parsing. 
    *   <ul> 
    *     <li>`PERMISSIVE` : when it meets a corrupted record, puts the malformed string into a 
    *     field configured by `columnNameOfCorruptRecord`, and sets other fields to `null`. To 
    *     keep corrupt records, an user can set a string type field named 
    *     `columnNameOfCorruptRecord` in an user-defined schema. If a schema does not have the 
    *     field, it drops corrupt records during parsing. When inferring a schema, it implicitly 
    *     adds a `columnNameOfCorruptRecord` field in an output schema.</li> 
    *     <li>`DROPMALFORMED` : ignores the whole corrupted records.</li> 
    *     <li>`FAILFAST` : throws an exception when it meets corrupted records.</li> 
    *   </ul> 
    * </li> 
    * <li>`columnNameOfCorruptRecord` (default is the value specified in 
    * <ul> 
    * <li>`primitivesAsString` (default `false`): infers all primitive values as a string type</li> 
    * <li>`prefersDecimal` (default `false`): infers all floating-point values as a decimal 
    * type. If the values do not fit in decimal, then it infers them as doubles.</li> 
    * <li>`allowComments` (default `false`): ignores Java/C++ style comment in JSON records</li> 
    * <li>`allowUnquotedFieldNames` (default `false`): allows unquoted JSON field names</li> 
    * <li>`allowSingleQuotes` (default `true`): allows single quotes in addition to double quotes 
    * </li> 
    * <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers 
    * (e.g. 00012)</li> 
    * <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all 
    * character using backslash quoting mechanism</li> 
    * <li>`allowUnquotedControlChars` (default `false`): allows JSON Strings to contain unquoted 
    * control characters (ASCII characters with value less than 32, including tab and line feed 
    * characters) or not.</li> 
    * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records 
    * during parsing. 
    *   <ul> 
    *     <li>`PERMISSIVE` : when it meets a corrupted record, puts the malformed string into a 
    *     field configured by `columnNameOfCorruptRecord`, and sets other fields to `null`. To 
    *     keep corrupt records, an user can set a string type field named 
    *     `columnNameOfCorruptRecord` in an user-defined schema. If a schema does not have the 
    *     field, it drops corrupt records during parsing. When inferring a schema, it implicitly 
    *     adds a `columnNameOfCorruptRecord` field in an output schema.</li> 
    *     <li>`DROPMALFORMED` : ignores the whole corrupted records.</li> 
    *     <li>`FAILFAST` : throws an exception when it meets corrupted records.</li> 
    *   </ul> 
    * </li> 
    * <li>`columnNameOfCorruptRecord` (default is the value specified in 
+   *   <li> `OutputMode.Complete()`: all the rows in the streaming DataFrame/Dataset will be written
+   *                              to the sink every time there are some updates.</li>
+   *   <li> `OutputMode.Update()`: only the rows that were updated in the streaming DataFrame/Dataset
    *                            will be written to the sink every time there are some updates. If
    *                            the query doesn't contain aggregations, it will be equivalent to
-   *                            `OutputMode.Append()` mode.
+   *                            `OutputMode.Append()` mode.</li>
+   * </ul>
    *
    * @since 2.0.0
    */
@@ -64,13 +66,16 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
   /**
    * Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink.
-   *   - `append`:   only the new rows in the streaming DataFrame/Dataset will be written to
-   *                 the sink
-   *   - `complete`: all the rows in the streaming DataFrame/Dataset will be written to the sink
-   *                 every time these is some updates
-   *   - `update`:   only the rows that were updated in the streaming DataFrame/Dataset will
+   * <ul>
+   *   <li> `append`: only the new rows in the streaming DataFrame/Dataset will be written to
+   *                 the sink. </li>
+   *   <li> `complete`: all the rows in the streaming DataFrame/Dataset will be written to the sink
+   *                 every time there are some updates. </li>
+   *   <li> `update`: only the rows that were updated in the streaming DataFrame/Dataset will
    *                 be written to the sink every time there are some updates. If the query doesn't
-   *                 contain aggregations, it will be equivalent to `append` mode.
+   *                 contain aggregations, it will be equivalent to `append` mode. </li>
+   * </ul>
+   *
    * @since 2.0.0
    */
   def outputMode(outputMode: String): DataStreamWriter[T] = {
@@ -131,8 +136,10 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
    * laid out on the file system similar to Hive's partitioning scheme. As an example, when we
    * partition a dataset by year and then month, the directory layout would look like:
    *
-   *   - year=2016/month=01/
-   *   - year=2016/month=02/
+   * <ul>
+   *   <li> year=2016/month=01/ </li>
+   *   <li> year=2016/month=02/ </li>
+   * </ul>
    *
    * Partitioning is one of the most widely used techniques to optimize physical data layout.
    * It provides a coarse-grained index for skipping unnecessary data reads when queries have