apache · stoty · Jun 2, 2025 · Sep 22, 2024 · stoty · May 30, 2025
diff --git a/phoenix5-spark/README.md b/phoenix5-spark/README.md
@@ -331,10 +331,9 @@ the deprected `zkUrl` parameter for backwards compatibility purposes. If neither
 it falls back to using connection defined by hbase-site.xml.
 - `"jdbcUrl"` expects a full Phoenix JDBC URL, i.e. "jdbc:phoenix" or "jdbc:phoenix:zkHost:zkport",
 while `"zkUrl"` expects the ZK quorum only, i.e. "zkHost:zkPort"
-- If you want to use DataSourceV1, you can use source type `"org.apache.phoenix.spark"`
-  instead of `"phoenix"`, however this is deprecated.
-  The `"org.apache.phoenix.spark"` datasource does not accept the `"jdbcUrl"` parameter,
-  only `"zkUrl"`
+- DataSourceV1 implementation was removed,
+source type `"org.apache.phoenix.spark"`
+use the DatasourceV2 since connector 6.0.0 release.
 - The (deprecated) functions `phoenixTableAsDataFrame`, `phoenixTableAsRDD` and
   `saveToPhoenix` use the deprecated `"org.apache.phoenix.spark"` datasource, and allow
   optionally specifying a `conf` Hadoop configuration parameter with custom Phoenix client settings,

diff --git a/phoenix5-spark/pom.xml b/phoenix5-spark/pom.xml
@@ -84,12 +84,6 @@
       <version>${spark.version}</version>
       <scope>provided</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-      <scope>provided</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
@@ -177,39 +171,6 @@
       </exclusions>
     </dependency>
 
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-mapreduce-client-core</artifactId>
-      <scope>provided</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>log4j</groupId>
-          <artifactId>log4j</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>javax.servlet</groupId>
-          <artifactId>servlet-api</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>javax.servlet.jsp</groupId>
-          <artifactId>jsp-api</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.jruby</groupId>
-          <artifactId>jruby-complete</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.jboss.netty</groupId>
-          <artifactId>netty</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-
     <dependency>
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-client</artifactId>
@@ -371,11 +332,6 @@
       <artifactId>slf4j-api</artifactId>
       <scope>provided</scope>
     </dependency>
-    <dependency>
-      <groupId>joda-time</groupId>
-      <artifactId>joda-time</artifactId>
-      <version>${jodatime.version}</version>
-    </dependency>
 
     <!-- Test dependencies -->
     <dependency>

diff --git a/phoenix5-spark/src/it/java/org/apache/phoenix/spark/DataSourceApiIT.java b/phoenix5-spark/src/it/java/org/apache/phoenix/spark/DataSourceApiIT.java
@@ -73,10 +73,8 @@ public Configuration getConfiguration(Configuration confToClone) {
 
     @Test
     public void basicWriteAndReadBackTest() throws SQLException {
-        SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("phoenix-test")
-                .set("spark.hadoopRDD.ignoreEmptySplits", "false");
-        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
-        SQLContext sqlContext = new SQLContext(jsc);
+
+        SparkSession spark = SparkUtil.getSparkSession();
         String tableName = generateUniqueName();
 
         try (Connection conn = DriverManager.getConnection(getUrl());
@@ -85,141 +83,122 @@ public void basicWriteAndReadBackTest() throws SQLException {
                 "CREATE TABLE " + tableName + " (id INTEGER PRIMARY KEY, v1 VARCHAR)");
         }
 
-        try (SparkSession spark = sqlContext.sparkSession()) {
+        StructType schema =
+                new StructType(new StructField[] {
+                        new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+                        new StructField("v1", DataTypes.StringType, false, Metadata.empty()) });
 
-            StructType schema =
-                    new StructType(new StructField[] {
-                            new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
-                            new StructField("v1", DataTypes.StringType, false, Metadata.empty()) });
+        // Use old zkUrl
+        Dataset<Row> df1 =
+                spark.createDataFrame(
+                    Arrays.asList(RowFactory.create(1, "x")),
+                    schema);
 
-            // Use old zkUrl
-            Dataset<Row> df1 =
-                    spark.createDataFrame(
-                        Arrays.asList(RowFactory.create(1, "x")),
-                        schema);
+        df1.write().format("phoenix").mode(SaveMode.Overwrite)
+        .option("table", tableName)
+        .option(ZOOKEEPER_URL, getUrl())
+        .save();
+
+        // Use jdbcUrl
+        // In Phoenix 5.2+ getUrl() return a JDBC URL, in earlier versions it returns a ZK
+        // quorum
+        String jdbcUrl = getUrl();
+        if (!jdbcUrl.startsWith(JDBC_PROTOCOL)) {
+            jdbcUrl = JDBC_PROTOCOL_ZK + JDBC_PROTOCOL_SEPARATOR + jdbcUrl;
+        }
+        Dataset<Row> df2 =
+                spark.createDataFrame(
+                    Arrays.asList(RowFactory.create(2, "x")),
+                    schema);
 
-            df1.write().format("phoenix").mode(SaveMode.Overwrite)
+        df2.write().format("phoenix").mode(SaveMode.Overwrite)
             .option("table", tableName)
-            .option(ZOOKEEPER_URL, getUrl())
+            .option(JDBC_URL, jdbcUrl)
             .save();
 
-            // Use jdbcUrl
-            // In Phoenix 5.2+ getUrl() return a JDBC URL, in earlier versions it returns a ZK
-            // quorum
-            String jdbcUrl = getUrl();
-            if (!jdbcUrl.startsWith(JDBC_PROTOCOL)) {
-                jdbcUrl = JDBC_PROTOCOL_ZK + JDBC_PROTOCOL_SEPARATOR + jdbcUrl;
-            }
-            Dataset<Row> df2 =
-                    spark.createDataFrame(
-                        Arrays.asList(RowFactory.create(2, "x")),
-                        schema);
+        // Use default from hbase-site.xml
+        Dataset<Row> df3 =
+                spark.createDataFrame(
+                    Arrays.asList(RowFactory.create(3, "x")),
+                    schema);
 
-            df2.write().format("phoenix").mode(SaveMode.Overwrite)
-                .option("table", tableName)
-                .option(JDBC_URL, jdbcUrl)
-                .save();
+        df3.write().format("phoenix").mode(SaveMode.Overwrite)
+            .option("table", tableName)
+            .save();
 
-            // Use default from hbase-site.xml
-            Dataset<Row> df3 =
-                    spark.createDataFrame(
-                        Arrays.asList(RowFactory.create(3, "x")),
-                        schema);
+        try (Connection conn = DriverManager.getConnection(getUrl());
+                Statement stmt = conn.createStatement()) {
+            ResultSet rs = stmt.executeQuery("SELECT * FROM " + tableName);
+            assertTrue(rs.next());
+            assertEquals(1, rs.getInt(1));
+            assertEquals("x", rs.getString(2));
+            assertTrue(rs.next());
+            assertEquals(2, rs.getInt(1));
+            assertEquals("x", rs.getString(2));
+            assertTrue(rs.next());
+            assertEquals(3, rs.getInt(1));
+            assertEquals("x", rs.getString(2));
+            assertFalse(rs.next());
+        }
 
-            df3.write().format("phoenix").mode(SaveMode.Overwrite)
+        Dataset df1Read = spark.read().format("phoenix")
                 .option("table", tableName)
-                .save();
-
-            try (Connection conn = DriverManager.getConnection(getUrl());
-                    Statement stmt = conn.createStatement()) {
-                ResultSet rs = stmt.executeQuery("SELECT * FROM " + tableName);
-                assertTrue(rs.next());
-                assertEquals(1, rs.getInt(1));
-                assertEquals("x", rs.getString(2));
-                assertTrue(rs.next());
-                assertEquals(2, rs.getInt(1));
-                assertEquals("x", rs.getString(2));
-                assertTrue(rs.next());
-                assertEquals(3, rs.getInt(1));
-                assertEquals("x", rs.getString(2));
-                assertFalse(rs.next());
-            }
-
-            Dataset df1Read = spark.read().format("phoenix")
-                    .option("table", tableName)
-                    .option(PhoenixDataSource.JDBC_URL, getUrl()).load();
+                .option(PhoenixDataSource.JDBC_URL, getUrl()).load();
 
-            assertEquals(3l, df1Read.count());
+        assertEquals(3l, df1Read.count());
 
-            // Use jdbcUrl
-            Dataset df2Read = spark.read().format("phoenix")
-                    .option("table", tableName)
-                    .option(PhoenixDataSource.JDBC_URL, jdbcUrl)
-                    .load();
-
-            assertEquals(3l, df2Read.count());
+        // Use jdbcUrl
+        Dataset df2Read = spark.read().format("phoenix")
+                .option("table", tableName)
+                .option(PhoenixDataSource.JDBC_URL, jdbcUrl)
+                .load();
 
-            // Use default
-            Dataset df3Read = spark.read().format("phoenix")
-                    .option("table", tableName)
-                    .load();
+        assertEquals(3l, df2Read.count());
 
-            assertEquals(3l, df3Read.count());
+        // Use default
+        Dataset df3Read = spark.read().format("phoenix")
+                .option("table", tableName)
+                .load();
 
-        } finally {
-            jsc.stop();
-        }
+        assertEquals(3l, df3Read.count());
     }
 
     @Test
     public void lowerCaseWriteTest() throws SQLException {
-        SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("phoenix-test")
-                .set("spark.hadoopRDD.ignoreEmptySplits", "false");
-        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
-        SQLContext sqlContext = new SQLContext(jsc);
+        SparkSession spark = SparkUtil.getSparkSession();
         String tableName = generateUniqueName();
 
         try (Connection conn = DriverManager.getConnection(getUrl());
              Statement stmt = conn.createStatement()){
             stmt.executeUpdate("CREATE TABLE " + tableName + " (id INTEGER PRIMARY KEY, v1 VARCHAR, \"v1\" VARCHAR)");
         }
+        StructType schema = new StructType(new StructField[]{
+                new StructField("ID", DataTypes.IntegerType, false, Metadata.empty()),
+                new StructField("V1", DataTypes.StringType, false, Metadata.empty()),
+                new StructField("\"v1\"", DataTypes.StringType, false, Metadata.empty())
+        });
 
-        try(SparkSession spark = sqlContext.sparkSession()) {
-            //Doesn't help
-            spark.conf().set("spark.sql.caseSensitive", true);
-
-            StructType schema = new StructType(new StructField[]{
-                    new StructField("ID", DataTypes.IntegerType, false, Metadata.empty()),
-                    new StructField("V1", DataTypes.StringType, false, Metadata.empty()),
-                    new StructField("\"v1\"", DataTypes.StringType, false, Metadata.empty())
-            });
-
-            Dataset<Row> df = spark.createDataFrame(
-                    Arrays.asList(
-                            RowFactory.create(1, "x", "y")),
-                    schema);
-
-            df.write()
-                    .format("phoenix")
-                    .mode(SaveMode.Overwrite)
-                    .option("table", tableName)
-                    .option(PhoenixDataSource.SKIP_NORMALIZING_IDENTIFIER,"true")
-                    .option(JDBC_URL, getUrl())
-                    .save();
-
-            try (Connection conn = DriverManager.getConnection(getUrl());
-                 Statement stmt = conn.createStatement()) {
-                ResultSet rs = stmt.executeQuery("SELECT * FROM " + tableName);
-                assertTrue(rs.next());
-                assertEquals(1, rs.getInt(1));
-                assertEquals("x", rs.getString(2));
-                assertEquals("y", rs.getString(3));
-                assertFalse(rs.next());
-            }
+        Dataset<Row> df = spark.createDataFrame(
+                Arrays.asList(
+                        RowFactory.create(1, "x", "y")),
+                schema);
 
+        df.write()
+                .format("phoenix")
+                .mode(SaveMode.Overwrite)
+                .option("table", tableName)
+                .option(PhoenixDataSource.SKIP_NORMALIZING_IDENTIFIER,"true")
+                .option(JDBC_URL, getUrl())
+                .save();
 
-        } finally {
-            jsc.stop();
+        try (Connection conn = DriverManager.getConnection(getUrl());
+             Statement stmt = conn.createStatement()) {
+            ResultSet rs = stmt.executeQuery("SELECT * FROM " + tableName);
+            assertTrue(rs.next());
+            assertEquals(1, rs.getInt(1));
+            assertEquals("x", rs.getString(2));
+            assertEquals("y", rs.getString(3));
+            assertFalse(rs.next());
         }
     }
 

diff --git a/phoenix5-spark/src/it/scala/org/apache/phoenix/spark/AbstractPhoenixSparkIT.scala b/phoenix5-spark/src/it/scala/org/apache/phoenix/spark/AbstractPhoenixSparkIT.scala
@@ -67,7 +67,7 @@ class AbstractPhoenixSparkIT extends FunSuite with Matchers with BeforeAndAfter
 
   lazy val jdbcUrl = PhoenixSparkITHelper.getUrl
 
-  lazy val quorumAddress = ConfigurationUtil.getZookeeperURL(hbaseConfiguration).get
+  lazy val quorumAddress = PhoenixSparkITHelper.getUrl
 
   // Runs SQL commands located in the file defined in the sqlSource argument
   // Optional argument tenantId used for running tenant-specific SQL