databricks · JoshRosen · Jul 30, 2015 · Aug 18, 2015 · Aug 18, 2015 · Aug 18, 2015
diff --git a/.travis.yml b/.travis.yml
@@ -8,10 +8,25 @@ cache:
   directories:
     - $HOME/.ivy2
 env:
-  - HADOOP_VERSION="1.0.4"
-  - HADOOP_VERSION="1.2.1"
-  - HADOOP_VERSION="2.2.0"
+  matrix:
+    - HADOOP_VERSION="1.0.4"
+    - HADOOP_VERSION="1.2.1"
+    - HADOOP_VERSION="2.2.0"
+  global:
+    # AWS_REDSHIFT_JDBC_URL
+    - secure: "RNkxdKcaKEYuJqxli8naazp42qO5/pgueIzs+J5rHwl39jcBvJMgW3DX8kT7duzdoBb/qrolj/ttbQ3l/30P45+djn0BEwcJMX7G/FGpZYD23yd03qeq7sOKPQl2Ni/OBttYHJMah5rI6aPmAysBZMQO7Wijdenb/RUiU2YcZp0="
+    # AWS_REDSHIFT_PASSWORD
+    - secure: "Bzre/ohanBt6wrj5asn8+iaIU5qm2QBZ+P/PiAeg55R5sqfyI/pwCYZKdtKSG7SuKzsoiAOtnjvcXMD2hickTLIDz3GmrvFcpx7yn3PEKoLQfT4Ry1/RMOsqa1+sj6zJ7J2dl4w0AURJ7Jb9/7GRylNnL0jkUvqUnWet8PBb7R8="
+    # AWS_REDSHIFT_USER
+    - secure: "LIkY/ZpBXK3vSFsdpBSRXEsgfD2wDF52X8OZOlyBJOiZpS4y1/obj8b3VQABDPyPH95bGX/LOpM0vVM137rYgF0pskgVEzLMyZOPpwYqNGPf/d4BtQhBRc8f7+jmr6D4Hrox4jCl0cCKaeiTazun2+Y9E+zgCUDvQ8y9qGctR2k="
+    # AWS_ACCESS_KEY_ID
+    - secure: "CDlql+nrgdi7sUr7bYyXF4CFoOUCiJG9WEYNRV4k/lC37eS/al3iVYicnXqF+6UrPv5a4kHulG4d3g78J4hzn4ZVJuEhn6v8beoOBUoJJ7W/J05hVwGiQFxUq86wT3tIaBrAuDmOXaAnPEvDmPfJGNZL9ZG1CaQJo70R/HkbbVA="
+    # AWS_SECRET_ACCESS_KEY
+    - secure: "V/Ac0ZkTslNpNc8wszalFqZYWnl910PgSORlA2tyTUCC/xfqX+CdtN9RNuVb3LBrvrkYiOBKF7ANMGOxnc/yazLNFBUmByf+rwEfR7NDCCz+SKXSNwIOPpDraOpNVd1KLyrJ9uKivFojW/IweN9bsJAEji8ql/Lpeb7qKfDbVWY="
+    # AWS_S3_SCRATCH_SPACE
+    - secure: "LvndQIW6dHs6nyaMHtblGI/oL+s460lOezFs2BoD0Isenb/O/IM+nY5K9HepTXjJIcq8qvUYnojZX1FCrxxOXX2/+/Iihiq7GzJYdmdMC6hLg9bJYeAFk0dWYT88/AwadrJCBOa3ockRLhiO3dkai7Ki5+M1erfaFiAHHMpJxYQ="
 script:
   - sbt -Dhadoop.version=$HADOOP_VERSION coverage test
+  - if [ "$TRAVIS_SECURE_ENV_VARS" ]; then sbt -Dhadoop.version=$HADOOP_VERSION coverage it:test; fi
 after_success:
   - bash <(curl -s https://codecov.io/bash)
diff --git a/README.md b/README.md
@@ -28,8 +28,9 @@ Further, as Redshift is an AWS product, some AWS libraries will be required. Thi
 your deployment environment will include `hadoop-aws`, or other things necessary to access S3, credentials,
 etc. Check the dependencies with "provided" scope in <tt>build.sbt</tt> if you're at all unclear.
 
-You're also going to need a JDBC driver that is compatible with Redshift. The one used for testing can be
-found in <tt>build.sbt</tt>, however Amazon recommend that you use [their driver](http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html).
+You're also going to need a JDBC driver that is compatible with Redshift. Amazon recommend that you
+use [their driver](http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html),
+although this library has also been successfully tested using the Postgres JDBC driver.
 
 ## Usage
 
@@ -49,7 +50,7 @@ val sqlContext = new SQLContext(sc)
 // Get some data from a Redshift table
 val df: DataFrame = sqlContext.read
     .format("com.databricks.spark.redshift")
-    .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass")
+    .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass")
     .option("dbtable" -> "my_table")
     .option("tempdir" -> "s3://path/for/temp/data")
     .load()
@@ -59,7 +60,7 @@ val df: DataFrame = sqlContext.read
 
 df.write
   .format("com.databricks.spark.redshift")
-    .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass")
+    .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass")
     .option("dbtable" -> "my_table_copy")
     .option("tempdir" -> "s3://path/for/temp/data")
   .mode("error")
@@ -77,15 +78,15 @@ sql_context = SQLContext(sc)
 # Read data from a table
 df = sql_context.read \
     .format("com.databricks.spark.redshift") \
-    .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass") \
+    .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass") \
     .option("dbtable" -> "my_table") \
     .option("tempdir" -> "s3://path/for/temp/data") \
     .load()
 
 # Write back to a table
 df.write \
   .format("com.databricks.spark.redshift")
-  .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass") \
+  .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass") \
   .option("dbtable" -> "my_table_copy") \
   .option("tempdir" -> "s3://path/for/temp/data") \
   .mode("error")
@@ -99,7 +100,7 @@ CREATE TABLE my_table
 USING com.databricks.spark.redshift
 OPTIONS (dbtable 'my_table',
          tempdir 's3://my_bucket/tmp',
-         url 'jdbc:postgresql://host:port/db?user=username&password=pass');
+         url 'jdbc:redshift://host:port/db?user=username&password=pass');
 ```
 
 ### Scala helper functions
@@ -204,7 +205,7 @@ and use that as a temp location for this data.
  <tr>
     <td><tt>jdbcdriver</tt></td>
     <td>No</td>
-    <td><tt>org.postgresql.Driver</tt></td>
+    <td><tt>com.amazon.redshift.jdbc4.Driver</tt></td>
     <td>The class name of the JDBC driver to load before JDBC operations. Must be on classpath.</td>
  </tr>
  <tr>

diff --git a/build.sbt b/build.sbt
diff --git a/project/SparkRedshiftBuild.scala b/project/SparkRedshiftBuild.scala
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2015 Databricks
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import sbt._
+import sbt.Keys._
+import sbtsparkpackage.SparkPackagePlugin.autoImport._
+import scoverage.ScoverageSbtPlugin
+
+object SparkRedshiftBuild extends Build {
+  val hadoopVersion = settingKey[String]("Hadoop version")
+
+  // Define a custom test configuration so that unit test helper classes can be re-used under
+  // the integration tests configuration; see http://stackoverflow.com/a/20635808.
+  lazy val IntegrationTest = config("it") extend Test
+
+  lazy val root = Project("spark-redshift", file("."))
+    .configs(IntegrationTest)
+    .settings(net.virtualvoid.sbt.graph.Plugin.graphSettings: _*)
+    .settings(Defaults.itSettings: _*)
+    .settings(Seq(
+      name := "spark-redshift",
+      organization := "com.databricks",
+      version := "0.4.1-SNAPSHOT",
+      scalaVersion := "2.10.4",
+      sparkVersion := sys.props.get("spark.version").getOrElse("1.4.1"),
+      hadoopVersion := sys.props.get("hadoop.version").getOrElse("2.2.0"),
+      spName := "databricks/spark-redshift",
+      sparkComponents += "sql",
+      licenses += "Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0"),
+      credentials += Credentials(Path.userHome / ".ivy2" / ".credentials"),
+      resolvers +=
+        "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots",
+      libraryDependencies ++= Seq(
+        "com.amazonaws" % "aws-java-sdk-core" % "1.9.40" % "provided",
+        // We require spark-avro, but avro-mapred must be provided to match Hadoop version:
+        "com.databricks" %% "spark-avro" % "1.0.0",
+        "org.apache.avro" % "avro-mapred" % "1.7.6" % "provided" exclude("org.mortbay.jetty", "servlet-api"),
+        // A Redshift-compatible JDBC driver must be present on the classpath for spark-redshift to work.
+        // For testing, we use an Amazon driver, which is available from
+        // http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html
+        "com.amazon.redshift" % "jdbc4" % "1.1.7.1007" % "test" from "https://s3.amazonaws.com/redshift-downloads/drivers/RedshiftJDBC4-1.1.7.1007.jar",
+        "com.google.guava" % "guava" % "14.0.1" % "test",
+        "org.scalatest" %% "scalatest" % "2.1.5" % "test",
+        "org.scalamock" %% "scalamock-scalatest-support" % "3.2" % "test"
+      ),
+      ScoverageSbtPlugin.ScoverageKeys.coverageHighlighting := {
+        if (scalaBinaryVersion.value == "2.10") false
+        else false
+      },
+      // Display full-length stacktraces from ScalaTest:
+      testOptions in Test += Tests.Argument("-oF")
+    ): _*)
+}