Skip to content

Commit fab56b6

Browse files
committed
Add spatial-index subproject
1 parent 978febc commit fab56b6

8 files changed

Lines changed: 141 additions & 18 deletions

File tree

build.sbt

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@ import java.time.Year
33

44
val scalaVersions = Seq("2.12.15")
55

6+
val sparkVersion = "3.1.3"
7+
val catsVersion = "2.6.1"
8+
val shapelessVersion = "2.3.3" // to be compatible with Spark 3.1.x
9+
val scalaTestVersion = "3.2.11"
10+
val geomesaVersion = "3.3.0"
11+
val geotrellisVersion = "3.6.1+0-6b5868af+20220321-1909-SNAPSHOT" //"3.6.1"
12+
613
lazy val commonSettings = Seq(
714
scalaVersion := scalaVersions.head,
815
crossScalaVersions := scalaVersions,
@@ -52,18 +59,18 @@ lazy val root = (project in file("."))
5259
publish := {},
5360
publishLocal := {}
5461
)
55-
.aggregate(core, spatial)
62+
.aggregate(core, spatial, `spatial-index`)
5663

5764
lazy val core = project
5865
.settings(commonSettings)
5966
.settings(name := "hiveless-core")
6067
.settings(
6168
addCompilerPlugin("org.typelevel" % "kind-projector" % "0.13.2" cross CrossVersion.full),
6269
libraryDependencies ++= Seq(
63-
"org.typelevel" %% "cats-core" % "2.6.1",
64-
"com.chuusai" %% "shapeless" % "2.3.3", // to be compatible with Spark 3.1.x
65-
"org.apache.spark" %% "spark-hive" % "3.1.2" % Provided,
66-
"org.scalatest" %% "scalatest" % "3.2.11" % Test
70+
"org.typelevel" %% "cats-core" % catsVersion,
71+
"com.chuusai" %% "shapeless" % shapelessVersion,
72+
"org.apache.spark" %% "spark-hive" % sparkVersion % Provided,
73+
"org.scalatest" %% "scalatest" % scalaTestVersion % Test
6774
)
6875
)
6976

@@ -73,8 +80,19 @@ lazy val spatial = project
7380
.settings(name := "hiveless-spatial")
7481
.settings(
7582
libraryDependencies ++= Seq(
76-
"org.locationtech.geomesa" %% "geomesa-spark-jts" % "3.3.0",
77-
"org.scalatest" %% "scalatest" % "3.2.10" % Test
83+
"org.locationtech.geomesa" %% "geomesa-spark-jts" % geomesaVersion,
84+
"org.scalatest" %% "scalatest" % scalaTestVersion % Test
85+
)
86+
)
87+
88+
lazy val `spatial-index` = project
89+
.dependsOn(spatial % "compile->compile;provided->provided")
90+
.settings(commonSettings)
91+
.settings(name := "hiveless-spatial-index")
92+
.settings(
93+
libraryDependencies ++= Seq(
94+
"org.locationtech.geotrellis" %% "geotrellis-store" % geotrellisVersion,
95+
"org.scalatest" %% "scalatest" % scalaTestVersion % Test
7896
),
7997
assembly / test := {},
8098
assembly / assemblyShadeRules := {

core/src/main/scala/com/azavea/hiveless/implicits/syntax.scala

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616

1717
package com.azavea.hiveless.implicits
1818

19-
import com.azavea.hiveless.serializers.HDeserialier
19+
import com.azavea.hiveless.serializers.{HConverter, HDeserialier, HSerializer, UnaryDeserializer}
2020
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF
21+
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
2122

22-
object syntax {
23+
object syntax extends Serializable {
2324
implicit class DeferredObjectOps(val self: GenericUDF.DeferredObject) extends AnyVal {
2425

2526
/** Behaves like a regular get, but throws when the result is null. */
@@ -28,4 +29,17 @@ object syntax {
2829
case _ => throw HDeserialier.Errors.NullArgument
2930
}
3031
}
32+
33+
implicit class ArrayDeferredObjectOps(val self: Array[GenericUDF.DeferredObject]) extends AnyVal {
34+
def deserialize[F[_], T: UnaryDeserializer[F, *]](inspectors: Array[ObjectInspector]): F[T] =
35+
UnaryDeserializer[F, T].deserialize(self, inspectors)
36+
}
37+
38+
implicit class ConverterOps(val self: Any) extends AnyVal {
39+
def convert[T: HConverter]: T = HConverter[T].convert(self)
40+
}
41+
42+
implicit class SerializerOps[T](val self: T) extends AnyVal {
43+
def serialize(implicit ev: HSerializer[T]): Any = HSerializer[T].serialize(self)
44+
}
3145
}

core/src/main/scala/com/azavea/hiveless/serializers/GenericDeserializer.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,10 @@ object GenericDeserializer extends Serializable {
7070
dh: UnaryDeserializer[F, H],
7171
dt: GenericDeserializer[F, T]
7272
): GenericDeserializer[F, H :: T] = new GenericDeserializer[F, H :: T] {
73-
def deserialize(arguments: Array[GenericUDF.DeferredObject], inspectors: Array[ObjectInspector]): F[H :: T] =
74-
(dh.deserialize(arguments.head, inspectors.head), dt.deserialize(arguments.tail, inspectors.tail)).mapN(_ :: _)
73+
def deserialize(arguments: Array[GenericUDF.DeferredObject], inspectors: Array[ObjectInspector]): F[H :: T] = {
74+
// take and drop allow us to handle options safely
75+
// take is left for semantics reasons only
76+
(dh.deserialize(arguments.take(1), inspectors.take(1)), dt.deserialize(arguments.drop(1), inspectors.drop(1))).mapN(_ :: _)
77+
}
7578
}
7679
}

core/src/main/scala/com/azavea/hiveless/serializers/UnaryDeserializer.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import org.apache.spark.sql.hive.HivelessInternals.unwrap
2222
import org.apache.spark.sql.types.Decimal
2323
import org.apache.spark.unsafe.types.UTF8String
2424
import cats.Id
25+
import cats.syntax.apply._
2526
import org.apache.spark.sql.catalyst.util.ArrayData
2627
import shapeless.HNil
2728

@@ -45,6 +46,10 @@ object UnaryDeserializer extends Serializable {
4546
implicit def tryUnaryDeserializer[T: UnaryDeserializer[Id, *]]: UnaryDeserializer[Try, T] =
4647
(arguments, inspectors) => Try(UnaryDeserializer[Id, T].deserialize(arguments, inspectors))
4748

49+
/** Derive Optional UnaryDeserializers. */
50+
implicit def optionalUnaryDeserializer[T: UnaryDeserializer[Id, *]]: UnaryDeserializer[Id, Option[T]] =
51+
(arguments, inspectors) => (arguments.headOption, inspectors.headOption).mapN(UnaryDeserializer[Id, T].deserialize)
52+
4853
/** Derivation helper deserializer. */
4954
implicit val hnilUnaryDeserializer: UnaryDeserializer[Id, HNil] = (_, _) => HNil
5055

spatial-index/sql/createUDFs.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
CREATE OR REPLACE FUNCTION ST_partitionCentroid as 'com.azavea.hiveless.spatial.index.ST_PartitionCentroid';
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Copyright 2022 Azavea
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.azavea.hiveless.spatial.index
18+
19+
import com.azavea.hiveless.HUDF
20+
import com.azavea.hiveless.spatial._
21+
import geotrellis.layer.{SpatialKey, ZoomedLayoutScheme}
22+
import geotrellis.vector._
23+
import geotrellis.proj4.{CRS, LatLng}
24+
import geotrellis.store.index.zcurve.Z2
25+
import org.locationtech.jts.geom.Geometry
26+
27+
class ST_PartitionCentroid extends HUDF[(Geometry, Int, Option[CRS], Option[Int], Option[Double], Option[Int]), Long] {
28+
val name: String = "st_partitionCentroid"
29+
def function = {
30+
case (geom: Geometry, zoom: Int, crsOpt: Option[CRS], tileSizeOpt: Option[Int], resolutionThresholdOpt: Option[Double], bitsOpt: Option[Int]) =>
31+
// set default values
32+
val crs = crsOpt.getOrElse(LatLng)
33+
val tileSize = tileSizeOpt.getOrElse(ZoomedLayoutScheme.DEFAULT_TILE_SIZE)
34+
val resolutionThreshold = resolutionThresholdOpt.getOrElse(ZoomedLayoutScheme.DEFAULT_RESOLUTION_THRESHOLD)
35+
val bits = bitsOpt.getOrElse(8)
36+
37+
// compute key
38+
val SpatialKey(col, row) = new ZoomedLayoutScheme(crs, tileSize, resolutionThreshold)
39+
.levelForZoom(zoom)
40+
.layout
41+
.mapTransform(geom.extent.center)
42+
43+
Z2(col, row).z >> bits
44+
}
45+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright 2022 Azavea
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.azavea.hiveless.spatial
18+
19+
import com.azavea.hiveless.serializers.{HConverter, HSerializer, UnaryDeserializer}
20+
import com.azavea.hiveless.implicits.syntax._
21+
import cats.Id
22+
import geotrellis.proj4.CRS
23+
import org.apache.spark.sql.types.{DataType, StringType}
24+
25+
package object index {
26+
implicit def crsConverter: HConverter[CRS] = new HConverter[CRS] {
27+
def convert(argument: Any): CRS = CRS.fromString(argument.convert[String])
28+
}
29+
30+
implicit def crsUnaryDeserializer: UnaryDeserializer[Id, CRS] =
31+
(arguments, inspectors) => arguments.deserialize[Id, String](inspectors).convert[CRS]
32+
33+
implicit def crsSerializer: HSerializer[CRS] = new HSerializer[CRS] {
34+
def dataType: DataType = StringType
35+
def serialize: CRS => Any = crs => crs.toProj4String.serialize
36+
}
37+
}

spatial/src/main/scala/com/azavea/hiveless/spatial/package.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,22 @@
1717
package com.azavea.hiveless
1818

1919
import com.azavea.hiveless.serializers.{HConverter, HSerializer, UnaryDeserializer}
20+
import com.azavea.hiveless.implicits.syntax._
2021
import cats.Id
2122
import org.locationtech.jts.geom.Geometry
22-
import org.apache.spark.sql.jts.GeometryUDT
23-
import org.apache.spark.sql.catalyst.InternalRow
24-
import org.apache.spark.sql.types.DataType
23+
import org.apache.spark.sql.types.{BinaryType, DataType}
24+
import org.locationtech.geomesa.spark.jts.util.WKBUtils
2525

2626
package object spatial extends Serializable {
2727
implicit def geometryConverter[T <: Geometry]: HConverter[T] = new HConverter[T] {
28-
def convert(argument: Any): T = GeometryUDT.deserialize(argument).asInstanceOf[T]
28+
def convert(argument: Any): T = WKBUtils.read(argument.asInstanceOf[Array[Byte]]).asInstanceOf[T]
2929
}
3030

3131
implicit def geometryUnaryDeserializer[T <: Geometry: HConverter]: UnaryDeserializer[Id, T] =
32-
(arguments, inspectors) => HConverter[T].convert(UnaryDeserializer[Id, InternalRow].deserialize(arguments, inspectors))
32+
(arguments, inspectors) => arguments.deserialize[Id, Array[Byte]](inspectors).convert[T]
3333

3434
implicit def geometrySerializer[T <: Geometry]: HSerializer[T] = new HSerializer[T] {
35-
def dataType: DataType = GeometryUDT
36-
def serialize: Geometry => InternalRow = GeometryUDT.serialize
35+
def dataType: DataType = BinaryType
36+
def serialize: Geometry => Array[Byte] = WKBUtils.write
3737
}
3838
}

0 commit comments

Comments
 (0)