diff --git a/hive/src/main/java/com/esri/hadoop/hive/BinUtils.java b/hive/src/main/java/com/esri/hadoop/hive/BinUtils.java new file mode 100755 index 0000000..e7cd80f --- /dev/null +++ b/hive/src/main/java/com/esri/hadoop/hive/BinUtils.java @@ -0,0 +1,76 @@ +package com.esri.hadoop.hive; + +import com.esri.core.geometry.Envelope; + +public class BinUtils { + final long numCols; + final double extentMin; + final double extentMax; + final double binSize; + + public BinUtils(double binSize) { + this.binSize = binSize; + + // absolute max number of rows/columns we can have + long maxBinsPerAxis = (long) Math.sqrt(Long.MAX_VALUE); + + // a smaller binSize gives us a smaller extent width and height that + // can be addressed by a single 64 bit long + double size = (binSize < 1) ? maxBinsPerAxis * binSize : maxBinsPerAxis; + + extentMax = size/2; + extentMin = extentMax - size; + numCols = (long)(Math.ceil(size / binSize)); + } + + /** + * Gets bin ID from a point. + * + * @param x + * @param y + * @return + */ + public long getId(double x, double y) { + double down = (extentMax - y) / binSize; + double over = (x - extentMin) / binSize; + + return ((long)down * numCols) + (long)over; + } + + /** + * Gets the envelope for the bin ID. + * + * @param binId + * @param envelope + */ + public void queryEnvelope(long binId, Envelope envelope) { + long down = binId / numCols; + long over = binId % numCols; + + double xmin = extentMin + (over * binSize); + double xmax = xmin + binSize; + double ymax = extentMax - (down * binSize); + double ymin = ymax - binSize; + + envelope.setCoords(xmin, ymin, xmax, ymax); + } + + /** + * Gets the envelope for the bin that contains the x,y coords. + * + * @param x + * @param y + * @param envelope + */ + public void queryEnvelope(double x, double y, Envelope envelope) { + double down = (extentMax - y) / binSize; + double over = (x - extentMin) / binSize; + + double xmin = extentMin + (over * binSize); + double xmax = xmin + binSize; + double ymax = extentMax - (down * binSize); + double ymin = ymax - binSize; + + envelope.setCoords(xmin, ymin, xmax, ymax); + } +} diff --git a/hive/src/main/java/com/esri/hadoop/hive/ST_Bin.java b/hive/src/main/java/com/esri/hadoop/hive/ST_Bin.java new file mode 100755 index 0000000..2456f1d --- /dev/null +++ b/hive/src/main/java/com/esri/hadoop/hive/ST_Bin.java @@ -0,0 +1,74 @@ +package com.esri.hadoop.hive; + +import java.util.EnumSet; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; + +import com.esri.core.geometry.ogc.OGCPoint; + +@Description( + name = "ST_Bin", + value = "_FUNC_(binsize, point) - return bin ID for given point\n") +public class ST_Bin extends GenericUDF { + + private transient HiveGeometryOIHelper geomHelper; + private transient boolean binSizeIsConstant; + private transient PrimitiveObjectInspector oiBinSize; + private transient BinUtils bins; + + @Override + public ObjectInspector initialize(ObjectInspector[] OIs) + throws UDFArgumentException { + + if (OIs.length != 2) { + throw new UDFArgumentException("Function takes exactly 2 arguments"); + } + + if (OIs[0].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentException("Argument 0 must be a number"); + } + + oiBinSize = (PrimitiveObjectInspector)OIs[0]; + if (!EnumSet.of(PrimitiveCategory.DOUBLE,PrimitiveCategory.INT,PrimitiveCategory.LONG,PrimitiveCategory.SHORT, PrimitiveCategory.FLOAT).contains(oiBinSize.getPrimitiveCategory())) { + throw new UDFArgumentException("Argument 0 must be a number"); + } + + geomHelper = HiveGeometryOIHelper.create(OIs[1], 1); + binSizeIsConstant = ObjectInspectorUtils.isConstantObjectInspector(OIs[0]); + + return PrimitiveObjectInspectorFactory.javaLongObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] args) throws HiveException { + double binSize = PrimitiveObjectInspectorUtils.getDouble(args[0].get(), oiBinSize); + + if (!binSizeIsConstant || bins == null) { + bins = new BinUtils(binSize); + } + + OGCPoint point = geomHelper.getPoint(args); + + if (point == null) { + return null; + } + + return bins.getId(point.X(), point.Y()); + } + + @Override + public String getDisplayString(String[] args) { + assert(args.length == 2); + return String.format("st_bin(%s,%s)", args[0], args[1]); + } +} diff --git a/hive/src/main/java/com/esri/hadoop/hive/ST_BinEnvelope.java b/hive/src/main/java/com/esri/hadoop/hive/ST_BinEnvelope.java new file mode 100755 index 0000000..053ee88 --- /dev/null +++ b/hive/src/main/java/com/esri/hadoop/hive/ST_BinEnvelope.java @@ -0,0 +1,99 @@ +package com.esri.hadoop.hive; + +import java.util.EnumSet; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; + +import com.esri.core.geometry.Envelope; +import com.esri.core.geometry.ogc.OGCPoint; +import com.esri.hadoop.hive.GeometryUtils.OGCType; + +@Description( + name = "ST_BinEnvelope", + value = "_FUNC_(binsize, point) - return bin envelope for given point\n" + + "_FUNC_(binsize, binid) - return bin envelope for given bin ID\n") +public class ST_BinEnvelope extends GenericUDF { + private transient boolean binSizeIsConstant; + private transient PrimitiveObjectInspector oiBinSize; + private transient BinUtils bins; + + private transient PrimitiveObjectInspector oiBinId; + private transient HiveGeometryOIHelper binPoint; + + @Override + public ObjectInspector initialize(ObjectInspector[] OIs) + throws UDFArgumentException { + + if (OIs.length != 2) { + throw new UDFArgumentException("Function takes exactly 2 arguments"); + } + + if (!isPrimitiveNumber(OIs[0])) { + throw new UDFArgumentException("Argument 0 must be a number"); + } + + oiBinSize = (PrimitiveObjectInspector)OIs[0]; + + if (isPrimitiveNumber(OIs[1])) { + oiBinId = (PrimitiveObjectInspector)OIs[1]; + } else if (HiveGeometryOIHelper.canCreate(OIs[1])) { + binPoint = HiveGeometryOIHelper.create(OIs, 1); + } else { + throw new UDFArgumentException("Argument 1 must be a number or valid geometry type"); + } + + return GeometryUtils.geometryTransportObjectInspector; + } + + private boolean isPrimitiveNumber(ObjectInspector oi) { + if (oi.getCategory() != Category.PRIMITIVE) { + return false; + } + + return EnumSet.of(PrimitiveCategory.DOUBLE,PrimitiveCategory.INT,PrimitiveCategory.LONG,PrimitiveCategory.SHORT, PrimitiveCategory.FLOAT) + .contains(((PrimitiveObjectInspector)oi).getPrimitiveCategory()); + } + + @Override + public Object evaluate(DeferredObject[] args) throws HiveException { + double binSize = PrimitiveObjectInspectorUtils.getDouble(args[0].get(), oiBinSize); + + + if (!binSizeIsConstant || bins == null) { + bins = new BinUtils(binSize); + } + + Envelope env = new Envelope(); + + if (oiBinId != null) { + // argument 1 is a number, attempt to get the envelope with bin ID + long binId = PrimitiveObjectInspectorUtils.getLong(args[1].get(), oiBinId); + bins.queryEnvelope(binId, env); + } else { + // argument 1 is a geometry, attempt to get the envelope with a point + OGCPoint point = binPoint.getPoint(args); + + if (point == null) { + return null; + } + + bins.queryEnvelope(point.X(), point.Y(), env); + } + + return GeometryUtils.geometryToEsriShapeBytesWritable(env, 0, OGCType.ST_POLYGON); + } + + @Override + public String getDisplayString(String[] args) { + assert(args.length == 2); + return String.format("st_binenvelope(%s,%s)", args[0], args[1]); + } +}