Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions java/bench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ These big data file format benchmarks, compare:
* ORC
* Parquet

There are three sub-modules to try to mitigate dependency hell:

* core - the shared part of the benchmarks
* hive - the Hive benchmarks
* spark - the Spark benchmarks

To build this library:

```% mvn clean package```
Expand All @@ -17,17 +23,25 @@ To fetch the source data:

To generate the derived data:

```% java -jar target/orc-benchmarks-*-uber.jar generate data```
```% java -jar core/target/orc-benchmarks-core-*-uber.jar generate data```

To run a scan of all of the data:

```% java -jar target/orc-benchmarks-*-uber.jar scan data```
```% java -jar core/target/orc-benchmarks-core-*-uber.jar scan data```

To run full read benchmark:

```% java -jar target/orc-benchmarks-*-uber.jar read-all data```
```% java -jar hive/target/orc-benchmarks-hive-*-uber.jar read-all data```

To run column projection benchmark:

```% java -jar target/orc-benchmarks-*-uber.jar read-some data```
```% java -jar hive/target/orc-benchmarks-hive-*-uber.jar read-some data```

To run decimal/decimal64 benchmark:

```% java -jar hive/target/orc-benchmarks-hive-*-uber.jar decimal data```

To run spark benchmark:

```% java -jar spark/target/orc-benchmarks-spark-*.jar spark data```

141 changes: 141 additions & 0 deletions java/bench/core/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.orc</groupId>
<artifactId>orc-benchmarks</artifactId>
<version>1.6.0-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>

<groupId>org.apache.orc</groupId>
<artifactId>orc-benchmarks-core</artifactId>
<version>1.6.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>ORC Benchmarks Core</name>
<description>
The core parts of the benchmarks for comparing performance across formats.
</description>

<dependencies>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
<dependency>
<groupId>com.google.auto.service</groupId>
<artifactId>auto-service</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</dependency>
<dependency>
<groupId>io.airlift</groupId>
<artifactId>aircompressor</artifactId>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-mapred</artifactId>
<classifier>hadoop2</classifier>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-storage-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
</dependencies>

<build>
<sourceDirectory>${basedir}/src/java</sourceDirectory>
<testSourceDirectory>${basedir}/src/test</testSourceDirectory>
<resources>
<resource>
<directory>src/resources</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>org.apache.orc.bench.core.Driver</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>

<profiles>
<profile>
<id>cmake</id>
<build>
<directory>${build.dir}/bench/core</directory>
</build>
</profile>
</profiles>
</project>
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand All @@ -19,10 +19,13 @@

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;

public class TrackingLocalFileSystem extends RawLocalFileSystem {
static final URI NAME = URI.create("track:///");

class TrackingFileInputStream extends RawLocalFileSystem.LocalFSFileInputStream {

public TrackingFileInputStream(Path f) throws IOException {
super(f);
}
Expand Down Expand Up @@ -51,6 +54,11 @@ public FSDataInputStream open(Path f, int bufferSize) throws IOException {
new TrackingFileInputStream(f), bufferSize));
}

@Override
public URI getUri() {
return NAME;
}

public FileSystem.Statistics getLocalStatistics() {
return statistics;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.orc.bench.core;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

public class BenchmarkOptions {

public static final String HELP = "help";
public static final String ITERATIONS = "iterations";
public static final String WARMUP_ITERATIONS = "warmup-iterations";
public static final String FORK = "fork";
public static final String TIME = "time";
public static final String MIN_MEMORY = "min-memory";
public static final String MAX_MEMORY = "max-memory";
public static final String GC = "gc";

public static CommandLine parseCommandLine(String[] args) {
Options options = new Options()
.addOption("h", HELP, false, "Provide help")
.addOption("i", ITERATIONS, true, "Number of iterations")
.addOption("I", WARMUP_ITERATIONS, true, "Number of warmup iterations")
.addOption("f", FORK, true, "How many forks to use")
.addOption("t", TIME, true, "How long each iteration is in seconds")
.addOption("m", MIN_MEMORY, true, "The minimum size of each JVM")
.addOption("M", MAX_MEMORY, true, "The maximum size of each JVM")
.addOption("g", GC, false, "Should GC be profiled");
CommandLine result;
try {
result = new DefaultParser().parse(options, args, true);
} catch (ParseException pe) {
System.err.println("Argument exception - " + pe.getMessage());
result = null;
}
if (result == null || result.hasOption(HELP) || result.getArgs().length == 0) {
new HelpFormatter().printHelp("java -jar <jar> <command> <options> <data>",
options);
System.err.println();
System.exit(1);
}
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* limitations under the License.
*/

package org.apache.orc.bench;
package org.apache.orc.bench.core;

import io.airlift.compress.snappy.SnappyCodec;
import org.apache.hadoop.fs.Path;
Expand All @@ -31,9 +31,9 @@
* Enum for handling the compression codecs for the benchmark
*/
public enum CompressionKind {
NONE(".none"),
ZLIB(".gz"),
SNAPPY(".snappy");
NONE("none"),
ZLIB("gz"),
SNAPPY("snappy");

CompressionKind(String extendsion) {
this.extension = extendsion;
Expand Down Expand Up @@ -77,11 +77,20 @@ public static CompressionKind fromPath(Path path) {
if (lastDot >= 0) {
String ext = name.substring(lastDot);
for (CompressionKind value : values()) {
if (ext.equals(value.getExtension())) {
if (ext.equals("." + value.getExtension())) {
return value;
}
}
}
return NONE;
}

public static CompressionKind fromExtension(String extension) {
for (CompressionKind value: values()) {
if (value.extension.equals(extension)) {
return value;
}
}
throw new IllegalArgumentException("Unknown compression " + extension);
}
}
65 changes: 65 additions & 0 deletions java/bench/core/src/java/org/apache/orc/bench/core/Driver.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.orc.bench.core;

import java.util.Arrays;
import java.util.Map;
import java.util.ServiceLoader;
import java.util.TreeMap;

/**
* A driver tool to call the various benchmark classes.
*/
public class Driver {
private static final ServiceLoader<OrcBenchmark> loader =
ServiceLoader.load(OrcBenchmark.class);

private static Map<String, OrcBenchmark> getBenchmarks() {
Map<String, OrcBenchmark> result = new TreeMap<>();
for(OrcBenchmark bench: loader) {
result.put(bench.getName(), bench);
}
return result;
}

private static final String PATTERN = " %10s - %s";

private static void printUsageAndExit(Map<String, OrcBenchmark> benchmarks) {
System.err.println("Commands:");
for(OrcBenchmark bench: benchmarks.values()) {
System.err.println(String.format(PATTERN, bench.getName(),
bench.getDescription()));
}
System.exit(1);
}

public static void main(String[] args) throws Exception {
Map<String, OrcBenchmark> benchmarks = getBenchmarks();
if (args.length == 0) {
printUsageAndExit(benchmarks);
}
String command = args[0];
args = Arrays.copyOfRange(args, 1, args.length);
OrcBenchmark bench = benchmarks.get(command);
if (bench == null) {
printUsageAndExit(benchmarks);
}
bench.run(args);
}
}
Loading