From ffc0a6d03d6f4574c5a5f16b2d00ceba51af66fd Mon Sep 17 00:00:00 2001 From: Arunkumar Chacko Date: Mon, 28 Apr 2025 15:21:05 +0000 Subject: [PATCH 1/8] HADOOP-19343. Add Google Cloud Storage Connector Add implementation for create() API --- hadoop-project/pom.xml | 9 +- hadoop-tools/hadoop-gcp/pom.xml | 616 ++++++++++++++++++ .../org/apache/hadoop/fs/gs/Constants.java | 29 + .../apache/hadoop/fs/gs/CreateOptions.java | 185 ++++++ .../hadoop/fs/gs/ErrorTypeExtractor.java | 96 +++ .../org/apache/hadoop/fs/gs/FileInfo.java | 203 ++++++ .../hadoop/fs/gs/GoogleCloudStorage.java | 260 ++++++++ .../GoogleCloudStorageClientWriteChannel.java | 118 ++++ .../fs/gs/GoogleCloudStorageFileSystem.java | 89 +++ .../fs/gs/GoogleCloudStorageItemInfo.java | 416 ++++++++++++ .../hadoop/fs/gs/GoogleHadoopFileSystem.java | 505 ++++++++++++++ .../GoogleHadoopFileSystemConfiguration.java | 75 +++ .../fs/gs/GoogleHadoopOutputStream.java | 165 +++++ .../fs/gs/HadoopConfigurationProperty.java | 90 +++ .../hadoop/fs/gs/StorageResourceId.java | 325 +++++++++ .../org/apache/hadoop/fs/gs/StringPaths.java | 168 +++++ .../org/apache/hadoop/fs/gs/UriPaths.java | 113 ++++ .../hadoop/fs/gs/VerificationAttributes.java | 68 ++ hadoop-tools/pom.xml | 1 + 19 files changed, 3529 insertions(+), 2 deletions(-) create mode 100644 hadoop-tools/hadoop-gcp/pom.xml create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/Constants.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/ErrorTypeExtractor.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/FileInfo.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorage.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageClientWriteChannel.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageItemInfo.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystemConfiguration.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopOutputStream.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/HadoopConfigurationProperty.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StringPaths.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/UriPaths.java create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index b957a6a148c5e..3c90a4a05b671 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -86,7 +86,7 @@ - 2.5.0 + 3.25.3 provided @@ -108,7 +108,7 @@ 3.0.5 3.6.1 - 27.0-jre + 33.1.0-jre 5.1.0 1.78.1 @@ -2141,6 +2141,11 @@ failsafe 2.4.4 + + com.google.cloud + google-cloud-storage + 2.44.1 + diff --git a/hadoop-tools/hadoop-gcp/pom.xml b/hadoop-tools/hadoop-gcp/pom.xml new file mode 100644 index 0000000000000..c177ba5a9200d --- /dev/null +++ b/hadoop-tools/hadoop-gcp/pom.xml @@ -0,0 +1,616 @@ + + + + + 4.0.0 + + org.apache.hadoop + hadoop-project + 3.5.0-SNAPSHOT + ../../hadoop-project + + hadoop-gcp + 3.5.0-SNAPSHOT + Apache Hadoop Google Cloud Platform support + + This module contains code to support integration with Google Cloud Platform. + It also declares the dependencies needed to work with Google Cloud Storage. + + jar + + + UTF-8 + true + ${project.build.directory}/test + + + unset + + unset + + unset + + 3600 + + + 200000 + + + + 00 + + unset + + + + + tests-off + + + src/test/resources/auth-keys.xml + + + + true + + + + tests-on + + + src/test/resources/auth-keys.xml + + + + false + + + + parallel-tests + + + parallel-tests + + + + + + org.apache.hadoop + hadoop-maven-plugins + + + parallel-tests-createdir + + parallel-tests-createdir + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + ${testsThreadCount} + false + false + ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true + + ${testsThreadCount} + ${test.build.data}/${surefire.forkNumber} + ${test.build.dir}/${surefire.forkNumber} + ${hadoop.tmp.dir}/${surefire.forkNumber} + job-${job.id}-fork-000${surefire.forkNumber} + + + ${fs.gs.scale.test.enabled} + ${fs.gs.scale.test.huge.filesize} + ${fs.gs.scale.test.huge.partitionsize} + ${fs.gs.scale.test.timeout} + + + + + org.apache.maven.plugins + maven-failsafe-plugin + + + default-integration-test + + integration-test + verify + + + ${testsThreadCount} + false + ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true + ${fs.gs.scale.test.timeout} + false + + + true + ${test.build.data}/${surefire.forkNumber} + ${test.build.dir}/${surefire.forkNumber} + ${hadoop.tmp.dir}/${surefire.forkNumber} + + + + + + job-${job.id}-fork-000${surefire.forkNumber} + + ${fs.gs.scale.test.enabled} + ${fs.gs.scale.test.huge.filesize} + ${fs.gs.scale.test.huge.partitionsize} + ${fs.gs.scale.test.timeout} + ${test.integration.timeout} + + + + ${root.tests.enabled} + + + + + + + + + + + + **/ITest*.java + + + + + sequential-integration-tests + + integration-test + verify + + + ${fs.gs.scale.test.timeout} + false + + + false + + ${fs.gs.scale.test.enabled} + ${fs.gs.scale.test.huge.filesize} + ${fs.gs.scale.test.huge.partitionsize} + ${fs.gs.scale.test.timeout} + + + + + job-${job.id} + + + + + **/ITest*.java + + + + + + + + + + sequential-tests + + + !parallel-tests + + + + + + org.apache.maven.plugins + maven-failsafe-plugin + + + + integration-test + verify + + + false + + + ${fs.gs.scale.test.enabled} + ${fs.gs.scale.test.huge.filesize} + ${fs.gs.scale.test.timeout} + + job-${job.id} + + ${fs.gs.scale.test.timeout} + + + + + + + + + + + scale + + + scale + + + + true + + + + + + prefetch + + + prefetch + + + + prefetch + + + + + + analytics + + + analytics + + + + analytics + + + + + + + + + + maven-shade-plugin + + + package + + shade + + + + + + + + + + + com.google.auth:* + + ** + + + + io.grpc:* + + ** + + + + io.opencensus:* + + ** + + + + *:* + + *.json + google/** + grpc/** + + + + + + com.google.api + com.google.api-client + com.google.api.grpc + com.google.apis + com.google.auth + com.google.cloud + com.google.cloud.bigdataoss + com.google.cloud.grpc + com.google.cloud.http + com.google.flogger + com.google.code.gson + com.google.guava + com.google.http-client + com.google.oauth-client + com.google.protobuf + com.google.re2j + com.google.storage.v2 + com.lmax + io.grpc + io.opencensus + io.perfmark + org.apache.httpcomponents + org.threeten:threetenbp + + + true + + + com + com.google.cloud.hadoop.repackaged.ossgcs.com + + com.google.api.** + com.google.api.gax.** + com.google.auth.** + com.google.cloud.* + com.google.cloud.audit.** + com.google.cloud.grpc.** + com.google.cloud.hadoop.gcsio.** + com.google.cloud.hadoop.util.** + com.google.cloud.http.** + com.google.cloud.monitoring.** + com.google.cloud.spi.** + com.google.cloud.storage.** + com.google.common.** + com.google.geo.** + com.google.gson.** + com.google.google.storage.** + com.google.iam.** + com.google.logging.** + com.google.longrunning.** + com.google.monitoring.** + com.google.protobuf.** + com.google.re2j.** + com.google.rpc.** + com.google.storage.** + com.google.thirdparty.** + com.google.type.** + com.lmax.disruptor.** + + + com.google.cloud.hadoop.util.AccessTokenProvider + com.google.cloud.hadoop.util.AccessTokenProvider$AccessToken + com.google.cloud.hadoop.util.AccessTokenProvider$AccessTokenType + com.google.cloud.hadoop.util.AccessBoundary + com.google.cloud.hadoop.util.AccessBoundary$Action + com.google.cloud.hadoop.util.AutoValue_AccessBoundary + + + + org + com.google.cloud.hadoop.repackaged.ossgcs.org + + org.apache.http.** + org.threeten.** + + + + + io.grpc.netty.shaded + + com.google.cloud.hadoop.repackaged.ossgcs.io.grpc.netty.shaded + + + + io + com.google.cloud.hadoop.repackaged.ossgcs.io + + io.grpc.** + io.opencensus.** + io.perfmark.** + + + + META-INF/native/io_grpc_netty_shaded_ + + META-INF/native/com_google_cloud_hadoop_repackaged_gcs_io_grpc_netty_shaded_ + + + + META-INF/native/libio_grpc_netty_shaded_ + + META-INF/native/libcom_google_cloud_hadoop_repackaged_gcs_io_grpc_netty_shaded_ + + + + true + + + + + + + com.github.spotbugs + spotbugs-maven-plugin + + true + ${basedir}/dev-support/findbugs-exclude.xml + + Max + + + + org.apache.maven.plugins + maven-surefire-plugin + + 3600 + + ${test.integration.timeout} + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + deplist1 + compile + + list + + + + ${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-optional.txt + + + + copy + test-compile + + copy-dependencies + + + test + so,dll,dylib + ${project.build.directory}/native-libs + + + + package + + copy-dependencies + + + ${project.build.directory}/lib + + + + deplist2 + compile + + list + + + ${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-builtin.txt + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + banned-illegal-imports + process-sources + + enforce + + + + + false + Restrict mapreduce imports to committer code + + + + + org.apache.hadoop.mapreduce.** + org.apache.hadoop.mapred.** + + + + false + Restrict encryption client imports to encryption client factory + + + + + + + + + + + + + + + + + + org.apache.hadoop + hadoop-common + provided + + + javax.servlet + servlet-api + + + javax.enterprise + cdi-api + + + + + org.apache.hadoop + hadoop-common + test + test-jar + + + org.assertj + assertj-core + test + + + junit + junit + test + + + org.junit.platform + junit-platform-launcher + test + + + org.junit.vintage + junit-vintage-engine + test + + + com.google.cloud + google-cloud-storage + + + com.google.protobuf + protobuf-java + + + + + diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/Constants.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/Constants.java new file mode 100644 index 0000000000000..67371ec85abca --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/Constants.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +class Constants { + // URI scheme for GCS. + static final String SCHEME = "gs"; + static final String PATH_DELIMITER = "/"; + + static final String GCS_CONFIG_PREFIX = "fs.gs"; + + static final String BASE_KEY_PREFIX = "google.cloud"; +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java new file mode 100644 index 0000000000000..c4d5e05be664b --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; + +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; + +import java.time.Duration; +import java.util.Map; +import javax.annotation.Nullable; + +/** + * Options that can be specified when creating a file in the {@link GoogleCloudStorageFileSystem}. + */ +class CreateOptions { + private final ImmutableMap attributes; + private final String contentType; + private final boolean ensureNoDirectoryConflict; + private final Duration interval; + private final long overwriteGenerationId; + private final WriteMode mode; + + public static final CreateOptions DEFAULT = builder().build(); + + public String getContentEncoding() { + return contentEncoding; + } + + private final String contentEncoding; + + private CreateOptions(CreateOperationOptionsBuilder builder) { + this.attributes = ImmutableMap.copyOf(builder.attributes); + this.contentType = builder.contentType; + this.ensureNoDirectoryConflict = builder.ensureNoDirectoryConflict; + this.interval = builder.interval; + this.overwriteGenerationId = builder.overwriteGenerationId; + this.mode = builder.mode; + this.contentEncoding = builder.contentEncoding; + } + + public boolean isOverwriteExisting() { + return this.mode == WriteMode.OVERWRITE; + } + + enum WriteMode { + /** + * Write new bytes to the end of the existing file rather than the beginning. + */ + APPEND, + /** + * Creates a new file for write and fails if file already exists. + */ + CREATE_NEW, + /** + * Creates a new file for write or overwrites an existing file if it already exists. + */ + OVERWRITE + } + + public static CreateOperationOptionsBuilder builder() { + return new CreateOperationOptionsBuilder(); + } + + /** + * Extended attributes to set when creating a file. + */ + public ImmutableMap getAttributes() { + return attributes; + } + + /** + * Content-type to set when creating a file. + */ + @Nullable + public String getContentType() { + return contentType; + } + + /** + * Configures the minimum time interval (milliseconds) between consecutive sync/flush calls + */ + public Duration getMinSyncInterval() { + return interval; + } + + /** + * If true, makes sure there isn't already a directory object of the same name. If false, you run + * the risk of creating hard-to-cleanup/access files whose names collide with directory names. If + * already sure no such directory exists, then this is safe to set for improved performance. + */ + public boolean isEnsureNoDirectoryConflict() { + return ensureNoDirectoryConflict; + } + + /** + * Whether to overwrite an existing file with the same name. + */ + public WriteMode getWriteMode() { + return mode; + } + + /** + * Generation of existing object to overwrite. Ignored if set to {@link + * StorageResourceId#UNKNOWN_GENERATION_ID}, but otherwise this is used instead of {@code + * overwriteExisting}, where 0 indicates no existing object, and otherwise an existing object will + * only be overwritten by the newly created file if its generation matches this provided + * generationId. + */ + public long getOverwriteGenerationId() { + return overwriteGenerationId; + } + + static class CreateOperationOptionsBuilder { + private Map attributes = ImmutableMap.of(); + private String contentType = "application/octet-stream"; + private boolean ensureNoDirectoryConflict = true; + private Duration interval = Duration.ZERO; + private long overwriteGenerationId = StorageResourceId.UNKNOWN_GENERATION_ID; + private WriteMode mode = WriteMode.CREATE_NEW; + + private String contentEncoding = null; + + public CreateOperationOptionsBuilder setAttributes(Map attributes) { + this.attributes = attributes; + return this; + } + + public CreateOperationOptionsBuilder setContentType(String contentType) { + this.contentType = contentType; + return this; + } + + public CreateOperationOptionsBuilder setEnsureNoDirectoryConflict( + boolean ensureNoDirectoryConflict) { + this.ensureNoDirectoryConflict = ensureNoDirectoryConflict; + return this; + } + + public CreateOperationOptionsBuilder setMinSyncInterval(Duration interval) { + this.interval = interval; + return this; + } + + public CreateOperationOptionsBuilder setOverwriteGenerationId(long overwriteGenerationId) { + this.overwriteGenerationId = overwriteGenerationId; + return this; + } + + public CreateOperationOptionsBuilder setWriteMode(WriteMode mode) { + this.mode = mode; + return this; + } + + CreateOptions build() { + CreateOptions options = new CreateOptions(this); + + checkArgument(!options.getAttributes().containsKey("Content-Type"), + "The Content-Type attribute must be set via the contentType option"); + if (options.getWriteMode() != WriteMode.OVERWRITE) { + checkArgument(options.getOverwriteGenerationId() == StorageResourceId.UNKNOWN_GENERATION_ID, + "overwriteGenerationId is set to %s but it can be set only in OVERWRITE mode", + options.getOverwriteGenerationId()); + } + + return options; + } + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/ErrorTypeExtractor.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/ErrorTypeExtractor.java new file mode 100644 index 0000000000000..2dfb1a6fc68fb --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/ErrorTypeExtractor.java @@ -0,0 +1,96 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import io.grpc.Status; +import io.grpc.StatusRuntimeException; + +import javax.annotation.Nullable; + +/** + * Implementation for {@link ErrorTypeExtractor} for exception specifically thrown from gRPC path. + */ +class ErrorTypeExtractor { + + enum ErrorType { + NOT_FOUND, OUT_OF_RANGE, ALREADY_EXISTS, FAILED_PRECONDITION, INTERNAL, RESOURCE_EXHAUSTED, UNAVAILABLE, UNKNOWN + } + + // public static final ErrorTypeExtractor INSTANCE = new ErrorTypeExtractor(); + + private static final String BUCKET_ALREADY_EXISTS_MESSAGE = + "FAILED_PRECONDITION: Your previous request to create the named bucket succeeded and you already own it."; + + private ErrorTypeExtractor() { + } + + static ErrorType getErrorType(Exception error) { + switch (Status.fromThrowable(error).getCode()) { + case NOT_FOUND: + return ErrorType.NOT_FOUND; + case OUT_OF_RANGE: + return ErrorType.OUT_OF_RANGE; + case ALREADY_EXISTS: + return ErrorType.ALREADY_EXISTS; + case FAILED_PRECONDITION: + return ErrorType.FAILED_PRECONDITION; + case RESOURCE_EXHAUSTED: + return ErrorType.RESOURCE_EXHAUSTED; + case INTERNAL: + return ErrorType.INTERNAL; + case UNAVAILABLE: + return ErrorType.UNAVAILABLE; + default: + return ErrorType.UNKNOWN; + } + } + + static boolean bucketAlreadyExists(Exception e) { + ErrorType errorType = getErrorType(e); + if (errorType == ErrorType.ALREADY_EXISTS) { + return true; + } + // The gRPC API currently throws a FAILED_PRECONDITION status code instead of ALREADY_EXISTS, + // so we handle both these conditions in the interim. + // TODO: remove once the status codes are fixed. + else if (errorType == ErrorType.FAILED_PRECONDITION) { + StatusRuntimeException statusRuntimeException = getStatusRuntimeException(e); + return statusRuntimeException != null && BUCKET_ALREADY_EXISTS_MESSAGE.equals( + statusRuntimeException.getMessage()); + } + return false; + } + + /** + * Extracts StatusRuntimeException from the Exception, if it exists. + */ + @Nullable + static private StatusRuntimeException getStatusRuntimeException(Exception e) { + Throwable cause = e; + // Keeping a counter to break early from the loop to avoid infinite loop condition due to + // cyclic exception chains. + int currentExceptionDepth = 0, maxChainDepth = 1000; + while (cause != null && currentExceptionDepth < maxChainDepth) { + if (cause instanceof StatusRuntimeException) { + return (StatusRuntimeException) cause; + } + cause = cause.getCause(); + currentExceptionDepth++; + } + return null; + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/FileInfo.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/FileInfo.java new file mode 100644 index 0000000000000..dd4a73f3aa055 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/FileInfo.java @@ -0,0 +1,203 @@ +/* + * Copyright 2013 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import java.net.URI; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * Contains information about a file or a directory. + * + *

Note: This class wraps GoogleCloudStorageItemInfo, adds file system specific information and + * hides bucket/object specific information. + */ +class FileInfo { + + // Info about the root path. + public static final FileInfo ROOT_INFO = + new FileInfo(GoogleCloudStorageFileSystem.GCS_ROOT, GoogleCloudStorageItemInfo.ROOT_INFO); + + // Path of this file or directory. + private final URI path; + + // Information about the underlying GCS item. + private final GoogleCloudStorageItemInfo itemInfo; + + /** + * Constructs an instance of FileInfo. + * + * @param itemInfo Information about the underlying item. + */ + private FileInfo(URI path, GoogleCloudStorageItemInfo itemInfo) { + this.itemInfo = itemInfo; + + // Construct the path once. + this.path = path; + } + + /** + * Gets the path of this file or directory. + */ + public URI getPath() { + return path; + } + + /** + * Indicates whether this item is a directory. + */ + public boolean isDirectory() { + return itemInfo.isDirectory(); + } + + /** + * Indicates whether this item is an inferred directory. + */ + public boolean isInferredDirectory() { + return itemInfo.isInferredDirectory(); + } + + /** + * Indicates whether this instance has information about the unique, shared root of the underlying + * storage system. + */ + public boolean isGlobalRoot() { + return itemInfo.isGlobalRoot(); + } + + /** + * Gets creation time of this item. + * + *

Time is expressed as milliseconds since January 1, 1970 UTC. + */ + public long getCreationTime() { + return itemInfo.getCreationTime(); + } + + /** + * Gets the size of this file or directory. + * + *

For files, size is in number of bytes. For directories size is 0. For items that do not + * exist, size is -1. + */ + public long getSize() { + return itemInfo.getSize(); + } + + /** + * Gets the modification time of this file if one is set, otherwise the value of {@link + * #getCreationTime()} is returned. + * + *

Time is expressed as milliseconds since January 1, 1970 UTC. + */ + public long getModificationTime() { + return itemInfo.getModificationTime(); + } + + /** + * Retrieve file attributes for this file. + * + * @return A map of file attributes + */ + public Map getAttributes() { + return itemInfo.getMetadata(); + } + + /** + * Indicates whether this file or directory exists. + */ + public boolean exists() { + return itemInfo.exists(); + } + + /** + * Returns CRC32C checksum of the file or {@code null}. + */ + public byte[] getCrc32cChecksum() { + VerificationAttributes verificationAttributes = itemInfo.getVerificationAttributes(); + return verificationAttributes == null ? null : verificationAttributes.getCrc32c(); + } + + /** + * Returns MD5 checksum of the file or {@code null}. + */ + public byte[] getMd5Checksum() { + VerificationAttributes verificationAttributes = itemInfo.getVerificationAttributes(); + return verificationAttributes == null ? null : verificationAttributes.getMd5hash(); + } + + /** + * Gets information about the underlying item. + */ + GoogleCloudStorageItemInfo getItemInfo() { + return itemInfo; + } + + /** + * Gets string representation of this instance. + */ + @Override + public String toString() { + return getPath() + (exists() ? + ": created on: " + Instant.ofEpochMilli(getCreationTime()) : + ": exists: no"); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof FileInfo)) { + return false; + } + FileInfo fileInfo = (FileInfo) o; + return Objects.equals(path, fileInfo.path) && Objects.equals(itemInfo, fileInfo.itemInfo); + } + + @Override + public int hashCode() { + return Objects.hash(path, itemInfo); + } + + /** + * Handy factory method for constructing a FileInfo from a GoogleCloudStorageItemInfo while + * potentially returning a singleton instead of really constructing an object for cases like ROOT. + */ + public static FileInfo fromItemInfo(GoogleCloudStorageItemInfo itemInfo) { + if (itemInfo.isRoot()) { + return ROOT_INFO; + } + URI path = UriPaths.fromResourceId(itemInfo.getResourceId(), /* allowEmptyObjectName= */ true); + return new FileInfo(path, itemInfo); + } + + /** + * Handy factory method for constructing a list of FileInfo from a list of + * GoogleCloudStorageItemInfo. + */ + public static List fromItemInfos(List itemInfos) { + List fileInfos = new ArrayList<>(itemInfos.size()); + for (GoogleCloudStorageItemInfo itemInfo : itemInfos) { + fileInfos.add(fromItemInfo(itemInfo)); + } + return fileInfos; + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorage.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorage.java new file mode 100644 index 0000000000000..b86383504f8d4 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorage.java @@ -0,0 +1,260 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; +import static org.apache.hadoop.thirdparty.com.google.common.base.Strings.isNullOrEmpty; + +import com.google.cloud.storage.*; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.io.BaseEncoding; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.channels.WritableByteChannel; +import java.nio.file.FileAlreadyExistsException; +import java.util.List; +import java.util.Map; + +/** + * A wrapper around Google cloud storage client + */ +class GoogleCloudStorage { + public static final Logger LOG = LoggerFactory.getLogger(GoogleHadoopFileSystem.class); + static final List BLOB_FIELDS = + ImmutableList.of(Storage.BlobField.BUCKET, Storage.BlobField.CONTENT_ENCODING, + Storage.BlobField.CONTENT_TYPE, Storage.BlobField.CRC32C, Storage.BlobField.GENERATION, + Storage.BlobField.METADATA, Storage.BlobField.MD5HASH, Storage.BlobField.METAGENERATION, + Storage.BlobField.NAME, Storage.BlobField.SIZE, Storage.BlobField.TIME_CREATED, + Storage.BlobField.UPDATED); + private final Storage storage; + private final GoogleHadoopFileSystemConfiguration configuration; + + /** + * Having an instance of gscImpl to redirect calls to Json client while new client implementation + * is in WIP. + */ + GoogleCloudStorage(GoogleHadoopFileSystemConfiguration configuration) throws IOException { + // TODO: Set projectId + // TODO: Set credentials + this.storage = StorageOptions.newBuilder().build().getService(); + this.configuration = configuration; + } + + public WritableByteChannel create(final StorageResourceId resourceId, final CreateOptions options) + throws IOException { + LOG.trace("create({})", resourceId); + + checkArgument(resourceId.isStorageObject(), "Expected full StorageObject id, got %s", + resourceId); + // Update resourceId if generationId is missing + StorageResourceId resourceIdWithGeneration = resourceId; + if (!resourceId.hasGenerationId()) { + resourceIdWithGeneration = + new StorageResourceId(resourceId.getBucketName(), resourceId.getObjectName(), + getWriteGeneration(resourceId, options.isOverwriteExisting())); + } + + return new GoogleCloudStorageClientWriteChannel(storage, resourceIdWithGeneration, options); + } + + /** + * Gets the object generation for a write operation + * + *

making getItemInfo call even if overwrite is disabled to fail fast in case file is existing. + * + * @param resourceId object for which generation info is requested + * @param overwrite whether existing object should be overwritten + * @return the generation of the object + * @throws IOException if the object already exists and cannot be overwritten + */ + private long getWriteGeneration(StorageResourceId resourceId, boolean overwrite) + throws IOException { + LOG.trace("getWriteGeneration({}, {})", resourceId, overwrite); + GoogleCloudStorageItemInfo info = getItemInfo(resourceId); + if (!info.exists()) { + return 0L; + } + if (info.exists() && overwrite) { + long generation = info.getContentGeneration(); + checkState(generation != 0, "Generation should not be 0 for an existing item"); + return generation; + } + + throw new FileAlreadyExistsException(String.format("Object %s already exists.", resourceId)); + } + + public void close() { + try { + storage.close(); + } catch (Exception e) { + LOG.warn("Error occurred while closing the storage client", e); + } + } + + public GoogleCloudStorageItemInfo getItemInfo(StorageResourceId resourceId) throws IOException { + LOG.trace("getItemInfo({})", resourceId); + + // Handle ROOT case first. + if (resourceId.isRoot()) { + return GoogleCloudStorageItemInfo.ROOT_INFO; + } + GoogleCloudStorageItemInfo itemInfo = null; + + if (resourceId.isBucket()) { + Bucket bucket = getBucket(resourceId.getBucketName()); + if (bucket != null) { + itemInfo = createItemInfoForBucket(resourceId, bucket); + } else { + LOG.debug("getBucket({}): not found", resourceId.getBucketName()); + } + } else { + Blob blob = getBlob(resourceId); + if (blob != null) { + itemInfo = createItemInfoForBlob(resourceId, blob); + } else { + LOG.debug("getObject({}): not found", resourceId); + } + } + + if (itemInfo == null) { + itemInfo = GoogleCloudStorageItemInfo.createNotFound(resourceId); + } + LOG.debug("getItemInfo: {}", itemInfo); + return itemInfo; + } + + /** + * Gets the bucket with the given name. + * + * @param bucketName name of the bucket to get + * @return the bucket with the given name or null if bucket not found + * @throws IOException if the bucket exists but cannot be accessed + */ + @Nullable + private Bucket getBucket(String bucketName) throws IOException { + LOG.debug("getBucket({})", bucketName); + checkArgument(!isNullOrEmpty(bucketName), "bucketName must not be null or empty"); + try { + return storage.get(bucketName); + } catch (StorageException e) { + if (ErrorTypeExtractor.getErrorType(e) == ErrorTypeExtractor.ErrorType.NOT_FOUND) { + return null; + } + throw new IOException("Error accessing Bucket " + bucketName, e); + } + } + + private static GoogleCloudStorageItemInfo createItemInfoForBlob(StorageResourceId resourceId, + Blob blob) { + checkArgument(resourceId != null, "resourceId must not be null"); + checkArgument(blob != null, "object must not be null"); + checkArgument(resourceId.isStorageObject(), + "resourceId must be a StorageObject. resourceId: %s", resourceId); + checkArgument(resourceId.getBucketName().equals(blob.getBucket()), + "resourceId.getBucketName() must equal object.getBucket(): '%s' vs '%s'", + resourceId.getBucketName(), blob.getBucket()); + checkArgument(resourceId.getObjectName().equals(blob.getName()), + "resourceId.getObjectName() must equal object.getName(): '%s' vs '%s'", + resourceId.getObjectName(), blob.getName()); + + Map decodedMetadata = + blob.getMetadata() == null ? null : decodeMetadata(blob.getMetadata()); + + byte[] md5Hash = null; + byte[] crc32c = null; + + if (!isNullOrEmpty(blob.getCrc32c())) { + crc32c = BaseEncoding.base64().decode(blob.getCrc32c()); + } + + if (!isNullOrEmpty(blob.getMd5())) { + md5Hash = BaseEncoding.base64().decode(blob.getMd5()); + } + + return GoogleCloudStorageItemInfo.createObject(resourceId, + blob.getCreateTimeOffsetDateTime() == null ? + 0 : + blob.getCreateTimeOffsetDateTime().toInstant().toEpochMilli(), + blob.getUpdateTimeOffsetDateTime() == null ? + 0 : + blob.getUpdateTimeOffsetDateTime().toInstant().toEpochMilli(), + blob.getSize() == null ? 0 : blob.getSize(), blob.getContentType(), + blob.getContentEncoding(), decodedMetadata, + blob.getGeneration() == null ? 0 : blob.getGeneration(), + blob.getMetageneration() == null ? 0 : blob.getMetageneration(), + new VerificationAttributes(md5Hash, crc32c)); + } + + static Map decodeMetadata(Map metadata) { + return Maps.transformValues(metadata, GoogleCloudStorage::decodeMetadataValues); + } + + @Nullable + private static byte[] decodeMetadataValues(String value) { + try { + return BaseEncoding.base64().decode(value); + } catch (IllegalArgumentException iae) { + LOG.error("Failed to parse base64 encoded attribute value {}", value, iae); + return null; + } + } + + /** + * Gets the object with the given resourceId. + * + * @param resourceId identifies a StorageObject + * @return the object with the given name or null if object not found + * @throws IOException if the object exists but cannot be accessed + */ + @Nullable + Blob getBlob(StorageResourceId resourceId) throws IOException { + checkArgument(resourceId.isStorageObject(), "Expected full StorageObject id, got %s", + resourceId); + String bucketName = resourceId.getBucketName(); + String objectName = resourceId.getObjectName(); + Blob blob; + try { + blob = storage.get(BlobId.of(bucketName, objectName), + Storage.BlobGetOption.fields(BLOB_FIELDS.toArray(new Storage.BlobField[0]))); + } catch (StorageException e) { + throw new IOException("Error accessing " + resourceId, e); + } + return blob; + } + + private static GoogleCloudStorageItemInfo createItemInfoForBucket(StorageResourceId resourceId, + Bucket bucket) { + checkArgument(resourceId != null, "resourceId must not be null"); + checkArgument(bucket != null, "bucket must not be null"); + checkArgument(resourceId.isBucket(), "resourceId must be a Bucket. resourceId: %s", resourceId); + checkArgument(resourceId.getBucketName().equals(bucket.getName()), + "resourceId.getBucketName() must equal bucket.getName(): '%s' vs '%s'", + resourceId.getBucketName(), bucket.getName()); + + return GoogleCloudStorageItemInfo.createBucket(resourceId, + bucket.asBucketInfo().getCreateTimeOffsetDateTime().toInstant().toEpochMilli(), + bucket.asBucketInfo().getUpdateTimeOffsetDateTime().toInstant().toEpochMilli(), + bucket.getLocation(), + bucket.getStorageClass() == null ? null : bucket.getStorageClass().name()); + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageClientWriteChannel.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageClientWriteChannel.java new file mode 100644 index 0000000000000..caee2a5321228 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageClientWriteChannel.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import com.google.cloud.storage.BlobId; +import com.google.cloud.storage.BlobInfo; +import com.google.cloud.storage.BlobWriteSession; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.Storage.BlobWriteOption; +import com.google.cloud.storage.StorageException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; +import java.util.ArrayList; +import java.util.List; + +/** + * Implements WritableByteChannel to provide write access to GCS via java-storage client + */ +class GoogleCloudStorageClientWriteChannel implements WritableByteChannel { + + public static final Logger LOG = + LoggerFactory.getLogger(GoogleCloudStorageClientWriteChannel.class); + + private final StorageResourceId resourceId; + private WritableByteChannel writableByteChannel; + + public GoogleCloudStorageClientWriteChannel(final Storage storage, + final StorageResourceId resourceId, final CreateOptions createOptions) throws IOException { + this.resourceId = resourceId; + BlobWriteSession blobWriteSession = getBlobWriteSession(storage, resourceId, createOptions); + try { + this.writableByteChannel = blobWriteSession.open(); + } catch (StorageException e) { + throw new IOException(e); + } + } + + private static BlobInfo getBlobInfo(final StorageResourceId resourceId, + final CreateOptions createOptions) { + BlobInfo blobInfo = BlobInfo.newBuilder( + BlobId.of(resourceId.getBucketName(), resourceId.getObjectName(), + resourceId.getGenerationId())).setContentType(createOptions.getContentType()) + .setContentEncoding(createOptions.getContentEncoding()) + // .setMetadata(encodeMetadata(createOptions.getMetadata())) // TODO: + .build(); + return blobInfo; + } + + private static BlobWriteSession getBlobWriteSession(final Storage storage, + final StorageResourceId resourceId, final CreateOptions createOptions) { + return storage.blobWriteSession(getBlobInfo(resourceId, createOptions), + generateWriteOptions(createOptions)); + } + + private static BlobWriteOption[] generateWriteOptions(final CreateOptions createOptions) { + List blobWriteOptions = new ArrayList<>(); + + blobWriteOptions.add(BlobWriteOption.disableGzipContent()); + blobWriteOptions.add(BlobWriteOption.generationMatch()); + + //TODO: Enable KMS and checksum + return blobWriteOptions.toArray(new BlobWriteOption[blobWriteOptions.size()]); + } + + @Override + public boolean isOpen() { + return writableByteChannel != null && writableByteChannel.isOpen(); + } + + @Override + public void close() throws IOException { + try { + if (!isOpen()) { + return; + } + + writableByteChannel.close(); + } catch (Exception e) { + throw new IOException( + String.format("Upload failed for '%s'. reason=%s", resourceId, e.getMessage()), e); + } finally { + writableByteChannel = null; + } + } + + private int writeInternal(final ByteBuffer byteBuffer) throws IOException { + int bytesWritten = writableByteChannel.write(byteBuffer); + LOG.trace("{} bytes were written out of provided buffer of capacity {}", bytesWritten, + byteBuffer.limit()); + return bytesWritten; + } + + @Override + public int write(final ByteBuffer src) throws IOException { + return writeInternal(src); + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java new file mode 100644 index 0000000000000..d290309c0b1e4 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; +import static org.apache.hadoop.fs.gs.Constants.SCHEME; + +import com.google.auth.Credentials; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.nio.channels.WritableByteChannel; + +/** + * Provides FS semantics over GCS based on Objects API + */ +class GoogleCloudStorageFileSystem { + private static final Logger LOG = LoggerFactory.getLogger(StorageResourceId.class); + + // URI of the root path. + static URI GCS_ROOT = URI.create(SCHEME + ":/"); + + // GCS access instance. + private GoogleCloudStorage gcs; + + private static GoogleCloudStorage createCloudStorage( + final GoogleHadoopFileSystemConfiguration configuration, final Credentials credentials) + throws IOException { + checkNotNull(configuration, "configuration must not be null"); + + return new GoogleCloudStorage(configuration); + } + + public GoogleCloudStorageFileSystem(final GoogleHadoopFileSystemConfiguration configuration, + final Credentials credentials) throws IOException { + gcs = createCloudStorage(configuration, credentials); + } + + public WritableByteChannel create(final URI path, final CreateOptions createOptions) + throws IOException { + LOG.trace("create(path: {}, createOptions: {})", path, createOptions); + checkNotNull(path, "path could not be null"); + StorageResourceId resourceId = + StorageResourceId.fromUriPath(path, /* allowEmptyObjectName=*/ true); + + if (resourceId.isDirectory()) { + throw new IOException( + String.format("Cannot create a file whose name looks like a directory: '%s'", + resourceId)); + } + + if (createOptions.getOverwriteGenerationId() != StorageResourceId.UNKNOWN_GENERATION_ID) { + resourceId = new StorageResourceId(resourceId.getBucketName(), resourceId.getObjectName(), + createOptions.getOverwriteGenerationId()); + } + + return gcs.create(resourceId, createOptions); + } + + public void close() { + if (gcs == null) { + return; + } + LOG.trace("close()"); + try { + gcs.close(); + } finally { + gcs = null; + } + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageItemInfo.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageItemInfo.java new file mode 100644 index 0000000000000..285be5405c36a --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageItemInfo.java @@ -0,0 +1,416 @@ +/* + * Copyright 2013 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; + +import java.time.Instant; +import java.util.Arrays; +import java.util.Map; +import java.util.Objects; + +/** + * Contains information about an item in Google Cloud Storage. + */ +class GoogleCloudStorageItemInfo { + // Info about the root of GCS namespace. + public static final GoogleCloudStorageItemInfo ROOT_INFO = + new GoogleCloudStorageItemInfo(StorageResourceId.ROOT, + /* creationTime= */ 0, + /* modificationTime= */ 0, + /* size= */ 0, + /* location= */ null, + /* storageClass= */ null, + /* contentType= */ null, + /* contentEncoding= */ null, + /* metadata= */ null, + /* contentGeneration= */ 0, + /* metaGeneration= */ 0, + /* verificationAttributes= */ null); + + /** + * Factory method for creating a GoogleCloudStorageItemInfo for a bucket. + * + * @param resourceId Resource ID that identifies a bucket + * @param creationTime Time when a bucket was created (milliseconds since January 1, 1970 UTC). + * @param modificationTime Time when a bucket was last modified (milliseconds since January 1, + * 1970 UTC). + * @param location Location of a bucket. + * @param storageClass Storage class of a bucket. + */ + public static GoogleCloudStorageItemInfo createBucket(StorageResourceId resourceId, + long creationTime, long modificationTime, String location, String storageClass) { + checkNotNull(resourceId, "resourceId must not be null"); + checkArgument(resourceId.isBucket(), "expected bucket but got '%s'", resourceId); + return new GoogleCloudStorageItemInfo(resourceId, creationTime, modificationTime, + /* size= */ 0, location, storageClass, + /* contentType= */ null, + /* contentEncoding= */ null, + /* metadata= */ null, + /* contentGeneration= */ 0, + /* metaGeneration= */ 0, + /* verificationAttributes= */ null); + } + + /** + * Factory method for creating a GoogleCloudStorageItemInfo for an object. + * + * @param resourceId identifies either root, a Bucket, or a StorageObject + * @param creationTime Time when object was created (milliseconds since January 1, 1970 UTC). + * @param size Size of the given object (number of bytes) or -1 if the object does not exist. + * @param metadata User-supplied object metadata for this object. + */ + public static GoogleCloudStorageItemInfo createObject(StorageResourceId resourceId, + long creationTime, long modificationTime, long size, String contentType, + String contentEncoding, Map metadata, long contentGeneration, + long metaGeneration, VerificationAttributes verificationAttributes) { + checkNotNull(resourceId, "resourceId must not be null"); + checkArgument(!resourceId.isRoot(), "expected object or directory but got '%s'", resourceId); + checkArgument(!resourceId.isBucket(), "expected object or directory but got '%s'", resourceId); + return new GoogleCloudStorageItemInfo(resourceId, creationTime, modificationTime, size, + /* location= */ null, + /* storageClass= */ null, contentType, contentEncoding, metadata, contentGeneration, + metaGeneration, verificationAttributes); + } + + /** + * Factory method for creating a "found" GoogleCloudStorageItemInfo for an inferred directory. + * + * @param resourceId Resource ID that identifies an inferred directory + */ + public static GoogleCloudStorageItemInfo createInferredDirectory(StorageResourceId resourceId) { + return new GoogleCloudStorageItemInfo(resourceId, + /* creationTime= */ 0, + /* modificationTime= */ 0, + /* size= */ 0, + /* location= */ null, + /* storageClass= */ null, + /* contentType= */ null, + /* contentEncoding= */ null, + /* metadata= */ null, + /* contentGeneration= */ 0, + /* metaGeneration= */ 0, + /* verificationAttributes= */ null); + } + + /** + * Factory method for creating a "not found" GoogleCloudStorageItemInfo for a bucket or an object. + * + * @param resourceId Resource ID that identifies an inferred directory + */ + public static GoogleCloudStorageItemInfo createNotFound(StorageResourceId resourceId) { + return new GoogleCloudStorageItemInfo(resourceId, + /* creationTime= */ 0, + /* modificationTime= */ 0, + /* size= */ -1, + /* location= */ null, + /* storageClass= */ null, + /* contentType= */ null, + /* contentEncoding= */ null, + /* metadata= */ null, + /* contentGeneration= */ 0, + /* metaGeneration= */ 0, + /* verificationAttributes= */ null); + } + + // The Bucket and maybe StorageObject names of the GCS "item" referenced by this object. Not + // null. + private final StorageResourceId resourceId; + + // Creation time of this item. + // Time is expressed as milliseconds since January 1, 1970 UTC. + private final long creationTime; + + // Modification time of this item. + // Time is expressed as milliseconds since January 1, 1970 UTC. + private final long modificationTime; + + // Size of an object (number of bytes). + // Size is -1 for items that do not exist. + private final long size; + + // Location of this item. + private final String location; + + // Storage class of this item. + private final String storageClass; + + // Content-Type of this item + private final String contentType; + + private final String contentEncoding; + + // User-supplied metadata. + private final Map metadata; + + private final long contentGeneration; + + private final long metaGeneration; + + private final VerificationAttributes verificationAttributes; + + private GoogleCloudStorageItemInfo(StorageResourceId resourceId, long creationTime, + long modificationTime, long size, String location, String storageClass, String contentType, + String contentEncoding, Map metadata, long contentGeneration, + long metaGeneration, VerificationAttributes verificationAttributes) { + this.resourceId = checkNotNull(resourceId, "resourceId must not be null"); + this.creationTime = creationTime; + this.modificationTime = modificationTime; + this.size = size; + this.location = location; + this.storageClass = storageClass; + this.contentType = contentType; + this.contentEncoding = contentEncoding; + this.metadata = (metadata == null) ? ImmutableMap.of() : metadata; + this.contentGeneration = contentGeneration; + this.metaGeneration = metaGeneration; + this.verificationAttributes = verificationAttributes; + } + + /** + * Gets bucket name of this item. + */ + public String getBucketName() { + return resourceId.getBucketName(); + } + + /** + * Gets object name of this item. + */ + public String getObjectName() { + return resourceId.getObjectName(); + } + + /** + * Gets the resourceId that holds the (possibly null) bucketName and objectName of this object. + */ + public StorageResourceId getResourceId() { + return resourceId; + } + + /** + * Gets creation time of this item. + * + *

Time is expressed as milliseconds since January 1, 1970 UTC. + */ + public long getCreationTime() { + return creationTime; + } + + /** + * Gets modification time of this item. + * + *

Time is expressed as milliseconds since January 1, 1970 UTC. + */ + public long getModificationTime() { + return modificationTime; + } + + /** + * Gets size of this item (number of bytes). Returns -1 if the object does not exist. + */ + public long getSize() { + return size; + } + + /** + * Gets location of this item. + * + *

Note: Location is only supported for buckets. The value is always null for objects. + */ + public String getLocation() { + return location; + } + + /** + * Gets storage class of this item. + * + *

Note: Storage-class is only supported for buckets. The value is always null for objects. + */ + public String getStorageClass() { + return storageClass; + } + + /** + * Gets the content-type of this item, or null if unknown or inapplicable. + * + *

Note: content-type is only supported for objects, and will always be null for buckets. + */ + public String getContentType() { + return contentType; + } + + /** + * Gets the content-encoding of this item, or null if unknown or inapplicable. + * + *

Note: content-encoding is only supported for objects, and will always be null for buckets. + */ + public String getContentEncoding() { + return contentEncoding; + } + + /** + * Gets user-supplied metadata for this item. + * + *

Note: metadata is only supported for objects. This value is always an empty map for buckets. + */ + public Map getMetadata() { + return metadata; + } + + /** + * Indicates whether this item is a bucket. Root is not considered to be a bucket. + */ + public boolean isBucket() { + return resourceId.isBucket(); + } + + /** + * Indicates whether this item refers to the GCS root (gs://). + */ + public boolean isRoot() { + return resourceId.isRoot(); + } + + /** + * Indicates whether this instance has information about the unique, shared root of the underlying + * storage system. + */ + public boolean isGlobalRoot() { + return isRoot() && exists(); + } + + /** + * Indicates whether {@code itemInfo} is a directory. + */ + public boolean isDirectory() { + return isGlobalRoot() || isBucket() || resourceId.isDirectory(); + } + + /** + * Indicates whether {@code itemInfo} is an inferred directory + */ + public boolean isInferredDirectory() { + return creationTime == 0 && modificationTime == 0 && size == 0 && contentGeneration == 0 + && metaGeneration == 0; + } + + /** + * Get the content generation of the object. + */ + public long getContentGeneration() { + return contentGeneration; + } + + /** + * Get the meta generation of the object. + */ + public long getMetaGeneration() { + return metaGeneration; + } + + /** + * Get object validation attributes. + */ + public VerificationAttributes getVerificationAttributes() { + return verificationAttributes; + } + + /** + * Indicates whether this item exists. + */ + public boolean exists() { + return size >= 0; + } + + /** + * Helper for checking logical equality of metadata maps, checking equality of keySet() between + * this.metadata and otherMetadata, and then using Arrays.equals to compare contents of + * corresponding byte arrays. + */ + @VisibleForTesting + public boolean metadataEquals(Map otherMetadata) { + if (metadata == otherMetadata) { + // Fast-path for common cases where the same actual default metadata instance may be + // used in + // multiple different item infos. + return true; + } + // No need to check if other `metadata` is not null, + // because previous `if` checks if both of them are null. + if (metadata == null || otherMetadata == null) { + return false; + } + if (!metadata.keySet().equals(otherMetadata.keySet())) { + return false; + } + + // Compare each byte[] with Arrays.equals. + for (Map.Entry metadataEntry : metadata.entrySet()) { + if (!Arrays.equals(metadataEntry.getValue(), otherMetadata.get(metadataEntry.getKey()))) { + return false; + } + } + return true; + } + + /** + * Gets string representation of this instance. + */ + @Override + public String toString() { + return exists() ? + String.format("%s: created on: %s", resourceId, Instant.ofEpochMilli(creationTime)) : + String.format("%s: exists: no", resourceId); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof GoogleCloudStorageItemInfo) { + GoogleCloudStorageItemInfo other = (GoogleCloudStorageItemInfo) obj; + return resourceId.equals(other.resourceId) && creationTime == other.creationTime + && modificationTime == other.modificationTime && size == other.size && Objects.equals( + location, other.location) && Objects.equals(storageClass, other.storageClass) + && Objects.equals(verificationAttributes, other.verificationAttributes) + && metaGeneration == other.metaGeneration && contentGeneration == other.contentGeneration + && metadataEquals(other.getMetadata()); + } + return false; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + resourceId.hashCode(); + result = prime * result + (int) creationTime; + result = prime * result + (int) modificationTime; + result = prime * result + (int) size; + result = prime * result + Objects.hashCode(location); + result = prime * result + Objects.hashCode(storageClass); + result = prime * result + Objects.hashCode(verificationAttributes); + result = prime * result + (int) metaGeneration; + result = prime * result + (int) contentGeneration; + result = prime * result + metadata.entrySet().stream() + .mapToInt(e -> Objects.hash(e.getKey()) + Arrays.hashCode(e.getValue())).sum(); + return result; + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java new file mode 100644 index 0000000000000..69b329b554f95 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java @@ -0,0 +1,505 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import static org.apache.hadoop.fs.gs.Constants.GCS_CONFIG_PREFIX; +import static org.apache.hadoop.fs.gs.GoogleHadoopFileSystemConfiguration.GCS_WORKING_DIRECTORY; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkState; +import static org.apache.hadoop.thirdparty.com.google.common.base.Strings.isNullOrEmpty; + +import com.google.auth.oauth2.GoogleCredentials; +import org.apache.hadoop.thirdparty.com.google.common.base.Ascii; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.util.EnumSet; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.ProviderUtils; +import org.apache.hadoop.util.Progressable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * GoogleHadoopFileSystem is rooted in a single bucket at initialization time; in this case, Hadoop + * paths no longer correspond directly to general GCS paths, and all Hadoop operations going through + * this FileSystem will never touch any GCS bucket other than the bucket on which this FileSystem is + * rooted. + * + *

This implementation sacrifices a small amount of cross-bucket interoperability in favor of + * more straightforward FileSystem semantics and compatibility with existing Hadoop applications. In + * particular, it is not subject to bucket-naming constraints, and files are allowed to be placed in + * root. + */ +public class GoogleHadoopFileSystem extends FileSystem { + + public static final Logger LOG = LoggerFactory.getLogger(GoogleHadoopFileSystem.class); + + /** + * URI scheme for GoogleHadoopFileSystem + */ + private static final String SCHEME = Constants.SCHEME; + + /** + * Default value of replication factor. + */ + static final short REPLICATION_FACTOR_DEFAULT = 3; + + // TODO: Take this from config + private static final int PERMISSIONS_TO_REPORT = 700; + + /** + * The URI the File System is passed in initialize. + */ + URI initUri; + + /** + * Default block size. Note that this is the size that is reported to Hadoop FS clients. It does + * not modify the actual block size of an underlying GCS object, because GCS JSON API does not + * allow modifying or querying the value. Modifying this value allows one to control how many + * mappers are used to process a given file. + */ + private long defaultBlockSize = GoogleHadoopFileSystemConfiguration.BLOCK_SIZE.getDefault(); + + // The bucket the file system is rooted in used for default values of: + // -- working directory + // -- user home directories (only for Hadoop purposes). + private Path fsRoot; + + /** + * Current working directory; overridden in initialize() if {@link + * GoogleHadoopFileSystemConfiguration#GCS_WORKING_DIRECTORY} is set. + */ + private Path workingDirectory; + private GoogleCloudStorageFileSystem gcsFs; + private boolean isClosed; + private FsPermission reportedPermissions; + + public GoogleHadoopFileSystemConfiguration getFileSystemConfiguration() { + return fileSystemConfiguration; + } + + private GoogleHadoopFileSystemConfiguration fileSystemConfiguration; + + @Override + public void initialize(final URI path, Configuration config) throws IOException { + LOG.trace("initialize(path: {}, config: {})", path, config); + + checkArgument(path != null, "path must not be null"); + checkArgument(config != null, "config must not be null"); + checkArgument(path.getScheme() != null, "scheme of path must not be null"); + checkArgument(path.getScheme().equals(getScheme()), "URI scheme not supported: {}", path); + + config = + ProviderUtils.excludeIncompatibleCredentialProviders(config, GoogleHadoopFileSystem.class); + super.initialize(path, config); + + initUri = path; + + // Set this configuration as the default config for this instance; configure() + // will perform some file-system-specific adjustments, but the original should + // be sufficient (and is required) for the delegation token binding initialization. + setConf(config); + + this.reportedPermissions = new FsPermission(PERMISSIONS_TO_REPORT); + + initializeFsRoot(); + + this.fileSystemConfiguration = new GoogleHadoopFileSystemConfiguration(config); + initializeWorkingDirectory(fileSystemConfiguration); + initializeGcsFs(fileSystemConfiguration); + } + + private void initializeFsRoot() { + String rootBucket = initUri.getAuthority(); + checkArgument(rootBucket != null, "No bucket specified in GCS URI: {}", initUri); + // Validate root bucket name + URI rootUri = UriPaths.fromStringPathComponents(rootBucket, /* objectName= */ + null, /* allowEmptyObjectName= */ true); + fsRoot = new Path(rootUri); + LOG.trace("Configured FS root: '{}'", fsRoot); + } + + private void initializeWorkingDirectory(final GoogleHadoopFileSystemConfiguration config) { + String configWorkingDirectory = config.getWorkingDirectory(); + if (isNullOrEmpty(configWorkingDirectory)) { + LOG.warn("No working directory configured, using default: '{}'", workingDirectory); + } + // Use the public method to ensure proper behavior of normalizing and resolving the new + // working directory relative to the initial filesystem-root directory. + setWorkingDirectory( + isNullOrEmpty(configWorkingDirectory) ? fsRoot : new Path(configWorkingDirectory)); + LOG.trace("Configured working directory: {} = {}", GCS_WORKING_DIRECTORY.getKey(), + getWorkingDirectory()); + } + + private synchronized void initializeGcsFs(final GoogleHadoopFileSystemConfiguration config) + throws IOException { + this.gcsFs = createGcsFs(config); + } + + private GoogleCloudStorageFileSystem createGcsFs(final GoogleHadoopFileSystemConfiguration config) + throws IOException { + GoogleCredentials credentials = getCredentials(config); + return new GoogleCloudStorageFileSystem(config, credentials); + } + + private GoogleCredentials getCredentials(GoogleHadoopFileSystemConfiguration config) + throws IOException { + return getCredentials(config, GCS_CONFIG_PREFIX); + } + + public static GoogleCredentials getCredentials(GoogleHadoopFileSystemConfiguration config, + String... keyPrefixesVararg) throws IOException { + return GoogleCredentials.getApplicationDefault(); // TODO: Add other Auth mechanisms + } + + @Override + protected void checkPath(final Path path) { + LOG.trace("checkPath(path: {})", path); + // Validate scheme + URI uri = path.toUri(); + + String scheme = uri.getScheme(); + if (scheme != null && !scheme.equalsIgnoreCase(getScheme())) { + throw new IllegalArgumentException( + String.format("Wrong scheme: {}, in path: {}, expected scheme: {}", scheme, path, + getScheme())); + } + + String bucket = uri.getAuthority(); + String rootBucket = fsRoot.toUri().getAuthority(); + + // Bucket-less URIs will be qualified later + if (bucket == null || bucket.equals(rootBucket)) { + return; + } + + throw new IllegalArgumentException( + String.format("Wrong bucket: {}, in path: {}, expected bucket: {}", bucket, path, + rootBucket)); + } + + /** + * Validates that GCS path belongs to this file system. The bucket must match the root bucket + * provided at initialization time. + */ + Path getHadoopPath(final URI gcsPath) { + LOG.trace("getHadoopPath(gcsPath: {})", gcsPath); + + // Handle root. Delegate to getGcsPath on "gs:/" to resolve the appropriate gs:// URI. + if (gcsPath.equals(getGcsPath(fsRoot))) { + return fsRoot; + } + + StorageResourceId resourceId = StorageResourceId.fromUriPath(gcsPath, true); + + checkArgument(!resourceId.isRoot(), "Missing authority in gcsPath '{}'", gcsPath); + String rootBucket = fsRoot.toUri().getAuthority(); + checkArgument(resourceId.getBucketName().equals(rootBucket), + "Authority of URI '{}' doesn't match root bucket '{}'", resourceId.getBucketName(), + rootBucket); + + Path hadoopPath = new Path(fsRoot, + new Path(/* schema= */ null, /* authority= */ null, resourceId.getObjectName())); + LOG.trace("getHadoopPath(gcsPath: {}): {}", gcsPath, hadoopPath); + return hadoopPath; + } + + /** + * Translates a "gs:/" style hadoopPath (or relative path which is not fully-qualified) into the + * appropriate GCS path which is compatible with the underlying GcsFs. + */ + URI getGcsPath(final Path hadoopPath) { + LOG.trace("getGcsPath(hadoopPath: {})", hadoopPath); + + // Convert to fully qualified absolute path; the Path object will call back to get our current + // workingDirectory as part of fully resolving the path. + Path resolvedPath = makeQualified(hadoopPath); + + String objectName = resolvedPath.toUri().getPath(); + if (objectName != null && resolvedPath.isAbsolute()) { + // Strip off leading '/' because GoogleCloudStorageFileSystem.getPath appends it explicitly + // between bucket and objectName. + objectName = objectName.substring(1); + } + + // Construct GCS path URI + String rootBucket = fsRoot.toUri().getAuthority(); + URI gcsPath = + UriPaths.fromStringPathComponents(rootBucket, objectName, /* allowEmptyObjectName= */ true); + LOG.trace("getGcsPath(hadoopPath: {}): {}", hadoopPath, gcsPath); + return gcsPath; + } + + @Override + public String getScheme() { + return SCHEME; + } + + @Override + public FSDataInputStream open(final Path path, final int bufferSize) throws IOException { + LOG.trace("open({})", path); + throw new UnsupportedOperationException(path.toString()); + } + + @Override + public FSDataOutputStream create(Path hadoopPath, FsPermission permission, boolean overwrite, + int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { + checkArgument(hadoopPath != null, "hadoopPath must not be null"); + checkArgument(replication > 0, "replication must be a positive integer: %s", replication); + checkArgument(blockSize > 0, "blockSize must be a positive integer: %s", blockSize); + + System.out.println(String.format("create(%s)", hadoopPath)); + checkOpen(); + + LOG.trace("create(hadoopPath: {}, overwrite: {}, bufferSize: {} [ignored])", hadoopPath, + overwrite, bufferSize); + + CreateOptions.WriteMode writeMode = + overwrite ? CreateOptions.WriteMode.OVERWRITE : CreateOptions.WriteMode.CREATE_NEW; + FSDataOutputStream response = new FSDataOutputStream( + new GoogleHadoopOutputStream(this, getGcsPath(hadoopPath), + CreateOptions.builder().setWriteMode(writeMode).build(), statistics), statistics); + + return response; + } + + @Override + public FSDataOutputStream createNonRecursive(Path hadoopPath, FsPermission permission, + EnumSet flags, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + throw new UnsupportedOperationException(hadoopPath.toString()); + } + + @Override + public FSDataOutputStream append(final Path path, final int i, final Progressable progressable) + throws IOException { + throw new UnsupportedOperationException(path.toString()); + } + + @Override + public boolean rename(final Path path, final Path path1) throws IOException { + LOG.trace("rename({}, {})", path, path1); + throw new UnsupportedOperationException(path.toString()); + } + + @Override + public boolean delete(final Path path, final boolean recursive) throws IOException { + LOG.trace("delete({}, {})", path, recursive); + throw new UnsupportedOperationException(path.toString()); + } + + @Override + public FileStatus[] listStatus(final Path path) throws FileNotFoundException, IOException { + checkArgument(path != null, "hadoopPath must not be null"); + + checkOpen(); + + LOG.trace("listStatus(hadoopPath: {})", path); + throw new UnsupportedOperationException(path.toString()); + } + + /** + * Overridden to make root its own parent. This is POSIX compliant, but more importantly guards + * against poor directory accounting in the PathData class of Hadoop 2's FsShell. + */ + @Override + public Path makeQualified(final Path path) { + Path qualifiedPath = super.makeQualified(path); + + URI uri = qualifiedPath.toUri(); + + checkState("".equals(uri.getPath()) || qualifiedPath.isAbsolute(), + "Path '{}' must be fully qualified.", qualifiedPath); + + Path result; + String upath = uri.getPath(); + + // Strip initial '..'s to make root is its own parent. + int i = 0; + while (upath.startsWith("/../", i)) { + // Leave a preceding slash, so path is still absolute. + i += 3; + } + if (i == upath.length() || upath.substring(i).equals("/..")) { + // Allow a Path of gs://someBucket to map to gs://someBucket/ + result = new Path(uri.getScheme(), uri.getAuthority(), "/"); + } else if (i == 0) { + result = qualifiedPath; + } else { + result = new Path(uri.getScheme(), uri.getAuthority(), upath.substring(i)); + } + + LOG.trace("makeQualified(path: {}): {}", path, result); + return result; + } + + /** + * Returns a URI of the root of this FileSystem. + */ + @Override + public URI getUri() { + return fsRoot.toUri(); + } + + /** + * The default port is listed as -1 as an indication that ports are not used. + */ + @Override + protected int getDefaultPort() { + int result = -1; + LOG.trace("getDefaultPort(): %d", result); + return result; + } + + @Override + public boolean hasPathCapability(final Path path, final String capability) { + checkNotNull(path, "path must not be null"); + checkArgument(!isNullOrEmpty(capability), "capability must not be null or empty string for {}", + path); + switch (Ascii.toLowerCase(capability)) { + case CommonPathCapabilities.FS_APPEND: + case CommonPathCapabilities.FS_CONCAT: + return false; + default: + return false; + } + } + + /** + * Gets the current working directory. + * + * @return The current working directory. + */ + @Override + public Path getWorkingDirectory() { + LOG.trace("getWorkingDirectory(): {}", workingDirectory); + return workingDirectory; + } + + @Override + public boolean mkdirs(final Path path, final FsPermission fsPermission) throws IOException { + LOG.trace("mkdirs({})", path); + throw new UnsupportedOperationException(path.toString()); + } + + /** + * Gets the default replication factor. + */ + @Override + public short getDefaultReplication() { + return REPLICATION_FACTOR_DEFAULT; + } + + @Override + public FileStatus getFileStatus(final Path path) throws IOException { + checkArgument(path != null, "path must not be null"); + + checkOpen(); + + URI gcsPath = getGcsPath(path); + + LOG.trace("getFileStatus(): {}", gcsPath); + + throw new UnsupportedOperationException(path.toString()); + } + + /** + * Returns home directory of the current user. + * + *

Note: This directory is only used for Hadoop purposes. It is not the same as a user's OS + * home directory. + */ + @Override + public Path getHomeDirectory() { + Path result = new Path(fsRoot, "user/" + System.getProperty("user.name")); + LOG.trace("getHomeDirectory(): {}", result); + return result; + } + + /** + * {@inheritDoc} + * + *

Returns the service if delegation tokens are configured, otherwise, null. + */ + @Override + public String getCanonicalServiceName() { + // TODO: Add delegation token support + return null; + } + + /** + * Gets GCS FS instance. + */ + public GoogleCloudStorageFileSystem getGcsFs() { + return gcsFs; + } + + /** + * Assert that the FileSystem has been initialized and not close()d. + */ + private void checkOpen() throws IOException { + if (isClosed) { + throw new IOException("GoogleHadoopFileSystem has been closed or not initialized."); + } + } + + @Override + public void close() throws IOException { + LOG.trace("close()"); + if (isClosed) { + return; + } + + super.close(); + + getGcsFs().close(); + + this.isClosed = true; + } + + @Override + public long getUsed() throws IOException { + long result = super.getUsed(); + LOG.trace("getUsed(): {}", result); + return result; + } + + @Override + public long getDefaultBlockSize() { + LOG.trace("getDefaultBlockSize(): {}", defaultBlockSize); + return defaultBlockSize; + } + + @Override + public void setWorkingDirectory(final Path hadoopPath) { + checkArgument(hadoopPath != null, "hadoopPath must not be null"); + URI gcsPath = UriPaths.toDirectory(getGcsPath(hadoopPath)); + workingDirectory = getHadoopPath(gcsPath); + LOG.trace("setWorkingDirectory(hadoopPath: {}): {}", hadoopPath, workingDirectory); + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystemConfiguration.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystemConfiguration.java new file mode 100644 index 0000000000000..16d940b16f49c --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystemConfiguration.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import static java.lang.Math.toIntExact; + +import org.apache.hadoop.conf.Configuration; + +/** + * This class provides a configuration for the {@link GoogleHadoopFileSystem} implementations. + */ +class GoogleHadoopFileSystemConfiguration { + /** + * Configuration key for default block size of a file. + * + *

Note that this is the size that is reported to Hadoop FS clients. It does not modify the + * actual block size of an underlying GCS object, because GCS JSON API does not allow modifying or + * querying the value. Modifying this value allows one to control how many mappers are used to + * process a given file. + */ + public static final HadoopConfigurationProperty BLOCK_SIZE = + new HadoopConfigurationProperty<>("fs.gs.block.size", 64 * 1024 * 1024L); + + /** + * Configuration key for GCS project ID. Default value: none + */ + public static final HadoopConfigurationProperty GCS_PROJECT_ID = + new HadoopConfigurationProperty<>("fs.gs.project.id"); + + /** + * Configuration key for initial working directory of a GHFS instance. Default value: '/' + */ + public static final HadoopConfigurationProperty GCS_WORKING_DIRECTORY = + new HadoopConfigurationProperty<>("fs.gs.working.dir", "/"); + + /** + * Configuration key for setting write buffer size. + */ + public static final HadoopConfigurationProperty GCS_OUTPUT_STREAM_BUFFER_SIZE = + new HadoopConfigurationProperty<>("fs.gs.outputstream.buffer.size", 8L * 1024 * 1024); + + private final String workingDirectory; + + public int getOutStreamBufferSize() { + return outStreamBufferSize; + } + + private final int outStreamBufferSize; + + GoogleHadoopFileSystemConfiguration(Configuration config) { + this.workingDirectory = GCS_WORKING_DIRECTORY.get(config, config::get); + this.outStreamBufferSize = + toIntExact(GCS_OUTPUT_STREAM_BUFFER_SIZE.get(config, config::getLongBytes)); + } + + public String getWorkingDirectory() { + return this.workingDirectory; + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopOutputStream.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopOutputStream.java new file mode 100644 index 0000000000000..78cf23e2af94c --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopOutputStream.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.thirdparty.com.google.common.base.Strings.isNullOrEmpty; + +import org.apache.hadoop.thirdparty.com.google.common.base.Ascii; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; +import java.nio.channels.Channels; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.WritableByteChannel; +import javax.annotation.Nonnull; + +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.fs.Syncable; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class GoogleHadoopOutputStream extends OutputStream + implements IOStatisticsSource, StreamCapabilities, Syncable { + public static final Logger LOG = LoggerFactory.getLogger(StorageResourceId.class); + + private final GoogleHadoopFileSystem ghfs; + + // Path of the file to write to. + private final URI dstGcsPath; + + private OutputStream outputStream; + + // Statistics tracker provided by the parent GoogleHadoopFileSystem for recording + // numbers of bytes written. + private final FileSystem.Statistics statistics; + + /** + * Constructs an instance of GoogleHadoopOutputStream object. + * + * @param ghfs Instance of {@link GoogleHadoopFileSystem}. + * @param dstGcsPath Path of the file to write to. + * @param statistics File system statistics object. + * @param createFileOptions options for file creation + * @throws IOException if an IO error occurs. + */ + public GoogleHadoopOutputStream(GoogleHadoopFileSystem ghfs, URI dstGcsPath, + CreateOptions createFileOptions, FileSystem.Statistics statistics) throws IOException { + LOG.trace("GoogleHadoopOutputStream(gcsPath: {}, createFileOptions: {})", dstGcsPath, + createFileOptions); + this.ghfs = ghfs; + this.dstGcsPath = dstGcsPath; + this.statistics = statistics; + + this.outputStream = createOutputStream(ghfs.getGcsFs(), dstGcsPath, createFileOptions, + ghfs.getFileSystemConfiguration()); + } + + private static OutputStream createOutputStream(GoogleCloudStorageFileSystem gcsfs, URI gcsPath, + CreateOptions options, GoogleHadoopFileSystemConfiguration fileSystemConfiguration) + throws IOException { + WritableByteChannel channel; + try { + channel = gcsfs.create(gcsPath, options); + } catch (java.nio.file.FileAlreadyExistsException e) { + + throw (FileAlreadyExistsException) new FileAlreadyExistsException( + String.format("'%s' already exists", gcsPath)).initCause(e); + } + OutputStream outputStream = Channels.newOutputStream(channel); + int bufferSize = fileSystemConfiguration.getOutStreamBufferSize(); + // gcsfs.getOptions().getCloudStorageOptions().getWriteChannelOptions().getBufferSize(); + return bufferSize > 0 ? new BufferedOutputStream(outputStream, bufferSize) : outputStream; + } + + @Override + public void write(int b) throws IOException { + throwIfNotOpen(); + outputStream.write(b); + statistics.incrementBytesWritten(1); + statistics.incrementWriteOps(1); + } + + @Override + public void write(@Nonnull byte[] b, int offset, int len) throws IOException { + throwIfNotOpen(); + outputStream.write(b, offset, len); + statistics.incrementBytesWritten(len); + statistics.incrementWriteOps(1); + } + + /** + * There is no way to flush data to become available for readers without a full-fledged hsync(), + * If the output stream is only syncable, this method is a no-op. If the output stream is also + * flushable, this method will simply use the same implementation of hsync(). + * + *

If it is rate limited, unlike hsync(), which will try to acquire the permits and block, it + * will do nothing. + */ + @Override + public void hflush() throws IOException { + // TODO: + } + + @Override + public void hsync() throws IOException { + // TODO: + } + + @Override + public void close() throws IOException { + LOG.trace("close(): final destination: {}", dstGcsPath); + + if (outputStream == null) { + LOG.trace("close(): Ignoring; stream already closed."); + return; + } + + try { + outputStream.close(); + } finally { + outputStream = null; + } + } + + private void throwIfNotOpen() throws IOException { + if (outputStream == null) { + throw new ClosedChannelException(); + } + } + + @Override + public boolean hasCapability(String capability) { + checkArgument(!isNullOrEmpty(capability), "capability must not be null or empty string"); + switch (Ascii.toLowerCase(capability)) { + case StreamCapabilities.HFLUSH: + case StreamCapabilities.HSYNC: + case StreamCapabilities.IOSTATISTICS: + return false; + default: + return false; + } + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/HadoopConfigurationProperty.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/HadoopConfigurationProperty.java new file mode 100644 index 0000000000000..584813480092a --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/HadoopConfigurationProperty.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.function.BiFunction; + +import org.apache.hadoop.conf.Configuration; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Hadoop configuration property + */ +class HadoopConfigurationProperty { + public static final Logger LOG = LoggerFactory.getLogger(HadoopConfigurationProperty.class); + + private final String key; + private final List deprecatedKeys; + private final T defaultValue; + + private List keyPrefixes = ImmutableList.of(""); + + public HadoopConfigurationProperty(String key) { + this(key, null); + } + + public HadoopConfigurationProperty(String key, T defaultValue, String... deprecatedKeys) { + this.key = key; + this.deprecatedKeys = + deprecatedKeys == null ? ImmutableList.of() : ImmutableList.copyOf(deprecatedKeys); + this.defaultValue = defaultValue; + } + + public String getKey() { + return key; + } + + public T getDefault() { + return defaultValue; + } + + public T get(Configuration config, BiFunction getterFn) { + String lookupKey = getLookupKey(config, key, deprecatedKeys, (c, k) -> c.get(k) != null); + return logProperty(lookupKey, getterFn.apply(lookupKey, defaultValue)); + } + + private String getLookupKey(Configuration config, String key, List deprecatedKeys, + BiFunction checkFn) { + for (String prefix : keyPrefixes) { + String prefixedKey = prefix + key; + if (checkFn.apply(config, prefixedKey)) { + return prefixedKey; + } + for (String deprecatedKey : deprecatedKeys) { + String prefixedDeprecatedKey = prefix + deprecatedKey; + if (checkFn.apply(config, prefixedDeprecatedKey)) { + LOG.warn("Using deprecated key '{}', use '{}' key instead.", prefixedDeprecatedKey, + prefixedKey); + return prefixedDeprecatedKey; + } + } + } + return keyPrefixes.get(0) + key; + } + + private static S logProperty(String key, S value) { + LOG.trace("{} = {}", key, value); + return value; + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java new file mode 100644 index 0000000000000..8c56122fd5770 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java @@ -0,0 +1,325 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Strings.isNullOrEmpty; +import static org.apache.hadoop.fs.gs.Constants.SCHEME; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Data struct representing either a GCS StorageObject, a GCS Bucket or the GCS root (gs://). If + * both bucketName and objectName are null, the StorageResourceId refers to GCS root (gs://). If + * bucketName is non-null, and objectName is null, then this refers to a GCS Bucket. Otherwise, if + * bucketName and objectName are both non-null, this refers to a GCS StorageObject. + */ +class StorageResourceId { + + public static final Logger LOG = LoggerFactory.getLogger(StorageResourceId.class); + + // The generationId used to denote "unknown"; if given to a method expecting generationId + // constraints, the method may perform extra low-level GETs to determine an existing + // generationId + // if idempotency constraints require doing so. + public static final long UNKNOWN_GENERATION_ID = -1L; + + // Pattern that parses out bucket and object names. + // Given 'gs://foo-bucket/foo/bar/baz', matcher.group(x) will return: + // 0 = gs://foo-bucket/foo/bar/baz + // 1 = foo-bucket/foo/bar/baz + // 2 = foo-bucket + // 3 = /foo/bar/baz + // 4 = foo/bar/baz + // Groups 2 and 4 can be used to create an instance. + private static final Pattern GCS_PATH_PATTERN = Pattern.compile("gs://(([^/]+)(/(.+)?)?)?"); + + // The singleton instance identifying the GCS root (gs://). Both getObjectName() and + // getBucketName() will return null. + public static final StorageResourceId ROOT = new StorageResourceId(); + + // Bucket name of this storage resource to be used with the Google Cloud Storage API. + private final String bucketName; + + // Object name of this storage resource to be used with the Google Cloud Storage API. + private final String objectName; + + // Human-readable String to be returned by toString(); kept as 'final' member for efficiency. + private final String stringPath; + + // The generationId to be used with precondition checks when using this StorageResourceId + // as an identifier for mutation requests. + private final long generationId; + + /** + * Constructor for a StorageResourceId that refers to the GCS root (gs://). Private because all + * external users should just use the singleton StorageResourceId.ROOT. + */ + private StorageResourceId() { + this.bucketName = null; + this.objectName = null; + this.stringPath = StringPaths.fromComponents(bucketName, objectName); + this.generationId = UNKNOWN_GENERATION_ID; + } + + /** + * Constructor for a StorageResourceId representing a Bucket; {@code getObjectName()} will return + * null for a StorageResourceId that represents a Bucket. + * + * @param bucketName The bucket name of the resource. Must be non-empty and non-null. + */ + public StorageResourceId(String bucketName) { + checkArgument(!isNullOrEmpty(bucketName), "bucketName must not be null or empty"); + + this.bucketName = bucketName; + this.objectName = null; + this.stringPath = StringPaths.fromComponents(bucketName, objectName); + this.generationId = UNKNOWN_GENERATION_ID; + } + + /** + * Constructor for a StorageResourceId representing a full StorageObject, including bucketName and + * objectName. + * + * @param bucketName The bucket name of the resource. Must be non-empty and non-null. + * @param objectName The object name of the resource. Must be non-empty and non-null. + */ + public StorageResourceId(String bucketName, String objectName) { + checkArgument(!isNullOrEmpty(bucketName), "bucketName must not be null or empty"); + checkArgument(!isNullOrEmpty(objectName), "objectName must not be null or empty"); + + this.bucketName = bucketName; + this.objectName = objectName; + this.stringPath = StringPaths.fromComponents(bucketName, objectName); + this.generationId = UNKNOWN_GENERATION_ID; + } + + /** + * Constructor for a StorageResourceId representing a full StorageObject, including bucketName and + * objectName. + * + * @param bucketName The bucket name of the resource. Must be non-empty and non-null. + * @param objectName The object name of the resource. Must be non-empty and non-null. + * @param generationId The generationId to be used with precondition checks when using this + * StorageResourceId as an identifier for mutation requests. + */ + public StorageResourceId(String bucketName, String objectName, long generationId) { + checkArgument(!isNullOrEmpty(bucketName), "bucketName must not be null or empty"); + checkArgument(!isNullOrEmpty(objectName), "objectName must not be null or empty"); + + this.bucketName = bucketName; + this.objectName = objectName; + this.stringPath = StringPaths.fromComponents(bucketName, objectName); + this.generationId = generationId; + } + + /** + * Constructor for a StorageResourceId representing a full StorageObject, including bucketName and + * objectName. + * + * @param bucketName The bucket name of the resource. Must be non-empty and non-null. + * @param generationId The generationId to be used with precondition checks when using this + * StorageResourceId as an identifier for mutation requests. + */ + public StorageResourceId(String bucketName, long generationId) { + checkArgument(!isNullOrEmpty(bucketName), "bucketName must not be null or empty"); + this.bucketName = bucketName; + this.objectName = null; + this.stringPath = StringPaths.fromComponents(bucketName, objectName); + this.generationId = generationId; + } + + /** + * Returns true if this StorageResourceId represents a GCS StorageObject; if true, both {@code + * getBucketName} and {@code getObjectName} will be non-empty and non-null. + */ + public boolean isStorageObject() { + return bucketName != null && objectName != null; + } + + /** + * Returns true if this StorageResourceId represents a GCS Bucket; if true, then {@code + * getObjectName} will return null. + */ + public boolean isBucket() { + return bucketName != null && objectName == null; + } + + /** + * Returns true if this StorageResourceId represents the GCS root (gs://); if true, then both + * {@code getBucketName} and {@code getObjectName} will be null. + */ + public boolean isRoot() { + return bucketName == null && objectName == null; + } + + /** + * Indicates if this StorageResourceId corresponds to a 'directory'; similar to {@link + * FileInfo#isDirectory} except deals entirely with pathnames instead of also checking for + * exists() to be true on a corresponding GoogleCloudStorageItemInfo. + */ + public boolean isDirectory() { + return isRoot() || isBucket() || StringPaths.isDirectoryPath(objectName); + } + + /** + * Gets the bucket name component of this resource identifier. + */ + public String getBucketName() { + return bucketName; + } + + /** + * Gets the object name component of this resource identifier. + */ + public String getObjectName() { + return objectName; + } + + /** + * The generationId to be used with precondition checks when using this StorageResourceId as an + * identifier for mutation requests. The generationId is *not* used when determining equals() or + * hashCode(). + */ + public long getGenerationId() { + return generationId; + } + + /** + * Returns true if generationId is not UNKNOWN_GENERATION_ID. + */ + public boolean hasGenerationId() { + return generationId != UNKNOWN_GENERATION_ID; + } + + /** + * Returns a string of the form {@code gs:///}. + */ + @Override + public String toString() { + return stringPath; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof StorageResourceId) { + StorageResourceId other = (StorageResourceId) obj; + return Objects.equals(bucketName, other.bucketName) && Objects.equals(objectName, + other.objectName); + } + return false; + } + + @Override + public int hashCode() { + return stringPath.hashCode(); + } + + /** + * Converts StorageResourceId instance to look like a directory path. If the path already looks + * like a directory path then this call is a no-op. + * + * @return A resourceId with a directory path corresponding to the given resourceId. + */ + public StorageResourceId toDirectoryId() { + if (isStorageObject() && !StringPaths.isDirectoryPath(getObjectName())) { + return new StorageResourceId(getBucketName(), StringPaths.toDirectoryPath(getObjectName())); + } + return this; + } + + /** + * Parses {@link StorageResourceId} from specified string. + */ + public static StorageResourceId fromStringPath(String path) { + return fromStringPath(path, UNKNOWN_GENERATION_ID); + } + + /** + * Parses {@link StorageResourceId} from specified string and generationId. + */ + public static StorageResourceId fromStringPath(String path, long generationId) { + Matcher matcher = GCS_PATH_PATTERN.matcher(path); + checkArgument(matcher.matches(), "'%s' is not a valid GCS object name.", path); + + String bucketName = matcher.group(2); + String relativePath = matcher.group(4); + if (bucketName == null) { + checkArgument(generationId == UNKNOWN_GENERATION_ID, + "Cannot specify generationId '%s' for root object '%s'", generationId, path); + return ROOT; + } else if (relativePath != null) { + return new StorageResourceId(bucketName, relativePath, generationId); + } + checkArgument(generationId == UNKNOWN_GENERATION_ID, + "Cannot specify generationId '%s' for bucket '%s'", generationId, path); + return new StorageResourceId(bucketName); + } + + /** + * Validates the given URI and if valid, returns the associated StorageResourceId. + * + * @param path The GCS URI to validate. + * @param allowEmptyObjectName If true, a missing object name is not considered invalid. + * @return a StorageResourceId that may be the GCS root, a Bucket, or a StorageObject. + */ + public static StorageResourceId fromUriPath(URI path, boolean allowEmptyObjectName) { + return fromUriPath(path, allowEmptyObjectName, UNKNOWN_GENERATION_ID); + } + + /** + * Validates the given URI and if valid, returns the associated StorageResourceId. + * + * @param path The GCS URI to validate. + * @param allowEmptyObjectName If true, a missing object name is not considered invalid. + * @param generationId The generationId to be used with precondition checks when using this + * @return a StorageResourceId that may be the GCS root, a Bucket, or a StorageObject. + */ + public static StorageResourceId fromUriPath(URI path, boolean allowEmptyObjectName, + long generationId) { + LOG.trace("fromUriPath('{}', {})", path, allowEmptyObjectName); + checkNotNull(path); + + if (!SCHEME.equals(path.getScheme())) { + throw new IllegalArgumentException( + String.format("GCS path supports only '%s' scheme, instead got '%s' from '%s'.", SCHEME, + path.getScheme(), path)); + } + + if (path.equals(GoogleCloudStorageFileSystem.GCS_ROOT)) { + return ROOT; + } + + String bucketName = StringPaths.validateBucketName(path.getAuthority()); + // Note that we're using getPath here instead of rawPath, etc. This is because it is assumed + // that the path was properly encoded in getPath (or another similar method): + String objectName = StringPaths.validateObjectName(path.getPath(), allowEmptyObjectName); + + return isNullOrEmpty(objectName) ? + new StorageResourceId(bucketName, generationId) : + new StorageResourceId(bucketName, objectName, generationId); + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StringPaths.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StringPaths.java new file mode 100644 index 0000000000000..723a564e72e63 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StringPaths.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Strings.isNullOrEmpty; +import static org.apache.hadoop.fs.gs.Constants.PATH_DELIMITER; + +import org.apache.hadoop.thirdparty.com.google.common.base.CharMatcher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility methods for String GCS paths + */ +final class StringPaths { + + public static final Logger LOG = LoggerFactory.getLogger(StorageResourceId.class); + + private StringPaths() { + } + + // 14x faster (20ns vs 280ns) than "^[a-z0-9_.-]+$" regex + private static final CharMatcher BUCKET_NAME_CHAR_MATCHER = CharMatcher.ascii().and( + CharMatcher.inRange('0', '9').or(CharMatcher.inRange('a', 'z')).or(CharMatcher.anyOf("_.-"))) + .precomputed(); + + /** + * Validate the given bucket name to make sure that it can be used as a part of a file system + * path. + * + *

Note: this is not designed to duplicate the exact checks that GCS would perform on the + * server side. We make some checks that are relevant to using GCS as a file system. + * + * @param bucketName Bucket name to check. + */ + static String validateBucketName(String bucketName) { + // If the name ends with '/', remove it. + bucketName = toFilePath(bucketName); + + if (isNullOrEmpty(bucketName)) { + throw new IllegalArgumentException("GCS bucket name cannot be empty."); + } + + if (!BUCKET_NAME_CHAR_MATCHER.matchesAllOf(bucketName)) { + throw new IllegalArgumentException(String.format( + "Invalid GCS bucket name '%s': bucket name must contain only 'a-z0-9_.-' characters.", + bucketName)); + } + + return bucketName; + } + + /** + * Validate the given object name to make sure that it can be used as a part of a file system + * path. + * + *

Note: this is not designed to duplicate the exact checks that GCS would perform on the + * server side. We make some checks that are relevant to using GCS as a file system. + * + * @param objectName Object name to check. + * @param allowEmptyObjectName If true, a missing object name is not considered invalid. + */ + static String validateObjectName(String objectName, boolean allowEmptyObjectName) { + LOG.trace("validateObjectName('{}', {})", objectName, allowEmptyObjectName); + + if (isNullOrEmpty(objectName) || objectName.equals(PATH_DELIMITER)) { + if (allowEmptyObjectName) { + objectName = ""; + } else { + throw new IllegalArgumentException(String.format( + "GCS path must include non-empty object name [objectName='%s'," + + " allowEmptyObjectName=%s]", objectName, allowEmptyObjectName)); + } + } + + // We want objectName to look like a traditional file system path, + // therefore, disallow objectName with consecutive '/' chars. + for (int i = 0; i < (objectName.length() - 1); i++) { + if (objectName.charAt(i) == '/' && objectName.charAt(i + 1) == '/') { + throw new IllegalArgumentException( + String.format("GCS path must not have consecutive '/' characters: '%s'", objectName)); + } + } + + // Remove leading '/' if it exists. + if (objectName.startsWith(PATH_DELIMITER)) { + objectName = objectName.substring(1); + } + + LOG.trace("validateObjectName -> '{}'", objectName); + return objectName; + } + + /** + * Helper for standardizing the way various human-readable messages in logs/exceptions that refer + * to a bucket/object pair. + */ + public static String fromComponents(String bucketName, String objectName) { + if (bucketName == null && objectName != null) { + throw new IllegalArgumentException( + String.format("Invalid bucketName/objectName pair: gs://%s/%s", bucketName, objectName)); + } + // TODO(user): Unify this method with other methods that convert bucketName/objectName + // to a URI; maybe use the single slash for compatibility. + StringBuilder result = new StringBuilder("gs://"); + if (bucketName != null) { + result.append(bucketName); + } + if (objectName != null) { + result.append('/').append(objectName); + } + return result.toString(); + } + + /** + * Indicates whether the given object name looks like a directory path. + * + * @param path Name of the object to inspect. + * @return Whether the given object name looks like a directory path. + */ + public static boolean isDirectoryPath(String path) { + return !isNullOrEmpty(path) && path.endsWith(PATH_DELIMITER); + } + + /** + * Converts the given object name to look like a file path. If the object name already looks like + * a file path then this call is a no-op. + * + *

If the object name is null or empty, it is returned as-is. + * + * @param path Name of the object to inspect. + * @return File path for the given path. + */ + public static String toFilePath(String path) { + return !isNullOrEmpty(path) && isDirectoryPath(path) ? + path.substring(0, path.length() - 1) : + path; + } + + /** + * Converts the given object name to look like a directory path. If the object name already looks + * like a directory path then this call is a no-op. + * + *

If the object name is null or empty, it is returned as-is. + * + * @param path Name of the object to inspect. + * @return Directory path for the given path. + */ + static String toDirectoryPath(String path) { + return isNullOrEmpty(path) || isDirectoryPath(path) ? path : path + PATH_DELIMITER; + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/UriPaths.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/UriPaths.java new file mode 100644 index 0000000000000..7fe8169e0bfbd --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/UriPaths.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.fs.gs.Constants.PATH_DELIMITER; +import static org.apache.hadoop.fs.gs.Constants.SCHEME; + +import java.net.URI; +import java.net.URISyntaxException; + +/** + * Utility methods for URI GCS paths + */ +final class UriPaths { + + private UriPaths() { + } + + /** + * Converts the given path to look like a directory path. If the path already looks like a + * directory path then this call is a no-op. + * + * @param path Path to convert. + * @return Directory path for the given path. + */ + public static URI toDirectory(URI path) { + StorageResourceId resourceId = StorageResourceId.fromUriPath(path, true); + + if (resourceId.isStorageObject() && !resourceId.isDirectory()) { + resourceId = resourceId.toDirectoryId(); + path = fromResourceId(resourceId, /* allowEmptyObjectName= */ false); + } + return path; + } + + /** + * Gets the parent directory of the given path. + * + * @param path Path to convert. + * @return Path of parent directory of the given item or null for root path. + */ + public static URI getParentPath(URI path) { + checkNotNull(path); + + // Root path has no parent. + if (path.equals(GoogleCloudStorageFileSystem.GCS_ROOT)) { + return null; + } + + StorageResourceId resourceId = StorageResourceId.fromUriPath(path, true); + + if (resourceId.isBucket()) { + return GoogleCloudStorageFileSystem.GCS_ROOT; + } + + String objectName = resourceId.getObjectName(); + int index = StringPaths.isDirectoryPath(objectName) ? + objectName.lastIndexOf(PATH_DELIMITER, objectName.length() - 2) : + objectName.lastIndexOf(PATH_DELIMITER); + return index < 0 ? + fromStringPathComponents(resourceId.getBucketName(), /* objectName= */ + null, /* allowEmptyObjectName= */ true) : + fromStringPathComponents(resourceId.getBucketName(), objectName.substring(0, index + 1), + /* allowEmptyObjectName= */ false); + } + + /** + * Constructs and returns full path for the given bucket and object names. + */ + public static URI fromResourceId(StorageResourceId resourceId, boolean allowEmptyObjectName) { + return fromStringPathComponents(resourceId.getBucketName(), resourceId.getObjectName(), + allowEmptyObjectName); + } + + /** + * Constructs and returns full path for the given bucket and object names. + */ + public static URI fromStringPathComponents(String bucketName, String objectName, + boolean allowEmptyObjectName) { + if (allowEmptyObjectName && bucketName == null && objectName == null) { + return GoogleCloudStorageFileSystem.GCS_ROOT; + } + + String authority = StringPaths.validateBucketName(bucketName); + String path = PATH_DELIMITER + StringPaths.validateObjectName(objectName, allowEmptyObjectName); + + try { + return new URI(SCHEME, authority, path, + /* query= */ null, + /* fragment= */ null); + } catch (URISyntaxException e) { + throw new IllegalArgumentException( + String.format("Invalid bucket name (%s) or object name (%s)", bucketName, objectName), e); + } + } +} diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java new file mode 100644 index 0000000000000..3f040af3264d4 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java @@ -0,0 +1,68 @@ +/* + * Copyright 2016 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import java.util.Arrays; +import javax.annotation.Nullable; + +/** + * GCS provided validation attributes for a single object. + */ +public class VerificationAttributes { + private final byte[] md5hash; + private final byte[] crc32c; + + public VerificationAttributes(@Nullable byte[] md5hash, @Nullable byte[] crc32c) { + this.md5hash = md5hash; + this.crc32c = crc32c; + } + + /** + * MD5 hash of an object, if available. + */ + @Nullable + public byte[] getMd5hash() { + return md5hash; + } + + /** + * CRC32c checksum of an object, if available. + */ + @Nullable + public byte[] getCrc32c() { + return crc32c; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof VerificationAttributes)) { + return false; + } + VerificationAttributes that = (VerificationAttributes) o; + return Arrays.equals(md5hash, that.md5hash) && Arrays.equals(crc32c, that.crc32c); + } + + @Override + public int hashCode() { + int result = Arrays.hashCode(md5hash); + result = 31 * result + Arrays.hashCode(crc32c); + return result; + } +} diff --git a/hadoop-tools/pom.xml b/hadoop-tools/pom.xml index 8c1256a177cc4..70df9b02d0fa9 100644 --- a/hadoop-tools/pom.xml +++ b/hadoop-tools/pom.xml @@ -53,6 +53,7 @@ hadoop-fs2img hadoop-benchmark hadoop-compat-bench + hadoop-gcp From 14d8d9682af8aa9f78413855f6cd6a2cb6ab4b4c Mon Sep 17 00:00:00 2001 From: Arunkumar Chacko Date: Wed, 7 May 2025 06:08:36 +0000 Subject: [PATCH 2/8] Fix a javadoc build error --- .../java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java | 2 +- .../java/org/apache/hadoop/fs/gs/VerificationAttributes.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java index 69b329b554f95..ee7c5cb51cf20 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java @@ -455,7 +455,7 @@ public String getCanonicalServiceName() { /** * Gets GCS FS instance. */ - public GoogleCloudStorageFileSystem getGcsFs() { + GoogleCloudStorageFileSystem getGcsFs() { return gcsFs; } diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java index 3f040af3264d4..814661a4b2cd2 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java @@ -22,7 +22,7 @@ /** * GCS provided validation attributes for a single object. */ -public class VerificationAttributes { +class VerificationAttributes { private final byte[] md5hash; private final byte[] crc32c; From 7e203c61181d022dc14e5a06c987afadc348e3de Mon Sep 17 00:00:00 2001 From: Arunkumar Chacko Date: Wed, 7 May 2025 15:35:08 +0000 Subject: [PATCH 3/8] Fix style errors --- .../dev-support/findbugs-exclude.xml | 89 +++++++++++++++++++ .../org/apache/hadoop/fs/gs/Constants.java | 4 +- .../apache/hadoop/fs/gs/CreateOptions.java | 72 +++------------ .../hadoop/fs/gs/ErrorTypeExtractor.java | 46 ++-------- .../org/apache/hadoop/fs/gs/FileInfo.java | 32 +++---- .../hadoop/fs/gs/GoogleCloudStorage.java | 3 +- .../GoogleCloudStorageClientWriteChannel.java | 7 +- .../fs/gs/GoogleCloudStorageFileSystem.java | 10 +-- .../fs/gs/GoogleCloudStorageItemInfo.java | 69 +++++++------- .../hadoop/fs/gs/GoogleHadoopFileSystem.java | 28 +++--- .../fs/gs/GoogleHadoopOutputStream.java | 2 +- .../fs/gs/HadoopConfigurationProperty.java | 22 ++--- .../hadoop/fs/gs/StorageResourceId.java | 45 +++++----- .../org/apache/hadoop/fs/gs/StringPaths.java | 7 +- .../org/apache/hadoop/fs/gs/UriPaths.java | 8 +- .../hadoop/fs/gs/VerificationAttributes.java | 6 +- .../org/apache/hadoop/fs/gs/package-info.java | 23 +++++ 17 files changed, 256 insertions(+), 217 deletions(-) create mode 100644 hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml create mode 100644 hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/package-info.java diff --git a/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml new file mode 100644 index 0000000000000..11410a24c1ad2 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/Constants.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/Constants.java index 67371ec85abca..34434b2859a06 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/Constants.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/Constants.java @@ -18,7 +18,9 @@ package org.apache.hadoop.fs.gs; -class Constants { +final class Constants { + private Constants() {} + // URI scheme for GCS. static final String SCHEME = "gs"; static final String PATH_DELIMITER = "/"; diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java index c4d5e05be664b..03603137c4ea9 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java @@ -29,7 +29,7 @@ /** * Options that can be specified when creating a file in the {@link GoogleCloudStorageFileSystem}. */ -class CreateOptions { +final class CreateOptions { private final ImmutableMap attributes; private final String contentType; private final boolean ensureNoDirectoryConflict; @@ -37,9 +37,7 @@ class CreateOptions { private final long overwriteGenerationId; private final WriteMode mode; - public static final CreateOptions DEFAULT = builder().build(); - - public String getContentEncoding() { + String getContentEncoding() { return contentEncoding; } @@ -51,19 +49,15 @@ private CreateOptions(CreateOperationOptionsBuilder builder) { this.ensureNoDirectoryConflict = builder.ensureNoDirectoryConflict; this.interval = builder.interval; this.overwriteGenerationId = builder.overwriteGenerationId; - this.mode = builder.mode; + this.mode = builder.writeMode; this.contentEncoding = builder.contentEncoding; } - public boolean isOverwriteExisting() { + boolean isOverwriteExisting() { return this.mode == WriteMode.OVERWRITE; } enum WriteMode { - /** - * Write new bytes to the end of the existing file rather than the beginning. - */ - APPEND, /** * Creates a new file for write and fails if file already exists. */ @@ -74,14 +68,14 @@ enum WriteMode { OVERWRITE } - public static CreateOperationOptionsBuilder builder() { + static CreateOperationOptionsBuilder builder() { return new CreateOperationOptionsBuilder(); } /** * Extended attributes to set when creating a file. */ - public ImmutableMap getAttributes() { + ImmutableMap getAttributes() { return attributes; } @@ -89,30 +83,14 @@ public ImmutableMap getAttributes() { * Content-type to set when creating a file. */ @Nullable - public String getContentType() { + String getContentType() { return contentType; } - /** - * Configures the minimum time interval (milliseconds) between consecutive sync/flush calls - */ - public Duration getMinSyncInterval() { - return interval; - } - - /** - * If true, makes sure there isn't already a directory object of the same name. If false, you run - * the risk of creating hard-to-cleanup/access files whose names collide with directory names. If - * already sure no such directory exists, then this is safe to set for improved performance. - */ - public boolean isEnsureNoDirectoryConflict() { - return ensureNoDirectoryConflict; - } - /** * Whether to overwrite an existing file with the same name. */ - public WriteMode getWriteMode() { + WriteMode getWriteMode() { return mode; } @@ -123,7 +101,7 @@ public WriteMode getWriteMode() { * only be overwritten by the newly created file if its generation matches this provided * generationId. */ - public long getOverwriteGenerationId() { + long getOverwriteGenerationId() { return overwriteGenerationId; } @@ -133,38 +111,12 @@ static class CreateOperationOptionsBuilder { private boolean ensureNoDirectoryConflict = true; private Duration interval = Duration.ZERO; private long overwriteGenerationId = StorageResourceId.UNKNOWN_GENERATION_ID; - private WriteMode mode = WriteMode.CREATE_NEW; + private WriteMode writeMode = WriteMode.CREATE_NEW; private String contentEncoding = null; - public CreateOperationOptionsBuilder setAttributes(Map attributes) { - this.attributes = attributes; - return this; - } - - public CreateOperationOptionsBuilder setContentType(String contentType) { - this.contentType = contentType; - return this; - } - - public CreateOperationOptionsBuilder setEnsureNoDirectoryConflict( - boolean ensureNoDirectoryConflict) { - this.ensureNoDirectoryConflict = ensureNoDirectoryConflict; - return this; - } - - public CreateOperationOptionsBuilder setMinSyncInterval(Duration interval) { - this.interval = interval; - return this; - } - - public CreateOperationOptionsBuilder setOverwriteGenerationId(long overwriteGenerationId) { - this.overwriteGenerationId = overwriteGenerationId; - return this; - } - - public CreateOperationOptionsBuilder setWriteMode(WriteMode mode) { - this.mode = mode; + CreateOperationOptionsBuilder setWriteMode(WriteMode mode) { + this.writeMode = mode; return this; } diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/ErrorTypeExtractor.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/ErrorTypeExtractor.java index 2dfb1a6fc68fb..a4497734524e7 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/ErrorTypeExtractor.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/ErrorTypeExtractor.java @@ -17,23 +17,22 @@ package org.apache.hadoop.fs.gs; import io.grpc.Status; -import io.grpc.StatusRuntimeException; - -import javax.annotation.Nullable; /** * Implementation for {@link ErrorTypeExtractor} for exception specifically thrown from gRPC path. */ -class ErrorTypeExtractor { +final class ErrorTypeExtractor { enum ErrorType { - NOT_FOUND, OUT_OF_RANGE, ALREADY_EXISTS, FAILED_PRECONDITION, INTERNAL, RESOURCE_EXHAUSTED, UNAVAILABLE, UNKNOWN + NOT_FOUND, OUT_OF_RANGE, ALREADY_EXISTS, FAILED_PRECONDITION, INTERNAL, RESOURCE_EXHAUSTED, + UNAVAILABLE, UNKNOWN } // public static final ErrorTypeExtractor INSTANCE = new ErrorTypeExtractor(); private static final String BUCKET_ALREADY_EXISTS_MESSAGE = - "FAILED_PRECONDITION: Your previous request to create the named bucket succeeded and you already own it."; + "FAILED_PRECONDITION: Your previous request to create the named bucket succeeded and you " + + "already own it."; private ErrorTypeExtractor() { } @@ -58,39 +57,4 @@ static ErrorType getErrorType(Exception error) { return ErrorType.UNKNOWN; } } - - static boolean bucketAlreadyExists(Exception e) { - ErrorType errorType = getErrorType(e); - if (errorType == ErrorType.ALREADY_EXISTS) { - return true; - } - // The gRPC API currently throws a FAILED_PRECONDITION status code instead of ALREADY_EXISTS, - // so we handle both these conditions in the interim. - // TODO: remove once the status codes are fixed. - else if (errorType == ErrorType.FAILED_PRECONDITION) { - StatusRuntimeException statusRuntimeException = getStatusRuntimeException(e); - return statusRuntimeException != null && BUCKET_ALREADY_EXISTS_MESSAGE.equals( - statusRuntimeException.getMessage()); - } - return false; - } - - /** - * Extracts StatusRuntimeException from the Exception, if it exists. - */ - @Nullable - static private StatusRuntimeException getStatusRuntimeException(Exception e) { - Throwable cause = e; - // Keeping a counter to break early from the loop to avoid infinite loop condition due to - // cyclic exception chains. - int currentExceptionDepth = 0, maxChainDepth = 1000; - while (cause != null && currentExceptionDepth < maxChainDepth) { - if (cause instanceof StatusRuntimeException) { - return (StatusRuntimeException) cause; - } - cause = cause.getCause(); - currentExceptionDepth++; - } - return null; - } } diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/FileInfo.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/FileInfo.java index dd4a73f3aa055..df8d63f5eecf2 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/FileInfo.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/FileInfo.java @@ -29,11 +29,11 @@ *

Note: This class wraps GoogleCloudStorageItemInfo, adds file system specific information and * hides bucket/object specific information. */ -class FileInfo { +final class FileInfo { // Info about the root path. - public static final FileInfo ROOT_INFO = - new FileInfo(GoogleCloudStorageFileSystem.GCS_ROOT, GoogleCloudStorageItemInfo.ROOT_INFO); + static final FileInfo ROOT_INFO = + new FileInfo(GoogleCloudStorageFileSystem.GCSROOT, GoogleCloudStorageItemInfo.ROOT_INFO); // Path of this file or directory. private final URI path; @@ -56,21 +56,21 @@ private FileInfo(URI path, GoogleCloudStorageItemInfo itemInfo) { /** * Gets the path of this file or directory. */ - public URI getPath() { + URI getPath() { return path; } /** * Indicates whether this item is a directory. */ - public boolean isDirectory() { + boolean isDirectory() { return itemInfo.isDirectory(); } /** * Indicates whether this item is an inferred directory. */ - public boolean isInferredDirectory() { + boolean isInferredDirectory() { return itemInfo.isInferredDirectory(); } @@ -78,7 +78,7 @@ public boolean isInferredDirectory() { * Indicates whether this instance has information about the unique, shared root of the underlying * storage system. */ - public boolean isGlobalRoot() { + boolean isGlobalRoot() { return itemInfo.isGlobalRoot(); } @@ -87,7 +87,7 @@ public boolean isGlobalRoot() { * *

Time is expressed as milliseconds since January 1, 1970 UTC. */ - public long getCreationTime() { + long getCreationTime() { return itemInfo.getCreationTime(); } @@ -97,7 +97,7 @@ public long getCreationTime() { *

For files, size is in number of bytes. For directories size is 0. For items that do not * exist, size is -1. */ - public long getSize() { + long getSize() { return itemInfo.getSize(); } @@ -107,7 +107,7 @@ public long getSize() { * *

Time is expressed as milliseconds since January 1, 1970 UTC. */ - public long getModificationTime() { + long getModificationTime() { return itemInfo.getModificationTime(); } @@ -116,21 +116,21 @@ public long getModificationTime() { * * @return A map of file attributes */ - public Map getAttributes() { + Map getAttributes() { return itemInfo.getMetadata(); } /** * Indicates whether this file or directory exists. */ - public boolean exists() { + boolean exists() { return itemInfo.exists(); } /** * Returns CRC32C checksum of the file or {@code null}. */ - public byte[] getCrc32cChecksum() { + byte[] getCrc32cChecksum() { VerificationAttributes verificationAttributes = itemInfo.getVerificationAttributes(); return verificationAttributes == null ? null : verificationAttributes.getCrc32c(); } @@ -138,7 +138,7 @@ public byte[] getCrc32cChecksum() { /** * Returns MD5 checksum of the file or {@code null}. */ - public byte[] getMd5Checksum() { + byte[] getMd5Checksum() { VerificationAttributes verificationAttributes = itemInfo.getVerificationAttributes(); return verificationAttributes == null ? null : verificationAttributes.getMd5hash(); } @@ -181,7 +181,7 @@ public int hashCode() { * Handy factory method for constructing a FileInfo from a GoogleCloudStorageItemInfo while * potentially returning a singleton instead of really constructing an object for cases like ROOT. */ - public static FileInfo fromItemInfo(GoogleCloudStorageItemInfo itemInfo) { + static FileInfo fromItemInfo(GoogleCloudStorageItemInfo itemInfo) { if (itemInfo.isRoot()) { return ROOT_INFO; } @@ -193,7 +193,7 @@ public static FileInfo fromItemInfo(GoogleCloudStorageItemInfo itemInfo) { * Handy factory method for constructing a list of FileInfo from a list of * GoogleCloudStorageItemInfo. */ - public static List fromItemInfos(List itemInfos) { + static List fromItemInfos(List itemInfos) { List fileInfos = new ArrayList<>(itemInfos.size()); for (GoogleCloudStorageItemInfo itemInfo : itemInfos) { fileInfos.add(fromItemInfo(itemInfo)); diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorage.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorage.java index b86383504f8d4..9c15962b7ef36 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorage.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorage.java @@ -36,7 +36,8 @@ import java.util.Map; /** - * A wrapper around Google cloud storage client + * A wrapper around Google cloud storage + * client. */ class GoogleCloudStorage { public static final Logger LOG = LoggerFactory.getLogger(GoogleHadoopFileSystem.class); diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageClientWriteChannel.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageClientWriteChannel.java index caee2a5321228..e21ca0c06bc0b 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageClientWriteChannel.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageClientWriteChannel.java @@ -35,17 +35,16 @@ import java.util.List; /** - * Implements WritableByteChannel to provide write access to GCS via java-storage client + * Implements WritableByteChannel to provide write access to GCS via java-storage client. */ class GoogleCloudStorageClientWriteChannel implements WritableByteChannel { - - public static final Logger LOG = + private static final Logger LOG = LoggerFactory.getLogger(GoogleCloudStorageClientWriteChannel.class); private final StorageResourceId resourceId; private WritableByteChannel writableByteChannel; - public GoogleCloudStorageClientWriteChannel(final Storage storage, + GoogleCloudStorageClientWriteChannel(final Storage storage, final StorageResourceId resourceId, final CreateOptions createOptions) throws IOException { this.resourceId = resourceId; BlobWriteSession blobWriteSession = getBlobWriteSession(storage, resourceId, createOptions); diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java index d290309c0b1e4..e411f22eb3994 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java @@ -30,13 +30,13 @@ import java.nio.channels.WritableByteChannel; /** - * Provides FS semantics over GCS based on Objects API + * Provides FS semantics over GCS based on Objects API. */ class GoogleCloudStorageFileSystem { private static final Logger LOG = LoggerFactory.getLogger(StorageResourceId.class); // URI of the root path. - static URI GCS_ROOT = URI.create(SCHEME + ":/"); + static final URI GCSROOT = URI.create(SCHEME + ":/"); // GCS access instance. private GoogleCloudStorage gcs; @@ -49,12 +49,12 @@ private static GoogleCloudStorage createCloudStorage( return new GoogleCloudStorage(configuration); } - public GoogleCloudStorageFileSystem(final GoogleHadoopFileSystemConfiguration configuration, + GoogleCloudStorageFileSystem(final GoogleHadoopFileSystemConfiguration configuration, final Credentials credentials) throws IOException { gcs = createCloudStorage(configuration, credentials); } - public WritableByteChannel create(final URI path, final CreateOptions createOptions) + WritableByteChannel create(final URI path, final CreateOptions createOptions) throws IOException { LOG.trace("create(path: {}, createOptions: {})", path, createOptions); checkNotNull(path, "path could not be null"); @@ -75,7 +75,7 @@ public WritableByteChannel create(final URI path, final CreateOptions createOpti return gcs.create(resourceId, createOptions); } - public void close() { + void close() { if (gcs == null) { return; } diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageItemInfo.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageItemInfo.java index 285be5405c36a..887e68b05f98c 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageItemInfo.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageItemInfo.java @@ -30,7 +30,7 @@ /** * Contains information about an item in Google Cloud Storage. */ -class GoogleCloudStorageItemInfo { +final class GoogleCloudStorageItemInfo { // Info about the root of GCS namespace. public static final GoogleCloudStorageItemInfo ROOT_INFO = new GoogleCloudStorageItemInfo(StorageResourceId.ROOT, @@ -50,13 +50,14 @@ class GoogleCloudStorageItemInfo { * Factory method for creating a GoogleCloudStorageItemInfo for a bucket. * * @param resourceId Resource ID that identifies a bucket - * @param creationTime Time when a bucket was created (milliseconds since January 1, 1970 UTC). + * @param creationTime Time when a bucket was created (milliseconds since January 1, 1970 + * UTC). * @param modificationTime Time when a bucket was last modified (milliseconds since January 1, * 1970 UTC). * @param location Location of a bucket. * @param storageClass Storage class of a bucket. */ - public static GoogleCloudStorageItemInfo createBucket(StorageResourceId resourceId, + static GoogleCloudStorageItemInfo createBucket(StorageResourceId resourceId, long creationTime, long modificationTime, String location, String storageClass) { checkNotNull(resourceId, "resourceId must not be null"); checkArgument(resourceId.isBucket(), "expected bucket but got '%s'", resourceId); @@ -74,17 +75,23 @@ public static GoogleCloudStorageItemInfo createBucket(StorageResourceId resource * Factory method for creating a GoogleCloudStorageItemInfo for an object. * * @param resourceId identifies either root, a Bucket, or a StorageObject - * @param creationTime Time when object was created (milliseconds since January 1, 1970 UTC). - * @param size Size of the given object (number of bytes) or -1 if the object does not exist. + * @param creationTime Time when object was created (milliseconds since January 1, 1970 + * UTC). + * @param size Size of the given object (number of bytes) or -1 if the object + * does not exist. * @param metadata User-supplied object metadata for this object. */ - public static GoogleCloudStorageItemInfo createObject(StorageResourceId resourceId, + static GoogleCloudStorageItemInfo createObject(StorageResourceId resourceId, long creationTime, long modificationTime, long size, String contentType, String contentEncoding, Map metadata, long contentGeneration, long metaGeneration, VerificationAttributes verificationAttributes) { checkNotNull(resourceId, "resourceId must not be null"); - checkArgument(!resourceId.isRoot(), "expected object or directory but got '%s'", resourceId); - checkArgument(!resourceId.isBucket(), "expected object or directory but got '%s'", resourceId); + checkArgument( + !resourceId.isRoot(), + "expected object or directory but got '%s'", resourceId); + checkArgument( + !resourceId.isBucket(), + "expected object or directory but got '%s'", resourceId); return new GoogleCloudStorageItemInfo(resourceId, creationTime, modificationTime, size, /* location= */ null, /* storageClass= */ null, contentType, contentEncoding, metadata, contentGeneration, @@ -96,7 +103,7 @@ public static GoogleCloudStorageItemInfo createObject(StorageResourceId resource * * @param resourceId Resource ID that identifies an inferred directory */ - public static GoogleCloudStorageItemInfo createInferredDirectory(StorageResourceId resourceId) { + static GoogleCloudStorageItemInfo createInferredDirectory(StorageResourceId resourceId) { return new GoogleCloudStorageItemInfo(resourceId, /* creationTime= */ 0, /* modificationTime= */ 0, @@ -116,7 +123,7 @@ public static GoogleCloudStorageItemInfo createInferredDirectory(StorageResource * * @param resourceId Resource ID that identifies an inferred directory */ - public static GoogleCloudStorageItemInfo createNotFound(StorageResourceId resourceId) { + static GoogleCloudStorageItemInfo createNotFound(StorageResourceId resourceId) { return new GoogleCloudStorageItemInfo(resourceId, /* creationTime= */ 0, /* modificationTime= */ 0, @@ -188,21 +195,21 @@ private GoogleCloudStorageItemInfo(StorageResourceId resourceId, long creationTi /** * Gets bucket name of this item. */ - public String getBucketName() { + String getBucketName() { return resourceId.getBucketName(); } /** * Gets object name of this item. */ - public String getObjectName() { + String getObjectName() { return resourceId.getObjectName(); } /** * Gets the resourceId that holds the (possibly null) bucketName and objectName of this object. */ - public StorageResourceId getResourceId() { + StorageResourceId getResourceId() { return resourceId; } @@ -211,7 +218,7 @@ public StorageResourceId getResourceId() { * *

Time is expressed as milliseconds since January 1, 1970 UTC. */ - public long getCreationTime() { + long getCreationTime() { return creationTime; } @@ -220,14 +227,14 @@ public long getCreationTime() { * *

Time is expressed as milliseconds since January 1, 1970 UTC. */ - public long getModificationTime() { + long getModificationTime() { return modificationTime; } /** * Gets size of this item (number of bytes). Returns -1 if the object does not exist. */ - public long getSize() { + long getSize() { return size; } @@ -236,7 +243,7 @@ public long getSize() { * *

Note: Location is only supported for buckets. The value is always null for objects. */ - public String getLocation() { + String getLocation() { return location; } @@ -245,7 +252,7 @@ public String getLocation() { * *

Note: Storage-class is only supported for buckets. The value is always null for objects. */ - public String getStorageClass() { + String getStorageClass() { return storageClass; } @@ -254,7 +261,7 @@ public String getStorageClass() { * *

Note: content-type is only supported for objects, and will always be null for buckets. */ - public String getContentType() { + String getContentType() { return contentType; } @@ -263,7 +270,7 @@ public String getContentType() { * *

Note: content-encoding is only supported for objects, and will always be null for buckets. */ - public String getContentEncoding() { + String getContentEncoding() { return contentEncoding; } @@ -272,21 +279,21 @@ public String getContentEncoding() { * *

Note: metadata is only supported for objects. This value is always an empty map for buckets. */ - public Map getMetadata() { + Map getMetadata() { return metadata; } /** * Indicates whether this item is a bucket. Root is not considered to be a bucket. */ - public boolean isBucket() { + boolean isBucket() { return resourceId.isBucket(); } /** * Indicates whether this item refers to the GCS root (gs://). */ - public boolean isRoot() { + boolean isRoot() { return resourceId.isRoot(); } @@ -294,21 +301,21 @@ public boolean isRoot() { * Indicates whether this instance has information about the unique, shared root of the underlying * storage system. */ - public boolean isGlobalRoot() { + boolean isGlobalRoot() { return isRoot() && exists(); } /** * Indicates whether {@code itemInfo} is a directory. */ - public boolean isDirectory() { + boolean isDirectory() { return isGlobalRoot() || isBucket() || resourceId.isDirectory(); } /** - * Indicates whether {@code itemInfo} is an inferred directory + * Indicates whether {@code itemInfo} is an inferred directory. */ - public boolean isInferredDirectory() { + boolean isInferredDirectory() { return creationTime == 0 && modificationTime == 0 && size == 0 && contentGeneration == 0 && metaGeneration == 0; } @@ -316,28 +323,28 @@ public boolean isInferredDirectory() { /** * Get the content generation of the object. */ - public long getContentGeneration() { + long getContentGeneration() { return contentGeneration; } /** * Get the meta generation of the object. */ - public long getMetaGeneration() { + long getMetaGeneration() { return metaGeneration; } /** * Get object validation attributes. */ - public VerificationAttributes getVerificationAttributes() { + VerificationAttributes getVerificationAttributes() { return verificationAttributes; } /** * Indicates whether this item exists. */ - public boolean exists() { + boolean exists() { return size >= 0; } diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java index ee7c5cb51cf20..1c2fc19d2b5b1 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java @@ -59,7 +59,7 @@ public class GoogleHadoopFileSystem extends FileSystem { public static final Logger LOG = LoggerFactory.getLogger(GoogleHadoopFileSystem.class); /** - * URI scheme for GoogleHadoopFileSystem + * URI scheme for GoogleHadoopFileSystem. */ private static final String SCHEME = Constants.SCHEME; @@ -74,7 +74,7 @@ public class GoogleHadoopFileSystem extends FileSystem { /** * The URI the File System is passed in initialize. */ - URI initUri; + private URI initUri; /** * Default block size. Note that this is the size that is reported to Hadoop FS clients. It does @@ -407,13 +407,13 @@ public boolean mkdirs(final Path path, final FsPermission fsPermission) throws I throw new UnsupportedOperationException(path.toString()); } - /** - * Gets the default replication factor. - */ - @Override - public short getDefaultReplication() { - return REPLICATION_FACTOR_DEFAULT; - } +// /** +// * Gets the default replication factor. +// */ +// @Override +// public short getDefaultReplication() { +// return REPLICATION_FACTOR_DEFAULT; +// } @Override public FileStatus getFileStatus(final Path path) throws IOException { @@ -489,11 +489,11 @@ public long getUsed() throws IOException { return result; } - @Override - public long getDefaultBlockSize() { - LOG.trace("getDefaultBlockSize(): {}", defaultBlockSize); - return defaultBlockSize; - } +// @Override +// public long getDefaultBlockSize() { +// LOG.trace("getDefaultBlockSize(): {}", defaultBlockSize); +// return defaultBlockSize; +// } @Override public void setWorkingDirectory(final Path hadoopPath) { diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopOutputStream.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopOutputStream.java index 78cf23e2af94c..3172373a36bc4 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopOutputStream.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopOutputStream.java @@ -65,7 +65,7 @@ class GoogleHadoopOutputStream extends OutputStream * @param createFileOptions options for file creation * @throws IOException if an IO error occurs. */ - public GoogleHadoopOutputStream(GoogleHadoopFileSystem ghfs, URI dstGcsPath, + GoogleHadoopOutputStream(GoogleHadoopFileSystem ghfs, URI dstGcsPath, CreateOptions createFileOptions, FileSystem.Statistics statistics) throws IOException { LOG.trace("GoogleHadoopOutputStream(gcsPath: {}, createFileOptions: {})", dstGcsPath, createFileOptions); diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/HadoopConfigurationProperty.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/HadoopConfigurationProperty.java index 584813480092a..9360290a09c5b 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/HadoopConfigurationProperty.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/HadoopConfigurationProperty.java @@ -29,10 +29,10 @@ import org.slf4j.LoggerFactory; /** - * Hadoop configuration property + * Hadoop configuration property. */ class HadoopConfigurationProperty { - public static final Logger LOG = LoggerFactory.getLogger(HadoopConfigurationProperty.class); + private static final Logger LOG = LoggerFactory.getLogger(HadoopConfigurationProperty.class); private final String key; private final List deprecatedKeys; @@ -40,34 +40,34 @@ class HadoopConfigurationProperty { private List keyPrefixes = ImmutableList.of(""); - public HadoopConfigurationProperty(String key) { + HadoopConfigurationProperty(String key) { this(key, null); } - public HadoopConfigurationProperty(String key, T defaultValue, String... deprecatedKeys) { + HadoopConfigurationProperty(String key, T defaultValue, String... deprecatedKeys) { this.key = key; this.deprecatedKeys = deprecatedKeys == null ? ImmutableList.of() : ImmutableList.copyOf(deprecatedKeys); this.defaultValue = defaultValue; } - public String getKey() { + String getKey() { return key; } - public T getDefault() { + T getDefault() { return defaultValue; } - public T get(Configuration config, BiFunction getterFn) { - String lookupKey = getLookupKey(config, key, deprecatedKeys, (c, k) -> c.get(k) != null); + T get(Configuration config, BiFunction getterFn) { + String lookupKey = getLookupKey(config, key, (c, k) -> c.get(k) != null); return logProperty(lookupKey, getterFn.apply(lookupKey, defaultValue)); } - private String getLookupKey(Configuration config, String key, List deprecatedKeys, + private String getLookupKey(Configuration config, String lookupKey, BiFunction checkFn) { for (String prefix : keyPrefixes) { - String prefixedKey = prefix + key; + String prefixedKey = prefix + lookupKey; if (checkFn.apply(config, prefixedKey)) { return prefixedKey; } @@ -80,7 +80,7 @@ private String getLookupKey(Configuration config, String key, List depre } } } - return keyPrefixes.get(0) + key; + return keyPrefixes.get(0) + lookupKey; } private static S logProperty(String key, S value) { diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java index 8c56122fd5770..c889ee054c6bd 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java @@ -39,13 +39,13 @@ */ class StorageResourceId { - public static final Logger LOG = LoggerFactory.getLogger(StorageResourceId.class); + private static final Logger LOG = LoggerFactory.getLogger(StorageResourceId.class); // The generationId used to denote "unknown"; if given to a method expecting generationId // constraints, the method may perform extra low-level GETs to determine an existing // generationId // if idempotency constraints require doing so. - public static final long UNKNOWN_GENERATION_ID = -1L; + static final long UNKNOWN_GENERATION_ID = -1L; // Pattern that parses out bucket and object names. // Given 'gs://foo-bucket/foo/bar/baz', matcher.group(x) will return: @@ -59,7 +59,7 @@ class StorageResourceId { // The singleton instance identifying the GCS root (gs://). Both getObjectName() and // getBucketName() will return null. - public static final StorageResourceId ROOT = new StorageResourceId(); + static final StorageResourceId ROOT = new StorageResourceId(); // Bucket name of this storage resource to be used with the Google Cloud Storage API. private final String bucketName; @@ -91,7 +91,7 @@ private StorageResourceId() { * * @param bucketName The bucket name of the resource. Must be non-empty and non-null. */ - public StorageResourceId(String bucketName) { + StorageResourceId(String bucketName) { checkArgument(!isNullOrEmpty(bucketName), "bucketName must not be null or empty"); this.bucketName = bucketName; @@ -107,7 +107,7 @@ public StorageResourceId(String bucketName) { * @param bucketName The bucket name of the resource. Must be non-empty and non-null. * @param objectName The object name of the resource. Must be non-empty and non-null. */ - public StorageResourceId(String bucketName, String objectName) { + StorageResourceId(String bucketName, String objectName) { checkArgument(!isNullOrEmpty(bucketName), "bucketName must not be null or empty"); checkArgument(!isNullOrEmpty(objectName), "objectName must not be null or empty"); @@ -126,7 +126,7 @@ public StorageResourceId(String bucketName, String objectName) { * @param generationId The generationId to be used with precondition checks when using this * StorageResourceId as an identifier for mutation requests. */ - public StorageResourceId(String bucketName, String objectName, long generationId) { + StorageResourceId(String bucketName, String objectName, long generationId) { checkArgument(!isNullOrEmpty(bucketName), "bucketName must not be null or empty"); checkArgument(!isNullOrEmpty(objectName), "objectName must not be null or empty"); @@ -144,7 +144,7 @@ public StorageResourceId(String bucketName, String objectName, long generationId * @param generationId The generationId to be used with precondition checks when using this * StorageResourceId as an identifier for mutation requests. */ - public StorageResourceId(String bucketName, long generationId) { + StorageResourceId(String bucketName, long generationId) { checkArgument(!isNullOrEmpty(bucketName), "bucketName must not be null or empty"); this.bucketName = bucketName; this.objectName = null; @@ -156,7 +156,7 @@ public StorageResourceId(String bucketName, long generationId) { * Returns true if this StorageResourceId represents a GCS StorageObject; if true, both {@code * getBucketName} and {@code getObjectName} will be non-empty and non-null. */ - public boolean isStorageObject() { + boolean isStorageObject() { return bucketName != null && objectName != null; } @@ -164,7 +164,7 @@ public boolean isStorageObject() { * Returns true if this StorageResourceId represents a GCS Bucket; if true, then {@code * getObjectName} will return null. */ - public boolean isBucket() { + boolean isBucket() { return bucketName != null && objectName == null; } @@ -172,7 +172,7 @@ public boolean isBucket() { * Returns true if this StorageResourceId represents the GCS root (gs://); if true, then both * {@code getBucketName} and {@code getObjectName} will be null. */ - public boolean isRoot() { + boolean isRoot() { return bucketName == null && objectName == null; } @@ -181,21 +181,21 @@ public boolean isRoot() { * FileInfo#isDirectory} except deals entirely with pathnames instead of also checking for * exists() to be true on a corresponding GoogleCloudStorageItemInfo. */ - public boolean isDirectory() { + boolean isDirectory() { return isRoot() || isBucket() || StringPaths.isDirectoryPath(objectName); } /** * Gets the bucket name component of this resource identifier. */ - public String getBucketName() { + String getBucketName() { return bucketName; } /** * Gets the object name component of this resource identifier. */ - public String getObjectName() { + String getObjectName() { return objectName; } @@ -204,14 +204,14 @@ public String getObjectName() { * identifier for mutation requests. The generationId is *not* used when determining equals() or * hashCode(). */ - public long getGenerationId() { + long getGenerationId() { return generationId; } /** * Returns true if generationId is not UNKNOWN_GENERATION_ID. */ - public boolean hasGenerationId() { + boolean hasGenerationId() { return generationId != UNKNOWN_GENERATION_ID; } @@ -244,7 +244,7 @@ public int hashCode() { * * @return A resourceId with a directory path corresponding to the given resourceId. */ - public StorageResourceId toDirectoryId() { + StorageResourceId toDirectoryId() { if (isStorageObject() && !StringPaths.isDirectoryPath(getObjectName())) { return new StorageResourceId(getBucketName(), StringPaths.toDirectoryPath(getObjectName())); } @@ -254,14 +254,14 @@ public StorageResourceId toDirectoryId() { /** * Parses {@link StorageResourceId} from specified string. */ - public static StorageResourceId fromStringPath(String path) { + static StorageResourceId fromStringPath(String path) { return fromStringPath(path, UNKNOWN_GENERATION_ID); } /** * Parses {@link StorageResourceId} from specified string and generationId. */ - public static StorageResourceId fromStringPath(String path, long generationId) { + static StorageResourceId fromStringPath(String path, long generationId) { Matcher matcher = GCS_PATH_PATTERN.matcher(path); checkArgument(matcher.matches(), "'%s' is not a valid GCS object name.", path); @@ -286,7 +286,7 @@ public static StorageResourceId fromStringPath(String path, long generationId) { * @param allowEmptyObjectName If true, a missing object name is not considered invalid. * @return a StorageResourceId that may be the GCS root, a Bucket, or a StorageObject. */ - public static StorageResourceId fromUriPath(URI path, boolean allowEmptyObjectName) { + static StorageResourceId fromUriPath(URI path, boolean allowEmptyObjectName) { return fromUriPath(path, allowEmptyObjectName, UNKNOWN_GENERATION_ID); } @@ -295,10 +295,11 @@ public static StorageResourceId fromUriPath(URI path, boolean allowEmptyObjectNa * * @param path The GCS URI to validate. * @param allowEmptyObjectName If true, a missing object name is not considered invalid. - * @param generationId The generationId to be used with precondition checks when using this + * @param generationId The generationId to be used with precondition checks when + * using this * @return a StorageResourceId that may be the GCS root, a Bucket, or a StorageObject. */ - public static StorageResourceId fromUriPath(URI path, boolean allowEmptyObjectName, + static StorageResourceId fromUriPath(URI path, boolean allowEmptyObjectName, long generationId) { LOG.trace("fromUriPath('{}', {})", path, allowEmptyObjectName); checkNotNull(path); @@ -309,7 +310,7 @@ public static StorageResourceId fromUriPath(URI path, boolean allowEmptyObjectNa path.getScheme(), path)); } - if (path.equals(GoogleCloudStorageFileSystem.GCS_ROOT)) { + if (path.equals(GoogleCloudStorageFileSystem.GCSROOT)) { return ROOT; } diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StringPaths.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StringPaths.java index 723a564e72e63..877226bb3533d 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StringPaths.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StringPaths.java @@ -26,7 +26,7 @@ import org.slf4j.LoggerFactory; /** - * Utility methods for String GCS paths + * Utility methods for String GCS paths. */ final class StringPaths { @@ -36,8 +36,9 @@ private StringPaths() { } // 14x faster (20ns vs 280ns) than "^[a-z0-9_.-]+$" regex - private static final CharMatcher BUCKET_NAME_CHAR_MATCHER = CharMatcher.ascii().and( - CharMatcher.inRange('0', '9').or(CharMatcher.inRange('a', 'z')).or(CharMatcher.anyOf("_.-"))) + private static final CharMatcher BUCKET_NAME_CHAR_MATCHER = CharMatcher.ascii() + .and(CharMatcher.inRange('0', '9').or(CharMatcher.inRange('a', 'z')) + .or(CharMatcher.anyOf("_.-"))) .precomputed(); /** diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/UriPaths.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/UriPaths.java index 7fe8169e0bfbd..30e13cb33cbf1 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/UriPaths.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/UriPaths.java @@ -26,7 +26,7 @@ import java.net.URISyntaxException; /** - * Utility methods for URI GCS paths + * Utility methods for URI GCS paths. */ final class UriPaths { @@ -60,14 +60,14 @@ public static URI getParentPath(URI path) { checkNotNull(path); // Root path has no parent. - if (path.equals(GoogleCloudStorageFileSystem.GCS_ROOT)) { + if (path.equals(GoogleCloudStorageFileSystem.GCSROOT)) { return null; } StorageResourceId resourceId = StorageResourceId.fromUriPath(path, true); if (resourceId.isBucket()) { - return GoogleCloudStorageFileSystem.GCS_ROOT; + return GoogleCloudStorageFileSystem.GCSROOT; } String objectName = resourceId.getObjectName(); @@ -95,7 +95,7 @@ public static URI fromResourceId(StorageResourceId resourceId, boolean allowEmpt public static URI fromStringPathComponents(String bucketName, String objectName, boolean allowEmptyObjectName) { if (allowEmptyObjectName && bucketName == null && objectName == null) { - return GoogleCloudStorageFileSystem.GCS_ROOT; + return GoogleCloudStorageFileSystem.GCSROOT; } String authority = StringPaths.validateBucketName(bucketName); diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java index 814661a4b2cd2..4155482fc7d33 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/VerificationAttributes.java @@ -26,7 +26,7 @@ class VerificationAttributes { private final byte[] md5hash; private final byte[] crc32c; - public VerificationAttributes(@Nullable byte[] md5hash, @Nullable byte[] crc32c) { + VerificationAttributes(@Nullable byte[] md5hash, @Nullable byte[] crc32c) { this.md5hash = md5hash; this.crc32c = crc32c; } @@ -35,7 +35,7 @@ public VerificationAttributes(@Nullable byte[] md5hash, @Nullable byte[] crc32c) * MD5 hash of an object, if available. */ @Nullable - public byte[] getMd5hash() { + byte[] getMd5hash() { return md5hash; } @@ -43,7 +43,7 @@ public byte[] getMd5hash() { * CRC32c checksum of an object, if available. */ @Nullable - public byte[] getCrc32c() { + byte[] getCrc32c() { return crc32c; } diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/package-info.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/package-info.java new file mode 100644 index 0000000000000..eedfb7822acd7 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Google Cloud Storage Filesystem. + */ + +package org.apache.hadoop.fs.gs; \ No newline at end of file From aa6a14959e6e280084925c4a720e1674cca2b61e Mon Sep 17 00:00:00 2001 From: Arunkumar Chacko Date: Thu, 8 May 2025 03:28:06 +0000 Subject: [PATCH 4/8] Fix spotbugs and style issues --- hadoop-tools/hadoop-gcp/pom.xml | 8 ++-- .../apache/hadoop/fs/gs/CreateOptions.java | 13 ------ .../GoogleCloudStorageClientWriteChannel.java | 1 - .../fs/gs/GoogleHadoopOutputStream.java | 43 +------------------ .../org/apache/hadoop/fs/gs/StringPaths.java | 2 +- 5 files changed, 5 insertions(+), 62 deletions(-) diff --git a/hadoop-tools/hadoop-gcp/pom.xml b/hadoop-tools/hadoop-gcp/pom.xml index c177ba5a9200d..36abfa0b1fadc 100644 --- a/hadoop-tools/hadoop-gcp/pom.xml +++ b/hadoop-tools/hadoop-gcp/pom.xml @@ -1,4 +1,3 @@ - - unset - - unset - - unset - - 3600 - - - 200000 - - - - 00 - - unset @@ -113,12 +95,6 @@ ${test.build.dir}/${surefire.forkNumber} ${hadoop.tmp.dir}/${surefire.forkNumber} job-${job.id}-fork-000${surefire.forkNumber} - - - ${fs.gs.scale.test.enabled} - ${fs.gs.scale.test.huge.filesize} - ${fs.gs.scale.test.huge.partitionsize} - ${fs.gs.scale.test.timeout} @@ -150,26 +126,8 @@ job-${job.id}-fork-000${surefire.forkNumber} - - ${fs.gs.scale.test.enabled} - ${fs.gs.scale.test.huge.filesize} - ${fs.gs.scale.test.huge.partitionsize} - ${fs.gs.scale.test.timeout} ${test.integration.timeout} - - - - ${root.tests.enabled} - - - - - - - - - **/ITest*.java @@ -187,15 +145,6 @@ false - - ${fs.gs.scale.test.enabled} - ${fs.gs.scale.test.huge.filesize} - ${fs.gs.scale.test.huge.partitionsize} - ${fs.gs.scale.test.timeout} - - - - job-${job.id} @@ -231,14 +180,8 @@ false - - ${fs.gs.scale.test.enabled} - ${fs.gs.scale.test.huge.filesize} - ${fs.gs.scale.test.timeout} - job-${job.id} - ${fs.gs.scale.test.timeout} @@ -246,46 +189,6 @@ - - - - scale - - - scale - - - - true - - - - - - prefetch - - - prefetch - - - - prefetch - - - - - - analytics - - - analytics - - - - analytics - - - @@ -469,54 +372,6 @@ - - org.apache.maven.plugins - maven-dependency-plugin - - - deplist1 - compile - - list - - - - ${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-optional.txt - - - - copy - test-compile - - copy-dependencies - - - test - so,dll,dylib - ${project.build.directory}/native-libs - - - - package - - copy-dependencies - - - ${project.build.directory}/lib - - - - deplist2 - compile - - list - - - ${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-builtin.txt - - - - org.apache.maven.plugins maven-enforcer-plugin diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java index 1a03b6b83164b..c9b44a1a481b1 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateOptions.java @@ -22,7 +22,6 @@ import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; -import java.time.Duration; import java.util.Map; import javax.annotation.Nullable; @@ -97,8 +96,6 @@ long getOverwriteGenerationId() { static class CreateOperationOptionsBuilder { private Map attributes = ImmutableMap.of(); private String contentType = "application/octet-stream"; - private boolean ensureNoDirectoryConflict = true; - private Duration interval = Duration.ZERO; private long overwriteGenerationId = StorageResourceId.UNKNOWN_GENERATION_ID; private WriteMode writeMode = WriteMode.CREATE_NEW; From 87ec68aa221a74b40755fe01baf48ce8bc25bc32 Mon Sep 17 00:00:00 2001 From: Arunkumar Chacko Date: Thu, 15 May 2025 03:41:12 +0000 Subject: [PATCH 6/8] Add specific protobuf version for GCS connector --- hadoop-project/pom.xml | 2 +- .../dev-support/findbugs-exclude.xml | 68 +------------------ hadoop-tools/hadoop-gcp/pom.xml | 9 +++ 3 files changed, 12 insertions(+), 67 deletions(-) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 3c90a4a05b671..36da80aba37b7 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -86,7 +86,7 @@ - 3.25.3 + 2.5.0 provided diff --git a/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml index 11410a24c1ad2..ee97ad7a9b07c 100644 --- a/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml +++ b/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml @@ -15,75 +15,11 @@ limitations under the License. --> - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/hadoop-tools/hadoop-gcp/pom.xml b/hadoop-tools/hadoop-gcp/pom.xml index 1a92e22440e5f..d9671acd12dfa 100644 --- a/hadoop-tools/hadoop-gcp/pom.xml +++ b/hadoop-tools/hadoop-gcp/pom.xml @@ -462,6 +462,15 @@ com.google.protobuf protobuf-java + + 3.25.3 From 6901f5d4c53cbe3f2be7e4cd8bb4052892deea54 Mon Sep 17 00:00:00 2001 From: Arunkumar Chacko Date: Wed, 21 May 2025 06:45:06 +0000 Subject: [PATCH 7/8] Add tests Supress a style issue --- hadoop-tools/hadoop-gcp/pom.xml | 61 ++-- .../src/config/checkstyle-suppressions.xml | 26 ++ .../hadoop/fs/gs/StorageResourceId.java | 2 + .../hadoop/fs/gs/TestStorageResourceId.java | 285 ++++++++++++++++++ .../apache/hadoop/fs/gs/TestStringPaths.java | 164 ++++++++++ .../org/apache/hadoop/fs/gs/TestUriPaths.java | 150 +++++++++ .../org/apache/hadoop/fs/gs/package-info.java | 22 ++ 7 files changed, 692 insertions(+), 18 deletions(-) create mode 100644 hadoop-tools/hadoop-gcp/src/config/checkstyle-suppressions.xml create mode 100644 hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestStorageResourceId.java create mode 100644 hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestStringPaths.java create mode 100644 hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestUriPaths.java create mode 100644 hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/package-info.java diff --git a/hadoop-tools/hadoop-gcp/pom.xml b/hadoop-tools/hadoop-gcp/pom.xml index d9671acd12dfa..d5744f1f97c44 100644 --- a/hadoop-tools/hadoop-gcp/pom.xml +++ b/hadoop-tools/hadoop-gcp/pom.xml @@ -361,6 +361,13 @@ Max + + + org.apache.maven.plugins + maven-checkstyle-plugin + + src/config/checkstyle-suppressions.xml + org.apache.maven.plugins @@ -413,6 +420,24 @@ + + + + + com.google.protobuf + protobuf-java + 3.25.5 + + + + org.apache.hadoop @@ -427,14 +452,13 @@ javax.enterprise cdi-api + + + com.google.protobuf + protobuf-java + - - org.apache.hadoop - hadoop-common - test - test-jar - org.assertj assertj-core @@ -458,21 +482,22 @@ com.google.cloud google-cloud-storage + + + org.junit.jupiter + junit-jupiter-api + test - com.google.protobuf - protobuf-java - - 3.25.3 + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.jupiter + junit-jupiter-params + test - diff --git a/hadoop-tools/hadoop-gcp/src/config/checkstyle-suppressions.xml b/hadoop-tools/hadoop-gcp/src/config/checkstyle-suppressions.xml new file mode 100644 index 0000000000000..8c765bc97f92a --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/config/checkstyle-suppressions.xml @@ -0,0 +1,26 @@ + + + + + + + + + + diff --git a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java index c889ee054c6bd..5935564feedfa 100644 --- a/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java +++ b/hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/StorageResourceId.java @@ -262,6 +262,8 @@ static StorageResourceId fromStringPath(String path) { * Parses {@link StorageResourceId} from specified string and generationId. */ static StorageResourceId fromStringPath(String path, long generationId) { + checkArgument(path != null, "path must not be null"); + Matcher matcher = GCS_PATH_PATTERN.matcher(path); checkArgument(matcher.matches(), "'%s' is not a valid GCS object name.", path); diff --git a/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestStorageResourceId.java b/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestStorageResourceId.java new file mode 100644 index 0000000000000..e0a39b2d7e403 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestStorageResourceId.java @@ -0,0 +1,285 @@ +/* + * Copyright 2013 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import java.net.URI; + +import org.junit.Test; + +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class TestStorageResourceId { + @Test + public void testConstructorInvalid() { + assertThrows(IllegalArgumentException.class, () -> { + new StorageResourceId(null); + }); + + assertThrows(IllegalArgumentException.class, () -> { + new StorageResourceId(""); + }); + + assertThrows(IllegalArgumentException.class, () -> { + new StorageResourceId(null, null); + }); + + assertThrows(IllegalArgumentException.class, () -> { + new StorageResourceId("foo", null); + }); + + assertThrows(IllegalArgumentException.class, () -> { + new StorageResourceId("", null); + }); + + assertThrows(IllegalArgumentException.class, () -> { + new StorageResourceId(null, null, 0L); + }); + + assertThrows(IllegalArgumentException.class, () -> { + new StorageResourceId("foo", null, 0L); + }); + + assertThrows(IllegalArgumentException.class, () -> { + new StorageResourceId("", null, 0L); + }); + + assertThrows(IllegalArgumentException.class, () -> { + new StorageResourceId(null, 0L); + }); + + assertThrows(IllegalArgumentException.class, () -> { + new StorageResourceId("", 0L); + }); + } + + @Test + public void testFromStringPathInvalid() { + assertThrows(IllegalArgumentException.class, () -> { + StorageResourceId.fromStringPath(null); + }); + + assertThrows(IllegalArgumentException.class, () -> { + StorageResourceId.fromStringPath(""); + }); + + assertThrows(IllegalArgumentException.class, () -> { + StorageResourceId.fromStringPath("foo"); + }); + + assertThrows(IllegalArgumentException.class, () -> { + StorageResourceId.fromStringPath("/foo/bar"); + }); + + assertThrows(IllegalArgumentException.class, () -> { + StorageResourceId.fromStringPath("gs:///foo/bar"); + }); + } + + @Test + public void testConstructor() { + String bucketName = "testbucketname"; + String objectName = "a/b/c.txt"; + + verify(new StorageResourceId(bucketName), bucketName, + StorageResourceId.UNKNOWN_GENERATION_ID, null, false, + true, true, false, false); + + verify(new StorageResourceId(bucketName, objectName), bucketName, + StorageResourceId.UNKNOWN_GENERATION_ID, objectName, false, + false, false, true, false); + + long genId = System.currentTimeMillis(); + verify(new StorageResourceId(bucketName, objectName, genId), bucketName, + genId, objectName, true, + false, false, true, false); + + verify(new StorageResourceId(bucketName, genId), bucketName, + genId, null, true, + true, true, false, false); + } + + @Test + public void testEqualsBucket() { + StorageResourceId bucket1 = new StorageResourceId("test-bucket"); + StorageResourceId bucket2 = new StorageResourceId("test-bucket"); + assertTrue(bucket1.equals(bucket2)); + assertEquals(bucket1.hashCode(), bucket2.hashCode()); + } + + @Test + public void testEqualsObject() { + StorageResourceId obj1 = new StorageResourceId("test-bucket", "test-object"); + StorageResourceId obj2 = new StorageResourceId("test-bucket", "test-object"); + assertTrue(obj1.equals(obj2)); + assertEquals(obj1.hashCode(), obj2.hashCode()); + } + + @Test + public void testEqualsDifferentBucket() { + StorageResourceId bucket1 = new StorageResourceId("test-bucket"); + StorageResourceId bucket2 = new StorageResourceId("other-bucket"); + assertFalse(bucket1.equals(bucket2)); + } + + @Test + public void testEqualsDifferentObject() { + StorageResourceId obj1 = new StorageResourceId("test-bucket", "test-object"); + StorageResourceId obj2 = new StorageResourceId("test-bucket", "other-object"); + assertFalse(obj1.equals(obj2)); + } + + @Test + public void testToDirectoryIdFromFile() { + StorageResourceId fileId = new StorageResourceId("my-bucket", "path/to/file.txt"); + StorageResourceId dirId = fileId.toDirectoryId(); + + assertNotSame(fileId, dirId); // Should return a new instance + assertTrue(dirId.isDirectory()); + assertEquals("my-bucket", dirId.getBucketName()); + assertEquals("path/to/file.txt/", dirId.getObjectName()); + assertEquals(fileId.getGenerationId(), dirId.getGenerationId()); + } + + @Test + public void testToDirectoryIdFromDirectoryObject() { + StorageResourceId dirIdOriginal = new StorageResourceId("my-bucket", "path/to/dir/"); + StorageResourceId dirIdConverted = dirIdOriginal.toDirectoryId(); + + assertSame(dirIdOriginal, dirIdConverted); // Should return the same instance + assertTrue(dirIdConverted.isDirectory()); + assertEquals("path/to/dir/", dirIdConverted.getObjectName()); + } + + @Test + public void testToDirectoryIdFromBucket() { + StorageResourceId bucketId = new StorageResourceId("my-bucket"); + StorageResourceId convertedId = bucketId.toDirectoryId(); + assertSame(bucketId, convertedId); + assertTrue(convertedId.isBucket()); + } + + @Test + public void testFromStringPathRoot() { + StorageResourceId id = StorageResourceId.fromStringPath("gs://"); + assertTrue(id.isRoot()); + } + + @Test + public void testFromStringPathBucket() { + StorageResourceId id = StorageResourceId.fromStringPath("gs://my-bucket"); + assertTrue(id.isBucket()); + assertEquals("my-bucket", id.getBucketName()); + assertNull(id.getObjectName()); + assertEquals(StorageResourceId.UNKNOWN_GENERATION_ID, id.getGenerationId()); + } + + @ParameterizedTest + @ValueSource(strings = { + "gs://my-bucket/object", + "gs://my-bucket/folder/file.txt", + "gs://my-bucket/folder/" + }) + public void testFromStringPathObject(String path) { + String expectedBucket = path.split("/")[2]; + String expectedObject = + path.substring(path.indexOf(expectedBucket) + expectedBucket.length() + 1); + + StorageResourceId id = StorageResourceId.fromStringPath(path); + assertTrue(id.isStorageObject()); + assertEquals(expectedBucket, id.getBucketName()); + assertEquals(expectedObject, id.getObjectName()); + assertEquals(StorageResourceId.UNKNOWN_GENERATION_ID, id.getGenerationId()); + } + + @Test + public void testFromStringPathObjectWithGenerationId() { + long genId = 12345L; + StorageResourceId id = StorageResourceId.fromStringPath("gs://my-bucket/object.txt", genId); + assertTrue(id.isStorageObject()); + assertEquals("my-bucket", id.getBucketName()); + assertEquals("object.txt", id.getObjectName()); + assertEquals(genId, id.getGenerationId()); + assertTrue(id.hasGenerationId()); + } + + @Test + public void testFromUriPathBucket() throws Exception { + URI uri = new URI("gs://my-bucket"); + StorageResourceId id = StorageResourceId.fromUriPath(uri, true); + assertTrue(id.isBucket()); + assertEquals("my-bucket", id.getBucketName()); + assertNull(id.getObjectName()); + } + + @Test + public void testFromUriPathObject() throws Exception { + URI uri = new URI("gs://my-bucket/path/to/file.txt"); + StorageResourceId id = StorageResourceId.fromUriPath(uri, false); + assertTrue(id.isStorageObject()); + assertEquals("my-bucket", id.getBucketName()); + assertEquals("path/to/file.txt", id.getObjectName()); + } + + @Test + public void testFromUriPathObjectWithGenerationId() throws Exception { + URI uri = new URI("gs://my-bucket/object.txt"); + long genId = 54321L; + StorageResourceId id = StorageResourceId.fromUriPath(uri, false, genId); + assertTrue(id.isStorageObject()); + assertEquals("my-bucket", id.getBucketName()); + assertEquals("object.txt", id.getObjectName()); + assertEquals(genId, id.getGenerationId()); + assertTrue(id.hasGenerationId()); + } + + @Test + public void testFromUriPathBucketWithGenerationId() throws Exception { + assertThrows(IllegalArgumentException.class, () -> { + URI uri = new URI("gs://my-bucket"); + long genId = 54321L; + StorageResourceId.fromUriPath(uri, false, genId); + }); + } + + private static void verify( + StorageResourceId id, + String bucketName, + long generationId, + String objectName, + boolean hasGenerationId, + boolean isBucket, + boolean isDirectory, + boolean isStorageObject, + boolean isRoot) { + assertEquals(bucketName, id.getBucketName()); + assertEquals(generationId, id.getGenerationId()); + assertEquals(objectName, id.getObjectName()); + assertEquals(hasGenerationId, id.hasGenerationId()); + assertEquals(isBucket, id.isBucket()); + assertEquals(isDirectory, id.isDirectory()); + assertEquals(isStorageObject, id.isStorageObject()); + assertEquals(isRoot, id.isRoot()); + } +} diff --git a/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestStringPaths.java b/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestStringPaths.java new file mode 100644 index 0000000000000..16234e0ce1d57 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestStringPaths.java @@ -0,0 +1,164 @@ +/* + * Copyright 2013 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import org.junit.Test; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; + +public class TestStringPaths { + @Test + public void testValidateBucketNameValid() { + assertEquals("my-bucket", StringPaths.validateBucketName("my-bucket")); + assertEquals("my.bucket", StringPaths.validateBucketName("my.bucket")); + assertEquals("my_bucket", StringPaths.validateBucketName("my_bucket")); + assertEquals("bucket123", StringPaths.validateBucketName("bucket123")); + assertEquals("a", StringPaths.validateBucketName("a")); + assertEquals("long-bucket-name-with-numbers-123", + StringPaths.validateBucketName("long-bucket-name-with-numbers-123")); + } + + @Test + public void testValidateBucketNameEndsWithSlash() { + assertEquals("my-bucket", StringPaths.validateBucketName("my-bucket/")); + assertEquals("another-bucket", StringPaths.validateBucketName("another-bucket/")); + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateBucketNameEmpty() { + StringPaths.validateBucketName(""); + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateBucketNameNull() { + StringPaths.validateBucketName(null); + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateBucketNameInvalidChars() { + StringPaths.validateBucketName("my bucket"); // Space + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateBucketNameInvalidChars2() { + StringPaths.validateBucketName("my@bucket"); // @ symbol + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateBucketNameUpperCase() { + StringPaths.validateBucketName("MyBucket"); // Uppercase + } + + @Test + public void testValidateObjectNameValid() { + assertEquals("path/to/object", + StringPaths.validateObjectName("path/to/object", false)); + assertEquals("object", StringPaths.validateObjectName("object", false)); + assertEquals("dir/", + StringPaths.validateObjectName("dir/", false)); // Still valid after validation + assertEquals("", StringPaths.validateObjectName("/", true)); // Slash becomes empty if allowed + assertEquals("", StringPaths.validateObjectName("", true)); + } + + @Test + public void testValidateObjectNameLeadingSlash() { + assertEquals("path/to/object", StringPaths.validateObjectName("/path/to/object", false)); + assertEquals("object", StringPaths.validateObjectName("/object", false)); + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateObjectNameEmptyNotAllowed() { + StringPaths.validateObjectName("", false); + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateObjectNameNullNotAllowed() { + StringPaths.validateObjectName(null, false); + } + + @Test + public void testValidateObjectNameEmptyAllowed() { + assertEquals("", StringPaths.validateObjectName("", true)); + assertEquals("", StringPaths.validateObjectName(null, true)); + assertEquals("", StringPaths.validateObjectName("/", true)); // Single slash becomes empty + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateObjectNameConsecutiveSlashes() { + StringPaths.validateObjectName("path//to/object", false); + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateObjectNameConsecutiveSlashesAtStart() { + StringPaths.validateObjectName("//path/to/object", false); + } + + @Test(expected = IllegalArgumentException.class) + public void testValidateObjectNameConsecutiveSlashesAtEnd() { + StringPaths.validateObjectName("path/to/object//", false); + } + + @Test + public void testFromComponentsValid() { + assertEquals("gs://my-bucket/path/to/object", + StringPaths.fromComponents("my-bucket", "path/to/object")); + assertEquals("gs://my-bucket/dir/", StringPaths.fromComponents("my-bucket", "dir/")); + assertEquals("gs://my-bucket/", StringPaths.fromComponents("my-bucket", "")); + } + + @Test(expected = IllegalArgumentException.class) + public void testFromComponentsNullBucketNonNullObject() { + StringPaths.fromComponents(null, "path/to/object"); + } + + @Test + public void testFromComponentsNullBucketAndObject() { + assertEquals("gs://", StringPaths.fromComponents(null, null)); + } + + @Test + public void testIsDirectoryPath() { + assertTrue(StringPaths.isDirectoryPath("dir/")); + assertTrue(StringPaths.isDirectoryPath("path/to/dir/")); + assertFalse(StringPaths.isDirectoryPath("file.txt")); + assertFalse(StringPaths.isDirectoryPath("path/to/file.txt")); + assertFalse(StringPaths.isDirectoryPath("")); + assertFalse(StringPaths.isDirectoryPath(null)); + } + + @Test + public void testToFilePath() { + assertEquals("path/to/file", StringPaths.toFilePath("path/to/file/")); + assertEquals("file.txt", StringPaths.toFilePath("file.txt")); + assertEquals("dir", StringPaths.toFilePath("dir/")); + assertEquals("", StringPaths.toFilePath("")); + assertNull(StringPaths.toFilePath(null)); + } + + // --- Tests for toDirectoryPath --- + + @Test + public void testToDirectoryPath() { + assertEquals("path/to/dir/", StringPaths.toDirectoryPath("path/to/dir")); + assertEquals("dir/", StringPaths.toDirectoryPath("dir/")); + assertEquals("file/", StringPaths.toDirectoryPath("file")); + assertEquals("", StringPaths.toDirectoryPath("")); + assertNull(StringPaths.toDirectoryPath(null)); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestUriPaths.java b/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestUriPaths.java new file mode 100644 index 0000000000000..fe93a28dc435c --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/TestUriPaths.java @@ -0,0 +1,150 @@ +/* + * Copyright 2013 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import java.net.URI; + +import org.junit.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestUriPaths { + @Test + public void testToDirectoryFile() throws Exception { + URI fileUri = new URI("gs://my-bucket/path/to/file.txt"); + URI expectedDirUri = new URI("gs://my-bucket/path/to/file.txt/"); + // Temporarily override the behavior for testing purposes + // This is not a clean mocking strategy for static methods, but demonstrates the test intent. + // In a real environment, you'd use PowerMock or refactor. + URI result = UriPaths.toDirectory(fileUri); + assertEquals(expectedDirUri, result); + } + + @Test + public void testToDirectoryAlreadyDirectory() throws Exception { + URI dirUri = new URI("gs://my-bucket/path/to/dir/"); + URI result = UriPaths.toDirectory(dirUri); + assertEquals(dirUri, result); + } + + @Test + public void testToDirectoryRootBucket() throws Exception { + URI bucketUri = new URI("gs://my-bucket"); + URI result = UriPaths.toDirectory(bucketUri); + assertEquals(bucketUri, result); // Buckets are implicitly directories + } + + @Test + public void testGetParentPathFile() throws Exception { + URI uri = new URI("gs://my-bucket/path/to/file.txt"); + URI expectedParent = new URI("gs://my-bucket/path/to/"); + assertEquals(expectedParent, UriPaths.getParentPath(uri)); + } + + @Test + public void testGetParentPathDirectory() throws Exception { + URI uri = new URI("gs://my-bucket/path/to/dir/"); + URI expectedParent = new URI("gs://my-bucket/path/to/"); + assertEquals(expectedParent, UriPaths.getParentPath(uri)); + } + + @Test + public void testGetParentPathObjectAtBucketRoot() throws Exception { + URI uri = new URI("gs://my-bucket/file.txt"); + URI expectedParent = new URI("gs://my-bucket/"); + assertEquals(expectedParent, UriPaths.getParentPath(uri)); + } + + @Test + public void testGetParentPathDirectoryAtBucketRoot() throws Exception { + URI uri = new URI("gs://my-bucket/dir/"); + URI expectedParent = new URI("gs://my-bucket/"); + assertEquals(expectedParent, UriPaths.getParentPath(uri)); + } + + @Test + public void testGetParentPathBucket() throws Exception { + URI uri = new URI("gs://my-bucket"); + assertEquals(GoogleCloudStorageFileSystem.GCSROOT, UriPaths.getParentPath(uri)); + } + + @Test + public void testFromResourceIdObject() throws Exception { + StorageResourceId resourceId = new StorageResourceId("my-bucket", "path/to/object"); + URI expectedUri = new URI("gs://my-bucket/path/to/object"); + assertEquals(expectedUri, UriPaths.fromResourceId(resourceId, false)); + } + + @Test + public void testFromResourceIdDirectory() throws Exception { + StorageResourceId resourceId = new StorageResourceId("my-bucket", "path/to/dir/"); + URI expectedUri = new URI("gs://my-bucket/path/to/dir/"); + assertEquals(expectedUri, UriPaths.fromResourceId(resourceId, false)); + } + + @Test + public void testFromResourceIdBucket() throws Exception { + StorageResourceId resourceId = new StorageResourceId("my-bucket"); + URI expectedUri = new URI("gs://my-bucket/"); + assertEquals(expectedUri, UriPaths.fromResourceId(resourceId, true)); + } + + @Test + public void testFromResourceIdEmptyObjectAllowed() throws Exception { + StorageResourceId resourceId = new StorageResourceId("my-bucket"); + URI expectedUri = new URI("gs://my-bucket/"); + assertEquals(expectedUri, UriPaths.fromResourceId(resourceId, true)); + } + + @Test + public void testFromResourceIdNullObjectAllowed() throws Exception { + StorageResourceId resourceId = new StorageResourceId("my-bucket"); + URI expectedUri = new URI("gs://my-bucket/"); + assertEquals(expectedUri, UriPaths.fromResourceId(resourceId, true)); + } + + @Test + public void testFromStringPathComponentsValid() throws Exception { + assertEquals(new URI("gs://my-bucket/path/to/object"), + UriPaths.fromStringPathComponents("my-bucket", "path/to/object", false)); + assertEquals(new URI("gs://my-bucket/path/to/dir/"), + UriPaths.fromStringPathComponents("my-bucket", "path/to/dir/", false)); + assertEquals(new URI("gs://my-bucket/"), + UriPaths.fromStringPathComponents("my-bucket", null, true)); + assertEquals(new URI("gs://my-bucket/"), + UriPaths.fromStringPathComponents("my-bucket", "", true)); + } + + @Test(expected = IllegalArgumentException.class) + public void testFromStringPathComponentsNullBucketNameNotAllowed() { + UriPaths.fromStringPathComponents(null, "object", false); + } + + @Test(expected = IllegalArgumentException.class) + public void testFromStringPathComponentsEmptyObjectNameNotAllowed() { + UriPaths.fromStringPathComponents("my-bucket", "", false); + } + + @Test(expected = IllegalArgumentException.class) + public void testFromStringPathComponentsConsecutiveSlashes() { + UriPaths.fromStringPathComponents("my-bucket", "path//to/object", false); + } + + @Test(expected = IllegalArgumentException.class) + public void testFromStringPathComponentsInvalidBucketName() { + UriPaths.fromStringPathComponents("MyBucket", "object", false); // Uppercase + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/package-info.java b/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/package-info.java new file mode 100644 index 0000000000000..fe289cc6d3dc2 --- /dev/null +++ b/hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Google Cloud Storage Filesystem tests. + */ +package org.apache.hadoop.fs.gs; \ No newline at end of file From 58f1de488041bacbfba103965f4f84bbd5bc68dc Mon Sep 17 00:00:00 2001 From: Arunkumar Chacko Date: Wed, 21 May 2025 15:07:53 +0000 Subject: [PATCH 8/8] Exclude a spotbug error --- hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml index ee97ad7a9b07c..80be329bd6d16 100644 --- a/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml +++ b/hadoop-tools/hadoop-gcp/dev-support/findbugs-exclude.xml @@ -21,5 +21,9 @@ - + + + + +