-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Introduce published segment cache in broker #6901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
e8b0a23
6ec4911
8b6d453
281600d
cdc751e
8fed80a
0d0f89f
8fc84b3
adf133f
d46edc5
92601d0
a993482
a4cbcfc
e376df3
0fbb48c
8df2d96
d5b7d79
2bfa396
632d741
bcc6513
c41f085
07a80b1
d83eb33
a440c0c
b385345
981b080
3a94cae
0032a31
ad28458
33751a4
2ddb7a1
1b37493
60dbf41
8860053
ca21779
3a70f39
e2a9af7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.druid.client; | ||
|
|
||
| import com.google.common.collect.Interner; | ||
| import com.google.common.collect.Interners; | ||
| import org.apache.druid.timeline.DataSegment; | ||
|
|
||
| public class DataSegmentInterner | ||
| { | ||
| public static final Interner<DataSegment> REALTIME_INTERNER = Interners.newWeakInterner(); | ||
| public static final Interner<DataSegment> HISTORICAL_INTERNER = Interners.newWeakInterner(); | ||
|
|
||
| private DataSegmentInterner() | ||
| { | ||
|
|
||
|
||
| } | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,174 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.druid.client; | ||
|
|
||
| import com.fasterxml.jackson.core.type.TypeReference; | ||
| import com.fasterxml.jackson.databind.JavaType; | ||
| import com.fasterxml.jackson.databind.ObjectMapper; | ||
| import com.google.common.util.concurrent.ListenableFuture; | ||
| import com.google.inject.Inject; | ||
| import org.apache.druid.client.coordinator.Coordinator; | ||
| import org.apache.druid.discovery.DruidLeaderClient; | ||
| import org.apache.druid.guice.ManageLifecycle; | ||
| import org.apache.druid.java.util.common.DateTimes; | ||
| import org.apache.druid.java.util.common.StringUtils; | ||
| import org.apache.druid.java.util.common.concurrent.Execs; | ||
| import org.apache.druid.java.util.common.lifecycle.LifecycleStart; | ||
| import org.apache.druid.java.util.common.lifecycle.LifecycleStop; | ||
| import org.apache.druid.java.util.emitter.EmittingLogger; | ||
| import org.apache.druid.java.util.http.client.Request; | ||
| import org.apache.druid.server.coordinator.BytesAccumulatingResponseHandler; | ||
| import org.apache.druid.timeline.DataSegment; | ||
| import org.jboss.netty.handler.codec.http.HttpMethod; | ||
| import org.joda.time.DateTime; | ||
|
|
||
| import java.io.IOException; | ||
| import java.io.InputStream; | ||
| import java.util.Iterator; | ||
| import java.util.Map; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.concurrent.ScheduledExecutorService; | ||
| import java.util.concurrent.TimeUnit; | ||
|
|
||
| @ManageLifecycle | ||
| public class MetadataSegmentView | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be great if you can add a simple description about what this class does and how it's being used.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. okay, added description. |
||
| { | ||
|
|
||
| private static final int DEFAULT_POLL_PERIOD_IN_MS = 60000; | ||
|
||
| private static final EmittingLogger log = new EmittingLogger(MetadataSegmentView.class); | ||
|
||
|
|
||
| private final DruidLeaderClient coordinatorDruidLeaderClient; | ||
| private final ObjectMapper jsonMapper; | ||
| private final BytesAccumulatingResponseHandler responseHandler; | ||
| private final BrokerSegmentWatcherConfig segmentWatcherConfig; | ||
|
|
||
| private final Map<DataSegment, DateTime> publishedSegments = new ConcurrentHashMap<>(); | ||
|
||
| private ScheduledExecutorService scheduledExec; | ||
|
|
||
| @Inject | ||
| public MetadataSegmentView( | ||
| final @Coordinator DruidLeaderClient druidLeaderClient, | ||
| ObjectMapper jsonMapper, | ||
| BytesAccumulatingResponseHandler responseHandler, | ||
|
||
| final BrokerSegmentWatcherConfig segmentWatcherConfig | ||
| ) | ||
| { | ||
| this.coordinatorDruidLeaderClient = druidLeaderClient; | ||
| this.jsonMapper = jsonMapper; | ||
| this.responseHandler = responseHandler; | ||
| this.segmentWatcherConfig = segmentWatcherConfig; | ||
| } | ||
|
|
||
| @LifecycleStart | ||
| public void start() | ||
| { | ||
| scheduledExec = Execs.scheduledSingleThreaded("MetadataSegmentView-Cache--%d"); | ||
| scheduledExec.scheduleWithFixedDelay( | ||
| () -> poll(), | ||
| 0, | ||
| DEFAULT_POLL_PERIOD_IN_MS, | ||
| TimeUnit.MILLISECONDS | ||
| ); | ||
| } | ||
|
|
||
| @LifecycleStop | ||
| public void stop() | ||
| { | ||
| scheduledExec.shutdownNow(); | ||
| scheduledExec = null; | ||
| } | ||
|
|
||
| private void poll() | ||
| { | ||
| log.info("polling published segments from coordinator"); | ||
| //get authorized published segments from coordinator | ||
|
||
| final JsonParserIterator<DataSegment> metadataSegments = getMetadataSegments( | ||
| coordinatorDruidLeaderClient, | ||
| jsonMapper, | ||
| responseHandler | ||
| ); | ||
|
|
||
| final DateTime ts = DateTimes.nowUtc(); | ||
| while (metadataSegments.hasNext()) { | ||
| final DataSegment currentSegment = metadataSegments.next(); | ||
| final DataSegment interned; | ||
| if (currentSegment.getSize() > 0) { | ||
|
||
| interned = DataSegmentInterner.HISTORICAL_INTERNER.intern(currentSegment); | ||
| } else { | ||
| interned = DataSegmentInterner.REALTIME_INTERNER.intern(currentSegment); | ||
| } | ||
| publishedSegments.put(interned, ts); | ||
| } | ||
| // filter the segments from cache which may not be present in subsequent polling | ||
| publishedSegments.values().removeIf(v -> v != ts); | ||
|
||
|
|
||
| if (segmentWatcherConfig.getWatchedDataSources() != null) { | ||
| log.debug( | ||
| "filtering datasources[%s] in published segments based on broker's watchedDataSources", | ||
| segmentWatcherConfig.getWatchedDataSources() | ||
| ); | ||
| publishedSegments.keySet() | ||
| .removeIf(key -> !segmentWatcherConfig.getWatchedDataSources().contains(key.getDataSource())); | ||
|
||
| } | ||
| } | ||
|
|
||
| public Iterator<DataSegment> getPublishedSegments() | ||
| { | ||
| return publishedSegments.keySet().iterator(); | ||
| } | ||
|
|
||
| // Note that coordinator must be up to get segments | ||
| private static JsonParserIterator<DataSegment> getMetadataSegments( | ||
| DruidLeaderClient coordinatorClient, | ||
| ObjectMapper jsonMapper, | ||
| BytesAccumulatingResponseHandler responseHandler | ||
| ) | ||
| { | ||
| Request request; | ||
| try { | ||
| request = coordinatorClient.makeRequest( | ||
| HttpMethod.GET, | ||
| StringUtils.format("/druid/coordinator/v1/metadata/segments"), | ||
| false | ||
| ); | ||
| } | ||
| catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| ListenableFuture<InputStream> future = coordinatorClient.goAsync( | ||
| request, | ||
| responseHandler | ||
| ); | ||
|
|
||
| final JavaType typeRef = jsonMapper.getTypeFactory().constructType(new TypeReference<DataSegment>() | ||
| { | ||
| }); | ||
| return new JsonParserIterator<>( | ||
| typeRef, | ||
| future, | ||
| request.getUrl().toString(), | ||
| null, | ||
| request.getUrl().getHost(), | ||
| jsonMapper, | ||
| responseHandler | ||
| ); | ||
| } | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,6 +45,7 @@ | |
| import org.apache.calcite.schema.impl.AbstractTable; | ||
| import org.apache.druid.client.ImmutableDruidServer; | ||
| import org.apache.druid.client.JsonParserIterator; | ||
| import org.apache.druid.client.MetadataSegmentView; | ||
| import org.apache.druid.client.TimelineServerView; | ||
| import org.apache.druid.client.coordinator.Coordinator; | ||
| import org.apache.druid.client.indexing.IndexingService; | ||
|
|
@@ -84,6 +85,7 @@ | |
| import java.util.Objects; | ||
| import java.util.Set; | ||
|
|
||
|
|
||
|
||
| public class SystemSchema extends AbstractSchema | ||
| { | ||
| public static final String NAME = "sys"; | ||
|
|
@@ -149,6 +151,7 @@ public class SystemSchema extends AbstractSchema | |
| @Inject | ||
| public SystemSchema( | ||
| final DruidSchema druidSchema, | ||
| final MetadataSegmentView metadataView, | ||
| final TimelineServerView serverView, | ||
| final AuthorizerMapper authorizerMapper, | ||
| final @Coordinator DruidLeaderClient coordinatorDruidLeaderClient, | ||
|
|
@@ -158,11 +161,10 @@ public SystemSchema( | |
| { | ||
| Preconditions.checkNotNull(serverView, "serverView"); | ||
| BytesAccumulatingResponseHandler responseHandler = new BytesAccumulatingResponseHandler(); | ||
| SegmentsTable segmentsTable = new SegmentsTable( | ||
| final SegmentsTable segmentsTable = new SegmentsTable( | ||
| druidSchema, | ||
| coordinatorDruidLeaderClient, | ||
| metadataView, | ||
| jsonMapper, | ||
| responseHandler, | ||
| authorizerMapper | ||
| ); | ||
| this.tableMap = ImmutableMap.of( | ||
|
|
@@ -182,23 +184,20 @@ public Map<String, Table> getTableMap() | |
| static class SegmentsTable extends AbstractTable implements ScannableTable | ||
| { | ||
| private final DruidSchema druidSchema; | ||
| private final DruidLeaderClient druidLeaderClient; | ||
| private final ObjectMapper jsonMapper; | ||
| private final BytesAccumulatingResponseHandler responseHandler; | ||
| private final AuthorizerMapper authorizerMapper; | ||
| private final MetadataSegmentView metadataView; | ||
|
|
||
| public SegmentsTable( | ||
| DruidSchema druidSchemna, | ||
| DruidLeaderClient druidLeaderClient, | ||
| MetadataSegmentView metadataView, | ||
| ObjectMapper jsonMapper, | ||
| BytesAccumulatingResponseHandler responseHandler, | ||
| AuthorizerMapper authorizerMapper | ||
| ) | ||
| { | ||
| this.druidSchema = druidSchemna; | ||
| this.druidLeaderClient = druidLeaderClient; | ||
| this.metadataView = metadataView; | ||
| this.jsonMapper = jsonMapper; | ||
| this.responseHandler = responseHandler; | ||
| this.authorizerMapper = authorizerMapper; | ||
| } | ||
|
|
||
|
|
@@ -231,21 +230,14 @@ public Enumerable<Object[]> scan(DataContext root) | |
| partialSegmentDataMap.put(h.getSegmentId(), partialSegmentData); | ||
| } | ||
|
|
||
| //get published segments from coordinator | ||
| final JsonParserIterator<DataSegment> metadataSegments = getMetadataSegments( | ||
| druidLeaderClient, | ||
| jsonMapper, | ||
| responseHandler | ||
| ); | ||
| //get published segments from metadata segment cache | ||
| final Iterator<DataSegment> pubSegments = metadataView.getPublishedSegments(); | ||
|
|
||
| final Set<SegmentId> segmentsAlreadySeen = new HashSet<>(); | ||
|
|
||
| final FluentIterable<Object[]> publishedSegments = FluentIterable | ||
| .from(() -> getAuthorizedPublishedSegments( | ||
| metadataSegments, | ||
| root | ||
| )) | ||
| .transform((DataSegment val) -> { | ||
| .from(() -> pubSegments) | ||
| .transform(val -> { | ||
| try { | ||
| segmentsAlreadySeen.add(val.getId()); | ||
| final PartialSegmentData partialSegmentData = partialSegmentDataMap.get(val.getId()); | ||
|
|
@@ -340,27 +332,6 @@ private Iterator<Entry<DataSegment, SegmentMetadataHolder>> getAuthorizedAvailab | |
| return authorizedSegments.iterator(); | ||
| } | ||
|
|
||
| private CloseableIterator<DataSegment> getAuthorizedPublishedSegments( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this authorization not needed anymore?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it's needed because,
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, this authorization check is still required, because broker->coordinator call uses escalated client, so |
||
| JsonParserIterator<DataSegment> it, | ||
| DataContext root | ||
| ) | ||
| { | ||
| final AuthenticationResult authenticationResult = | ||
| (AuthenticationResult) root.get(PlannerContext.DATA_CTX_AUTHENTICATION_RESULT); | ||
|
|
||
| Function<DataSegment, Iterable<ResourceAction>> raGenerator = segment -> Collections.singletonList( | ||
| AuthorizationUtils.DATASOURCE_READ_RA_GENERATOR.apply(segment.getDataSource())); | ||
|
|
||
| final Iterable<DataSegment> authorizedSegments = AuthorizationUtils.filterAuthorizedResources( | ||
| authenticationResult, | ||
| () -> it, | ||
| raGenerator, | ||
| authorizerMapper | ||
| ); | ||
|
|
||
| return wrap(authorizedSegments.iterator(), it); | ||
| } | ||
|
|
||
| private static class PartialSegmentData | ||
| { | ||
| private final long isAvailable; | ||
|
|
@@ -404,44 +375,6 @@ public long getNumRows() | |
| } | ||
| } | ||
|
|
||
| // Note that coordinator must be up to get segments | ||
| private static JsonParserIterator<DataSegment> getMetadataSegments( | ||
| DruidLeaderClient coordinatorClient, | ||
| ObjectMapper jsonMapper, | ||
| BytesAccumulatingResponseHandler responseHandler | ||
| ) | ||
| { | ||
|
|
||
| Request request; | ||
| try { | ||
| request = coordinatorClient.makeRequest( | ||
| HttpMethod.GET, | ||
| StringUtils.format("/druid/coordinator/v1/metadata/segments"), | ||
| false | ||
| ); | ||
| } | ||
| catch (IOException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| ListenableFuture<InputStream> future = coordinatorClient.goAsync( | ||
| request, | ||
| responseHandler | ||
| ); | ||
|
|
||
| final JavaType typeRef = jsonMapper.getTypeFactory().constructType(new TypeReference<DataSegment>() | ||
| { | ||
| }); | ||
| return new JsonParserIterator<>( | ||
| typeRef, | ||
| future, | ||
| request.getUrl().toString(), | ||
| null, | ||
| request.getUrl().getHost(), | ||
| jsonMapper, | ||
| responseHandler | ||
| ); | ||
| } | ||
|
|
||
| static class ServersTable extends AbstractTable implements ScannableTable | ||
| { | ||
| private final TimelineServerView serverView; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would you please add a javadoc about what this is doing and why we need two interners?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added docs
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interners with weak references shouldn't be used: #7395