apache · clintropolis · Feb 3, 2019 · Jan 22, 2019 · Jan 27, 2019 · Jan 27, 2019
diff --git a/server/src/main/java/org/apache/druid/client/DataSegmentInterner.java b/server/src/main/java/org/apache/druid/client/DataSegmentInterner.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.client;
+
+import com.google.common.collect.Interner;
+import com.google.common.collect.Interners;
+import org.apache.druid.timeline.DataSegment;
+
+public class DataSegmentInterner
+{
+  private static final Interner<DataSegment> REALTIME_INTERNER = Interners.newWeakInterner();
+  private static final Interner<DataSegment> HISTORICAL_INTERNER = Interners.newWeakInterner();
+
+  private DataSegmentInterner()
+  {
+
+  }
+
+  public static DataSegment intern(DataSegment segment)
+  {
+    return segment.getSize() > 0 ? HISTORICAL_INTERNER.intern(segment) : REALTIME_INTERNER.intern(segment);
+  }
+
+}
diff --git a/server/src/main/java/org/apache/druid/client/MetadataSegmentView.java b/server/src/main/java/org/apache/druid/client/MetadataSegmentView.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.client;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.JavaType;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.inject.Inject;
+import org.apache.druid.client.coordinator.Coordinator;
+import org.apache.druid.discovery.DruidLeaderClient;
+import org.apache.druid.guice.ManageLifecycle;
+import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.java.util.common.concurrent.Execs;
+import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
+import org.apache.druid.java.util.common.lifecycle.LifecycleStop;
+import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.java.util.http.client.Request;
+import org.apache.druid.server.coordinator.BytesAccumulatingResponseHandler;
+import org.apache.druid.timeline.DataSegment;
+import org.jboss.netty.handler.codec.http.HttpMethod;
+import org.joda.time.DateTime;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * This class polls the coordinator in background to keep the latest published segments.
+ * Provides {@link #getPublishedSegments()} for others to get segments in metadata store.
+ */
+@ManageLifecycle
+public class MetadataSegmentView
+{
+
+  private static final int DEFAULT_POLL_PERIOD_IN_MS = 60000;
+  private static final Logger log = new Logger(MetadataSegmentView.class);
+
+  private final DruidLeaderClient coordinatorDruidLeaderClient;
+  private final ObjectMapper jsonMapper;
+  private final BytesAccumulatingResponseHandler responseHandler;
+  private final BrokerSegmentWatcherConfig segmentWatcherConfig;
+
+  private final ConcurrentMap<DataSegment, DateTime> publishedSegments = new ConcurrentHashMap<>();
+  private ScheduledExecutorService scheduledExec;
+
+  @Inject
+  public MetadataSegmentView(
+      final @Coordinator DruidLeaderClient druidLeaderClient,
+      final ObjectMapper jsonMapper,
+      final BytesAccumulatingResponseHandler responseHandler,
+      final BrokerSegmentWatcherConfig segmentWatcherConfig
+  )
+  {
+    this.coordinatorDruidLeaderClient = druidLeaderClient;
+    this.jsonMapper = jsonMapper;
+    this.responseHandler = responseHandler;
+    this.segmentWatcherConfig = segmentWatcherConfig;
+  }
+
+  @LifecycleStart
+  public void start()
+  {
+    scheduledExec = Execs.scheduledSingleThreaded("MetadataSegmentView-Cache--%d");
+    scheduledExec.schedule(new PollTask(), 0, TimeUnit.MILLISECONDS);
+  }
+
+  @LifecycleStop
+  public void stop()
+  {
+    scheduledExec.shutdownNow();
+    scheduledExec = null;
+  }
+
+  private void poll()
+  {
+    log.info("polling published segments from coordinator");
+    //get authorized published segments from coordinator
+    final JsonParserIterator<DataSegment> metadataSegments = getMetadataSegments(
+        coordinatorDruidLeaderClient,
+        jsonMapper,
+        responseHandler,
+        segmentWatcherConfig.getWatchedDataSources()
+    );
+
+    final DateTime timestamp = DateTimes.nowUtc();
+    while (metadataSegments.hasNext()) {
+      final DataSegment interned = DataSegmentInterner.intern(metadataSegments.next());
+      // timestamp is used to filter deleted segments
+      publishedSegments.put(interned, timestamp);
+    }
+    // filter the segments from cache whose timestamp is not equal to latest timestamp stored,
+    // since the presence of a segment with an earlier timestamp indicates that
+    // "that" segment is not returned by coordinator in latest poll, so it's
+    // likely deleted and therefore we remove it from publishedSegments
+    final Set<DateTime> toBeRemovedSegments = publishedSegments.values()
+                                                         .stream()
+                                                         .filter(v -> v != timestamp)
+                                                         .collect(Collectors.toSet());
+    publishedSegments.values().removeAll(toBeRemovedSegments);
+
+    if (segmentWatcherConfig.getWatchedDataSources() != null) {
+      log.debug(
+          "filtering datasources[%s] in published segments based on broker's watchedDataSources",
+          segmentWatcherConfig.getWatchedDataSources()
+      );
+      publishedSegments.keySet()
+                       .removeIf(key -> !segmentWatcherConfig.getWatchedDataSources().contains(key.getDataSource()));
+    }
+  }
+
+  public Iterator<DataSegment> getPublishedSegments()
+  {
+    return publishedSegments.keySet().iterator();
+  }
+
+  // Note that coordinator must be up to get segments
+  private static JsonParserIterator<DataSegment> getMetadataSegments(
+      DruidLeaderClient coordinatorClient,
+      ObjectMapper jsonMapper,
+      BytesAccumulatingResponseHandler responseHandler,
+      Set<String> watchedDataSources
+  )
+  {
+    String query = "/druid/coordinator/v1/metadata/segments";
+    if (watchedDataSources != null && !watchedDataSources.isEmpty()) {
+      final StringBuilder sb = new StringBuilder();
+      for (String ds : watchedDataSources) {
+        sb.append("datasources=" + ds + "&");
+      }
+      sb.setLength(Math.max(sb.length() - 1, 0));
+      query = "/druid/coordinator/v1/metadata/segments?" + sb;
+    }
+    Request request;
+    try {
+      request = coordinatorClient.makeRequest(
+          HttpMethod.GET,
+          StringUtils.format(query),
+          false
+      );
+    }
+    catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    ListenableFuture<InputStream> future = coordinatorClient.goAsync(
+        request,
+        responseHandler
+    );
+
+    final JavaType typeRef = jsonMapper.getTypeFactory().constructType(new TypeReference<DataSegment>()
+    {
+    });
+    return new JsonParserIterator<>(
+        typeRef,
+        future,
+        request.getUrl().toString(),
+        null,
+        request.getUrl().getHost(),
+        jsonMapper,
+        responseHandler
+    );
+  }
+
+  private class PollTask implements Callable<Void>
+  {
+    @Override
+    public Void call()
+    {
+      poll();
+      scheduledExec.schedule(new PollTask(), DEFAULT_POLL_PERIOD_IN_MS, TimeUnit.MILLISECONDS);
+      return null;
+    }
+  }
+
+}
diff --git a/server/src/main/java/org/apache/druid/client/selector/ServerSelector.java b/server/src/main/java/org/apache/druid/client/selector/ServerSelector.java
@@ -20,6 +20,7 @@
 package org.apache.druid.client.selector;
 
 import it.unimi.dsi.fastutil.ints.Int2ObjectRBTreeMap;
+import org.apache.druid.client.DataSegmentInterner;
 import org.apache.druid.server.coordination.DruidServerMetadata;
 import org.apache.druid.server.coordination.ServerType;
 import org.apache.druid.timeline.DataSegment;
@@ -50,7 +51,7 @@ public ServerSelector(
       TierSelectorStrategy strategy
   )
   {
-    this.segment = new AtomicReference<>(segment);
+    this.segment = new AtomicReference<>(DataSegmentInterner.intern(segment));
     this.strategy = strategy;
     this.historicalServers = new Int2ObjectRBTreeMap<>(strategy.getComparator());
     this.realtimeServers = new Int2ObjectRBTreeMap<>(strategy.getComparator());

diff --git a/server/src/main/java/org/apache/druid/server/http/MetadataResource.java b/server/src/main/java/org/apache/druid/server/http/MetadataResource.java
@@ -57,6 +57,7 @@
 import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 /**
@@ -148,14 +149,22 @@ public Response getDatabaseSegmentDataSource(@PathParam("dataSourceName") final
   @GET
   @Path("/segments")
   @Produces(MediaType.APPLICATION_JSON)
-  public Response getDatabaseSegments(@Context final HttpServletRequest req)
+  public Response getDatabaseSegments(
+      @Context final HttpServletRequest req,
+      @QueryParam("datasources") final Set<String> datasources
+  )
   {
-    final Collection<ImmutableDruidDataSource> druidDataSources = metadataSegmentManager.getDataSources();
+    Collection<ImmutableDruidDataSource> druidDataSources = metadataSegmentManager.getDataSources();
+    if (datasources != null && !datasources.isEmpty()) {
+      druidDataSources = druidDataSources.stream()
+                                         .filter(src -> datasources.contains(src.getName()))
+                                         .collect(Collectors.toSet());
+    }
     final Stream<DataSegment> metadataSegments = druidDataSources
         .stream()
         .flatMap(t -> t.getSegments().stream());
 
-    Function<DataSegment, Iterable<ResourceAction>> raGenerator = segment -> Collections.singletonList(
+    final Function<DataSegment, Iterable<ResourceAction>> raGenerator = segment -> Collections.singletonList(
         AuthorizationUtils.DATASOURCE_READ_RA_GENERATOR.apply(segment.getDataSource()));
 
     final Iterable<DataSegment> authorizedSegments =

diff --git a/server/src/test/java/org/apache/druid/client/CachingClusteredClientTest.java b/server/src/test/java/org/apache/druid/client/CachingClusteredClientTest.java
@@ -2213,13 +2213,13 @@ private List<Map<DruidServer, ServerExpectations>> populateTimeline(
             expectedResults.get(k).get(j)
         );
         serverExpectations.get(lastServer).addExpectation(expectation);
-
+        EasyMock.expect(mockSegment.getSize()).andReturn(0L).anyTimes();
+        EasyMock.replay(mockSegment);
         ServerSelector selector = new ServerSelector(
             expectation.getSegment(),
             new HighestPriorityTierSelectorStrategy(new RandomServerSelectorStrategy())
         );
         selector.addServerAndUpdateSegment(new QueryableDruidServer(lastServer, null), selector.getSegment());
-
         final ShardSpec shardSpec;
         if (numChunks == 1) {
           shardSpec = new SingleDimensionShardSpec("dimAll", null, null, 0);
@@ -2234,6 +2234,7 @@ private List<Map<DruidServer, ServerExpectations>> populateTimeline(
           }
           shardSpec = new SingleDimensionShardSpec("dim" + k, start, end, j);
         }
+        EasyMock.reset(mockSegment);
         EasyMock.expect(mockSegment.getShardSpec())
                 .andReturn(shardSpec)
                 .anyTimes();