mesos
diff --git a/‎README.md‎
Lines changed: 22 additions & 4 deletions b/‎README.md‎
Lines changed: 22 additions & 4 deletions
diff --git a/‎docs/screenshot-cluster.png‎
-224 KB b/‎docs/screenshot-cluster.png‎
-224 KB
diff --git a/‎docs/screenshot-configuration.png‎
140 KB b/‎docs/screenshot-configuration.png‎
140 KB
diff --git a/‎docs/screenshot-query-browser.png‎
114 KB b/‎docs/screenshot-query-browser.png‎
114 KB
diff --git a/‎docs/screenshot-scaling.png‎
54.4 KB b/‎docs/screenshot-scaling.png‎
54.4 KB
diff --git a/‎docs/screenshot-tasks.png‎
-118 KB b/‎docs/screenshot-tasks.png‎
-118 KB
diff --git a/‎scheduler/src/main/java/org/apache/mesos/elasticsearch/scheduler/ElasticsearchScheduler.java‎
Lines changed: 24 additions & 44 deletions b/‎scheduler/src/main/java/org/apache/mesos/elasticsearch/scheduler/ElasticsearchScheduler.java‎
Lines changed: 24 additions & 44 deletions
diff --git a/‎scheduler/src/main/java/org/apache/mesos/elasticsearch/scheduler/OfferStrategy.java‎
Lines changed: 111 additions & 0 deletions b/‎scheduler/src/main/java/org/apache/mesos/elasticsearch/scheduler/OfferStrategy.java‎
Lines changed: 111 additions & 0 deletions
@@ -209,22 +209,40 @@ Usage: (Options preceded by an asterisk are required) [options]
 
 The web based user interface is available on port 31100 of the scheduler by default. It displays real time information about the tasks running in the cluster and a basic configuration overview of the cluster. 
 
-The user interface uses REST API of the Elasticsearch Mesos Framework. You can find the API documentation here: [docs.elasticsearchmesosui.apiary.io](http://docs.elasticsearchmesosui.apiary.io/).
+The user interface uses REST API of the Elasticsearch Mesos Framework. You can find the API documentation here: [docs.elasticsearchmesos.apiary.io](http://docs.elasticsearchmesos.apiary.io/).
 
 #### Cluster Overview
 
-![Tasks List](docs/screenshot-cluster.png)
+![Cluster Overview](docs/screenshot-cluster.png)
 
-Cluster overview page shows on the top the number of Elasticsearch nodes in the cluster, the overall amount of RAM and disk space allocated by the cluster. State of individual nodes is displayed in a bar, one color representing each state and the percentage of nodes being in this state.
+Cluster page shows on the top the number of Elasticsearch nodes in the cluster, the overall amount of RAM and disk space allocated by the cluster. State of individual nodes is displayed in a bar, one color representing each state and the percentage of nodes being in this state.
 
-Below you can find Configuration Overview section and Query Browser, that allows you to examine data stored on individual Elasticsearch nodes.
+Below you can see Performance Overview with the following metrics over time: number of indices, number of shards, number of documents in the cluster and the cluster data size.
+
+#### Scaling 
+
+![Scaling](docs/screenshot-scaling.png)
+
+This simple interface allows you to specify a number of nodes to scale to.
 
 #### Tasks List
 
 ![Tasks List](docs/screenshot-tasks.png)
 
 Tasks list displays detailed information about all tasks in the cluster, not only those currently running, but also tasks being staged, finished or failed. Click through individual tasks to get access to Elasticsearch REST API.
 
+#### Configuration
+
+![Configuration](docs/screenshot-configuration.png)
+
+This is a read-only interface displaying an overview of the framework configuration.
+
+#### Query Browser
+
+![Query Browser](docs/screenshot-query-browser.png)
+
+Query Browser allows you to examine data stored on individual Elasticsearch nodes. In this example we searched for the word "Love" on `slave1` node. You can toggle between tabular view and raw results view mode, which displays the raw data returned from Elasticsearch `/_search` API endpoint.
+
 ### Known issues
 
 - Issue [#188](https://github.com/mesos/elasticsearch/issues/188): Database data IS NOT persisted to disk. Data storage is wholly reliant on cluster redundancy. This means that the framework is not yet recommended for production use.
 
@@ -7,7 +7,9 @@
 import org.apache.mesos.SchedulerDriver;
 import org.apache.mesos.elasticsearch.scheduler.cluster.ClusterMonitor;
 import org.apache.mesos.elasticsearch.scheduler.state.ClusterState;
+import org.apache.mesos.elasticsearch.scheduler.state.ESTaskStatus;
 import org.apache.mesos.elasticsearch.scheduler.state.FrameworkState;
+import org.apache.mesos.elasticsearch.scheduler.state.StatePath;
 
 import java.util.*;
 
@@ -28,6 +30,7 @@ public class ElasticsearchScheduler implements Scheduler {
     private Observable statusUpdateWatchers = new StatusUpdateObservable();
     private Boolean registered = false;
     private ClusterState clusterState;
+    OfferStrategy offerStrategy;
 
     public ElasticsearchScheduler(Configuration configuration, TaskInfoFactory taskInfoFactory) {
         this.configuration = configuration;
@@ -65,7 +68,10 @@ public void registered(SchedulerDriver driver, Protos.FrameworkID frameworkId, P
         LOGGER.info("Framework registered as " + frameworkId.getValue());
 
         clusterState = new ClusterState(configuration.getState(), frameworkState); // Must use new framework state. This is when we are allocated our FrameworkID.
-        clusterMonitor = new ClusterMonitor(configuration, this, driver, clusterState);
+        offerStrategy = new OfferStrategy(configuration, clusterState);
+        clusterMonitor = new ClusterMonitor(configuration, this, driver, new StatePath(configuration.getState()));
+        clusterState.getTaskList().forEach(clusterMonitor::startMonitoringTask); // Get all previous executors and start monitoring them.
+        statusUpdateWatchers.addObserver(clusterState);
         statusUpdateWatchers.addObserver(clusterMonitor);
 
         List<Protos.Resource> resources = Resources.buildFrameworkResources(configuration);
@@ -84,62 +90,29 @@ public void reregistered(SchedulerDriver driver, Protos.MasterInfo masterInfo) {
         LOGGER.info("Framework re-registered");
     }
 
-    // Todo, this massive if statement needs to be performed better.
     @Override
     public void resourceOffers(SchedulerDriver driver, List<Protos.Offer> offers) {
         if (!registered) {
             LOGGER.debug("Not registered, can't accept resource offers.");
             return;
         }
         for (Protos.Offer offer : offers) {
-            if (isHostAlreadyRunningTask(offer)) {
-                driver.declineOffer(offer.getId()); // DCOS certification 05
-                LOGGER.info("Declined offer: Host " + offer.getHostname() + " is already running an Elastisearch task");
-            } else if (clusterMonitor.getClusterState().getTaskList().size() == configuration.getElasticsearchNodes()) {
-                driver.declineOffer(offer.getId()); // DCOS certification 05
-                LOGGER.info("Declined offer: Mesos runs already runs " + configuration.getElasticsearchNodes() + " Elasticsearch tasks");
-            } else if (!containsTwoPorts(offer.getResourcesList())) {
-                LOGGER.info("Declined offer: Offer did not contain 2 ports for Elasticsearch client and transport connection");
-                driver.declineOffer(offer.getId());
-            } else if (!isEnoughCPU(offer.getResourcesList())) {
-                LOGGER.info("Declined offer: Not enough CPU resources");
-                driver.declineOffer(offer.getId());
-            } else if (!isEnoughRAM(offer.getResourcesList())) {
-                LOGGER.info("Declined offer: Not enough RAM resources");
-                driver.declineOffer(offer.getId());
-            } else if (!isEnoughDisk(offer.getResourcesList())) {
-                LOGGER.info("Not enough Disk resources");
+            final OfferStrategy.OfferResult result = offerStrategy.evaluate(offer);
+
+            if (!result.acceptable) {
+                LOGGER.debug("Declined offer: " + result.reason.orElse("Unknown"));
                 driver.declineOffer(offer.getId());
             } else {
-                LOGGER.info("Accepted offer: " + offer.getHostname());
                 Protos.TaskInfo taskInfo = taskInfoFactory.createTask(configuration, offer);
                 LOGGER.debug(taskInfo.toString());
                 driver.launchTasks(Collections.singleton(offer.getId()), Collections.singleton(taskInfo));
-                clusterMonitor.monitorTask(taskInfo); // Add task to cluster monitor
+                ESTaskStatus esTask = new ESTaskStatus(configuration.getState(), configuration.getFrameworkId(), taskInfo, new StatePath(configuration.getState())); // Write staging state to zk
+                clusterState.addTask(esTask); // Add tasks to cluster state and write to zk
+                clusterMonitor.startMonitoringTask(esTask); // Add task to cluster monitor
             }
         }
     }
 
-    private boolean isEnoughDisk(List<Protos.Resource> resourcesList) {
-        ResourceCheck resourceCheck = new ResourceCheck(Resources.RESOURCE_DISK);
-        return resourceCheck.isEnough(resourcesList, configuration.getDisk());
-    }
-
-    private boolean isEnoughCPU(List<Protos.Resource> resourcesList) {
-        ResourceCheck resourceCheck = new ResourceCheck(Resources.RESOURCE_CPUS);
-        return resourceCheck.isEnough(resourcesList, configuration.getCpus());
-    }
-
-    private boolean isEnoughRAM(List<Protos.Resource> resourcesList) {
-        ResourceCheck resourceCheck = new ResourceCheck(Resources.RESOURCE_MEM);
-        return resourceCheck.isEnough(resourcesList, configuration.getMem());
-    }
-
-    private boolean containsTwoPorts(List<Protos.Resource> resources) {
-        int count = Resources.selectTwoPortsFromRange(resources).size();
-        return count == 2;
-    }
-
     @Override
     public void offerRescinded(SchedulerDriver driver, Protos.OfferID offerId) {
         LOGGER.info("Offer " + offerId.getValue() + " rescinded");
@@ -171,8 +144,15 @@ public void executorLost(SchedulerDriver driver, Protos.ExecutorID executorId, P
         // This is never called by Mesos, so we have to call it ourselves via a healthcheck
         // https://issues.apache.org/jira/browse/MESOS-313
         LOGGER.info("Executor lost: " + executorId.getValue() +
-                "on slave " + slaveId.getValue() +
-                "with status " + status);
+                " on slave " + slaveId.getValue() +
+                " with status " + status);
+        try {
+            Protos.TaskInfo taskInfo = clusterState.getTask(executorId);
+            statusUpdate(driver, Protos.TaskStatus.newBuilder().setExecutorId(executorId).setSlaveId(slaveId).setTaskId(taskInfo.getTaskId()).setState(Protos.TaskState.TASK_LOST).build());
+            driver.killTask(taskInfo.getTaskId()); // It may not actually be lost, it may just have hanged. So Kill, just in case.
+        } catch (IllegalArgumentException e) {
+            LOGGER.warn("Unable to find TaskInfo with the given Executor ID", e);
+        }
     }
 
     @Override
@@ -182,7 +162,7 @@ public void error(SchedulerDriver driver, String message) {
 
     private boolean isHostAlreadyRunningTask(Protos.Offer offer) {
         Boolean result = false;
-        List<Protos.TaskInfo> stateList = clusterMonitor.getClusterState().getTaskList();
+        List<Protos.TaskInfo> stateList = clusterState.getTaskList();
         for (Protos.TaskInfo t : stateList) {
             if (t.getSlaveId().equals(offer.getSlaveId())) {
                 result = true;
 
@@ -0,0 +1,111 @@
+package org.apache.mesos.elasticsearch.scheduler;
+
+import org.apache.log4j.Logger;
+import org.apache.mesos.Protos;
+import org.apache.mesos.elasticsearch.scheduler.state.ClusterState;
+
+import java.util.List;
+import java.util.Optional;
+
+import static java.util.Arrays.asList;
+
+/**
+ * Offer strategy
+ */
+public class OfferStrategy {
+    private static final Logger LOGGER = Logger.getLogger(ElasticsearchScheduler.class.toString());
+    private ClusterState clusterState;
+    private Configuration configuration;
+
+    private List<OfferRule> acceptanceRules = asList(
+            new OfferRule("Host already running task", this::isHostAlreadyRunningTask),
+            new OfferRule("Cluster size already fulfilled", offer -> clusterState.getTaskList().size() == configuration.getElasticsearchNodes()),
+            new OfferRule("Offer did not have 2 ports", offer -> !containsTwoPorts(offer.getResourcesList())),
+            new OfferRule("Offer did not have enough CPU resources", offer -> !isEnoughCPU(configuration, offer.getResourcesList())),
+            new OfferRule("Offer did not have enough RAM resources", offer -> !isEnoughRAM(configuration, offer.getResourcesList())),
+            new OfferRule("Offer did not have enough disk resources", offer -> !isEnoughDisk(configuration, offer.getResourcesList()))
+    );
+
+    public OfferStrategy(Configuration configuration, ClusterState clusterState) {
+        this.clusterState = clusterState;
+        this.configuration = configuration;
+    }
+
+    public OfferResult evaluate(Protos.Offer offer) {
+        final Optional<OfferRule> decline = acceptanceRules.stream().filter(offerRule -> offerRule.rule.accepts(offer)).limit(1).findFirst();
+        if (decline.isPresent()) {
+            return OfferResult.decline(decline.get().declineReason);
+        }
+
+        LOGGER.info("Accepted offer: " + offer.getHostname());
+        return OfferResult.accept();
+    }
+
+    /**
+     * Offer result
+     */
+    public static class OfferResult {
+        final boolean acceptable;
+        final Optional<String> reason;
+
+        private OfferResult(boolean acceptable, Optional<String> reason) {
+            this.acceptable = acceptable;
+            this.reason = reason;
+        }
+
+        public static OfferResult accept() {
+            return new OfferResult(true, Optional.<String>empty());
+        }
+
+        public static OfferResult decline(String reason) {
+            return new OfferResult(false, Optional.of(reason));
+        }
+    }
+
+    private boolean isHostAlreadyRunningTask(Protos.Offer offer) {
+        Boolean result = false;
+        List<Protos.TaskInfo> stateList = clusterState.getTaskList();
+        for (Protos.TaskInfo t : stateList) {
+            if (t.getSlaveId().equals(offer.getSlaveId())) {
+                result = true;
+            }
+        }
+        return result;
+    }
+    private boolean isEnoughDisk(Configuration configuration, List<Protos.Resource> resourcesList) {
+        return new ResourceCheck(Resources.RESOURCE_DISK).isEnough(resourcesList, configuration.getDisk());
+    }
+
+    private boolean isEnoughCPU(Configuration configuration, List<Protos.Resource> resourcesList) {
+        return new ResourceCheck(Resources.RESOURCE_CPUS).isEnough(resourcesList, configuration.getCpus());
+    }
+
+    private boolean isEnoughRAM(Configuration configuration, List<Protos.Resource> resourcesList) {
+        return new ResourceCheck(Resources.RESOURCE_MEM).isEnough(resourcesList, configuration.getMem());
+    }
+
+    private boolean containsTwoPorts(List<Protos.Resource> resources) {
+        return Resources.selectTwoPortsFromRange(resources).size() == 2;
+    }
+
+    /**
+     * Rule and reason container object
+     */
+    private static class OfferRule {
+        String declineReason;
+        Rule rule;
+
+        public OfferRule(String declineReason, Rule rule) {
+            this.declineReason = declineReason;
+            this.rule = rule;
+        }
+    }
+
+    /**
+     * Interface for checking offers
+     */
+    @FunctionalInterface
+    private interface Rule {
+        boolean accepts(Protos.Offer offer);
+    }
+}