From 3f9ce19963643152b5af484eeb7d6928c34fdd6b Mon Sep 17 00:00:00 2001 From: Aman Poonia Date: Thu, 12 Dec 2019 14:37:10 +0530 Subject: [PATCH 1/8] HBASE-22285 A normalizer which merges small size regions with adjacent regions --- .../master/normalizer/MergeNormalizer.java | 154 +++++++++++++ .../normalizer/TestMergeNormalizer.java | 218 ++++++++++++++++++ 2 files changed, 372 insertions(+) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java new file mode 100644 index 000000000000..c66355002480 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java @@ -0,0 +1,154 @@ +package org.apache.hadoop.hbase.master.normalizer; + +import com.google.protobuf.ServiceException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.RegionLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.master.MasterRpcServices; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.protobuf.RequestConverter; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + + + +/** + * Implementation of MergeNormalizer + * + * Logic in use: + * + *
    + *
  1. get all regions of a given table + *
  2. get avg size S of each region (by total size of store files reported in RegionLoad) + *
  3. Otherwise, two region R1 and its smallest neighbor R2 are merged, + * if R1 + R1 < S, and all such regions are returned to be merged + *
  4. Otherwise, no action is performed + *
+ *

+ * Region sizes are coarse and approximate on the order of megabytes. Also, + * empty regions (less than 1MB) are also merged if the age of region is > MIN_DURATION_FOR_MERGE (default 2) + */ + +@InterfaceAudience.Private +public class MergeNormalizer implements RegionNormalizer { + private static final Log LOG = LogFactory.getLog(MergeNormalizer.class); + private static final int MIN_REGION_COUNT = 3; + private static final int MIN_DURATION_FOR_MERGE=2; + private MasterServices masterServices; + private MasterRpcServices masterRpcServices; + + @Override public void setMasterServices(MasterServices masterServices) { + this.masterServices = masterServices; + } + + @Override public void setMasterRpcServices(MasterRpcServices masterRpcServices) { + this.masterRpcServices = masterRpcServices; + } + + @Override public List computePlanForTable(TableName table) + throws HBaseIOException { + if (table == null || table.isSystemTable()) { + LOG.debug("Normalization of system table " + table + " isn't allowed"); + return null; + } + boolean mergeEnabled = true; + try { + mergeEnabled = masterRpcServices.isSplitOrMergeEnabled(null, + RequestConverter.buildIsSplitOrMergeEnabledRequest(Admin.MasterSwitchType.MERGE)).getEnabled(); + } catch (ServiceException se) { + LOG.debug("Unable to determine whether merge is enabled", se); + } + if (!mergeEnabled) { + LOG.debug("Merge disabled for table: " + table); + return null; + } + List plans = new ArrayList(); + List tableRegions = masterServices.getAssignmentManager().getRegionStates(). + getRegionsOfTable(table); + if (tableRegions == null || tableRegions.size() < MIN_REGION_COUNT) { + int nrRegions = tableRegions == null ? 0 : tableRegions.size(); + LOG.debug("Table " + table + " has " + nrRegions + " regions, required min number" + + " of regions for normalizer to run is " + MIN_REGION_COUNT + ", not running normalizer"); + return null; + } + + LOG.debug("Computing normalization plan for table: " + table + + ", number of regions: " + tableRegions.size()); + + long totalSizeMb = 0; + int acutalRegionCnt = 0; + + for (int i = 0; i < tableRegions.size(); i++) { + HRegionInfo hri = tableRegions.get(i); + long regionSize = getRegionSize(hri); + //don't consider regions that are in bytes for averaging the size. + if (regionSize > 0) { + acutalRegionCnt++; + totalSizeMb += regionSize; + } + } + + double avgRegionSize = acutalRegionCnt == 0 ? 0 : totalSizeMb / (double) acutalRegionCnt; + + LOG.debug("Table " + table + ", total aggregated regions size: " + totalSizeMb); + LOG.debug("Table " + table + ", average region size: " + avgRegionSize); + + int candidateIdx = 0; + while (candidateIdx < tableRegions.size()) { + HRegionInfo hri = tableRegions.get(candidateIdx); + long regionSize = getRegionSize(hri); + if (candidateIdx == tableRegions.size() - 1) { + break; + } + HRegionInfo hri2 = tableRegions.get(candidateIdx + 1); + long regionSize2 = getRegionSize(hri2); + if (regionSize >= 0 && regionSize2 >= 0 && regionSize + regionSize2 < avgRegionSize) { + Timestamp hriTime = new Timestamp(hri.getRegionId()); + Timestamp hri2Time = new Timestamp(hri2.getRegionId()); + Timestamp currentTime = new Timestamp(System.currentTimeMillis()); + try { + // atleast one of the two regions should be older than MIN_REGION_DURATION days + if (!(new Timestamp(hriTime.getTime() + TimeUnit.DAYS.toMillis(MIN_DURATION_FOR_MERGE))) + .after(currentTime) + || !(new Timestamp( + hri2Time.getTime() + TimeUnit.DAYS.toMillis(MIN_DURATION_FOR_MERGE))) + .after(currentTime)) { + LOG.info( + "Table " + table + ", small region size: " + regionSize + " plus its neighbor size: " + + regionSize2 + ", less than the avg size " + avgRegionSize + ", merging them"); + plans.add(new MergeNormalizationPlan(hri, hri2)); + candidateIdx++; + } + } catch (Exception e) { + e.printStackTrace(); + } + } + candidateIdx++; + } + if (plans.isEmpty()) { + LOG.debug("No normalization needed, regions look good for table: " + table); + return null; + } + return plans; + } + + private long getRegionSize(HRegionInfo hri) { + ServerName sn = masterServices.getAssignmentManager().getRegionStates(). + getRegionServerOfRegion(hri); + RegionLoad regionLoad = masterServices.getServerManager().getLoad(sn). + getRegionsLoad().get(hri.getRegionName()); + if (regionLoad == null) { + LOG.debug(hri.getRegionNameAsString() + " was not found in RegionsLoad"); + return -1; + } + return regionLoad.getStorefileSizeMB(); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java new file mode 100644 index 000000000000..d8687403aca6 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java @@ -0,0 +1,218 @@ +package org.apache.hadoop.hbase.master.normalizer; + +import com.google.protobuf.RpcController; +import com.google.protobuf.ServiceException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.RegionLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.master.MasterRpcServices; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.protobuf.generated.MasterProtos; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.when; + +@Category(SmallTests.class) +public class TestMergeNormalizer { + private static final Log LOG = LogFactory.getLog(MergeNormalizer.class); + + private static RegionNormalizer normalizer; + + // mocks + private static MasterServices masterServices; + private static MasterRpcServices masterRpcServices; + + @BeforeClass + public static void beforeAllTests() throws Exception { + normalizer = new MergeNormalizer(); + } + + @Test + public void testNoNormalizationForMetaTable() throws HBaseIOException { + TableName testTable = TableName.META_TABLE_NAME; + List hris = new ArrayList<>(); + Map regionSizes = new HashMap<>(); + + setupMocksForNormalizer(regionSizes, hris); + List plans = normalizer.computePlanForTable(testTable); + assertTrue(plans == null); + } + + @Test + public void testNoNormalizationIfTooFewRegions() throws HBaseIOException { + TableName testTable = TableName.valueOf("testSplitOfSmallRegion"); + List hris = new ArrayList<>(); + Map regionSizes = new HashMap<>(); + + HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb")); + hris.add(hri1); + regionSizes.put(hri1.getRegionName(), 10); + + HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc")); + hris.add(hri2); + regionSizes.put(hri2.getRegionName(), 15); + + setupMocksForNormalizer(regionSizes, hris); + List plans = normalizer.computePlanForTable(testTable); + assertTrue(plans == null); + } + + @Test + public void testNoNormalizationOnNormalizedCluster() throws HBaseIOException { + TableName testTable = TableName.valueOf("testSplitOfSmallRegion"); + List hris = new ArrayList<>(); + Map regionSizes = new HashMap<>(); + + HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb")); + hris.add(hri1); + regionSizes.put(hri1.getRegionName(), 10); + + HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc")); + hris.add(hri2); + regionSizes.put(hri2.getRegionName(), 15); + + HRegionInfo hri3 = new HRegionInfo(testTable, Bytes.toBytes("ccc"), Bytes.toBytes("ddd")); + hris.add(hri3); + regionSizes.put(hri3.getRegionName(), 8); + + HRegionInfo hri4 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee")); + hris.add(hri4); + regionSizes.put(hri4.getRegionName(), 10); + + setupMocksForNormalizer(regionSizes, hris); + List plans = normalizer.computePlanForTable(testTable); + assertTrue(plans == null); + } + + @Test + public void testMergeOfSmallRegions() throws HBaseIOException { + TableName testTable = TableName.valueOf("testMergeOfSmallRegions"); + List hris = new ArrayList<>(); + Map regionSizes = new HashMap<>(); + + Timestamp currentTime = new Timestamp(System.currentTimeMillis()); + Timestamp threedaysBefore = new Timestamp(currentTime.getTime() - TimeUnit.DAYS.toMillis(3)); + + HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),false,threedaysBefore.getTime()); + hris.add(hri1); + regionSizes.put(hri1.getRegionName(), 15); + + HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc"),false, threedaysBefore.getTime()); + hris.add(hri2); + regionSizes.put(hri2.getRegionName(), 5); + + HRegionInfo hri3 = new HRegionInfo(testTable, Bytes.toBytes("ccc"), Bytes.toBytes("ddd"),false, threedaysBefore.getTime()); + hris.add(hri3); + regionSizes.put(hri3.getRegionName(), 5); + + HRegionInfo hri4 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee"),false,threedaysBefore.getTime()); + hris.add(hri4); + regionSizes.put(hri4.getRegionName(), 15); + + HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("eee"), Bytes.toBytes("fff")); + hris.add(hri5); + regionSizes.put(hri5.getRegionName(), 16); + + HRegionInfo hri6 = new HRegionInfo(testTable, Bytes.toBytes("fff"), Bytes.toBytes("ggg"),false,threedaysBefore.getTime()); + hris.add(hri6); + regionSizes.put(hri6.getRegionName(), 0); + + HRegionInfo hri7 = new HRegionInfo(testTable, Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),false); + hris.add(hri7); + regionSizes.put(hri7.getRegionName(), 0); + + setupMocksForNormalizer(regionSizes, hris); + List plans = normalizer.computePlanForTable(testTable); + + NormalizationPlan plan = plans.get(0); + assertTrue(plan instanceof MergeNormalizationPlan); + assertEquals(hri2, ((MergeNormalizationPlan) plan).getFirstRegion()); + assertEquals(hri3, ((MergeNormalizationPlan) plan).getSecondRegion()); + } + + @Test + public void testMergeOfNewSmallRegions() throws HBaseIOException { + TableName testTable = TableName.valueOf("testMergeOfNewSmallRegions"); + List hris = new ArrayList<>(); + Map regionSizes = new HashMap<>(); + + HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb")); + hris.add(hri1); + regionSizes.put(hri1.getRegionName(), 15); + + HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc")); + hris.add(hri2); + regionSizes.put(hri2.getRegionName(), 5); + + HRegionInfo hri3 = new HRegionInfo(testTable, Bytes.toBytes("ccc"), Bytes.toBytes("ddd")); + hris.add(hri3); + regionSizes.put(hri3.getRegionName(), 16); + + HRegionInfo hri4 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee")); + hris.add(hri4); + regionSizes.put(hri4.getRegionName(), 15); + + HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee")); + hris.add(hri4); + regionSizes.put(hri5.getRegionName(), 5); + + setupMocksForNormalizer(regionSizes, hris); + List plans = normalizer.computePlanForTable(testTable); + + assertTrue(plans == null); + } + + @SuppressWarnings("MockitoCast") + protected void setupMocksForNormalizer(Map regionSizes, + List hris) { + masterServices = Mockito.mock(MasterServices.class, RETURNS_DEEP_STUBS); + masterRpcServices = Mockito.mock(MasterRpcServices.class, RETURNS_DEEP_STUBS); + + // for simplicity all regions are assumed to be on one server; doesn't matter to us + ServerName sn = ServerName.valueOf("localhost", 0, 1L); + when(masterServices.getAssignmentManager().getRegionStates(). + getRegionsOfTable(any(TableName.class))).thenReturn(hris); + when(masterServices.getAssignmentManager().getRegionStates(). + getRegionServerOfRegion(any(HRegionInfo.class))).thenReturn(sn); + + for (Map.Entry region : regionSizes.entrySet()) { + RegionLoad regionLoad = Mockito.mock(RegionLoad.class); + when(regionLoad.getName()).thenReturn(region.getKey()); + when(regionLoad.getStorefileSizeMB()).thenReturn(region.getValue()); + + // this is possibly broken with jdk9, unclear if false positive or not + // suppress it for now, fix it when we get to running tests on 9 + // see: http://errorprone.info/bugpattern/MockitoCast + when((Object) masterServices.getServerManager().getLoad(sn). + getRegionsLoad().get(region.getKey())).thenReturn(regionLoad); + } + try { + when(masterRpcServices.isSplitOrMergeEnabled(any(RpcController.class), + any(MasterProtos.IsSplitOrMergeEnabledRequest.class))).thenReturn( + MasterProtos.IsSplitOrMergeEnabledResponse.newBuilder().setEnabled(true).build()); + } catch (ServiceException se) { + LOG.debug("error setting isSplitOrMergeEnabled switch", se); + } + + normalizer.setMasterServices(masterServices); + normalizer.setMasterRpcServices(masterRpcServices); + } +} From 3aadfa432c734b3a634cd151a69537cb38da2cb5 Mon Sep 17 00:00:00 2001 From: Aman Poonia Date: Tue, 17 Dec 2019 10:40:52 +0530 Subject: [PATCH 2/8] Fix checkstyle and accomodate review comments --- .../master/normalizer/MergeNormalizer.java | 156 ++++++++++-------- .../normalizer/TestMergeNormalizer.java | 71 +++++--- 2 files changed, 137 insertions(+), 90 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java index c66355002480..e63919efae74 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java @@ -1,8 +1,28 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.master.normalizer; import com.google.protobuf.ServiceException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.RegionLoad; @@ -13,83 +33,81 @@ import org.apache.hadoop.hbase.master.MasterRpcServices; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.protobuf.RequestConverter; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; - - +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** - * Implementation of MergeNormalizer - * - * Logic in use: - * - *

    - *
  1. get all regions of a given table - *
  2. get avg size S of each region (by total size of store files reported in RegionLoad) - *
  3. Otherwise, two region R1 and its smallest neighbor R2 are merged, - * if R1 + R1 < S, and all such regions are returned to be merged - *
  4. Otherwise, no action is performed + * Implementation of MergeNormalizer Logic in use: + *
      + *
    1. get all regions of a given table + *
    2. get avg size S of each region (by total size of store files reported in RegionLoad) + *
    3. two regions R1 and its neighbour R2 are merged, if R1 + R2 < S, and all + * such regions are returned to be merged + *
    4. Otherwise, no action is performed *
    *

    - * Region sizes are coarse and approximate on the order of megabytes. Also, - * empty regions (less than 1MB) are also merged if the age of region is > MIN_DURATION_FOR_MERGE (default 2) + * Region sizes are coarse and approximate on the order of megabytes. Also, empty regions (less than + * 1MB) are also merged if the age of region is > MIN_DURATION_FOR_MERGE (default 2) + *

    */ @InterfaceAudience.Private public class MergeNormalizer implements RegionNormalizer { - private static final Log LOG = LogFactory.getLog(MergeNormalizer.class); + private static final Logger LOG = LoggerFactory.getLogger(MergeNormalizer.class); private static final int MIN_REGION_COUNT = 3; - private static final int MIN_DURATION_FOR_MERGE=2; + private static final int MIN_DURATION_FOR_MERGE = 2; private MasterServices masterServices; private MasterRpcServices masterRpcServices; - @Override public void setMasterServices(MasterServices masterServices) { + @Override + public void setMasterServices(MasterServices masterServices) { this.masterServices = masterServices; } - @Override public void setMasterRpcServices(MasterRpcServices masterRpcServices) { + @Override + public void setMasterRpcServices(MasterRpcServices masterRpcServices) { this.masterRpcServices = masterRpcServices; } - @Override public List computePlanForTable(TableName table) - throws HBaseIOException { + @Override + public List computePlanForTable(TableName table) throws HBaseIOException { if (table == null || table.isSystemTable()) { - LOG.debug("Normalization of system table " + table + " isn't allowed"); + LOG.debug("Normalization of system table {} isn't allowed", table); return null; } boolean mergeEnabled = true; try { - mergeEnabled = masterRpcServices.isSplitOrMergeEnabled(null, - RequestConverter.buildIsSplitOrMergeEnabledRequest(Admin.MasterSwitchType.MERGE)).getEnabled(); + mergeEnabled = masterRpcServices + .isSplitOrMergeEnabled(null, + RequestConverter.buildIsSplitOrMergeEnabledRequest(Admin.MasterSwitchType.MERGE)) + .getEnabled(); } catch (ServiceException se) { LOG.debug("Unable to determine whether merge is enabled", se); } if (!mergeEnabled) { - LOG.debug("Merge disabled for table: " + table); + LOG.debug("Merge disabled for table: {}", table); return null; } - List plans = new ArrayList(); - List tableRegions = masterServices.getAssignmentManager().getRegionStates(). - getRegionsOfTable(table); + List plans = new ArrayList<>(); + List tableRegions = + masterServices.getAssignmentManager().getRegionStates().getRegionsOfTable(table); if (tableRegions == null || tableRegions.size() < MIN_REGION_COUNT) { int nrRegions = tableRegions == null ? 0 : tableRegions.size(); - LOG.debug("Table " + table + " has " + nrRegions + " regions, required min number" - + " of regions for normalizer to run is " + MIN_REGION_COUNT + ", not running normalizer"); + LOG.debug( + "Table {} has {} regions, required min number of regions for normalizer to run is {} , not " + + "running normalizer", table, nrRegions, MIN_REGION_COUNT); return null; } - LOG.debug("Computing normalization plan for table: " + table + - ", number of regions: " + tableRegions.size()); + LOG.debug("Computing normalization plan for table: {}, number of regions: {}", + table,tableRegions.size()); long totalSizeMb = 0; int acutalRegionCnt = 0; - for (int i = 0; i < tableRegions.size(); i++) { - HRegionInfo hri = tableRegions.get(i); + for (HRegionInfo hri : tableRegions) { long regionSize = getRegionSize(hri); - //don't consider regions that are in bytes for averaging the size. + // don't consider regions that are in bytes for averaging the size. if (regionSize > 0) { acutalRegionCnt++; totalSizeMb += regionSize; @@ -98,57 +116,59 @@ public class MergeNormalizer implements RegionNormalizer { double avgRegionSize = acutalRegionCnt == 0 ? 0 : totalSizeMb / (double) acutalRegionCnt; - LOG.debug("Table " + table + ", total aggregated regions size: " + totalSizeMb); - LOG.debug("Table " + table + ", average region size: " + avgRegionSize); + LOG.debug("Table {}, total aggregated regions size: {}", table, totalSizeMb); + LOG.debug("Table {}, average region size: {}", table, avgRegionSize); int candidateIdx = 0; while (candidateIdx < tableRegions.size()) { - HRegionInfo hri = tableRegions.get(candidateIdx); - long regionSize = getRegionSize(hri); if (candidateIdx == tableRegions.size() - 1) { break; } + HRegionInfo hri = tableRegions.get(candidateIdx); + long regionSize = getRegionSize(hri); HRegionInfo hri2 = tableRegions.get(candidateIdx + 1); long regionSize2 = getRegionSize(hri2); if (regionSize >= 0 && regionSize2 >= 0 && regionSize + regionSize2 < avgRegionSize) { - Timestamp hriTime = new Timestamp(hri.getRegionId()); - Timestamp hri2Time = new Timestamp(hri2.getRegionId()); - Timestamp currentTime = new Timestamp(System.currentTimeMillis()); - try { - // atleast one of the two regions should be older than MIN_REGION_DURATION days - if (!(new Timestamp(hriTime.getTime() + TimeUnit.DAYS.toMillis(MIN_DURATION_FOR_MERGE))) - .after(currentTime) - || !(new Timestamp( - hri2Time.getTime() + TimeUnit.DAYS.toMillis(MIN_DURATION_FOR_MERGE))) - .after(currentTime)) { - LOG.info( - "Table " + table + ", small region size: " + regionSize + " plus its neighbor size: " - + regionSize2 + ", less than the avg size " + avgRegionSize + ", merging them"); - plans.add(new MergeNormalizationPlan(hri, hri2)); - candidateIdx++; - } - } catch (Exception e) { - e.printStackTrace(); + // atleast one of the two regions should be older than MIN_REGION_DURATION days + if (isOldEnoughToMerge(hri) || isOldEnoughToMerge(hri2)) { + LOG.info( + "Table {}, small region size: {} plus its neighbor size: {}, less than the avg size " + + "{}, merging them", + table, regionSize, regionSize2, avgRegionSize); + plans.add(new MergeNormalizationPlan(hri, hri2)); + candidateIdx++; } + } else { + LOG.debug("Skipping region {} of table {}", hri.getRegionId(), table); } candidateIdx++; } if (plans.isEmpty()) { - LOG.debug("No normalization needed, regions look good for table: " + table); + LOG.debug("No normalization needed, regions look good for table: {}", table); return null; } return plans; } private long getRegionSize(HRegionInfo hri) { - ServerName sn = masterServices.getAssignmentManager().getRegionStates(). - getRegionServerOfRegion(hri); - RegionLoad regionLoad = masterServices.getServerManager().getLoad(sn). - getRegionsLoad().get(hri.getRegionName()); + ServerName sn = + masterServices.getAssignmentManager().getRegionStates().getRegionServerOfRegion(hri); + RegionLoad regionLoad = + masterServices.getServerManager().getLoad(sn).getRegionsLoad().get(hri.getRegionName()); if (regionLoad == null) { - LOG.debug(hri.getRegionNameAsString() + " was not found in RegionsLoad"); + LOG.debug("{} was not found in RegionsLoad", hri.getRegionNameAsString() ); return -1; } return regionLoad.getStorefileSizeMB(); } + + private boolean isOldEnoughToMerge(HRegionInfo hri) { + Timestamp currentTime = new Timestamp(System.currentTimeMillis()); + Timestamp hriTime = new Timestamp(hri.getRegionId()); + boolean isOld = + new Timestamp(hriTime.getTime() + TimeUnit.DAYS.toMillis(MIN_DURATION_FOR_MERGE)) + .before(currentTime); + return isOld; + } } + diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java index d8687403aca6..1764d020f37e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java @@ -1,3 +1,21 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.master.normalizer; import com.google.protobuf.RpcController; @@ -24,8 +42,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import static org.mockito.Matchers.any; import static org.mockito.Mockito.RETURNS_DEEP_STUBS; import static org.mockito.Mockito.when; @@ -53,7 +70,7 @@ public void testNoNormalizationForMetaTable() throws HBaseIOException { setupMocksForNormalizer(regionSizes, hris); List plans = normalizer.computePlanForTable(testTable); - assertTrue(plans == null); + assertNull(plans); } @Test @@ -72,7 +89,7 @@ public void testNoNormalizationIfTooFewRegions() throws HBaseIOException { setupMocksForNormalizer(regionSizes, hris); List plans = normalizer.computePlanForTable(testTable); - assertTrue(plans == null); + assertNull(plans); } @Test @@ -99,7 +116,7 @@ public void testNoNormalizationOnNormalizedCluster() throws HBaseIOException { setupMocksForNormalizer(regionSizes, hris); List plans = normalizer.computePlanForTable(testTable); - assertTrue(plans == null); + assertNull(plans); } @Test @@ -111,19 +128,23 @@ public void testMergeOfSmallRegions() throws HBaseIOException { Timestamp currentTime = new Timestamp(System.currentTimeMillis()); Timestamp threedaysBefore = new Timestamp(currentTime.getTime() - TimeUnit.DAYS.toMillis(3)); - HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),false,threedaysBefore.getTime()); + HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb"), false, + threedaysBefore.getTime()); hris.add(hri1); regionSizes.put(hri1.getRegionName(), 15); - HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc"),false, threedaysBefore.getTime()); + HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), false, + threedaysBefore.getTime()); hris.add(hri2); regionSizes.put(hri2.getRegionName(), 5); - HRegionInfo hri3 = new HRegionInfo(testTable, Bytes.toBytes("ccc"), Bytes.toBytes("ddd"),false, threedaysBefore.getTime()); + HRegionInfo hri3 = new HRegionInfo(testTable, Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), false, + threedaysBefore.getTime()); hris.add(hri3); regionSizes.put(hri3.getRegionName(), 5); - HRegionInfo hri4 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee"),false,threedaysBefore.getTime()); + HRegionInfo hri4 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee"), false, + threedaysBefore.getTime()); hris.add(hri4); regionSizes.put(hri4.getRegionName(), 15); @@ -131,11 +152,13 @@ public void testMergeOfSmallRegions() throws HBaseIOException { hris.add(hri5); regionSizes.put(hri5.getRegionName(), 16); - HRegionInfo hri6 = new HRegionInfo(testTable, Bytes.toBytes("fff"), Bytes.toBytes("ggg"),false,threedaysBefore.getTime()); + HRegionInfo hri6 = new HRegionInfo(testTable, Bytes.toBytes("fff"), Bytes.toBytes("ggg"), false, + threedaysBefore.getTime()); hris.add(hri6); regionSizes.put(hri6.getRegionName(), 0); - HRegionInfo hri7 = new HRegionInfo(testTable, Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),false); + HRegionInfo hri7 = + new HRegionInfo(testTable, Bytes.toBytes("ggg"), Bytes.toBytes("hhh"), false); hris.add(hri7); regionSizes.put(hri7.getRegionName(), 0); @@ -146,6 +169,11 @@ public void testMergeOfSmallRegions() throws HBaseIOException { assertTrue(plan instanceof MergeNormalizationPlan); assertEquals(hri2, ((MergeNormalizationPlan) plan).getFirstRegion()); assertEquals(hri3, ((MergeNormalizationPlan) plan).getSecondRegion()); + + // to check last 0 sized regions are merged + plan = plans.get(1); + assertEquals(hri6, ((MergeNormalizationPlan) plan).getFirstRegion()); + assertEquals(hri7, ((MergeNormalizationPlan) plan).getSecondRegion()); } @Test @@ -170,28 +198,27 @@ public void testMergeOfNewSmallRegions() throws HBaseIOException { hris.add(hri4); regionSizes.put(hri4.getRegionName(), 15); - HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee")); + HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("eee"), Bytes.toBytes("fff")); hris.add(hri4); regionSizes.put(hri5.getRegionName(), 5); setupMocksForNormalizer(regionSizes, hris); List plans = normalizer.computePlanForTable(testTable); - assertTrue(plans == null); + assertNull(plans); } @SuppressWarnings("MockitoCast") - protected void setupMocksForNormalizer(Map regionSizes, - List hris) { + protected void setupMocksForNormalizer(Map regionSizes, List hris) { masterServices = Mockito.mock(MasterServices.class, RETURNS_DEEP_STUBS); masterRpcServices = Mockito.mock(MasterRpcServices.class, RETURNS_DEEP_STUBS); // for simplicity all regions are assumed to be on one server; doesn't matter to us ServerName sn = ServerName.valueOf("localhost", 0, 1L); - when(masterServices.getAssignmentManager().getRegionStates(). - getRegionsOfTable(any(TableName.class))).thenReturn(hris); - when(masterServices.getAssignmentManager().getRegionStates(). - getRegionServerOfRegion(any(HRegionInfo.class))).thenReturn(sn); + when(masterServices.getAssignmentManager().getRegionStates() + .getRegionsOfTable(any(TableName.class))).thenReturn(hris); + when(masterServices.getAssignmentManager().getRegionStates() + .getRegionServerOfRegion(any(HRegionInfo.class))).thenReturn(sn); for (Map.Entry region : regionSizes.entrySet()) { RegionLoad regionLoad = Mockito.mock(RegionLoad.class); @@ -201,13 +228,13 @@ protected void setupMocksForNormalizer(Map regionSizes, // this is possibly broken with jdk9, unclear if false positive or not // suppress it for now, fix it when we get to running tests on 9 // see: http://errorprone.info/bugpattern/MockitoCast - when((Object) masterServices.getServerManager().getLoad(sn). - getRegionsLoad().get(region.getKey())).thenReturn(regionLoad); + when((Object) masterServices.getServerManager().getLoad(sn).getRegionsLoad() + .get(region.getKey())).thenReturn(regionLoad); } try { when(masterRpcServices.isSplitOrMergeEnabled(any(RpcController.class), any(MasterProtos.IsSplitOrMergeEnabledRequest.class))).thenReturn( - MasterProtos.IsSplitOrMergeEnabledResponse.newBuilder().setEnabled(true).build()); + MasterProtos.IsSplitOrMergeEnabledResponse.newBuilder().setEnabled(true).build()); } catch (ServiceException se) { LOG.debug("error setting isSplitOrMergeEnabled switch", se); } From 06b4899014835295acbe87812ee809f23c6b67fd Mon Sep 17 00:00:00 2001 From: mnpoonia Date: Thu, 19 Dec 2019 21:03:40 +0530 Subject: [PATCH 3/8] create abstract normalizer to add common functionality --- .../master/normalizer/BaseNormalizer.java | 101 ++++++++++++++++ .../master/normalizer/MergeNormalizer.java | 111 +++++------------- .../normalizer/TestMergeNormalizer.java | 23 ++-- 3 files changed, 146 insertions(+), 89 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java new file mode 100644 index 000000000000..520fac2d543d --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java @@ -0,0 +1,101 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.normalizer; + +import com.google.protobuf.ServiceException; +import java.util.List; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.RegionLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.master.MasterRpcServices; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.protobuf.RequestConverter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class BaseNormalizer implements RegionNormalizer { + private static final Logger LOG = LoggerFactory.getLogger(BaseNormalizer.class); + protected MasterServices masterServices; + protected MasterRpcServices masterRpcServices; + + @Override + public void setMasterServices(MasterServices masterServices) { + this.masterServices = masterServices; + } + + @Override + public void setMasterRpcServices(MasterRpcServices masterRpcServices) { + this.masterRpcServices = masterRpcServices; + } + + protected long getRegionSize(HRegionInfo hri) { + ServerName sn = + masterServices.getAssignmentManager().getRegionStates().getRegionServerOfRegion(hri); + RegionLoad regionLoad = + masterServices.getServerManager().getLoad(sn).getRegionsLoad().get(hri.getRegionName()); + if (regionLoad == null) { + LOG.debug("Region {} was not found in RegionsLoad", hri.getRegionNameAsString()); + return -1; + } + return regionLoad.getStorefileSizeMB(); + } + + protected boolean isMergeEnabled() { + boolean mergeEnabled = true; + try { + mergeEnabled = masterRpcServices + .isSplitOrMergeEnabled(null, + RequestConverter.buildIsSplitOrMergeEnabledRequest(Admin.MasterSwitchType.MERGE)) + .getEnabled(); + } catch (ServiceException se) { + LOG.debug("Unable to determine whether merge is enabled", se); + } + return mergeEnabled; + } + + protected boolean isSplitEnabled() { + boolean splitEnabled = true; + try { + splitEnabled = masterRpcServices + .isSplitOrMergeEnabled(null, + RequestConverter.buildIsSplitOrMergeEnabledRequest(Admin.MasterSwitchType.SPLIT)) + .getEnabled(); + } catch (ServiceException se) { + LOG.debug("Unable to determine whether merge is enabled", se); + } + return splitEnabled; + } + + protected double getAvgRegionSize(List tableRegions) { + long totalSizeMb = 0; + int acutalRegionCnt = 0; + for (HRegionInfo hri : tableRegions) { + long regionSize = getRegionSize(hri); + // don't consider regions that are in bytes for averaging the size. + if (regionSize > 0) { + acutalRegionCnt++; + totalSizeMb += regionSize; + } + } + + double avgRegionSize = acutalRegionCnt == 0 ? 0 : totalSizeMb / (double) acutalRegionCnt; + return avgRegionSize; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java index e63919efae74..15304a4dd8a0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java @@ -18,21 +18,14 @@ */ package org.apache.hadoop.hbase.master.normalizer; -import com.google.protobuf.ServiceException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.RegionLoad; -import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.client.Admin; -import org.apache.hadoop.hbase.master.MasterRpcServices; -import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.protobuf.RequestConverter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,8 +34,8 @@ *
      *
    1. get all regions of a given table *
    2. get avg size S of each region (by total size of store files reported in RegionLoad) - *
    3. two regions R1 and its neighbour R2 are merged, if R1 + R2 < S, and all - * such regions are returned to be merged + *
    4. two regions R1 and its neighbour R2 are merged, if R1 + R2 < S, and all such regions are + * returned to be merged *
    5. Otherwise, no action is performed *
    *

    @@ -52,72 +45,23 @@ */ @InterfaceAudience.Private -public class MergeNormalizer implements RegionNormalizer { +public class MergeNormalizer extends BaseNormalizer implements RegionNormalizer { private static final Logger LOG = LoggerFactory.getLogger(MergeNormalizer.class); private static final int MIN_REGION_COUNT = 3; private static final int MIN_DURATION_FOR_MERGE = 2; - private MasterServices masterServices; - private MasterRpcServices masterRpcServices; - - @Override - public void setMasterServices(MasterServices masterServices) { - this.masterServices = masterServices; - } - - @Override - public void setMasterRpcServices(MasterRpcServices masterRpcServices) { - this.masterRpcServices = masterRpcServices; - } @Override public List computePlanForTable(TableName table) throws HBaseIOException { - if (table == null || table.isSystemTable()) { - LOG.debug("Normalization of system table {} isn't allowed", table); - return null; - } - boolean mergeEnabled = true; - try { - mergeEnabled = masterRpcServices - .isSplitOrMergeEnabled(null, - RequestConverter.buildIsSplitOrMergeEnabledRequest(Admin.MasterSwitchType.MERGE)) - .getEnabled(); - } catch (ServiceException se) { - LOG.debug("Unable to determine whether merge is enabled", se); - } - if (!mergeEnabled) { - LOG.debug("Merge disabled for table: {}", table); + if (!shouldNormalize(table)) { return null; } List plans = new ArrayList<>(); List tableRegions = masterServices.getAssignmentManager().getRegionStates().getRegionsOfTable(table); - if (tableRegions == null || tableRegions.size() < MIN_REGION_COUNT) { - int nrRegions = tableRegions == null ? 0 : tableRegions.size(); - LOG.debug( - "Table {} has {} regions, required min number of regions for normalizer to run is {} , not " - + "running normalizer", table, nrRegions, MIN_REGION_COUNT); - return null; - } - - LOG.debug("Computing normalization plan for table: {}, number of regions: {}", - table,tableRegions.size()); - - long totalSizeMb = 0; - int acutalRegionCnt = 0; - - for (HRegionInfo hri : tableRegions) { - long regionSize = getRegionSize(hri); - // don't consider regions that are in bytes for averaging the size. - if (regionSize > 0) { - acutalRegionCnt++; - totalSizeMb += regionSize; - } - } - - double avgRegionSize = acutalRegionCnt == 0 ? 0 : totalSizeMb / (double) acutalRegionCnt; - - LOG.debug("Table {}, total aggregated regions size: {}", table, totalSizeMb); + double avgRegionSize = getAvgRegionSize(tableRegions); LOG.debug("Table {}, average region size: {}", table, avgRegionSize); + LOG.debug("Computing normalization plan for table: {}, number of regions: {}", table, + tableRegions.size()); int candidateIdx = 0; while (candidateIdx < tableRegions.size()) { @@ -150,25 +94,34 @@ public List computePlanForTable(TableName table) throws HBase return plans; } - private long getRegionSize(HRegionInfo hri) { - ServerName sn = - masterServices.getAssignmentManager().getRegionStates().getRegionServerOfRegion(hri); - RegionLoad regionLoad = - masterServices.getServerManager().getLoad(sn).getRegionsLoad().get(hri.getRegionName()); - if (regionLoad == null) { - LOG.debug("{} was not found in RegionsLoad", hri.getRegionNameAsString() ); - return -1; - } - return regionLoad.getStorefileSizeMB(); - } - private boolean isOldEnoughToMerge(HRegionInfo hri) { Timestamp currentTime = new Timestamp(System.currentTimeMillis()); Timestamp hriTime = new Timestamp(hri.getRegionId()); boolean isOld = - new Timestamp(hriTime.getTime() + TimeUnit.DAYS.toMillis(MIN_DURATION_FOR_MERGE)) - .before(currentTime); + new Timestamp(hriTime.getTime() + TimeUnit.DAYS.toMillis(MIN_DURATION_FOR_MERGE)) + .before(currentTime); return isOld; } -} + private boolean shouldNormalize(TableName table) { + boolean normalize = false; + if (table == null || table.isSystemTable()) { + LOG.debug("Normalization of system table {} isn't allowed", table); + } else if (!isMergeEnabled()) { + LOG.debug("Merge disabled for table: {}", table); + } else { + List tableRegions = + masterServices.getAssignmentManager().getRegionStates().getRegionsOfTable(table); + if (tableRegions == null || tableRegions.size() < MIN_REGION_COUNT) { + int nrRegions = tableRegions == null ? 0 : tableRegions.size(); + LOG.debug( + "Table {} has {} regions, required min number of regions for normalizer to run is {} , " + + "not running normalizer", + table, nrRegions, MIN_REGION_COUNT); + } else { + normalize = true; + } + } + return normalize; + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java index 1764d020f37e..e0a4fd1de9b3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestMergeNormalizer.java @@ -18,8 +18,21 @@ */ package org.apache.hadoop.hbase.master.normalizer; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.when; + import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HBaseIOException; @@ -36,16 +49,6 @@ import org.junit.Test; import org.junit.experimental.categories.Category; import org.mockito.Mockito; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; -import static org.junit.Assert.*; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.RETURNS_DEEP_STUBS; -import static org.mockito.Mockito.when; @Category(SmallTests.class) public class TestMergeNormalizer { From 79258f2c988edd451bd7dfb31edcd9268c23e041 Mon Sep 17 00:00:00 2001 From: mnpoonia Date: Thu, 19 Dec 2019 21:07:03 +0530 Subject: [PATCH 4/8] removing mention of interface --- .../apache/hadoop/hbase/master/normalizer/MergeNormalizer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java index 15304a4dd8a0..c35a07593204 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java @@ -45,7 +45,7 @@ */ @InterfaceAudience.Private -public class MergeNormalizer extends BaseNormalizer implements RegionNormalizer { +public class MergeNormalizer extends BaseNormalizer { private static final Logger LOG = LoggerFactory.getLogger(MergeNormalizer.class); private static final int MIN_REGION_COUNT = 3; private static final int MIN_DURATION_FOR_MERGE = 2; From c549fe475017ff6f722d1c152ba21f08acd8b77a Mon Sep 17 00:00:00 2001 From: mnpoonia Date: Sat, 28 Dec 2019 09:34:28 +0530 Subject: [PATCH 5/8] Change Simple region normalizer to use base class and merge normalizer for common functinality --- .../org/apache/hadoop/hbase/HConstants.java | 6 ++ .../master/normalizer/BaseNormalizer.java | 4 + .../master/normalizer/MergeNormalizer.java | 13 ++- .../normalizer/SimpleRegionNormalizer.java | 85 ++++--------------- .../TestSimpleRegionNormalizer.java | 4 +- 5 files changed, 39 insertions(+), 73 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 4607de986a06..6d0b80be57eb 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -151,6 +151,12 @@ public enum OperationStatusCode { public static final String HBASE_MASTER_NORMALIZER_CLASS = "hbase.master.normalizer.class"; + /** Config for min age of region before being considerded for merge in mormalizer */ + public static final long DEFAULT_MIN_DAYS_BEFORE_MERGE = 3; + + public static final String HBASE_MASTER_DAYS_BEFORE_MERGE = + "hbase.master.normalize.daysBeforeMerge"; + /** Cluster is standalone or pseudo-distributed */ public static final boolean CLUSTER_IS_LOCAL = false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java index 520fac2d543d..8522ce985c98 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java @@ -35,6 +35,10 @@ public abstract class BaseNormalizer implements RegionNormalizer { protected MasterServices masterServices; protected MasterRpcServices masterRpcServices; + /** + * Set the master service. + * @param masterServices inject instance of MasterServices + */ @Override public void setMasterServices(MasterServices masterServices) { this.masterServices = masterServices; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java index c35a07593204..572dbe6ff8f7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java @@ -23,7 +23,10 @@ import java.util.List; import java.util.concurrent.TimeUnit; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.TableName; import org.slf4j.Logger; @@ -48,7 +51,6 @@ public class MergeNormalizer extends BaseNormalizer { private static final Logger LOG = LoggerFactory.getLogger(MergeNormalizer.class); private static final int MIN_REGION_COUNT = 3; - private static final int MIN_DURATION_FOR_MERGE = 2; @Override public List computePlanForTable(TableName table) throws HBaseIOException { @@ -98,7 +100,7 @@ private boolean isOldEnoughToMerge(HRegionInfo hri) { Timestamp currentTime = new Timestamp(System.currentTimeMillis()); Timestamp hriTime = new Timestamp(hri.getRegionId()); boolean isOld = - new Timestamp(hriTime.getTime() + TimeUnit.DAYS.toMillis(MIN_DURATION_FOR_MERGE)) + new Timestamp(hriTime.getTime() + TimeUnit.DAYS.toMillis(getMinimumDurationBeforeMerge())) .before(currentTime); return isOld; } @@ -124,4 +126,11 @@ private boolean shouldNormalize(TableName table) { } return normalize; } + + private long getMinimumDurationBeforeMerge() { + Configuration entries = HBaseConfiguration.create(); + long minDuration = masterServices.getConfiguration() + .getLong(HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, HConstants.DEFAULT_MIN_DAYS_BEFORE_MERGE); + return minDuration; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java index 9b91767ea5fa..3bea9d41aa24 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java @@ -23,20 +23,12 @@ import java.util.Comparator; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.RegionLoad; -import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.client.Admin.MasterSwitchType; -import org.apache.hadoop.hbase.master.MasterRpcServices; -import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.protobuf.RequestConverter; - -import com.google.protobuf.ServiceException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Simple implementation of region normalizer. @@ -58,21 +50,10 @@ * is by design to prevent normalization from undoing the pre-splitting of a table. */ @InterfaceAudience.Private -public class SimpleRegionNormalizer implements RegionNormalizer { - - private static final Log LOG = LogFactory.getLog(SimpleRegionNormalizer.class); +public class SimpleRegionNormalizer extends BaseNormalizer { + private static final Logger LOG = LoggerFactory.getLogger(SimpleRegionNormalizer.class); private static final int MIN_REGION_COUNT = 3; - private MasterServices masterServices; - private MasterRpcServices masterRpcServices; - /** - * Set the master service. - * @param masterServices inject instance of MasterServices - */ - @Override - public void setMasterServices(MasterServices masterServices) { - this.masterServices = masterServices; - } // Comparator that gives higher priority to region Split plan private Comparator planComparator = @@ -89,11 +70,6 @@ public int compare(NormalizationPlan plan, NormalizationPlan plan2) { } }; - @Override - public void setMasterRpcServices(MasterRpcServices masterRpcServices) { - this.masterRpcServices = masterRpcServices; - } - /** * Computes next most "urgent" normalization action on the table. * Action may be either a split, or a merge, or no action. @@ -107,25 +83,13 @@ public List computePlanForTable(TableName table) throws HBase LOG.debug("Normalization of system table " + table + " isn't allowed"); return null; } - boolean splitEnabled = true, mergeEnabled = true; - try { - splitEnabled = masterRpcServices.isSplitOrMergeEnabled(null, - RequestConverter.buildIsSplitOrMergeEnabledRequest(MasterSwitchType.SPLIT)).getEnabled(); - } catch (ServiceException se) { - LOG.debug("Unable to determine whether split is enabled", se); - } - try { - mergeEnabled = masterRpcServices.isSplitOrMergeEnabled(null, - RequestConverter.buildIsSplitOrMergeEnabledRequest(MasterSwitchType.MERGE)).getEnabled(); - } catch (ServiceException se) { - LOG.debug("Unable to determine whether merge is enabled", se); - } + boolean splitEnabled = isSplitEnabled(); + boolean mergeEnabled = isMergeEnabled(); if (!splitEnabled && !mergeEnabled) { LOG.debug("Both split and merge are disabled for table: " + table); return null; } - - List plans = new ArrayList(); + List plans = new ArrayList<>(); List tableRegions = masterServices.getAssignmentManager().getRegionStates(). getRegionsOfTable(table); @@ -169,41 +133,22 @@ public List computePlanForTable(TableName table) throws HBase + regionSize + ", more than twice avg size, splitting"); plans.add(new SplitNormalizationPlan(hri, null)); } - } else { - if (candidateIdx == tableRegions.size()-1) { - break; - } - if (mergeEnabled) { - HRegionInfo hri2 = tableRegions.get(candidateIdx+1); - long regionSize2 = getRegionSize(hri2); - if (regionSize >= 0 && regionSize2 >= 0 && regionSize + regionSize2 < avgRegionSize) { - LOG.info("Table " + table + ", small region size: " + regionSize - + " plus its neighbor size: " + regionSize2 - + ", less than the avg size " + avgRegionSize + ", merging them"); - plans.add(new MergeNormalizationPlan(hri, hri2)); - candidateIdx++; - } - } } candidateIdx++; } + MergeNormalizer mergeNormalizer = new MergeNormalizer(); + mergeNormalizer.setMasterRpcServices(masterRpcServices); + mergeNormalizer.setMasterServices(masterServices); + List normalizationPlans = mergeNormalizer.computePlanForTable(table); + if(normalizationPlans != null) { + plans.addAll(normalizationPlans); + } if (plans.isEmpty()) { LOG.debug("No normalization needed, regions look good for table: " + table); return null; } + Collections.sort(plans, planComparator); return plans; } - - private long getRegionSize(HRegionInfo hri) { - ServerName sn = masterServices.getAssignmentManager().getRegionStates(). - getRegionServerOfRegion(hri); - RegionLoad regionLoad = masterServices.getServerManager().getLoad(sn). - getRegionsLoad().get(hri.getRegionName()); - if (regionLoad == null) { - LOG.debug(hri.getRegionNameAsString() + " was not found in RegionsLoad"); - return -1; - } - return regionLoad.getStorefileSizeMB(); - } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java index 1e29514a8dac..b6e3cc976028 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java @@ -21,13 +21,13 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.RegionLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.MasterRpcServices; import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.protobuf.RequestConverter; import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsSplitOrMergeEnabledRequest; import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsSplitOrMergeEnabledResponse; import org.apache.hadoop.hbase.testclassification.SmallTests; @@ -276,6 +276,8 @@ protected void setupMocksForNormalizer(Map regionSizes, when(masterServices.getAssignmentManager().getRegionStates(). getRegionServerOfRegion(any(HRegionInfo.class))).thenReturn(sn); + when(masterServices.getConfiguration().getLong(HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, + HConstants.DEFAULT_MIN_DAYS_BEFORE_MERGE)).thenReturn(0L); for (Map.Entry region : regionSizes.entrySet()) { RegionLoad regionLoad = Mockito.mock(RegionLoad.class); when(regionLoad.getName()).thenReturn(region.getKey()); From 56bee021cb607a9771c13b52a092a10d58f8c3c5 Mon Sep 17 00:00:00 2001 From: mnpoonia Date: Sat, 28 Dec 2019 19:23:46 +0530 Subject: [PATCH 6/8] Change num of days to int --- .../src/main/java/org/apache/hadoop/hbase/HConstants.java | 2 +- .../hadoop/hbase/master/normalizer/MergeNormalizer.java | 6 +++--- .../hbase/master/normalizer/TestSimpleRegionNormalizer.java | 4 ++-- .../normalizer/TestSimpleRegionNormalizerOnCluster.java | 1 + 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 6d0b80be57eb..6cb407a7f888 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -152,7 +152,7 @@ public enum OperationStatusCode { "hbase.master.normalizer.class"; /** Config for min age of region before being considerded for merge in mormalizer */ - public static final long DEFAULT_MIN_DAYS_BEFORE_MERGE = 3; + public static final int DEFAULT_MIN_DAYS_BEFORE_MERGE = 3; public static final String HBASE_MASTER_DAYS_BEFORE_MERGE = "hbase.master.normalize.daysBeforeMerge"; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java index 572dbe6ff8f7..65ce10e964b8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java @@ -127,10 +127,10 @@ private boolean shouldNormalize(TableName table) { return normalize; } - private long getMinimumDurationBeforeMerge() { + private int getMinimumDurationBeforeMerge() { Configuration entries = HBaseConfiguration.create(); - long minDuration = masterServices.getConfiguration() - .getLong(HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, HConstants.DEFAULT_MIN_DAYS_BEFORE_MERGE); + int minDuration = masterServices.getConfiguration() + .getInt(HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, HConstants.DEFAULT_MIN_DAYS_BEFORE_MERGE); return minDuration; } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java index b6e3cc976028..d62082da4bce 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java @@ -276,8 +276,8 @@ protected void setupMocksForNormalizer(Map regionSizes, when(masterServices.getAssignmentManager().getRegionStates(). getRegionServerOfRegion(any(HRegionInfo.class))).thenReturn(sn); - when(masterServices.getConfiguration().getLong(HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, - HConstants.DEFAULT_MIN_DAYS_BEFORE_MERGE)).thenReturn(0L); + when(masterServices.getConfiguration().getInt(HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, + HConstants.DEFAULT_MIN_DAYS_BEFORE_MERGE)).thenReturn(0); for (Map.Entry region : regionSizes.entrySet()) { RegionLoad regionLoad = Mockito.mock(RegionLoad.class); when(regionLoad.getName()).thenReturn(region.getKey()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizerOnCluster.java index a73e186d6b5d..687fee354418 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizerOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizerOnCluster.java @@ -61,6 +61,7 @@ public class TestSimpleRegionNormalizerOnCluster { public static void beforeAllTests() throws Exception { // we will retry operations when PleaseHoldException is thrown TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3); + TEST_UTIL.getConfiguration().setLong(HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, 0L); // Start a cluster of two regionservers. TEST_UTIL.startMiniCluster(1); From dbdccc4abae061d2339ae3cbf98d0f07f8ef1a58 Mon Sep 17 00:00:00 2001 From: mnpoonia Date: Sat, 28 Dec 2019 20:23:01 +0530 Subject: [PATCH 7/8] Move common logic to baseNormalizer class --- .../master/normalizer/BaseNormalizer.java | 56 +++++++++++ .../master/normalizer/MergeNormalizer.java | 40 ++------ .../normalizer/SimpleRegionNormalizer.java | 97 ++++++------------- .../TestSimpleRegionNormalizer.java | 48 ++++----- .../TestSimpleRegionNormalizerOnCluster.java | 21 ++-- 5 files changed, 125 insertions(+), 137 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java index 8522ce985c98..0cdf2f2c586e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java @@ -19,10 +19,12 @@ package org.apache.hadoop.hbase.master.normalizer; import com.google.protobuf.ServiceException; +import java.util.ArrayList; import java.util.List; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.RegionLoad; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.master.MasterRpcServices; import org.apache.hadoop.hbase.master.MasterServices; @@ -102,4 +104,58 @@ protected double getAvgRegionSize(List tableRegions) { double avgRegionSize = acutalRegionCnt == 0 ? 0 : totalSizeMb / (double) acutalRegionCnt; return avgRegionSize; } + + protected List getMergeNormalizationPlan(TableName table) { + List plans = new ArrayList<>(); + List tableRegions = + masterServices.getAssignmentManager().getRegionStates().getRegionsOfTable(table); + double avgRegionSize = getAvgRegionSize(tableRegions); + LOG.debug("Table {}, average region size: {}", table, avgRegionSize); + LOG.debug("Computing normalization plan for table: {}, number of regions: {}", table, + tableRegions.size()); + + int candidateIdx = 0; + while (candidateIdx < tableRegions.size()) { + if (candidateIdx == tableRegions.size() - 1) { + break; + } + HRegionInfo hri = tableRegions.get(candidateIdx); + long regionSize = getRegionSize(hri); + HRegionInfo hri2 = tableRegions.get(candidateIdx + 1); + long regionSize2 = getRegionSize(hri2); + if (regionSize >= 0 && regionSize2 >= 0 && regionSize + regionSize2 < avgRegionSize) { + // atleast one of the two regions should be older than MIN_REGION_DURATION days + plans.add(new MergeNormalizationPlan(hri, hri2)); + candidateIdx++; + } else { + LOG.debug("Skipping region {} of table {} with size {}", hri.getRegionId(), table, + regionSize); + } + candidateIdx++; + } + return plans; + } + + protected List getSplitNormalizationPlan(TableName table) { + List plans = new ArrayList<>(); + List tableRegions = + masterServices.getAssignmentManager().getRegionStates().getRegionsOfTable(table); + double avgRegionSize = getAvgRegionSize(tableRegions); + LOG.debug("Table{}, average region size: ", table, avgRegionSize); + + int candidateIdx = 0; + while (candidateIdx < tableRegions.size()) { + HRegionInfo hri = tableRegions.get(candidateIdx); + long regionSize = getRegionSize(hri); + // if the region is > 2 times larger than average, we split it, split + // is more high priority normalization action than merge. + if (regionSize > 2 * avgRegionSize) { + LOG.info("Table " + table + ", large region " + hri.getRegionNameAsString() + " has size " + + regionSize + ", more than twice avg size, splitting"); + plans.add(new SplitNormalizationPlan(hri, null)); + } + candidateIdx++; + } + return plans; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java index 65ce10e964b8..3c29cae1e740 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java @@ -54,40 +54,20 @@ public class MergeNormalizer extends BaseNormalizer { @Override public List computePlanForTable(TableName table) throws HBaseIOException { + List plans = new ArrayList<>(); if (!shouldNormalize(table)) { return null; } - List plans = new ArrayList<>(); - List tableRegions = - masterServices.getAssignmentManager().getRegionStates().getRegionsOfTable(table); - double avgRegionSize = getAvgRegionSize(tableRegions); - LOG.debug("Table {}, average region size: {}", table, avgRegionSize); - LOG.debug("Computing normalization plan for table: {}, number of regions: {}", table, - tableRegions.size()); - - int candidateIdx = 0; - while (candidateIdx < tableRegions.size()) { - if (candidateIdx == tableRegions.size() - 1) { - break; - } - HRegionInfo hri = tableRegions.get(candidateIdx); - long regionSize = getRegionSize(hri); - HRegionInfo hri2 = tableRegions.get(candidateIdx + 1); - long regionSize2 = getRegionSize(hri2); - if (regionSize >= 0 && regionSize2 >= 0 && regionSize + regionSize2 < avgRegionSize) { - // atleast one of the two regions should be older than MIN_REGION_DURATION days + // atleast one of the two regions should be older than MIN_REGION_DURATION days + List normalizationPlans = getMergeNormalizationPlan(table); + for (NormalizationPlan plan : normalizationPlans) { + if (plan instanceof MergeNormalizationPlan) { + HRegionInfo hri = ((MergeNormalizationPlan) plan).getFirstRegion(); + HRegionInfo hri2 = ((MergeNormalizationPlan) plan).getSecondRegion(); if (isOldEnoughToMerge(hri) || isOldEnoughToMerge(hri2)) { - LOG.info( - "Table {}, small region size: {} plus its neighbor size: {}, less than the avg size " - + "{}, merging them", - table, regionSize, regionSize2, avgRegionSize); - plans.add(new MergeNormalizationPlan(hri, hri2)); - candidateIdx++; + plans.add(plan); } - } else { - LOG.debug("Skipping region {} of table {}", hri.getRegionId(), table); } - candidateIdx++; } if (plans.isEmpty()) { LOG.debug("No normalization needed, regions look good for table: {}", table); @@ -129,8 +109,8 @@ private boolean shouldNormalize(TableName table) { private int getMinimumDurationBeforeMerge() { Configuration entries = HBaseConfiguration.create(); - int minDuration = masterServices.getConfiguration() - .getInt(HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, HConstants.DEFAULT_MIN_DAYS_BEFORE_MERGE); + int minDuration = masterServices.getConfiguration().getInt( + HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, HConstants.DEFAULT_MIN_DAYS_BEFORE_MERGE); return minDuration; } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java index 3bea9d41aa24..17d4d50d1945 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java @@ -22,42 +22,36 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; - +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Simple implementation of region normalizer. - * - * Logic in use: - * - *

      - *
    1. get all regions of a given table - *
    2. get avg size S of each region (by total size of store files reported in RegionLoad) - *
    3. If biggest region is bigger than S * 2, it is kindly requested to split, - * and normalization stops - *
    4. Otherwise, two smallest region R1 and its smallest neighbor R2 are kindly requested - * to merge, if R1 + R1 < S, and normalization stops - *
    5. Otherwise, no action is performed + * Simple implementation of region normalizer. Logic in use: + *
        + *
      1. get all regions of a given table + *
      2. get avg size S of each region (by total size of store files reported in RegionLoad) + *
      3. If biggest region is bigger than S * 2, it is kindly requested to split, and normalization + * stops + *
      4. Otherwise, two smallest region R1 and its smallest neighbor R2 are kindly requested to merge, + * if R1 + R1 < S, and normalization stops + *
      5. Otherwise, no action is performed *
      *

      - * Region sizes are coarse and approximate on the order of megabytes. Additionally, - * "empty" regions (less than 1MB, with the previous note) are not merged away. This - * is by design to prevent normalization from undoing the pre-splitting of a table. + * Region sizes are coarse and approximate on the order of megabytes. Additionally, "empty" regions + * (less than 1MB, with the previous note) are not merged away. This is by design to prevent + * normalization from undoing the pre-splitting of a table. */ @InterfaceAudience.Private public class SimpleRegionNormalizer extends BaseNormalizer { private static final Logger LOG = LoggerFactory.getLogger(SimpleRegionNormalizer.class); private static final int MIN_REGION_COUNT = 3; - // Comparator that gives higher priority to region Split plan - private Comparator planComparator = - new Comparator() { + private Comparator planComparator = new Comparator() { @Override public int compare(NormalizationPlan plan, NormalizationPlan plan2) { if (plan instanceof SplitNormalizationPlan) { @@ -71,9 +65,8 @@ public int compare(NormalizationPlan plan, NormalizationPlan plan2) { }; /** - * Computes next most "urgent" normalization action on the table. - * Action may be either a split, or a merge, or no action. - * + * Computes next most "urgent" normalization action on the table. Action may be either a split, or + * a merge, or no action. * @param table table to normalize * @return normalization plan to execute */ @@ -89,59 +82,29 @@ public List computePlanForTable(TableName table) throws HBase LOG.debug("Both split and merge are disabled for table: " + table); return null; } - List plans = new ArrayList<>(); - List tableRegions = masterServices.getAssignmentManager().getRegionStates(). - getRegionsOfTable(table); + List tableRegions = + masterServices.getAssignmentManager().getRegionStates().getRegionsOfTable(table); - //TODO: should we make min number of regions a config param? + // TODO: should we make min number of regions a config param? if (tableRegions == null || tableRegions.size() < MIN_REGION_COUNT) { int nrRegions = tableRegions == null ? 0 : tableRegions.size(); LOG.debug("Table " + table + " has " + nrRegions + " regions, required min number" - + " of regions for normalizer to run is " + MIN_REGION_COUNT + ", not running normalizer"); + + " of regions for normalizer to run is " + MIN_REGION_COUNT + + ", not running normalizer"); return null; } - - LOG.debug("Computing normalization plan for table: " + table + - ", number of regions: " + tableRegions.size()); - - long totalSizeMb = 0; - int acutalRegionCnt = 0; - - for (int i = 0; i < tableRegions.size(); i++) { - HRegionInfo hri = tableRegions.get(i); - long regionSize = getRegionSize(hri); - if (regionSize > 0) { - acutalRegionCnt++; - totalSizeMb += regionSize; + List plans = new ArrayList<>(); + if (splitEnabled) { + List splitNormalizationPlan = getSplitNormalizationPlan(table); + if (splitNormalizationPlan != null) { + plans = splitNormalizationPlan; } } - - double avgRegionSize = acutalRegionCnt == 0 ? 0 : totalSizeMb / (double) acutalRegionCnt; - - LOG.debug("Table " + table + ", total aggregated regions size: " + totalSizeMb); - LOG.debug("Table " + table + ", average region size: " + avgRegionSize); - - int candidateIdx = 0; - while (candidateIdx < tableRegions.size()) { - HRegionInfo hri = tableRegions.get(candidateIdx); - long regionSize = getRegionSize(hri); - // if the region is > 2 times larger than average, we split it, split - // is more high priority normalization action than merge. - if (regionSize > 2 * avgRegionSize) { - if (splitEnabled) { - LOG.info("Table " + table + ", large region " + hri.getRegionNameAsString() + " has size " - + regionSize + ", more than twice avg size, splitting"); - plans.add(new SplitNormalizationPlan(hri, null)); - } + if (mergeEnabled) { + List normalizationPlans = getMergeNormalizationPlan(table); + if (normalizationPlans != null) { + plans.addAll(normalizationPlans); } - candidateIdx++; - } - MergeNormalizer mergeNormalizer = new MergeNormalizer(); - mergeNormalizer.setMasterRpcServices(masterRpcServices); - mergeNormalizer.setMasterServices(masterServices); - List normalizationPlans = mergeNormalizer.computePlanForTable(table); - if(normalizationPlans != null) { - plans.addAll(normalizationPlans); } if (plans.isEmpty()) { LOG.debug("No normalization needed, regions look good for table: " + table); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java index d62082da4bce..5f1669226e40 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java @@ -18,10 +18,21 @@ */ package org.apache.hadoop.hbase.master.normalizer; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.when; + +import com.google.protobuf.RpcController; +import com.google.protobuf.ServiceException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HBaseIOException; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.RegionLoad; import org.apache.hadoop.hbase.ServerName; @@ -37,20 +48,6 @@ import org.junit.experimental.categories.Category; import org.mockito.Mockito; -import com.google.protobuf.RpcController; -import com.google.protobuf.ServiceException; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.RETURNS_DEEP_STUBS; -import static org.mockito.Mockito.when; - /** * Tests logic of {@link SimpleRegionNormalizer}. */ @@ -264,20 +261,17 @@ public void testSplitOfLargeRegion() throws HBaseIOException { } @SuppressWarnings("MockitoCast") - protected void setupMocksForNormalizer(Map regionSizes, - List hris) { + protected void setupMocksForNormalizer(Map regionSizes, List hris) { masterServices = Mockito.mock(MasterServices.class, RETURNS_DEEP_STUBS); masterRpcServices = Mockito.mock(MasterRpcServices.class, RETURNS_DEEP_STUBS); // for simplicity all regions are assumed to be on one server; doesn't matter to us ServerName sn = ServerName.valueOf("localhost", 0, 1L); - when(masterServices.getAssignmentManager().getRegionStates(). - getRegionsOfTable(any(TableName.class))).thenReturn(hris); - when(masterServices.getAssignmentManager().getRegionStates(). - getRegionServerOfRegion(any(HRegionInfo.class))).thenReturn(sn); + when(masterServices.getAssignmentManager().getRegionStates() + .getRegionsOfTable(any(TableName.class))).thenReturn(hris); + when(masterServices.getAssignmentManager().getRegionStates() + .getRegionServerOfRegion(any(HRegionInfo.class))).thenReturn(sn); - when(masterServices.getConfiguration().getInt(HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, - HConstants.DEFAULT_MIN_DAYS_BEFORE_MERGE)).thenReturn(0); for (Map.Entry region : regionSizes.entrySet()) { RegionLoad regionLoad = Mockito.mock(RegionLoad.class); when(regionLoad.getName()).thenReturn(region.getKey()); @@ -286,13 +280,13 @@ protected void setupMocksForNormalizer(Map regionSizes, // this is possibly broken with jdk9, unclear if false positive or not // suppress it for now, fix it when we get to running tests on 9 // see: http://errorprone.info/bugpattern/MockitoCast - when((Object) masterServices.getServerManager().getLoad(sn). - getRegionsLoad().get(region.getKey())).thenReturn(regionLoad); + when((Object) masterServices.getServerManager().getLoad(sn).getRegionsLoad() + .get(region.getKey())).thenReturn(regionLoad); } try { when(masterRpcServices.isSplitOrMergeEnabled(any(RpcController.class), - any(IsSplitOrMergeEnabledRequest.class))).thenReturn( - IsSplitOrMergeEnabledResponse.newBuilder().setEnabled(true).build()); + any(IsSplitOrMergeEnabledRequest.class))) + .thenReturn(IsSplitOrMergeEnabledResponse.newBuilder().setEnabled(true).build()); } catch (ServiceException se) { LOG.debug("error setting isSplitOrMergeEnabled switch", se); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizerOnCluster.java index 687fee354418..c83a10f1b3a1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizerOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizerOnCluster.java @@ -18,6 +18,12 @@ */ package org.apache.hadoop.hbase.master.normalizer; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -40,13 +46,6 @@ import org.junit.Test; import org.junit.experimental.categories.Category; -import java.io.IOException; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; - -import static org.junit.Assert.assertEquals; - /** * Testing {@link SimpleRegionNormalizer} on minicluster. */ @@ -61,8 +60,6 @@ public class TestSimpleRegionNormalizerOnCluster { public static void beforeAllTests() throws Exception { // we will retry operations when PleaseHoldException is thrown TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3); - TEST_UTIL.getConfiguration().setLong(HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, 0L); - // Start a cluster of two regionservers. TEST_UTIL.startMiniCluster(1); admin = TEST_UTIL.getHBaseAdmin(); @@ -76,8 +73,7 @@ public static void afterAllTests() throws Exception { @Test(timeout = 60000) @SuppressWarnings("deprecation") public void testRegionNormalizationSplitOnCluster() throws Exception { - final TableName TABLENAME = - TableName.valueOf("testRegionNormalizationSplitOnCluster"); + final TableName TABLENAME = TableName.valueOf("testRegionNormalizationSplitOnCluster"); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); HMaster m = cluster.getMaster(); @@ -146,8 +142,7 @@ public int compare(HRegion o1, HRegion o2) { @Test(timeout = 60000) @SuppressWarnings("deprecation") public void testRegionNormalizationMergeOnCluster() throws Exception { - final TableName TABLENAME = - TableName.valueOf("testRegionNormalizationMergeOnCluster"); + final TableName TABLENAME = TableName.valueOf("testRegionNormalizationMergeOnCluster"); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); HMaster m = cluster.getMaster(); From a1718d4a4694a920ecde3ee29a14b2fc2e76665f Mon Sep 17 00:00:00 2001 From: mnpoonia Date: Sat, 28 Dec 2019 23:22:40 +0530 Subject: [PATCH 8/8] Fix checkstyle and findbus --- .../hadoop/hbase/master/normalizer/BaseNormalizer.java | 6 +++--- .../hadoop/hbase/master/normalizer/MergeNormalizer.java | 5 +---- .../hbase/master/normalizer/SimpleRegionNormalizer.java | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java index 0cdf2f2c586e..3df2c3349054 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/BaseNormalizer.java @@ -71,7 +71,7 @@ protected boolean isMergeEnabled() { RequestConverter.buildIsSplitOrMergeEnabledRequest(Admin.MasterSwitchType.MERGE)) .getEnabled(); } catch (ServiceException se) { - LOG.debug("Unable to determine whether merge is enabled", se); + LOG.warn("Unable to determine whether merge is enabled", se); } return mergeEnabled; } @@ -84,7 +84,7 @@ protected boolean isSplitEnabled() { RequestConverter.buildIsSplitOrMergeEnabledRequest(Admin.MasterSwitchType.SPLIT)) .getEnabled(); } catch (ServiceException se) { - LOG.debug("Unable to determine whether merge is enabled", se); + LOG.warn("Unable to determine whether merge is enabled", se); } return splitEnabled; } @@ -141,7 +141,7 @@ protected List getSplitNormalizationPlan(TableName table) { List tableRegions = masterServices.getAssignmentManager().getRegionStates().getRegionsOfTable(table); double avgRegionSize = getAvgRegionSize(tableRegions); - LOG.debug("Table{}, average region size: ", table, avgRegionSize); + LOG.debug("Table {}, average region size: {}", table, avgRegionSize); int candidateIdx = 0; while (candidateIdx < tableRegions.size()) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java index 3c29cae1e740..38acaa579a9f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java @@ -22,13 +22,11 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -108,7 +106,6 @@ private boolean shouldNormalize(TableName table) { } private int getMinimumDurationBeforeMerge() { - Configuration entries = HBaseConfiguration.create(); int minDuration = masterServices.getConfiguration().getInt( HConstants.HBASE_MASTER_DAYS_BEFORE_MERGE, HConstants.DEFAULT_MIN_DAYS_BEFORE_MERGE); return minDuration; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java index 17d4d50d1945..4ae44a470bfa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java @@ -22,10 +22,10 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; -import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory;