001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_DEFAULT; 021import static org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY; 022import static org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_SIZE_MAX_KEY; 023import static org.junit.Assert.assertEquals; 024import static org.junit.Assert.assertTrue; 025 026import java.io.IOException; 027import java.util.List; 028import java.util.Set; 029import java.util.stream.Collectors; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HBaseConfiguration; 033import org.apache.hadoop.hbase.HBaseTestingUtil; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 037import org.apache.hadoop.hbase.master.RegionState.State; 038import org.apache.hadoop.hbase.master.ServerManager; 039import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 040import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 041import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; 042import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 043import org.apache.hadoop.hbase.testclassification.MasterTests; 044import org.apache.hadoop.hbase.testclassification.MediumTests; 045import org.apache.hadoop.hbase.util.Bytes; 046import org.junit.AfterClass; 047import org.junit.BeforeClass; 048import org.junit.ClassRule; 049import org.junit.Test; 050import org.junit.experimental.categories.Category; 051 052import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; 053 054/** 055 * Confirm that we will batch region reopens when reopening all table regions. This can avoid the 056 * pain associated with reopening too many regions at once. 057 */ 058@Category({ MasterTests.class, MediumTests.class }) 059public class TestReopenTableRegionsProcedureBatching { 060 061 @ClassRule 062 public static final HBaseClassTestRule CLASS_RULE = 063 HBaseClassTestRule.forClass(TestReopenTableRegionsProcedureBatching.class); 064 065 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 066 private static final int BACKOFF_MILLIS_PER_RS = 0; 067 private static final int REOPEN_BATCH_SIZE_MAX = 1; 068 069 private static TableName TABLE_NAME = TableName.valueOf("Batching"); 070 071 private static byte[] CF = Bytes.toBytes("cf"); 072 073 @BeforeClass 074 public static void setUp() throws Exception { 075 Configuration conf = UTIL.getConfiguration(); 076 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1); 077 UTIL.startMiniCluster(1); 078 UTIL.createMultiRegionTable(TABLE_NAME, CF); 079 } 080 081 @AfterClass 082 public static void tearDown() throws Exception { 083 UTIL.shutdownMiniCluster(); 084 } 085 086 @Test 087 public void testSmallMaxBatchSize() throws IOException { 088 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 089 ProcedureExecutor<MasterProcedureEnv> procExec = 090 UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); 091 List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME); 092 assertTrue(2 <= regions.size()); 093 Set<StuckRegion> stuckRegions = 094 regions.stream().map(r -> stickRegion(am, procExec, r)).collect(Collectors.toSet()); 095 ReopenTableRegionsProcedure proc = 096 new ReopenTableRegionsProcedure(TABLE_NAME, BACKOFF_MILLIS_PER_RS, REOPEN_BATCH_SIZE_MAX); 097 procExec.submitProcedure(proc); 098 UTIL.waitFor(10000, () -> proc.getState() == ProcedureState.WAITING_TIMEOUT); 099 100 // the first batch should be small 101 confirmBatchSize(1, stuckRegions, proc); 102 ProcedureSyncWait.waitForProcedureToComplete(procExec, proc, 60_000); 103 104 // other batches should also be small 105 assertTrue(proc.getBatchesProcessed() >= regions.size()); 106 107 // all regions should only be opened once 108 assertEquals(proc.getRegionsReopened(), regions.size()); 109 } 110 111 @Test 112 public void testDefaultMaxBatchSize() throws IOException { 113 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 114 ProcedureExecutor<MasterProcedureEnv> procExec = 115 UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); 116 List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME); 117 assertTrue(2 <= regions.size()); 118 Set<StuckRegion> stuckRegions = 119 regions.stream().map(r -> stickRegion(am, procExec, r)).collect(Collectors.toSet()); 120 ReopenTableRegionsProcedure proc = new ReopenTableRegionsProcedure(TABLE_NAME); 121 procExec.submitProcedure(proc); 122 UTIL.waitFor(10000, () -> proc.getState() == ProcedureState.WAITING_TIMEOUT); 123 124 // the first batch should be large 125 confirmBatchSize(regions.size(), stuckRegions, proc); 126 ProcedureSyncWait.waitForProcedureToComplete(procExec, proc, 60_000); 127 128 // all regions should only be opened once 129 assertEquals(proc.getRegionsReopened(), regions.size()); 130 } 131 132 @Test 133 public void testNegativeBatchSizeDoesNotBreak() throws IOException { 134 AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 135 ProcedureExecutor<MasterProcedureEnv> procExec = 136 UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); 137 List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME); 138 assertTrue(2 <= regions.size()); 139 Set<StuckRegion> stuckRegions = 140 regions.stream().map(r -> stickRegion(am, procExec, r)).collect(Collectors.toSet()); 141 ReopenTableRegionsProcedure proc = 142 new ReopenTableRegionsProcedure(TABLE_NAME, BACKOFF_MILLIS_PER_RS, -100); 143 procExec.submitProcedure(proc); 144 UTIL.waitFor(10000, () -> proc.getState() == ProcedureState.WAITING_TIMEOUT); 145 146 // the first batch should be small 147 confirmBatchSize(1, stuckRegions, proc); 148 ProcedureSyncWait.waitForProcedureToComplete(procExec, proc, 60_000); 149 150 // other batches should also be small 151 assertTrue(proc.getBatchesProcessed() >= regions.size()); 152 153 // all regions should only be opened once 154 assertEquals(proc.getRegionsReopened(), regions.size()); 155 } 156 157 @Test 158 public void testBatchSizeDoesNotOverflow() { 159 ReopenTableRegionsProcedure proc = 160 new ReopenTableRegionsProcedure(TABLE_NAME, BACKOFF_MILLIS_PER_RS, Integer.MAX_VALUE); 161 int currentBatchSize = 1; 162 while (currentBatchSize < Integer.MAX_VALUE) { 163 currentBatchSize = proc.progressBatchSize(); 164 assertTrue(currentBatchSize > 0); 165 } 166 } 167 168 @Test 169 public void testBackoffConfigurationFromTableDescriptor() { 170 Configuration conf = HBaseConfiguration.create(); 171 TableDescriptorBuilder tbd = TableDescriptorBuilder.newBuilder(TABLE_NAME); 172 173 // Default (no batching, no backoff) 174 ReopenTableRegionsProcedure proc = ReopenTableRegionsProcedure.throttled(conf, tbd.build()); 175 assertEquals(PROGRESSIVE_BATCH_BACKOFF_MILLIS_DEFAULT, proc.getReopenBatchBackoffMillis()); 176 assertEquals(Integer.MAX_VALUE, proc.progressBatchSize()); 177 178 // From Configuration (backoff: 100ms, max: 6) 179 conf.setLong(PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, 100); 180 conf.setInt(PROGRESSIVE_BATCH_SIZE_MAX_KEY, 6); 181 proc = ReopenTableRegionsProcedure.throttled(conf, tbd.build()); 182 assertEquals(100, proc.getReopenBatchBackoffMillis()); 183 assertEquals(2, proc.progressBatchSize()); 184 assertEquals(4, proc.progressBatchSize()); 185 assertEquals(6, proc.progressBatchSize()); 186 assertEquals(6, proc.progressBatchSize()); 187 188 // From TableDescriptor (backoff: 200ms, max: 7) 189 tbd.setValue(PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, "200"); 190 tbd.setValue(PROGRESSIVE_BATCH_SIZE_MAX_KEY, "7"); 191 proc = ReopenTableRegionsProcedure.throttled(conf, tbd.build()); 192 assertEquals(200, proc.getReopenBatchBackoffMillis()); 193 assertEquals(2, proc.progressBatchSize()); 194 assertEquals(4, proc.progressBatchSize()); 195 assertEquals(7, proc.progressBatchSize()); 196 assertEquals(7, proc.progressBatchSize()); 197 } 198 199 private void confirmBatchSize(int expectedBatchSize, Set<StuckRegion> stuckRegions, 200 ReopenTableRegionsProcedure proc) { 201 while (true) { 202 if (proc.getBatchesProcessed() == 0) { 203 continue; 204 } 205 stuckRegions.forEach(this::unstickRegion); 206 UTIL.waitFor(5000, () -> expectedBatchSize == proc.getRegionsReopened()); 207 break; 208 } 209 } 210 211 static class StuckRegion { 212 final TransitRegionStateProcedure trsp; 213 final RegionStateNode regionNode; 214 final long openSeqNum; 215 216 public StuckRegion(TransitRegionStateProcedure trsp, RegionStateNode regionNode, 217 long openSeqNum) { 218 this.trsp = trsp; 219 this.regionNode = regionNode; 220 this.openSeqNum = openSeqNum; 221 } 222 } 223 224 private StuckRegion stickRegion(AssignmentManager am, 225 ProcedureExecutor<MasterProcedureEnv> procExec, RegionInfo regionInfo) { 226 RegionStateNode regionNode = am.getRegionStates().getRegionStateNode(regionInfo); 227 TransitRegionStateProcedure trsp = 228 TransitRegionStateProcedure.unassign(procExec.getEnvironment(), regionInfo); 229 regionNode.lock(); 230 long openSeqNum; 231 try { 232 openSeqNum = regionNode.getOpenSeqNum(); 233 regionNode.setState(State.OPENING); 234 regionNode.setOpenSeqNum(-1L); 235 regionNode.setProcedure(trsp); 236 } finally { 237 regionNode.unlock(); 238 } 239 return new StuckRegion(trsp, regionNode, openSeqNum); 240 } 241 242 private void unstickRegion(StuckRegion stuckRegion) { 243 stuckRegion.regionNode.lock(); 244 try { 245 stuckRegion.regionNode.setState(State.OPEN); 246 stuckRegion.regionNode.setOpenSeqNum(stuckRegion.openSeqNum); 247 stuckRegion.regionNode.unsetProcedure(stuckRegion.trsp); 248 } finally { 249 stuckRegion.regionNode.unlock(); 250 } 251 } 252}