001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_DEFAULT;
021import static org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY;
022import static org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure.PROGRESSIVE_BATCH_SIZE_MAX_KEY;
023import static org.junit.Assert.assertEquals;
024import static org.junit.Assert.assertTrue;
025
026import java.io.IOException;
027import java.util.List;
028import java.util.Set;
029import java.util.stream.Collectors;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseConfiguration;
033import org.apache.hadoop.hbase.HBaseTestingUtil;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
037import org.apache.hadoop.hbase.master.RegionState.State;
038import org.apache.hadoop.hbase.master.ServerManager;
039import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
040import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
041import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
042import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
043import org.apache.hadoop.hbase.testclassification.MasterTests;
044import org.apache.hadoop.hbase.testclassification.MediumTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.junit.AfterClass;
047import org.junit.BeforeClass;
048import org.junit.ClassRule;
049import org.junit.Test;
050import org.junit.experimental.categories.Category;
051
052import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
053
054/**
055 * Confirm that we will batch region reopens when reopening all table regions. This can avoid the
056 * pain associated with reopening too many regions at once.
057 */
058@Category({ MasterTests.class, MediumTests.class })
059public class TestReopenTableRegionsProcedureBatching {
060
061  @ClassRule
062  public static final HBaseClassTestRule CLASS_RULE =
063    HBaseClassTestRule.forClass(TestReopenTableRegionsProcedureBatching.class);
064
065  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
066  private static final int BACKOFF_MILLIS_PER_RS = 0;
067  private static final int REOPEN_BATCH_SIZE_MAX = 1;
068
069  private static TableName TABLE_NAME = TableName.valueOf("Batching");
070
071  private static byte[] CF = Bytes.toBytes("cf");
072
073  @BeforeClass
074  public static void setUp() throws Exception {
075    Configuration conf = UTIL.getConfiguration();
076    conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
077    UTIL.startMiniCluster(1);
078    UTIL.createMultiRegionTable(TABLE_NAME, CF);
079  }
080
081  @AfterClass
082  public static void tearDown() throws Exception {
083    UTIL.shutdownMiniCluster();
084  }
085
086  @Test
087  public void testSmallMaxBatchSize() throws IOException {
088    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
089    ProcedureExecutor<MasterProcedureEnv> procExec =
090      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
091    List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME);
092    assertTrue(2 <= regions.size());
093    Set<StuckRegion> stuckRegions =
094      regions.stream().map(r -> stickRegion(am, procExec, r)).collect(Collectors.toSet());
095    ReopenTableRegionsProcedure proc =
096      new ReopenTableRegionsProcedure(TABLE_NAME, BACKOFF_MILLIS_PER_RS, REOPEN_BATCH_SIZE_MAX);
097    procExec.submitProcedure(proc);
098    UTIL.waitFor(10000, () -> proc.getState() == ProcedureState.WAITING_TIMEOUT);
099
100    // the first batch should be small
101    confirmBatchSize(1, stuckRegions, proc);
102    ProcedureSyncWait.waitForProcedureToComplete(procExec, proc, 60_000);
103
104    // other batches should also be small
105    assertTrue(proc.getBatchesProcessed() >= regions.size());
106
107    // all regions should only be opened once
108    assertEquals(proc.getRegionsReopened(), regions.size());
109  }
110
111  @Test
112  public void testDefaultMaxBatchSize() throws IOException {
113    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
114    ProcedureExecutor<MasterProcedureEnv> procExec =
115      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
116    List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME);
117    assertTrue(2 <= regions.size());
118    Set<StuckRegion> stuckRegions =
119      regions.stream().map(r -> stickRegion(am, procExec, r)).collect(Collectors.toSet());
120    ReopenTableRegionsProcedure proc = new ReopenTableRegionsProcedure(TABLE_NAME);
121    procExec.submitProcedure(proc);
122    UTIL.waitFor(10000, () -> proc.getState() == ProcedureState.WAITING_TIMEOUT);
123
124    // the first batch should be large
125    confirmBatchSize(regions.size(), stuckRegions, proc);
126    ProcedureSyncWait.waitForProcedureToComplete(procExec, proc, 60_000);
127
128    // all regions should only be opened once
129    assertEquals(proc.getRegionsReopened(), regions.size());
130  }
131
132  @Test
133  public void testNegativeBatchSizeDoesNotBreak() throws IOException {
134    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
135    ProcedureExecutor<MasterProcedureEnv> procExec =
136      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
137    List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME);
138    assertTrue(2 <= regions.size());
139    Set<StuckRegion> stuckRegions =
140      regions.stream().map(r -> stickRegion(am, procExec, r)).collect(Collectors.toSet());
141    ReopenTableRegionsProcedure proc =
142      new ReopenTableRegionsProcedure(TABLE_NAME, BACKOFF_MILLIS_PER_RS, -100);
143    procExec.submitProcedure(proc);
144    UTIL.waitFor(10000, () -> proc.getState() == ProcedureState.WAITING_TIMEOUT);
145
146    // the first batch should be small
147    confirmBatchSize(1, stuckRegions, proc);
148    ProcedureSyncWait.waitForProcedureToComplete(procExec, proc, 60_000);
149
150    // other batches should also be small
151    assertTrue(proc.getBatchesProcessed() >= regions.size());
152
153    // all regions should only be opened once
154    assertEquals(proc.getRegionsReopened(), regions.size());
155  }
156
157  @Test
158  public void testBatchSizeDoesNotOverflow() {
159    ReopenTableRegionsProcedure proc =
160      new ReopenTableRegionsProcedure(TABLE_NAME, BACKOFF_MILLIS_PER_RS, Integer.MAX_VALUE);
161    int currentBatchSize = 1;
162    while (currentBatchSize < Integer.MAX_VALUE) {
163      currentBatchSize = proc.progressBatchSize();
164      assertTrue(currentBatchSize > 0);
165    }
166  }
167
168  @Test
169  public void testBackoffConfigurationFromTableDescriptor() {
170    Configuration conf = HBaseConfiguration.create();
171    TableDescriptorBuilder tbd = TableDescriptorBuilder.newBuilder(TABLE_NAME);
172
173    // Default (no batching, no backoff)
174    ReopenTableRegionsProcedure proc = ReopenTableRegionsProcedure.throttled(conf, tbd.build());
175    assertEquals(PROGRESSIVE_BATCH_BACKOFF_MILLIS_DEFAULT, proc.getReopenBatchBackoffMillis());
176    assertEquals(Integer.MAX_VALUE, proc.progressBatchSize());
177
178    // From Configuration (backoff: 100ms, max: 6)
179    conf.setLong(PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, 100);
180    conf.setInt(PROGRESSIVE_BATCH_SIZE_MAX_KEY, 6);
181    proc = ReopenTableRegionsProcedure.throttled(conf, tbd.build());
182    assertEquals(100, proc.getReopenBatchBackoffMillis());
183    assertEquals(2, proc.progressBatchSize());
184    assertEquals(4, proc.progressBatchSize());
185    assertEquals(6, proc.progressBatchSize());
186    assertEquals(6, proc.progressBatchSize());
187
188    // From TableDescriptor (backoff: 200ms, max: 7)
189    tbd.setValue(PROGRESSIVE_BATCH_BACKOFF_MILLIS_KEY, "200");
190    tbd.setValue(PROGRESSIVE_BATCH_SIZE_MAX_KEY, "7");
191    proc = ReopenTableRegionsProcedure.throttled(conf, tbd.build());
192    assertEquals(200, proc.getReopenBatchBackoffMillis());
193    assertEquals(2, proc.progressBatchSize());
194    assertEquals(4, proc.progressBatchSize());
195    assertEquals(7, proc.progressBatchSize());
196    assertEquals(7, proc.progressBatchSize());
197  }
198
199  private void confirmBatchSize(int expectedBatchSize, Set<StuckRegion> stuckRegions,
200    ReopenTableRegionsProcedure proc) {
201    while (true) {
202      if (proc.getBatchesProcessed() == 0) {
203        continue;
204      }
205      stuckRegions.forEach(this::unstickRegion);
206      UTIL.waitFor(5000, () -> expectedBatchSize == proc.getRegionsReopened());
207      break;
208    }
209  }
210
211  static class StuckRegion {
212    final TransitRegionStateProcedure trsp;
213    final RegionStateNode regionNode;
214    final long openSeqNum;
215
216    public StuckRegion(TransitRegionStateProcedure trsp, RegionStateNode regionNode,
217      long openSeqNum) {
218      this.trsp = trsp;
219      this.regionNode = regionNode;
220      this.openSeqNum = openSeqNum;
221    }
222  }
223
224  private StuckRegion stickRegion(AssignmentManager am,
225    ProcedureExecutor<MasterProcedureEnv> procExec, RegionInfo regionInfo) {
226    RegionStateNode regionNode = am.getRegionStates().getRegionStateNode(regionInfo);
227    TransitRegionStateProcedure trsp =
228      TransitRegionStateProcedure.unassign(procExec.getEnvironment(), regionInfo);
229    regionNode.lock();
230    long openSeqNum;
231    try {
232      openSeqNum = regionNode.getOpenSeqNum();
233      regionNode.setState(State.OPENING);
234      regionNode.setOpenSeqNum(-1L);
235      regionNode.setProcedure(trsp);
236    } finally {
237      regionNode.unlock();
238    }
239    return new StuckRegion(trsp, regionNode, openSeqNum);
240  }
241
242  private void unstickRegion(StuckRegion stuckRegion) {
243    stuckRegion.regionNode.lock();
244    try {
245      stuckRegion.regionNode.setState(State.OPEN);
246      stuckRegion.regionNode.setOpenSeqNum(stuckRegion.openSeqNum);
247      stuckRegion.regionNode.unsetProcedure(stuckRegion.trsp);
248    } finally {
249      stuckRegion.regionNode.unlock();
250    }
251  }
252}