Skip to content

Commit 5f5375e

Browse files
author
zhanghaobo
committed
HDFS-16368. DFSadmin supports refresh topology info without restarting namenode
1 parent 6dddbd4 commit 5f5375e

13 files changed

Lines changed: 221 additions & 0 deletions

File tree

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,6 +2426,13 @@ public void refreshNodes() throws IOException {
24262426
}
24272427
}
24282428

2429+
public void refreshTopology() throws IOException{
2430+
checkOpen();
2431+
try (TraceScope ignored = tracer.newScope("refreshTopology")) {
2432+
namenode.refreshTopology();
2433+
}
2434+
}
2435+
24292436
/**
24302437
* Dumps DFS data structures into specified file.
24312438
*

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,6 +1691,10 @@ public void refreshNodes() throws IOException {
16911691
dfs.refreshNodes();
16921692
}
16931693

1694+
public void refreshTopology() throws IOException{
1695+
dfs.refreshTopology();
1696+
}
1697+
16941698
/**
16951699
* Finalize previously upgraded files system state.
16961700
* @throws IOException

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,14 @@ boolean setSafeMode(HdfsConstants.SafeModeAction action, boolean isChecked)
978978
@Idempotent
979979
void refreshNodes() throws IOException;
980980

981+
/**
982+
* Tells the namenode to refresh the network topology info
983+
*
984+
* @throws IOException
985+
*/
986+
@Idempotent
987+
void refreshTopology() throws IOException;
988+
981989
/**
982990
* Finalize previous upgrade.
983991
* Remove file system state saved during the upgrade.

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@
171171
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.OpenFilesBatchResponseProto;
172172
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.RecoverLeaseRequestProto;
173173
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.RefreshNodesRequestProto;
174+
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.RefreshTopologyRequestProto;
174175
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.RemoveCacheDirectiveRequestProto;
175176
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.RemoveCachePoolRequestProto;
176177
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.Rename2RequestProto;
@@ -288,6 +289,9 @@ public class ClientNamenodeProtocolTranslatorPB implements
288289
private final static RefreshNodesRequestProto VOID_REFRESH_NODES_REQUEST =
289290
RefreshNodesRequestProto.newBuilder().build();
290291

292+
private final static RefreshTopologyRequestProto VOID_REFRESH_TOPOLOGY_REQUEST =
293+
RefreshTopologyRequestProto.newBuilder().build();
294+
291295
private final static FinalizeUpgradeRequestProto
292296
VOID_FINALIZE_UPGRADE_REQUEST =
293297
FinalizeUpgradeRequestProto.newBuilder().build();
@@ -894,6 +898,15 @@ public void refreshNodes() throws IOException {
894898
}
895899
}
896900

901+
@Override
902+
public void refreshTopology() throws IOException {
903+
try {
904+
rpcProxy.refreshTopology(null, VOID_REFRESH_TOPOLOGY_REQUEST);
905+
} catch (ServiceException e) {
906+
throw ProtobufHelper.getRemoteException(e);
907+
}
908+
}
909+
897910
@Override
898911
public void finalizeUpgrade() throws IOException {
899912
try {

hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,12 @@ message RefreshNodesRequestProto { // no parameters
470470
message RefreshNodesResponseProto { // void response
471471
}
472472

473+
message RefreshTopologyRequestProto { // no parameters
474+
}
475+
476+
message RefreshTopologyResponseProto { // void response
477+
}
478+
473479
message FinalizeUpgradeRequestProto { // no parameters
474480
}
475481

@@ -931,6 +937,7 @@ service ClientNamenodeProtocol {
931937
rpc restoreFailedStorage(RestoreFailedStorageRequestProto)
932938
returns(RestoreFailedStorageResponseProto);
933939
rpc refreshNodes(RefreshNodesRequestProto) returns(RefreshNodesResponseProto);
940+
rpc refreshTopology(RefreshTopologyRequestProto) returns(RefreshTopologyResponseProto);
934941
rpc finalizeUpgrade(FinalizeUpgradeRequestProto)
935942
returns(FinalizeUpgradeResponseProto);
936943
rpc upgradeStatus(UpgradeStatusRequestProto)

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,6 +1129,14 @@ public void refreshNodes() throws IOException {
11291129
rpcClient.invokeConcurrent(nss, method, true, true);
11301130
}
11311131

1132+
@Override
1133+
public void refreshTopology() throws IOException {
1134+
// Router not support this operation, because this maybe refresh multi namespaces
1135+
String methodName = RouterRpcServer.getMethodName();
1136+
throw new UnsupportedOperationException(
1137+
"Operation \"" + methodName + "\" is not supported");
1138+
}
1139+
11321140
@Override
11331141
public void finalizeUpgrade() throws IOException {
11341142
rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED);

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,6 +1194,11 @@ public void refreshNodes() throws IOException {
11941194
clientProto.refreshNodes();
11951195
}
11961196

1197+
@Override // ClientProtocol
1198+
public void refreshTopology() throws IOException {
1199+
clientProto.refreshTopology();
1200+
}
1201+
11971202
@Override // ClientProtocol
11981203
public void finalizeUpgrade() throws IOException {
11991204
clientProto.finalizeUpgrade();

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@
172172
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.HAServiceStateResponseProto;
173173
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.IsFileClosedRequestProto;
174174
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.IsFileClosedResponseProto;
175+
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.RefreshTopologyRequestProto;
176+
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.RefreshTopologyResponseProto;
175177
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpgradeStatusRequestProto;
176178
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpgradeStatusResponseProto;
177179
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ListCacheDirectivesRequestProto;
@@ -374,6 +376,9 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
374376
private static final RefreshNodesResponseProto VOID_REFRESHNODES_RESPONSE =
375377
RefreshNodesResponseProto.newBuilder().build();
376378

379+
private static final RefreshTopologyResponseProto VOID_REFRESHTOPOLOGY_RESPONSE =
380+
RefreshTopologyResponseProto.newBuilder().build();
381+
377382
private static final FinalizeUpgradeResponseProto VOID_FINALIZEUPGRADE_RESPONSE =
378383
FinalizeUpgradeResponseProto.newBuilder().build();
379384

@@ -972,6 +977,16 @@ public RefreshNodesResponseProto refreshNodes(RpcController controller,
972977

973978
}
974979

980+
@Override
981+
public RefreshTopologyResponseProto refreshTopology(RpcController controller, RefreshTopologyRequestProto request) throws ServiceException {
982+
try {
983+
server.refreshTopology();
984+
return VOID_REFRESHTOPOLOGY_RESPONSE;
985+
} catch (IOException e) {
986+
throw new ServiceException(e);
987+
}
988+
}
989+
975990
@Override
976991
public FinalizeUpgradeResponseProto finalizeUpgrade(RpcController controller,
977992
FinalizeUpgradeRequestProto req) throws ServiceException {

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import java.net.InetSocketAddress;
6868
import java.net.UnknownHostException;
6969
import java.util.*;
70+
import java.util.Map.Entry;
7071
import java.util.concurrent.ThreadLocalRandom;
7172
import java.util.concurrent.TimeUnit;
7273
import java.util.function.Consumer;
@@ -1281,6 +1282,57 @@ nodes with its data cleared (or user can just remove the StorageID
12811282
}
12821283
}
12831284

1285+
/**
1286+
* refresh the network topology of this cluster based on the mapping_topology.data file.
1287+
*/
1288+
public void refreshTopology() throws IOException {
1289+
long start = System.currentTimeMillis();
1290+
int datanodeNums = 0;
1291+
Set<String> storageIds = datanodeMap.keySet();
1292+
Set<String> forIterations = new HashSet<>();
1293+
List<String> datanodeIpAddrs = new ArrayList<>();
1294+
List<DatanodeDescriptor> datanodeDescriptors = new ArrayList<>();
1295+
// To avoid ConcurrentModificationException
1296+
forIterations.addAll(storageIds);
1297+
1298+
for (String storageId : forIterations) {
1299+
DatanodeDescriptor dnDescriptor = datanodeMap.get(storageId);
1300+
String ipAddr = dnDescriptor.getIpAddr();
1301+
datanodeIpAddrs.add(ipAddr);
1302+
datanodeDescriptors.add(dnDescriptor);
1303+
datanodeNums++;
1304+
}
1305+
dnsToSwitchMapping.reloadCachedMappings(datanodeIpAddrs);
1306+
List<String> rNameList = dnsToSwitchMapping.resolve(datanodeIpAddrs);
1307+
1308+
for (int i = 0; i < datanodeNums; i++) {
1309+
DatanodeDescriptor dnDescriptor = datanodeDescriptors.get(i);
1310+
String originNetwork = dnDescriptor.getNetworkLocation();
1311+
String resolvedNetwork = rNameList.get(i);
1312+
1313+
if (dnDescriptor.getNetworkLocation().equals(resolvedNetwork)) {
1314+
continue;
1315+
}
1316+
try {
1317+
synchronized (this) {
1318+
dnDescriptor.setNetworkLocation(resolvedNetwork);
1319+
networktopology.add(dnDescriptor); // may throw InvalidTopologyException
1320+
}
1321+
} catch (Throwable e) {
1322+
LOG.error(getClass().getSimpleName() + ".refreshTopology: update datanode " + dnDescriptor +
1323+
" failed. reset from Rack: " + resolvedNetwork + " to Rack: " + originNetwork);
1324+
dnDescriptor.setNetworkLocation(originNetwork);
1325+
throw new IOException(getClass().getSimpleName() + ".refreshTopology: update datanode " + dnDescriptor +
1326+
" failed. reset from Rack: " + resolvedNetwork + " to Rack: " + originNetwork);
1327+
}
1328+
LOG.info(getClass().getSimpleName() + ".refreshTopology: update datanode :" + dnDescriptor +
1329+
" from Rack: " + dnDescriptor.getNetworkLocation() + " to Rack: " + resolvedNetwork);
1330+
checkIfClusterIsNowMultiRack(dnDescriptor);
1331+
}
1332+
long end = System.currentTimeMillis() - start;
1333+
LOG.info(getClass().getSimpleName() + ".refreshTopology: " + "costs " + end + "ms");
1334+
}
1335+
12841336
/**
12851337
* Rereads conf to get hosts and exclude list file names.
12861338
* Rereads the files to update the hosts and exclude lists. It

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5052,6 +5052,20 @@ void refreshNodes() throws IOException {
50525052
logAuditEvent(true, operationName, null);
50535053
}
50545054

5055+
public void refreshTopology() throws IOException {
5056+
String operationName = "refreshTopology";
5057+
checkOperation(OperationCategory.UNCHECKED);
5058+
checkSuperuserPrivilege(operationName);
5059+
writeLock();
5060+
try {
5061+
checkOperation(OperationCategory.UNCHECKED);
5062+
getBlockManager().getDatanodeManager().refreshTopology();
5063+
} finally {
5064+
writeUnlock(operationName);
5065+
}
5066+
logAuditEvent(true, operationName, null);
5067+
}
5068+
50555069
void setBalancerBandwidth(long bandwidth) throws IOException {
50565070
String operationName = "setBalancerBandwidth";
50575071
checkOperation(OperationCategory.WRITE);

0 commit comments

Comments
 (0)