Skip to content

Commit fb3b7a4

Browse files
author
liuminjian
committed
Feature: When the cluster capacity is almost full, make the cluster read only
Signed-off-by: liuminjian <[email protected]>
1 parent b184f47 commit fb3b7a4

31 files changed

+366
-102
lines changed

conf/chunkserver.conf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ copyset.sync_chunk_limits=2097152
128128
copyset.sync_threshold=65536
129129
# check syncing interval
130130
copyset.check_syncing_interval_ms=500
131+
# wait for retry time when disk space is insufficient
132+
copyset.wait_for_disk_freed_interval_ms=60000
131133

132134
#
133135
# Clone settings
@@ -215,6 +217,11 @@ chunkfilepool.allocate_percent=80
215217
chunkfilepool.chunk_file_pool_size=1GB
216218
# The thread num for format chunks
217219
chunkfilepool.thread_num=1
220+
# When the chunkserver disk usage exceeds the percentage, heartbeat sets the disk status
221+
chunkfilepool.disk_usage_percent_limit=95
222+
# Reserve part of the chunk number, and the write operation returns readonly to the client
223+
# when the available value is too small to avoid chunkfilepool and walfilepool not being able to obtain the chunk.
224+
chunkfilepool.chunk_reserved=100
218225

219226
#
220227
# WAL file pool

proto/chunk.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ enum CHUNK_OP_STATUS {
8585
CHUNK_OP_STATUS_BACKWARD = 10; // 请求的版本落后当前chunk的版本
8686
CHUNK_OP_STATUS_CHUNK_EXIST = 11; // chunk已存在
8787
CHUNK_OP_STATUS_EPOCH_TOO_OLD = 12; // request epoch too old
88+
CHUNK_OP_STATUS_READONLY = 13; // If there is insufficient disk space, set the chunkserver to read-only
8889
};
8990

9091
message ChunkResponse {

proto/heartbeat.proto

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,13 @@ message CopysetStatistics {
7171
required uint32 writeIOPS = 4;
7272
}
7373

74+
enum ErrorType {
75+
NORMAL = 0;
76+
DISKFULL = 1;
77+
}
78+
7479
message DiskState {
75-
required uint32 errType = 1;
80+
required ErrorType errType = 1;
7681
required string errMsg = 2;
7782
}
7883

proto/topology.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ enum ChunkServerStatus {
4848
enum DiskState {
4949
DISKNORMAL = 0;
5050
DISKERROR = 1;
51+
DISKFULL = 2;
5152
}
5253

5354
enum OnlineState {

src/chunkserver/chunkserver.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,12 @@ void ChunkServer::InitCopysetNodeOptions(
710710
LOG_IF(FATAL, !conf->GetUInt32Value("copyset.sync_trigger_seconds",
711711
&copysetNodeOptions->syncTriggerSeconds));
712712
}
713+
LOG_IF(FATAL, !conf->GetUInt32Value(
714+
"copyset.wait_for_disk_freed_interval_ms",
715+
&copysetNodeOptions->waitForDiskFreedIntervalMs));
716+
LOG_IF(FATAL, !conf->GetUInt32Value(
717+
"copyset.chunk_reserved",
718+
&copysetNodeOptions->chunkReserved));
713719
}
714720

715721
void ChunkServer::InitCopyerOptions(
@@ -781,6 +787,9 @@ void ChunkServer::InitHeartbeatOptions(
781787
&heartbeatOptions->intervalSec));
782788
LOG_IF(FATAL, !conf->GetUInt32Value("mds.heartbeat_timeout",
783789
&heartbeatOptions->timeout));
790+
LOG_IF(FATAL, !conf->GetUInt32Value(
791+
"chunkfilepool.disk_usage_percent_limit",
792+
&heartbeatOptions->chunkserverDiskLimit));
784793
}
785794

786795
void ChunkServer::InitRegisterOptions(

src/chunkserver/config_info.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ struct CopysetNodeOptions {
140140
uint64_t syncThreshold = 64 * 1024;
141141
// check syncing interval
142142
uint32_t checkSyncingIntervalMs = 500u;
143+
// wait for retry time when disk space is insufficient
144+
uint32_t waitForDiskFreedIntervalMs = 60000;
145+
// reserve part of the chunk number
146+
uint32_t chunkReserved = 100;
143147

144148
CopysetNodeOptions();
145149
};

src/chunkserver/copyset_node.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ int CopysetNode::Init(const CopysetNodeOptions &options) {
135135
dsOptions.locationLimit = options.locationLimit;
136136
dsOptions.enableOdsyncWhenOpenChunkFile =
137137
options.enableOdsyncWhenOpenChunkFile;
138+
dsOptions.waitForDiskFreedIntervalMs =
139+
options.waitForDiskFreedIntervalMs;
140+
dsOptions.chunkReserved = options.chunkReserved;
138141
dataStore_ = std::make_shared<CSDataStore>(options.localFileSystem,
139142
options.chunkFilePool,
140143
dsOptions);
@@ -345,6 +348,10 @@ void CopysetNode::WaitSnapshotDone() {
345348
}
346349
}
347350

351+
bool CopysetNode::ReadOnly() const {
352+
return !dataStore_->EnoughChunk();
353+
}
354+
348355
void CopysetNode::save_snapshot_background(::braft::SnapshotWriter *writer,
349356
::braft::Closure *done) {
350357
brpc::ClosureGuard doneGuard(done);

src/chunkserver/copyset_node.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,8 @@ class CopysetNode : public braft::StateMachine,
469469

470470
void WaitSnapshotDone();
471471

472+
bool ReadOnly() const;
473+
472474
private:
473475
inline std::string GroupId() {
474476
return ToGroupId(logicPoolId_, copysetId_);

src/chunkserver/datastore/chunkserver_chunkfile.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
* File Created: Thursday, 6th September 2018 10:49:53 am
2020
* Author: yangyaokai
2121
*/
22+
#include <errno.h>
2223
#include <fcntl.h>
2324
#include <algorithm>
2425
#include <memory>
@@ -207,7 +208,8 @@ CSErrorCode CSChunkFile::Open(bool createFile) {
207208
if (rc != 0 && rc != -EEXIST) {
208209
LOG(ERROR) << "Error occured when create file."
209210
<< " filepath = " << chunkFilePath;
210-
return CSErrorCode::InternalError;
211+
return rc == -ENOSPC ? CSErrorCode::NoSpaceError :
212+
CSErrorCode::InternalError;
211213
}
212214
}
213215
int rc = -1;
@@ -400,7 +402,8 @@ CSErrorCode CSChunkFile::Write(SequenceNum sn,
400402
<< "ChunkID: " << chunkId_
401403
<< ",request sn: " << sn
402404
<< ",chunk sn: " << metaPage_.sn;
403-
return CSErrorCode::InternalError;
405+
return rc == -ENOSPC ? CSErrorCode::NoSpaceError :
406+
CSErrorCode::InternalError;
404407
}
405408
// If it is a clone chunk, the bitmap will be updated
406409
CSErrorCode errorCode = flush();
@@ -478,7 +481,8 @@ CSErrorCode CSChunkFile::Paste(const char * buf, off_t offset, size_t length) {
478481
<< "ChunkID: " << chunkId_
479482
<< ", offset: " << offset
480483
<< ", length: " << length;
481-
return CSErrorCode::InternalError;
484+
return rc == -ENOSPC ? CSErrorCode::NoSpaceError :
485+
CSErrorCode::InternalError;
482486
}
483487
}
484488

src/chunkserver/datastore/chunkserver_datastore.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ CSDataStore::CSDataStore(std::shared_ptr<LocalFileSystem> lfs,
4444
baseDir_(options.baseDir),
4545
chunkFilePool_(chunkFilePool),
4646
lfs_(lfs),
47-
enableOdsyncWhenOpenChunkFile_(options.enableOdsyncWhenOpenChunkFile) {
47+
enableOdsyncWhenOpenChunkFile_(options.enableOdsyncWhenOpenChunkFile),
48+
waitForDiskFreedIntervalMs_(options.waitForDiskFreedIntervalMs),
49+
chunkReserved_(options.chunkReserved) {
4850
CHECK(!baseDir_.empty()) << "Create datastore failed";
4951
CHECK(lfs_ != nullptr) << "Create datastore failed";
5052
CHECK(chunkFilePool_ != nullptr) << "Create datastore failed";

0 commit comments

Comments
 (0)