1-
21/**
32 * Licensed to the Apache Software Foundation (ASF) under one
43 * or more contributor license agreements. See the NOTICE file
4342/**
4443 * Mimics a ReentrantReadWriteLock but does not directly implement the interface
4544 * so more sophisticated locking capabilities and logging/metrics are possible.
46- * If {@link org.apache.hadoop.hdfs.DFSConfigKeys#DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY}
47- * is set to be true, metrics will be emitted into the FSNamesystem metrics
48- * registry for each operation which acquires this lock indicating how long
49- * the operation held the lock for. Note that if a thread dies, metrics produced
50- * after the most recent snapshot will be lost due to the use of
45+ * {@link org.apache.hadoop.hdfs.DFSConfigKeys#DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY}
46+ * to be true, metrics will be emitted into the FSNamesystem metrics registry
47+ * for each operation which acquires this lock indicating how long the operation
48+ * held the lock for. These metrics have names of the form
49+ * FSN(Read|Write)LockNanosOperationName, where OperationName denotes the name
50+ * of the operation that initiated the lock hold (this will be OTHER for certain
51+ * uncategorized operations) and they export the hold time values in
52+ * nanoseconds. Note that if a thread dies, metrics produced after the
53+ * most recent snapshot will be lost due to the use of
5154 * {@link MutableRatesWithAggregation}. However since threads are re-used
5255 * between operations this should not generally be an issue.
5356 */
@@ -63,24 +66,26 @@ class FSNamesystemLock {
6366 * Log statements about long lock hold times will not be produced more
6467 * frequently than this interval.
6568 */
66- private final long lockSuppressWarningInterval ;
69+ private final long lockSuppressWarningIntervalMs ;
6770
6871 /** Threshold (ms) for long holding write lock report. */
69- private final long writeLockReportingThreshold ;
72+ private final long writeLockReportingThresholdMs ;
7073 /** Last time stamp for write lock. Keep the longest one for multi-entrance.*/
71- private long writeLockHeldTimeStamp ;
74+ private long writeLockHeldTimeStampNanos ;
7275 private int numWriteLockWarningsSuppressed = 0 ;
73- private long timeStampOfLastWriteLockReport = 0 ;
74- private long longestWriteLockHeldInterval = 0 ;
76+ /** Time stamp (ms) of the last time a write lock report was written. */
77+ private long timeStampOfLastWriteLockReportMs = 0 ;
78+ /** Longest time (ms) a write lock was held since the last report. */
79+ private long longestWriteLockHeldIntervalMs = 0 ;
7580
7681 /** Threshold (ms) for long holding read lock report. */
77- private final long readLockReportingThreshold ;
82+ private final long readLockReportingThresholdMs ;
7883 /**
7984 * Last time stamp for read lock. Keep the longest one for
8085 * multi-entrance. This is ThreadLocal since there could be
8186 * many read locks held simultaneously.
8287 */
83- private final ThreadLocal <Long > readLockHeldTimeStamp =
88+ private final ThreadLocal <Long > readLockHeldTimeStampNanos =
8489 new ThreadLocal <Long >() {
8590 @ Override
8691 public Long initialValue () {
@@ -89,13 +94,16 @@ public Long initialValue() {
8994 };
9095 private final AtomicInteger numReadLockWarningsSuppressed =
9196 new AtomicInteger (0 );
92- private final AtomicLong timeStampOfLastReadLockReport = new AtomicLong (0 );
93- private final AtomicLong longestReadLockHeldInterval = new AtomicLong (0 );
97+ /** Time stamp (ms) of the last time a read lock report was written. */
98+ private final AtomicLong timeStampOfLastReadLockReportMs = new AtomicLong (0 );
99+ /** Longest time (ms) a read lock was held since the last report. */
100+ private final AtomicLong longestReadLockHeldIntervalMs = new AtomicLong (0 );
94101
95102 @ VisibleForTesting
96103 static final String OP_NAME_OTHER = "OTHER" ;
97104 private static final String READ_LOCK_METRIC_PREFIX = "FSNReadLock" ;
98105 private static final String WRITE_LOCK_METRIC_PREFIX = "FSNWriteLock" ;
106+ private static final String LOCK_METRIC_SUFFIX = "Nanos" ;
99107
100108 FSNamesystemLock (Configuration conf ,
101109 MutableRatesWithAggregation detailedHoldTimeMetrics ) {
@@ -110,13 +118,13 @@ public Long initialValue() {
110118 this .coarseLock = new ReentrantReadWriteLock (fair );
111119 this .timer = timer ;
112120
113- this .writeLockReportingThreshold = conf .getLong (
121+ this .writeLockReportingThresholdMs = conf .getLong (
114122 DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY ,
115123 DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT );
116- this .readLockReportingThreshold = conf .getLong (
124+ this .readLockReportingThresholdMs = conf .getLong (
117125 DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY ,
118126 DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT );
119- this .lockSuppressWarningInterval = conf .getTimeDuration (
127+ this .lockSuppressWarningIntervalMs = conf .getTimeDuration (
120128 DFS_LOCK_SUPPRESS_WARNING_INTERVAL_KEY ,
121129 DFS_LOCK_SUPPRESS_WARNING_INTERVAL_DEFAULT , TimeUnit .MILLISECONDS );
122130 this .metricsEnabled = conf .getBoolean (
@@ -130,7 +138,7 @@ public Long initialValue() {
130138 public void readLock () {
131139 coarseLock .readLock ().lock ();
132140 if (coarseLock .getReadHoldCount () == 1 ) {
133- readLockHeldTimeStamp .set (timer .monotonicNow ());
141+ readLockHeldTimeStampNanos .set (timer .monotonicNowNanos ());
134142 }
135143 }
136144
@@ -140,56 +148,59 @@ public void readUnlock() {
140148
141149 public void readUnlock (String opName ) {
142150 final boolean needReport = coarseLock .getReadHoldCount () == 1 ;
143- final long readLockInterval =
144- timer .monotonicNow () - readLockHeldTimeStamp .get ();
151+ final long readLockIntervalNanos =
152+ timer .monotonicNowNanos () - readLockHeldTimeStampNanos .get ();
145153 coarseLock .readLock ().unlock ();
146154
147155 if (needReport ) {
148- addMetric (opName , readLockInterval , false );
149- readLockHeldTimeStamp .remove ();
156+ addMetric (opName , readLockIntervalNanos , false );
157+ readLockHeldTimeStampNanos .remove ();
150158 }
151- if (needReport && readLockInterval >= this .readLockReportingThreshold ) {
159+ final long readLockIntervalMs =
160+ TimeUnit .NANOSECONDS .toMillis (readLockIntervalNanos );
161+ if (needReport && readLockIntervalMs >= this .readLockReportingThresholdMs ) {
152162 long localLongestReadLock ;
153163 do {
154- localLongestReadLock = longestReadLockHeldInterval .get ();
155- } while (localLongestReadLock - readLockInterval < 0 &&
156- !longestReadLockHeldInterval .compareAndSet (localLongestReadLock ,
157- readLockInterval ));
164+ localLongestReadLock = longestReadLockHeldIntervalMs .get ();
165+ } while (localLongestReadLock - readLockIntervalMs < 0 &&
166+ !longestReadLockHeldIntervalMs .compareAndSet (localLongestReadLock ,
167+ readLockIntervalMs ));
158168
159169 long localTimeStampOfLastReadLockReport ;
160- long now ;
170+ long nowMs ;
161171 do {
162- now = timer .monotonicNow ();
172+ nowMs = timer .monotonicNow ();
163173 localTimeStampOfLastReadLockReport =
164- timeStampOfLastReadLockReport .get ();
165- if (now - localTimeStampOfLastReadLockReport <
166- lockSuppressWarningInterval ) {
174+ timeStampOfLastReadLockReportMs .get ();
175+ if (nowMs - localTimeStampOfLastReadLockReport <
176+ lockSuppressWarningIntervalMs ) {
167177 numReadLockWarningsSuppressed .incrementAndGet ();
168178 return ;
169179 }
170- } while (!timeStampOfLastReadLockReport .compareAndSet (
171- localTimeStampOfLastReadLockReport , now ));
180+ } while (!timeStampOfLastReadLockReportMs .compareAndSet (
181+ localTimeStampOfLastReadLockReport , nowMs ));
172182 int numSuppressedWarnings = numReadLockWarningsSuppressed .getAndSet (0 );
173- long longestLockHeldInterval = longestReadLockHeldInterval .getAndSet (0 );
183+ long longestLockHeldIntervalMs =
184+ longestReadLockHeldIntervalMs .getAndSet (0 );
174185 FSNamesystem .LOG .info ("FSNamesystem read lock held for " +
175- readLockInterval + " ms via\n " +
186+ readLockIntervalMs + " ms via\n " +
176187 StringUtils .getStackTrace (Thread .currentThread ()) +
177188 "\t Number of suppressed read-lock reports: " + numSuppressedWarnings +
178- "\n \t Longest read-lock held interval: " + longestLockHeldInterval );
189+ "\n \t Longest read-lock held interval: " + longestLockHeldIntervalMs );
179190 }
180191 }
181192
182193 public void writeLock () {
183194 coarseLock .writeLock ().lock ();
184195 if (coarseLock .getWriteHoldCount () == 1 ) {
185- writeLockHeldTimeStamp = timer .monotonicNow ();
196+ writeLockHeldTimeStampNanos = timer .monotonicNowNanos ();
186197 }
187198 }
188199
189200 public void writeLockInterruptibly () throws InterruptedException {
190201 coarseLock .writeLock ().lockInterruptibly ();
191202 if (coarseLock .getWriteHoldCount () == 1 ) {
192- writeLockHeldTimeStamp = timer .monotonicNow ();
203+ writeLockHeldTimeStampNanos = timer .monotonicNowNanos ();
193204 }
194205 }
195206
@@ -200,24 +211,29 @@ public void writeUnlock() {
200211 public void writeUnlock (String opName ) {
201212 final boolean needReport = coarseLock .getWriteHoldCount () == 1 &&
202213 coarseLock .isWriteLockedByCurrentThread ();
203- final long currentTime = timer .monotonicNow ();
204- final long writeLockInterval = currentTime - writeLockHeldTimeStamp ;
214+ final long currentTimeNanos = timer .monotonicNowNanos ();
215+ final long writeLockIntervalNanos =
216+ currentTimeNanos - writeLockHeldTimeStampNanos ;
217+ final long currentTimeMs = TimeUnit .NANOSECONDS .toMillis (currentTimeNanos );
218+ final long writeLockIntervalMs =
219+ TimeUnit .NANOSECONDS .toMillis (writeLockIntervalNanos );
205220
206221 boolean logReport = false ;
207222 int numSuppressedWarnings = 0 ;
208- long longestLockHeldInterval = 0 ;
209- if (needReport && writeLockInterval >= this .writeLockReportingThreshold ) {
210- if (writeLockInterval > longestWriteLockHeldInterval ) {
211- longestWriteLockHeldInterval = writeLockInterval ;
223+ long longestLockHeldIntervalMs = 0 ;
224+ if (needReport &&
225+ writeLockIntervalMs >= this .writeLockReportingThresholdMs ) {
226+ if (writeLockIntervalMs > longestWriteLockHeldIntervalMs ) {
227+ longestWriteLockHeldIntervalMs = writeLockIntervalMs ;
212228 }
213- if (currentTime - timeStampOfLastWriteLockReport >
214- this .lockSuppressWarningInterval ) {
229+ if (currentTimeMs - timeStampOfLastWriteLockReportMs >
230+ this .lockSuppressWarningIntervalMs ) {
215231 logReport = true ;
216232 numSuppressedWarnings = numWriteLockWarningsSuppressed ;
217233 numWriteLockWarningsSuppressed = 0 ;
218- longestLockHeldInterval = longestWriteLockHeldInterval ;
219- longestWriteLockHeldInterval = 0 ;
220- timeStampOfLastWriteLockReport = currentTime ;
234+ longestLockHeldIntervalMs = longestWriteLockHeldIntervalMs ;
235+ longestWriteLockHeldIntervalMs = 0 ;
236+ timeStampOfLastWriteLockReportMs = currentTimeMs ;
221237 } else {
222238 numWriteLockWarningsSuppressed ++;
223239 }
@@ -226,16 +242,16 @@ public void writeUnlock(String opName) {
226242 coarseLock .writeLock ().unlock ();
227243
228244 if (needReport ) {
229- addMetric (opName , writeLockInterval , true );
245+ addMetric (opName , writeLockIntervalNanos , true );
230246 }
231247
232248 if (logReport ) {
233249 FSNamesystem .LOG .info ("FSNamesystem write lock held for " +
234- writeLockInterval + " ms via\n " +
250+ writeLockIntervalMs + " ms via\n " +
235251 StringUtils .getStackTrace (Thread .currentThread ()) +
236252 "\t Number of suppressed write-lock reports: " +
237253 numSuppressedWarnings + "\n \t Longest write-lock held interval: " +
238- longestLockHeldInterval );
254+ longestLockHeldIntervalMs );
239255 }
240256 }
241257
@@ -269,13 +285,14 @@ public int getQueueLength() {
269285 /**
270286 * Add the lock hold time for a recent operation to the metrics.
271287 * @param operationName Name of the operation for which to record the time
272- * @param value Length of time the lock was held
288+ * @param value Length of time the lock was held (nanoseconds)
273289 */
274290 private void addMetric (String operationName , long value , boolean isWrite ) {
275291 if (metricsEnabled ) {
276292 String metricName =
277293 (isWrite ? WRITE_LOCK_METRIC_PREFIX : READ_LOCK_METRIC_PREFIX ) +
278- org .apache .commons .lang .StringUtils .capitalize (operationName );
294+ org .apache .commons .lang .StringUtils .capitalize (operationName ) +
295+ LOCK_METRIC_SUFFIX ;
279296 detailedHoldTimeMetrics .add (metricName , value );
280297 }
281298 }
0 commit comments