From 8e5fbebd2fbac960f618a0143adac26cf99e105a Mon Sep 17 00:00:00 2001 From: BrandonBLin Date: Tue, 19 Jul 2016 17:40:45 +0800 Subject: [PATCH 1/7] Update README.txt --- README.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.txt b/README.txt index 148cd31c86b72..ca182dbd5d77f 100644 --- a/README.txt +++ b/README.txt @@ -1,3 +1,7 @@ + +本项目为本人近期阅读Hadoop源码时fork出来的,主要用于注释源码。 + + For the latest information about Hadoop, please visit our website at: http://hadoop.apache.org/core/ From e5105bb814ba117154b2f70efa017946de9f4083 Mon Sep 17 00:00:00 2001 From: BrandonBLin <819422484@qq.com> Date: Tue, 19 Jul 2016 19:27:05 +0800 Subject: [PATCH 2/7] IndexRecord comment --- .../src/main/java/org/apache/hadoop/mapred/IndexRecord.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java index 3996534bd54ec..7672fbeffbc71 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java @@ -22,6 +22,12 @@ @InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable + +/** + * Spill文件索引 在相应Reducer的数据请求时快速定位到相应的partition。 + * 一个Spill文件对应一个索引,索引存储专门分配的缓冲中(对应map输出的 + * 环形Buffer) + */ public class IndexRecord { public long startOffset; public long rawLength; From 54ea891a7b0cec9b100026ef08497c336d440133 Mon Sep 17 00:00:00 2001 From: BrandonBLin <819422484@qq.com> Date: Tue, 19 Jul 2016 20:07:33 +0800 Subject: [PATCH 3/7] more comment --- .../java/org/apache/hadoop/mapred/IndexRecord.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java index 7672fbeffbc71..bbba64e748afa 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java @@ -29,8 +29,20 @@ * 环形Buffer) */ public class IndexRecord { + + /** + * 起始偏移量(字节数) + */ public long startOffset; + + /** + * Partition数据原始长度(字节数) + */ public long rawLength; + + /** + * partition数据长度,如果压缩则算压缩后的长度(字节数) + */ public long partLength; public IndexRecord() { } From 98fc59541a53e6844b33abc9e8cdf11795c3f399 Mon Sep 17 00:00:00 2001 From: LinBrandon Date: Wed, 20 Jul 2016 18:44:32 +0800 Subject: [PATCH 4/7] update --- hadoop-common-project/hadoop-common/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 398bb840cf750..e8d607dfa44ee 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -616,7 +616,7 @@ javah - ${env.JAVA_HOME}/bin/javah + ${java.home}/bin/javah org.apache.hadoop.io.compress.zlib.ZlibCompressor org.apache.hadoop.io.compress.zlib.ZlibDecompressor From 5ba9ace8218c78f2c4ebd7f0a84e8b7c25c98a41 Mon Sep 17 00:00:00 2001 From: LinBrandon Date: Wed, 20 Jul 2016 19:05:31 +0800 Subject: [PATCH 5/7] Comment --- .../org/apache/hadoop/mapreduce/Partitioner.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java index 7fdb83dc3e726..c7c3d13be2826 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java @@ -37,6 +37,17 @@ * *

Note: If you require your Partitioner class to obtain the Job's * configuration object, implement the {@link Configurable} interface.

+ * + * Partitioner对map输出的key进行划分,决定key及其记录应该发往哪一个reducer. + * + * 即指定每一个key应该由哪个reducer来处理. + * + * 发往同一个reducer的所有key组成一个Partition. + * + * 如果作业只有一个reducer,则框架不会该作业创建Partitioner. + * + * 作业使用哪一个Partitioner由用户配置决定,Partitioner的逻辑中需要使用作业的配置信息, + * 可以通过实现Configurable接口访问配置信息. * * @see Reducer */ @@ -50,6 +61,9 @@ public abstract class Partitioner { * *

Typically a hash function on a all or a subset of the key.

* + * 每个Partition(Reducer)对应一个整数编号,该方法返回代码key所属的Partition + * 的编号. 传入的Partition总数即为作业的reducer总数. + * * @param key the key to be partioned. * @param value the entry value. * @param numPartitions the total number of partitions. From 12124561b2897883c8f522a01f795a8ca6b16ab1 Mon Sep 17 00:00:00 2001 From: LinBrandon Date: Wed, 20 Jul 2016 19:06:02 +0800 Subject: [PATCH 6/7] comment --- .../main/java/org/apache/hadoop/mapreduce/Reducer.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java index ab67ab05734e6..15edd9e5d42be 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java @@ -30,7 +30,7 @@ /** * Reduces a set of intermediate values which share a key to a smaller set of * values. - * + * *

Reducer implementations * can access the {@link Configuration} for the job via the * {@link JobContext#getConfiguration()} method.

@@ -114,7 +114,12 @@ * } * } * - * + * + * + * Reducer逻辑实现的模板方法. 执行入口为run(). setup , reduce, cleanup 是3个模板方法,可以实现任意一个方法改变逻辑. + * + * 作业的配置信息可以通过Context的getConfigurable方法获取. + * * @see Mapper * @see Partitioner */ From b36b46ad3e029504d104dd835c95fc26ed415702 Mon Sep 17 00:00:00 2001 From: LinBrandon Date: Wed, 20 Jul 2016 19:06:26 +0800 Subject: [PATCH 7/7] comment --- .../src/main/java/org/apache/hadoop/mapreduce/TaskType.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java index 9708b6ef1a435..3c1643b0f870e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java @@ -23,6 +23,8 @@ /** * Enum for map, reduce, job-setup, job-cleanup, task-cleanup task types. + * + * 任务类型 */ @InterfaceAudience.Public @InterfaceStability.Stable