3939import org .apache .hadoop .hbase .filter .RowFilter ;
4040import org .apache .hadoop .hbase .io .ImmutableBytesWritable ;
4141import org .apache .hadoop .hbase .util .Bytes ;
42- import org .apache .hadoop .io .IntWritable ;
42+ import org .apache .hadoop .io .LongWritable ;
4343import org .apache .hadoop .io .Text ;
4444import org .apache .hadoop .mapreduce .Job ;
4545import org .apache .hadoop .mapreduce .Reducer ;
5050
5151import org .apache .hbase .thirdparty .com .google .common .base .Preconditions ;
5252
53-
5453/**
5554 * A job with a a map and reduce phase to count cells in a table.
5655 * The counter lists the following stats for a given table:
5958 * 2. Total number of CFs across all rows
6059 * 3. Total qualifiers across all rows
6160 * 4. Total occurrence of each CF
62- * 5. Total occurrence of each qualifier
61+ * 5. Total occurrence of each qualifier
6362 * 6. Total number of versions of each qualifier.
63+ * 7. Total size of serialized cells of each CF.
64+ * 8. Total size of serialized cells of each qualifier.
65+ * 9. Total size of serialized cells across all rows.
6466 * </pre>
6567 *
6668 * The cellcounter can take optional parameters to use a user
@@ -86,13 +88,14 @@ public class CellCounter extends Configured implements Tool {
8688 * Mapper that runs the count.
8789 */
8890 static class CellCounterMapper
89- extends TableMapper <Text , IntWritable > {
91+ extends TableMapper <Text , LongWritable > {
9092 /**
9193 * Counter enumeration to count the actual rows.
9294 */
9395 public static enum Counters {
9496 ROWS ,
95- CELLS
97+ CELLS ,
98+ SIZE
9699 }
97100
98101 private Configuration conf ;
@@ -143,34 +146,41 @@ public void map(ImmutableBytesWritable row, Result values,
143146 currentFamily = null ;
144147 currentQualifier = null ;
145148 context .getCounter (Counters .ROWS ).increment (1 );
146- context .write (new Text ("Total ROWS" ), new IntWritable (1 ));
149+ context .write (new Text ("Total ROWS" ), new LongWritable (1 ));
147150 }
148151 if (!values .isEmpty ()) {
149152 int cellCount = 0 ;
150153 for (Cell value : values .listCells ()) {
151154 cellCount ++;
155+ long size = value .getSerializedSize ();
152156 if (currentFamily == null || !CellUtil .matchingFamily (value , currentFamily )) {
153157 currentFamily = CellUtil .cloneFamily (value );
154158 currentFamilyName = Bytes .toStringBinary (currentFamily );
155159 currentQualifier = null ;
156160 context .getCounter ("CF" , currentFamilyName ).increment (1 );
157161 if (1 == context .getCounter ("CF" , currentFamilyName ).getValue ()) {
158- context .write (new Text ("Total Families Across all Rows" ), new IntWritable (1 ));
159- context .write (new Text (currentFamily ), new IntWritable (1 ));
162+ context .write (new Text ("Total Families Across all Rows" ), new LongWritable (1 ));
163+ context .write (new Text (currentFamily ), new LongWritable (1 ));
160164 }
165+ context .getCounter (Counters .SIZE ).increment (size );
166+ context .write (new Text ("Total SIZE" ), new LongWritable (size ));
167+ context .getCounter ("CF" , currentFamilyName + "_Size" ).increment (size );
168+ context .write (new Text (currentFamilyName + "_Size" ), new LongWritable (size ));
161169 }
162- if (currentQualifier == null || !CellUtil .matchingQualifier (value , currentQualifier )) {
170+ if (currentQualifier == null || !CellUtil .matchingQualifier (value , currentQualifier )){
163171 currentQualifier = CellUtil .cloneQualifier (value );
164172 currentQualifierName = currentFamilyName + separator +
165173 Bytes .toStringBinary (currentQualifier );
166174 currentRowQualifierName = currentRowKey + separator + currentQualifierName ;
167175
168176 context .write (new Text ("Total Qualifiers across all Rows" ),
169- new IntWritable (1 ));
170- context .write (new Text (currentQualifierName ), new IntWritable (1 ));
177+ new LongWritable (1 ));
178+ context .write (new Text (currentQualifierName ), new LongWritable (1 ));
179+ context .getCounter ("Q" , currentQualifierName + "_Size" ).increment (size );
180+ context .write (new Text (currentQualifierName + "_Size" ), new LongWritable (size ));
171181 }
172182 // Increment versions
173- context .write (new Text (currentRowQualifierName + "_Versions" ), new IntWritable (1 ));
183+ context .write (new Text (currentRowQualifierName + "_Versions" ), new LongWritable (1 ));
174184 }
175185 context .getCounter (Counters .CELLS ).increment (cellCount );
176186 }
@@ -180,20 +190,20 @@ public void map(ImmutableBytesWritable row, Result values,
180190 }
181191 }
182192
183- static class IntSumReducer <Key > extends Reducer <Key , IntWritable ,
184- Key , IntWritable > {
193+ static class LongSumReducer <Key > extends Reducer <Key , LongWritable , Key , LongWritable > {
194+
195+ private LongWritable result = new LongWritable ();
185196
186- private IntWritable result = new IntWritable ();
187- public void reduce (Key key , Iterable <IntWritable > values ,
188- Context context )
189- throws IOException , InterruptedException {
190- int sum = 0 ;
191- for (IntWritable val : values ) {
197+ public void reduce (Key key , Iterable <LongWritable > values , Context context )
198+ throws IOException , InterruptedException {
199+ long sum = 0 ;
200+ for (LongWritable val : values ) {
192201 sum += val .get ();
193202 }
194203 result .set (sum );
195204 context .write (key , result );
196205 }
206+
197207 }
198208
199209 /**
@@ -216,13 +226,13 @@ public static Job createSubmittableJob(Configuration conf, String[] args)
216226 TableMapReduceUtil .initTableMapperJob (tableName , scan ,
217227 CellCounterMapper .class , ImmutableBytesWritable .class , Result .class , job );
218228 job .setMapOutputKeyClass (Text .class );
219- job .setMapOutputValueClass (IntWritable .class );
229+ job .setMapOutputValueClass (LongWritable .class );
220230 job .setOutputFormatClass (TextOutputFormat .class );
221231 job .setOutputKeyClass (Text .class );
222- job .setOutputValueClass (IntWritable .class );
232+ job .setOutputValueClass (LongWritable .class );
223233 FileOutputFormat .setOutputPath (job , outputDir );
224- job .setReducerClass (IntSumReducer .class );
225- job .setCombinerClass (IntSumReducer .class );
234+ job .setReducerClass (LongSumReducer .class );
235+ job .setCombinerClass (LongSumReducer .class );
226236 return job ;
227237 }
228238
0 commit comments