1717# under the License.
1818#
1919
20- import imp , json , os , shutil , sys , tempfile , zipfile
20+ import json
21+ import os
22+ import shutil
23+ import sys
24+ import tempfile
25+ import zipfile
26+
2127try :
22- imp .find_module ('texttable' )
2328 from texttable import Texttable
2429except ImportError :
25- sys . stderr . write ("Could not import Texttable\n Retry after 'pip install texttable'\n " )
26- exit ()
30+ print ("Could not import Texttable. Retry after 'pip install texttable'" , file = sys . stderr )
31+ sys . exit (1 )
2732
2833tmpdir = tempfile .mkdtemp ()
2934
@@ -47,28 +52,33 @@ def diff(file1, file2):
4752 # also in dag.json data is inside "dag" root node
4853 file1_using_dag_json = True
4954 dag_json_file1 = os .path .join (file1_dir , "dag.json" )
50- if os .path .isfile (dag_json_file1 ) == False :
55+ if not os .path .isfile (dag_json_file1 ):
5156 file1_using_dag_json = False
5257 dag_json_file1 = os .path .join (file1_dir , "TEZ_DAG" )
53- if os .path .isfile (dag_json_file1 ) == False :
54- print "Unable to find dag.json/TEZ_DAG file inside the archive " + file1
55- exit ()
58+ if not os .path .isfile (dag_json_file1 ):
59+ print ( "Unable to find dag.json/TEZ_DAG file inside the archive " + file1 )
60+ sys . exit ()
5661
5762 file2_using_dag_json = True
5863 dag_json_file2 = os .path .join (file2_dir , "dag.json" )
59- if os .path .isfile (dag_json_file2 ) == False :
64+ if not os .path .isfile (dag_json_file2 ):
6065 file2_using_dag_json = False
6166 dag_json_file2 = os .path .join (file2_dir , "TEZ_DAG" )
62- if os .path .isfile (dag_json_file2 ) == False :
63- print "Unable to find dag.json/TEZ_DAG file inside the archive " + file1
64- exit ()
67+ if not os .path .isfile (dag_json_file2 ):
68+ print ( "Unable to find dag.json/TEZ_DAG file inside the archive " + file1 )
69+ sys . exit ()
6570
6671 # populate diff table
6772 difftable = {}
6873 with open (dag_json_file1 ) as data_file :
6974 file1_dag_json = json .load (data_file )["dag" ] if file1_using_dag_json else json .load (data_file )
70- counters = file1_dag_json ['otherinfo' ]['counters' ]
71- for group in counters ['counterGroups' ]:
75+
76+ # Safe access to otherinfo and counters
77+ otherinfo = file1_dag_json .get ('otherinfo' , {})
78+ counters = otherinfo .get ('counters' , {})
79+
80+ # Iterate only if counterGroups exists
81+ for group in counters .get ('counterGroups' , []):
7282 countertable = {}
7383 for counter in group ['counters' ]:
7484 counterName = counter ['counterName' ]
@@ -78,22 +88,24 @@ def diff(file1, file2):
7888 groupName = group ['counterGroupName' ]
7989 difftable [groupName ] = countertable
8090
81- # add other info
82- otherinfo = file1_dag_json ['otherinfo' ]
91+ # add other info safely
8392 countertable = {}
84- countertable ["TIME_TAKEN" ] = [otherinfo [ 'timeTaken' ] ]
85- countertable ["COMPLETED_TASKS" ] = [otherinfo [ 'numCompletedTasks' ] ]
86- countertable ["SUCCEEDED_TASKS" ] = [otherinfo [ 'numSucceededTasks' ] ]
87- countertable ["FAILED_TASKS" ] = [otherinfo [ 'numFailedTasks' ] ]
88- countertable ["KILLED_TASKS" ] = [otherinfo [ 'numKilledTasks' ] ]
89- countertable ["FAILED_TASK_ATTEMPTS" ] = [otherinfo [ 'numFailedTaskAttempts' ] ]
90- countertable ["KILLED_TASK_ATTEMPTS" ] = [otherinfo [ 'numKilledTaskAttempts' ] ]
93+ countertable ["TIME_TAKEN" ] = [otherinfo . get ( 'timeTaken' , 0 ) ]
94+ countertable ["COMPLETED_TASKS" ] = [otherinfo . get ( 'numCompletedTasks' , 0 ) ]
95+ countertable ["SUCCEEDED_TASKS" ] = [otherinfo . get ( 'numSucceededTasks' , 0 ) ]
96+ countertable ["FAILED_TASKS" ] = [otherinfo . get ( 'numFailedTasks' , 0 ) ]
97+ countertable ["KILLED_TASKS" ] = [otherinfo . get ( 'numKilledTasks' , 0 ) ]
98+ countertable ["FAILED_TASK_ATTEMPTS" ] = [otherinfo . get ( 'numFailedTaskAttempts' , 0 ) ]
99+ countertable ["KILLED_TASK_ATTEMPTS" ] = [otherinfo . get ( 'numKilledTaskAttempts' , 0 ) ]
91100 difftable ['otherinfo' ] = countertable
92101
93102 with open (dag_json_file2 ) as data_file :
94103 file2_dag_json = json .load (data_file )["dag" ] if file2_using_dag_json else json .load (data_file )
95- counters = file2_dag_json ['otherinfo' ]['counters' ]
96- for group in counters ['counterGroups' ]:
104+
105+ otherinfo = file2_dag_json .get ('otherinfo' , {})
106+ counters = otherinfo .get ('counters' , {})
107+
108+ for group in counters .get ('counterGroups' , []):
97109 groupName = group ['counterGroupName' ]
98110 if groupName not in difftable :
99111 difftable [groupName ] = {}
@@ -105,16 +117,15 @@ def diff(file1, file2):
105117 countertable [counterName ] = [0 ]
106118 countertable [counterName ].append (counter ['counterValue' ])
107119
108- # append other info
109- otherinfo = file2_dag_json ['otherinfo' ]
120+ # append other info safely
110121 countertable = difftable ['otherinfo' ]
111- countertable ["TIME_TAKEN" ].append (otherinfo [ 'timeTaken' ] )
112- countertable ["COMPLETED_TASKS" ].append (otherinfo [ 'numCompletedTasks' ] )
113- countertable ["SUCCEEDED_TASKS" ].append (otherinfo [ 'numSucceededTasks' ] )
114- countertable ["FAILED_TASKS" ].append (otherinfo [ 'numFailedTasks' ] )
115- countertable ["KILLED_TASKS" ].append (otherinfo [ 'numKilledTasks' ] )
116- countertable ["FAILED_TASK_ATTEMPTS" ].append (otherinfo [ 'numFailedTaskAttempts' ] )
117- countertable ["KILLED_TASK_ATTEMPTS" ].append (otherinfo [ 'numKilledTaskAttempts' ] )
122+ countertable ["TIME_TAKEN" ].append (otherinfo . get ( 'timeTaken' , 0 ) )
123+ countertable ["COMPLETED_TASKS" ].append (otherinfo . get ( 'numCompletedTasks' , 0 ) )
124+ countertable ["SUCCEEDED_TASKS" ].append (otherinfo . get ( 'numSucceededTasks' , 0 ) )
125+ countertable ["FAILED_TASKS" ].append (otherinfo . get ( 'numFailedTasks' , 0 ) )
126+ countertable ["KILLED_TASKS" ].append (otherinfo . get ( 'numKilledTasks' , 0 ) )
127+ countertable ["FAILED_TASK_ATTEMPTS" ].append (otherinfo . get ( 'numFailedTaskAttempts' , 0 ) )
128+ countertable ["KILLED_TASK_ATTEMPTS" ].append (otherinfo . get ( 'numKilledTaskAttempts' , 0 ) )
118129 difftable ['otherinfo' ] = countertable
119130
120131 # if some counters are missing, consider it as 0 and compute delta difference
@@ -134,7 +145,7 @@ def print_table(difftable, name1, name2, detailed=False):
134145 table = Texttable (max_width = 0 )
135146 table .set_cols_align (["l" , "l" , "l" , "l" , "l" ])
136147 table .set_cols_valign (["m" , "m" , "m" , "m" , "m" ])
137- table .add_row (["Counter Group" , "Counter Name" , name1 , name2 , "delta" ]);
148+ table .add_row (["Counter Group" , "Counter Name" , name1 , name2 , "delta" ])
138149 for k in sorted (difftable ):
139150 # ignore task specific counters in default output
140151 if not detailed and ("_INPUT_" in k or "_OUTPUT_" in k ):
@@ -177,13 +188,13 @@ def print_table(difftable, name1, name2, detailed=False):
177188
178189 table .add_row (row )
179190
180- print table .draw () + "\n "
191+ print ( table .draw () + "\n " )
181192
182193
183194def main (argv ):
184195 sysargs = len (argv )
185196 if sysargs < 2 :
186- print "Usage: python counter-diff.py dag_file1.zip dag_file2.zip [--detail]"
197+ print ( "Usage: python3 counter-diff.py dag_file1.zip dag_file2.zip [--detail]" )
187198 return - 1
188199
189200 file1 = argv [0 ]
@@ -200,4 +211,4 @@ def main(argv):
200211 try :
201212 sys .exit (main (sys .argv [1 :]))
202213 finally :
203- shutil .rmtree (tmpdir )
214+ shutil .rmtree (tmpdir )
0 commit comments