11from rest_framework import serializers
22from pandas import DataFrame
3+ from pandas .api .types import is_numeric_dtype
34from django .core .exceptions import ImproperlyConfigured
45import datetime
6+ from collections import OrderedDict
57
68
79class PandasSerializer (serializers .ListSerializer ):
@@ -222,12 +224,15 @@ def get_index(self, dataframe):
222224 group_field = self .get_group_field ()
223225 date_field = self .get_date_field ()
224226 header_fields = self .get_header_fields ()
227+ extra_index_fields = self .get_extra_index_fields ()
225228
229+ index = []
226230 if date_field :
227- group_fields = [date_field , group_field ]
228- else :
229- group_fields = [group_field ]
230- return group_fields + header_fields
231+ index .append (date_field )
232+ index += extra_index_fields
233+ index .append (group_field )
234+ index += header_fields
235+ return index
231236
232237 def transform_dataframe (self , dataframe ):
233238 """
@@ -255,35 +260,30 @@ def transform_dataframe(self, dataframe):
255260 interval = None
256261
257262 # Compute stats for each column, potentially grouped by year
258- all_stats = []
263+ series_infos = OrderedDict ()
259264 for header , series in groups .items ():
260265 if interval :
261266 series_stats = self .boxplots_for_interval (series , interval )
262267 else :
263- interval = None
264268 series_stats = [self .compute_boxplot (series )]
265269
266- series_infos = []
267270 for series_stat in series_stats :
268- series_info = {}
269271 if isinstance (header , tuple ):
270272 value_name = header [0 ]
271273 col_values = header [1 :]
272274 else :
273275 value_name = header
274276 col_values = []
275- col_names = zip (dataframe .columns .names [1 :], col_values )
276- for col_name , value in col_names :
277- series_info [col_name ] = value
277+ col_names = tuple (zip (dataframe .columns .names [1 :], col_values ))
278+ if interval in series_stat :
279+ col_names += ((interval , series_stat [interval ]),)
280+ series_infos .setdefault (col_names , dict (col_names ))
281+ series_info = series_infos [col_names ]
278282 for stat_name , val in series_stat .items ():
279- if stat_name == interval :
280- series_info [stat_name ] = val
281- else :
283+ if stat_name != interval :
282284 series_info [value_name + '-' + stat_name ] = val
283- series_infos .append (series_info )
284- all_stats += series_infos
285285
286- dataframe = DataFrame (all_stats )
286+ dataframe = DataFrame (list ( series_infos . values ()) )
287287 if 'series' in grouping :
288288 index = header_fields + [group_field ]
289289 unstack = len (header_fields )
@@ -336,11 +336,19 @@ def compute_boxplot(self, series):
336336 series = series [series .notnull ()]
337337 if len (series .values ) == 0 :
338338 return {}
339+ elif not is_numeric_dtype (series ):
340+ return self .non_numeric_stats (series )
339341 stats = boxplot_stats (list (series .values ))[0 ]
340342 stats ['count' ] = len (series .values )
341343 stats ['fliers' ] = "|" .join (map (str , stats ['fliers' ]))
342344 return stats
343345
346+ def non_numeric_stats (self , series ):
347+ return {
348+ 'count' : len (series ),
349+ 'mode' : series .mode ()[0 ],
350+ }
351+
344352 def get_group_field (self ):
345353 """
346354 Categorical field to group datasets by.
@@ -359,6 +367,12 @@ def get_header_fields(self):
359367 """
360368 return self .get_meta_option ('boxplot_header' , [])
361369
370+ def get_extra_index_fields (self ):
371+ """
372+ Fields that identify each row but don't need to be considered for plot
373+ """
374+ return self .get_meta_option ('boxplot_extra_index' , [])
375+
362376
363377class SimpleSerializer (serializers .Serializer ):
364378 """
0 commit comments