From f9776ce809ec5be1425f490aa0f7346a88dd313a Mon Sep 17 00:00:00 2001
From: Yi Lin <qinsoon@gmail.com>
Date: Mon, 23 Sep 2024 04:19:03 +0000
Subject: [PATCH 1/9] Implement epoch

---
 scripts/history_report.py |   5 +-
 scripts/plot.py           | 389 +++++++++++++++++++++++++++-----------
 2 files changed, 279 insertions(+), 115 deletions(-)

diff --git a/scripts/history_report.py b/scripts/history_report.py
index fadb1f7..9df8287 100644
--- a/scripts/history_report.py
+++ b/scripts/history_report.py
@@ -49,6 +49,9 @@
 excluded_runs = plot.get_excluded_runs_from_env_var('HISTORY_EXCLUDE_RUNS')
 
 for plan in plans:
+    if plan != "semispace":
+        continue
+
     # The path for all logs for the plan, such as /home/yilin/Code/ci-perf-kit/result_repo/openjdk/immix
     plan_path = os.path.join(result_repo_vm_root, plan)
     # Get all the runs for the plan, such as ['rat-2021-08-24-Tue-163625']
@@ -73,7 +76,7 @@
     print("Plan: %s" % plan)
     print("Last run: %s" % last_run)
     print("Benchmarks: %s" % benchmarks)
-    print(logs)
+    # print(logs)
 
     # figure out the baseline and get the result for the baseline
     plan_config = parse.get_config_for_plan(config, plan)
diff --git a/scripts/plot.py b/scripts/plot.py
index 389a0d7..e37f59b 100644
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -67,31 +67,37 @@ def plot_history(runs, plan, benchmarks, start_date, end_date, data_key, baselin
         x_labels.sort(key = lambda x: parse.parse_run_date(x))
 
         # Only add notes for the first plot
-        if row == 1:
-            import datetime
-
-            # Sort notes
-            notes.sort(key = lambda x: parse.parse_note_date(x['date']))
-
-            # Align notes to logs/run_ids. Each note has a date, find the next log on or after the date.
-            def peek_next_note_date():
-                return parse.parse_note_date(notes[0]['date']) if len(notes) > 0 else datetime.datetime(9999, 1, 1) # end of the world. We will never find a log after this date.
-            next_note_date = peek_next_note_date()
-
-            for idx, run_id in enumerate(x_labels):
-                log_date = parse.parse_run_date(run_id)
-                if log_date >= next_note_date:
-                    # We may have multiple notes on this date. We have to combine them.
-                    combined_note = None
-
-                    while log_date >= next_note_date:
-                        note = notes.pop(0)
-                        if combined_note is None:
-                            combined_note = { 'run_id': run_id, 'x': x[idx], 'note': f"{note['date']}: {note['note']}" }
-                        else:
-                            combined_note['note'] += f",{note['date']}: {note['note']}"
-                        next_note_date = peek_next_note_date()
-                    aligned_notes.append(combined_note)
+        # if row == 1:
+        #     notes_copy = notes.copy()
+        #     import datetime
+
+        #     # Sort notes
+        #     notes.sort(key = lambda x: parse.parse_note_date(x['date']))
+
+        #     # Align notes to logs/run_ids. Each note has a date, find the next log on or after the date.
+        #     def peek_next_note_date():
+        #         return parse.parse_note_date(notes[0]['date']) if len(notes) > 0 else datetime.datetime(9999, 1, 1) # end of the world. We will never find a log after this date.
+        #     next_note_date = peek_next_note_date()
+
+        #     for idx, run_id in enumerate(x_labels):
+        #         log_date = parse.parse_run_date(run_id)
+        #         if log_date >= next_note_date:
+        #             # We may have multiple notes on this date. We have to combine them.
+        #             combined_note = None
+
+        #             while log_date >= next_note_date:
+        #                 note = notes.pop(0)
+        #                 if combined_note is None:
+        #                     combined_note = { 'run_id': run_id, 'x': x[idx], 'note': f"{note['date']}: {note['note']}" }
+        #                 else:
+        #                     combined_note['note'] += f",{note['date']}: {note['note']}"
+        #                 next_note_date = peek_next_note_date()
+        #             aligned_notes.append(combined_note)
+            
+        #     notes = notes_copy
+
+        attributes = split_epochs(x, x_labels, y, notes.copy())
+        print(attributes)
 
         y_cur_aboslute = y[-1]
 
@@ -119,12 +125,17 @@ def peek_next_note_date():
                 baseline_perf = 1
             nonzero_y = [baseline_perf]
 
-        y_baseline = min(nonzero_y)
+        # normalize to the min value in the latest epoch
+        current_epoch = sorted(attributes.keys())[-1]
+        y_baseline = attributes[current_epoch]['min']
+        # No min value. There is no value in the plot at all. We just need a reasonable baseline.
+        if y_baseline == 0:
+            y_baseline = min(nonzero_y)
         y_max = max(nonzero_y) / y_baseline
         y_min = min(nonzero_y) / y_baseline
 
-        this_y_upper = max(y) / y_baseline
-        this_y_lower = min(y) / y_baseline
+        this_y_upper = attributes[current_epoch]['max'] / y_baseline
+        this_y_lower = attributes[current_epoch]['min'] / y_baseline
 
         # update range
         if this_y_upper > y_range_upper:
@@ -139,7 +150,6 @@ def peek_next_note_date():
         x_axis = "x"
         y_axis = "y%d" % row
 
-        # history
         history_trace = {
             "name": bm,
             "hoverinfo": "text",
@@ -151,6 +161,8 @@ def peek_next_note_date():
             "yaxis": y_axis,
             "showlegend": False,
         }
+
+        # history
         traces.append({**history_trace, **{
             "line": {"width": 3, "color": "black"},
             "y": make_zero_as_none(y),
@@ -203,31 +215,101 @@ def keep_last(arr, f):
             res = keep_first(r, f)
             res.reverse()
             return res
+        def keep_first_in_index_range(arr, f, start, end):
+            ret = []
+            first = True
+            for idx, x in enumerate(arr):
+                if idx < start or idx >= end:
+                    ret.append(None)
+                elif f(x) and first:
+                    ret.append(x)
+                    first = False
+                else:
+                    ret.append(None)
+            return ret
 
-        y_max_array = keep_first(y, lambda x: x == y_max) # keep max, leave others as None
-        traces.append({**history_trace, **{
-            "hoverinfo": "none",
-            "mode": "markers+text",
-            "textposition": "top center",
-            "y": y_max_array,
-            "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
-            "textfont_color": "red",
-            "cliponaxis": False,
-            "marker": { "size": 20, "color": "red", "symbol": "triangle-up" },
-            "showlegend": False,
-        }})
-        y_min_array = keep_first(y, lambda x: x == y_min) # keep min, leave others as None
-        traces.append({**history_trace, **{
-            "hoverinfo": "none",
-            "mode": "markers+text",
-            "textposition": "bottom center",
-            "y": y_min_array,
-            "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
-            "textfont_color": "green",
-            "cliponaxis": False,
-            "marker": { "size": 20, "color": "green", "symbol": "triangle-down" },
-            "showlegend": False,
-        }})
+        # Overall min/max -- this is replaced by min/max in each epoch
+        # y_max_array = keep_first(y, lambda x: x == y_max) # keep max, leave others as None
+        # traces.append({**history_trace, **{
+        #     "hoverinfo": "none",
+        #     "mode": "markers+text",
+        #     "textposition": "top center",
+        #     "y": y_max_array,
+        #     "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
+        #     "textfont_color": "red",
+        #     "cliponaxis": False,
+        #     "marker": { "size": 20, "color": "red", "symbol": "triangle-up" },
+        #     "showlegend": False,
+        # }})
+        # y_min_array = keep_first(y, lambda x: x == y_min) # keep min, leave others as None
+        # traces.append({**history_trace, **{
+        #     "hoverinfo": "none",
+        #     "mode": "markers+text",
+        #     "textposition": "bottom center",
+        #     "y": y_min_array,
+        #     "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
+        #     "textfont_color": "green",
+        #     "cliponaxis": False,
+        #     "marker": { "size": 20, "color": "green", "symbol": "triangle-down" },
+        #     "showlegend": False,
+        # }})
+
+        # Mark epoch
+        for epoch_name, v in attributes.items():
+            print(v)
+            # Epoch start
+            epoch_start_x = keep_first(x, lambda x: x == v['start_x'])
+            epoch_start_y = keep_first(y, lambda y: y == v['start_y'] / y_baseline)
+
+            # Normalized y
+            epoch_normalized_start_y = v['start_y'] / y_baseline
+            epoch_normalized_end_y = v['end_y'] / y_baseline
+
+            # Epoch min/max
+            epoch_normalized_min_y = v['min'] / y_baseline
+            epoch_normalized_max_y = v['max'] / y_baseline
+
+            regress = epoch_normalized_end_y > epoch_normalized_start_y
+            color = "red" if regress else "green"
+
+            traces.append({**history_trace, **{
+                "hoverinfo": 'text',
+                "mode": "markers",
+                "textposition": "top center",
+                "x": epoch_start_x,
+                "y": epoch_start_y,
+                "text": "Epoch: %s, start: %.2f, end: %.2f, min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_end_y, epoch_normalized_min_y, epoch_normalized_max_y),
+                "textfont_color": color,
+                "cliponaxis": False,
+                "marker": { "size": 10, "color": color, "symbol": "star-diamond"},
+                "showlegend": False,
+            }})
+
+            if epoch_name == current_epoch:
+                # Epoch min
+                traces.append({**history_trace, **{
+                    "hoverinfo": "text",
+                    "mode": "markers",
+                    "textposition": "top center",
+                    "y": keep_first_in_index_range(y, lambda y: y == epoch_normalized_min_y, v['start_x'], v['end_x'] + 1),
+                    "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
+                    "textfont_color": "green",
+                    "cliponaxis": False,
+                    "marker": { "size": 10, "color": "green", "symbol": "triangle-down" },
+                    "showlegend": False,
+                }})
+                # Epoch max
+                traces.append({**history_trace, **{
+                    "hoverinfo": "text",
+                    "mode": "markers",
+                    "textposition": "top center",
+                    "y": keep_first_in_index_range(y, lambda y: y == epoch_normalized_max_y, v['start_x'], v['end_x'] + 1),
+                    "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
+                    "textfont_color": "red",
+                    "cliponaxis": False,
+                    "marker": { "size": 10, "color": "red", "symbol": "triangle-up" },
+                    "showlegend": False,
+                }})
 
         # labeling
         annotation = {
@@ -291,51 +373,53 @@ def keep_last(arr, f):
             "yshift": -50
         }})
 
-        # moving average
-        y_moving_average = moving_average(y, 10)
-        traces.append({
-            "name": bm,
-            "hoverinfo": "text",
-            # "fill": "tozeroy",
-            "mode": TRACE_MODE,
-            "line": {"width": 1, "color": "gray"},
-            "type": "scatter",
-            "x": x,
-            "y": y_moving_average,
-            "text": ["10-p moving avg: %s: %s" % (x, "{:.2f}".format(y) if y is not None else "na") for (x, y) in zip(x_labels, y_moving_average)],
-            "xaxis": x_axis,
-            "yaxis": y_axis,
-            "showlegend": False,
-        })
+        PLOT_MORE_STATISTICS = False
+        if PLOT_MORE_STATISTICS:
+            # moving average
+            y_moving_average = moving_average(y, 10)
+            traces.append({
+                "name": bm,
+                "hoverinfo": "text",
+                # "fill": "tozeroy",
+                "mode": TRACE_MODE,
+                "line": {"width": 1, "color": "gray"},
+                "type": "scatter",
+                "x": x,
+                "y": y_moving_average,
+                "text": ["10-p moving avg: %s: %s" % (x, "{:.2f}".format(y) if y is not None else "na") for (x, y) in zip(x_labels, y_moving_average)],
+                "xaxis": x_axis,
+                "yaxis": y_axis,
+                "showlegend": False,
+            })
 
-        # variance (10p moving average of std dev)
-        std_dev_moving_average = moving_average(std, 10)
-        variance_trace = {
-            "name": bm,
-            "hoverinfo": "text",
-            "mode": "lines",
-            "line_color": "#cacccf",
-            "line": {"width": 0},
-            "x": x,
-            "xaxis": x_axis,
-            "yaxis": y_axis,
-            "showlegend": False,
-        }
-        variance_down = list(map(lambda a, b: a - b if a is not None and b is not None else None, y_moving_average, std_dev_moving_average))
-        traces.append({**variance_trace, **{
-            # a hack: fill everything under this line the same as the background color
-            "fill": "tozeroy",
-            "line_color": "#e5ecf6",
-            "y": variance_down,
-            "text": ["moving avg - std dev: %s: %s" % (x, "{:.2f}".format(y) if y is not None else "na") for (x, y) in zip(x_labels, variance_down)],
-        }})
-        variance_up = list(map(lambda a, b: a + b if a is not None and b is not None else None, y_moving_average, std_dev_moving_average))
-        traces.append({**variance_trace, **{
-            # fill things in grey between this trace and the trace above
-            "fill": "tonexty",
-            "y": variance_up,
-            "text": ["moving avg + std dev: %s: %s" % (x, "{:.2f}".format(y) if y is not None else "na") for (x, y) in zip(x_labels, variance_up)],
-        }})
+            # variance (10p moving average of std dev)
+            std_dev_moving_average = moving_average(std, 10)
+            variance_trace = {
+                "name": bm,
+                "hoverinfo": "text",
+                "mode": "lines",
+                "line_color": "#cacccf",
+                "line": {"width": 0},
+                "x": x,
+                "xaxis": x_axis,
+                "yaxis": y_axis,
+                "showlegend": False,
+            }
+            variance_down = list(map(lambda a, b: a - b if a is not None and b is not None else None, y_moving_average, std_dev_moving_average))
+            traces.append({**variance_trace, **{
+                # a hack: fill everything under this line the same as the background color
+                "fill": "tozeroy",
+                "line_color": "#e5ecf6",
+                "y": variance_down,
+                "text": ["moving avg - std dev: %s: %s" % (x, "{:.2f}".format(y) if y is not None else "na") for (x, y) in zip(x_labels, variance_down)],
+            }})
+            variance_up = list(map(lambda a, b: a + b if a is not None and b is not None else None, y_moving_average, std_dev_moving_average))
+            traces.append({**variance_trace, **{
+                # fill things in grey between this trace and the trace above
+                "fill": "tonexty",
+                "y": variance_up,
+                "text": ["moving avg + std dev: %s: %s" % (x, "{:.2f}".format(y) if y is not None else "na") for (x, y) in zip(x_labels, variance_up)],
+            }})
 
         # baseline - we will draw one horizontal line per each baseline
         baseline_opacity = 0.6
@@ -383,26 +467,27 @@ def keep_last(arr, f):
 
         # Notes
         # Somehow this line does not show. But it adds a hover text for all the plots.
-        for note in aligned_notes:
-            note_trace = {
-                "hoverinfo": "text",
-                "mode": "lines",
-                "line": {"width": 10, "color": "blue"},
-                "x": [note['x']],
-                "y": [0, 999],
-                "xaxis": x_axis,
-                "yaxis": y_axis,
-                "showlegend": False,
-                "opacity": 0,
-                "text": note['note']
-            }
-            traces.append(note_trace)
+        # for note in aligned_notes:
+        #     note_trace = {
+        #         "hoverinfo": "text",
+        #         "mode": "lines",
+        #         "line": {"width": 10, "color": "blue"},
+        #         "x": [note['x']],
+        #         "y": [0, 999],
+        #         "xaxis": x_axis,
+        #         "yaxis": y_axis,
+        #         "showlegend": False,
+        #         "opacity": 0,
+        #         "text": note['note']
+        #     }
+        #     traces.append(note_trace)
 
         row += 1
 
     # fix range for all the traces
     if SAME_Y_RANGE_IN_ALL_TRACES:
-        y_range = [y_range_lower - 0.02, y_range_upper + 0.02]
+        RANGE_EXTRA = 0.20
+        y_range = [y_range_lower - RANGE_EXTRA, y_range_upper + RANGE_EXTRA]
         for i in range(1, row):
             layout["yaxis%d" % i]["range"] = y_range
 
@@ -412,14 +497,90 @@ def keep_last(arr, f):
     for line in baseline_hlines:
         fig.add_shape(line)
     # This plots a vertical line for each note in the first subgraph.
-    for note in aligned_notes:
-        fig.add_vline(x = int(note['x']), line_color = 'blue', annotation = { "text": "📓", "hovertext": note['note'] })
+    # for note in aligned_notes:
+    #     fig.add_vline(x = int(note['x']), line_color = 'blue', annotation = { "text": "📓", "hovertext": note['note'] })
 
     fig.update_layout(hovermode='x')
 
     return fig
 
 
+def split_epochs(x, x_labels, y, notes):
+    import datetime
+
+    FIRST_EPOCH = "19700101"
+
+    attrs = {}
+    epoch = None
+
+    def new_epoch(idx, epoch_name, note = None):
+        nonlocal epoch
+
+        # End previous epoch
+        if epoch is not None:
+            if idx > 1:
+                prev_epoch_end = idx - 1
+            else:
+                prev_epoch_end = 0
+            attrs[epoch]['end_x'] = x[prev_epoch_end]
+            attrs[epoch]['end_y'] = y[prev_epoch_end]
+
+        epoch = epoch_name
+
+        attrs[epoch_name] = {}
+        attrs[epoch_name]['epoch'] = epoch_name
+        attrs[epoch_name]['start_x'] = x[idx]
+        attrs[epoch_name]['start_y'] = y[idx]
+        if note is not None:
+            attrs[epoch_name]['note'] = note
+        else:
+            attrs[epoch_name]['note'] = epoch_name
+
+    # Sort notes
+    notes.sort(key = lambda x: parse.parse_note_date(x['date']))
+
+    # Align notes to logs/run_ids. Each note has a date, find the next log on or after the date.
+    def peek_next_note_date():
+        return parse.parse_note_date(notes[0]['date']) if len(notes) > 0 else datetime.datetime(9999, 1, 1) # end of the world. We will never find a log after this date.
+    next_note_date = peek_next_note_date()
+
+
+    for idx, run_id in enumerate(x_labels):
+        log_date = parse.parse_run_date(run_id)
+        if log_date >= next_note_date:
+            # We may have multiple notes on this date. We have to combine them.
+            combined_note = None
+
+            while log_date >= next_note_date:
+                note = notes.pop(0)
+                if combined_note is None:
+                    combined_note = { 'run_id': run_id, 'x': x[idx], 'note': f"{note['date']}: {note['note']}" }
+                else:
+                    combined_note['note'] += f",{note['date']}: {note['note']}"
+                next_note_date = peek_next_note_date()
+            new_epoch(idx, note['date'], combined_note['note'])
+
+        if epoch is None:
+            new_epoch(idx, FIRST_EPOCH)
+
+    # End the last epoch
+    attrs[epoch]['end_x'] = x[-1]
+    attrs[epoch]['end_y'] = y[-1]
+
+    # For each epoch, find min/max
+    for name, epoch in attrs.items():
+        epoch_y = y[epoch['start_x']:(epoch['end_x'] + 1)]
+        epoch_non_zero_y = [y for y in epoch_y if y > 0]
+        if len(epoch_non_zero_y) != 0:
+            epoch['min'] = min(epoch_non_zero_y)
+            epoch['max'] = max(epoch_y)
+        else:
+            epoch['min'] = epoch['start_y']
+            epoch['max'] = epoch['start_y']
+
+    return attrs
+
+
 def plot_multi_plans_history(runs, plans, benchmarks, start_date, end_date, data_key):
     # whether we should show legend - only show legend for a plan when it is the first time we add a trace for this plan
     show_legend = {}

From fd63cf01a78084951f5a2ef2c5ffbebbd023e158 Mon Sep 17 00:00:00 2001
From: Yi Lin <qinsoon@gmail.com>
Date: Mon, 23 Sep 2024 05:06:46 +0000
Subject: [PATCH 2/9] Fix the color of the large label

---
 scripts/plot.py | 101 ++++++++++++++++++++----------------------------
 1 file changed, 42 insertions(+), 59 deletions(-)

diff --git a/scripts/plot.py b/scripts/plot.py
index e37f59b..0bfe369 100644
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -97,7 +97,7 @@ def plot_history(runs, plan, benchmarks, start_date, end_date, data_key, baselin
         #     notes = notes_copy
 
         attributes = split_epochs(x, x_labels, y, notes.copy())
-        print(attributes)
+        # print(attributes)
 
         y_cur_aboslute = y[-1]
 
@@ -131,11 +131,15 @@ def plot_history(runs, plan, benchmarks, start_date, end_date, data_key, baselin
         # No min value. There is no value in the plot at all. We just need a reasonable baseline.
         if y_baseline == 0:
             y_baseline = min(nonzero_y)
-        y_max = max(nonzero_y) / y_baseline
-        y_min = min(nonzero_y) / y_baseline
+        # y_max = max(nonzero_y) / y_baseline
+        # y_min = min(nonzero_y) / y_baseline
 
         this_y_upper = attributes[current_epoch]['max'] / y_baseline
         this_y_lower = attributes[current_epoch]['min'] / y_baseline
+        if this_y_lower == 0:
+            this_y_lower = 1
+
+        y_best = this_y_lower
 
         # update range
         if this_y_upper > y_range_upper:
@@ -228,35 +232,8 @@ def keep_first_in_index_range(arr, f, start, end):
                     ret.append(None)
             return ret
 
-        # Overall min/max -- this is replaced by min/max in each epoch
-        # y_max_array = keep_first(y, lambda x: x == y_max) # keep max, leave others as None
-        # traces.append({**history_trace, **{
-        #     "hoverinfo": "none",
-        #     "mode": "markers+text",
-        #     "textposition": "top center",
-        #     "y": y_max_array,
-        #     "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
-        #     "textfont_color": "red",
-        #     "cliponaxis": False,
-        #     "marker": { "size": 20, "color": "red", "symbol": "triangle-up" },
-        #     "showlegend": False,
-        # }})
-        # y_min_array = keep_first(y, lambda x: x == y_min) # keep min, leave others as None
-        # traces.append({**history_trace, **{
-        #     "hoverinfo": "none",
-        #     "mode": "markers+text",
-        #     "textposition": "bottom center",
-        #     "y": y_min_array,
-        #     "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
-        #     "textfont_color": "green",
-        #     "cliponaxis": False,
-        #     "marker": { "size": 20, "color": "green", "symbol": "triangle-down" },
-        #     "showlegend": False,
-        # }})
-
         # Mark epoch
         for epoch_name, v in attributes.items():
-            print(v)
             # Epoch start
             epoch_start_x = keep_first(x, lambda x: x == v['start_x'])
             epoch_start_y = keep_first(y, lambda y: y == v['start_y'] / y_baseline)
@@ -285,31 +262,31 @@ def keep_first_in_index_range(arr, f, start, end):
                 "showlegend": False,
             }})
 
-            if epoch_name == current_epoch:
-                # Epoch min
-                traces.append({**history_trace, **{
-                    "hoverinfo": "text",
-                    "mode": "markers",
-                    "textposition": "top center",
-                    "y": keep_first_in_index_range(y, lambda y: y == epoch_normalized_min_y, v['start_x'], v['end_x'] + 1),
-                    "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
-                    "textfont_color": "green",
-                    "cliponaxis": False,
-                    "marker": { "size": 10, "color": "green", "symbol": "triangle-down" },
-                    "showlegend": False,
-                }})
-                # Epoch max
-                traces.append({**history_trace, **{
-                    "hoverinfo": "text",
-                    "mode": "markers",
-                    "textposition": "top center",
-                    "y": keep_first_in_index_range(y, lambda y: y == epoch_normalized_max_y, v['start_x'], v['end_x'] + 1),
-                    "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
-                    "textfont_color": "red",
-                    "cliponaxis": False,
-                    "marker": { "size": 10, "color": "red", "symbol": "triangle-up" },
-                    "showlegend": False,
-                }})
+            # if epoch_name == current_epoch:
+            #     # Epoch min
+            #     traces.append({**history_trace, **{
+            #         "hoverinfo": "text",
+            #         "mode": "markers",
+            #         "textposition": "top center",
+            #         "y": keep_first_in_index_range(y, lambda y: y == epoch_normalized_min_y, v['start_x'], v['end_x'] + 1),
+            #         "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
+            #         "textfont_color": "green",
+            #         "cliponaxis": False,
+            #         "marker": { "size": 10, "color": "green", "symbol": "triangle-down" },
+            #         "showlegend": False,
+            #     }})
+            #     # Epoch max
+            #     traces.append({**history_trace, **{
+            #         "hoverinfo": "text",
+            #         "mode": "markers",
+            #         "textposition": "top center",
+            #         "y": keep_first_in_index_range(y, lambda y: y == epoch_normalized_max_y, v['start_x'], v['end_x'] + 1),
+            #         "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
+            #         "textfont_color": "red",
+            #         "cliponaxis": False,
+            #         "marker": { "size": 10, "color": "red", "symbol": "triangle-up" },
+            #         "showlegend": False,
+            #     }})
 
         # labeling
         annotation = {
@@ -327,16 +304,21 @@ def keep_first_in_index_range(arr, f, start, end):
         current = y[-1]
         current_std = std[-1]
         # determine if current is improvement or degradation
-        if current + current_std < y_min:
+        # print("cur: %.2f, std: %.2f, best: %.2f" % (current, current_std, y_best))
+        if current == 0:
+            # No data. Show neutral
+            current_color = "black"
+            current_symbol = "~"
+        elif current + current_std < y_best:
             # improvement
             current_color = "green"
             current_symbol = "▽"
-        elif current - current_std > y_min:
+        elif current - current_std > y_best:
             # degradation
             current_color = "red"
             current_symbol = "△"
         else:
-            # none of the above
+            # neutral
             current_color = "black"
             current_symbol = "~"
 
@@ -486,7 +468,7 @@ def keep_first_in_index_range(arr, f, start, end):
 
     # fix range for all the traces
     if SAME_Y_RANGE_IN_ALL_TRACES:
-        RANGE_EXTRA = 0.20
+        RANGE_EXTRA = 0.2
         y_range = [y_range_lower - RANGE_EXTRA, y_range_upper + RANGE_EXTRA]
         for i in range(1, row):
             layout["yaxis%d" % i]["range"] = y_range
@@ -501,6 +483,7 @@ def keep_first_in_index_range(arr, f, start, end):
     #     fig.add_vline(x = int(note['x']), line_color = 'blue', annotation = { "text": "📓", "hovertext": note['note'] })
 
     fig.update_layout(hovermode='x')
+    fig.update_layout(margin=dict(l=5, r=5, t=50, b=5))
 
     return fig
 

From 4d25cfa5ffd3a2478dff8376de8523762ca0e9e2 Mon Sep 17 00:00:00 2001
From: Yi Lin <qinsoon@gmail.com>
Date: Mon, 23 Sep 2024 06:13:52 +0000
Subject: [PATCH 3/9] Add epoch/note time. Check regression with std

---
 configs/jikesrvm-plot.yml |   9 ++-
 configs/openjdk-plot.yml  |   9 ++-
 scripts/parse.py          |  11 ++-
 scripts/plot.py           | 153 ++++++++++++++++++++++++--------------
 4 files changed, 118 insertions(+), 64 deletions(-)

diff --git a/configs/jikesrvm-plot.yml b/configs/jikesrvm-plot.yml
index 313e32b..4181edd 100644
--- a/configs/jikesrvm-plot.yml
+++ b/configs/jikesrvm-plot.yml
@@ -8,12 +8,17 @@ plans:
     baseline: []
 notes:
   - date: "20230116"
+    time: "0000"
     note: "Move to Ubuntu 22.04"
-  - date: "20231015"
+  - date: "20231017"
+    time: "0000"
     note: "Speculative RAS Overflow mitigation on Zen1/Zen2"
-  - date: "20231102"
+  - date: "20231103"
+    time: "0800"
     note: "Move to running-ng"
   - date: "20240403"
+    time: "0000"
     note: "Move to Rust 1.77.0"
   - date: "20240903"
+    time: "0000"
     note: "Move to Linux Kernel 6.8.0"
diff --git a/configs/openjdk-plot.yml b/configs/openjdk-plot.yml
index 1324c6d..e5ba411 100644
--- a/configs/openjdk-plot.yml
+++ b/configs/openjdk-plot.yml
@@ -16,12 +16,17 @@ plans:
     baseline: ['jdk-g1', 'jdk-zgc']
 notes:
   - date: "20230116"
+    time: "0000"
     note: "Move to Ubuntu 22.04"
-  - date: "20231015"
+  - date: "20231017"
+    time: "0000"
     note: "Speculative RAS Overflow mitigation on Zen1/Zen2"
-  - date: "20231102"
+  - date: "20231103"
+    time: "0800"
     note: "Move to running-ng. Use -Xcomp. Use image build."
   - date: "20240403"
+    time: "0000"
     note: "Move to Rust 1.77.0"
   - date: "20240903"
+    time: "0000"
     note: "Move to Linux Kernel 6.8.0"
diff --git a/scripts/parse.py b/scripts/parse.py
index 03da1e6..95ffbd8 100644
--- a/scripts/parse.py
+++ b/scripts/parse.py
@@ -94,11 +94,14 @@ def parse_run_date(run_id):
         return datetime(int(matcher['year']), int(matcher['month']), int(matcher['day']), int(matcher['hour']), int(matcher['minute']), int(matcher['second']))
 
 # Given a note date, return the date object
-def parse_note_date(note_date):
+def parse_note_date(note_date, note_time = None):
     from datetime import datetime
-    matcher = re.match("(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})", note_date)
-    if matcher:
-        return datetime(int(matcher['year']), int(matcher['month']), int(matcher['day']))
+    date_matcher = re.match(r"(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})", note_date)
+    if note_time is None:
+        note_time = "0000"
+    time_matcher = re.match(r"(?P<hour>\d{2})(?P<minute>\d{2})", note_time)
+    if date_matcher and time_matcher:
+        return datetime(int(date_matcher['year']), int(date_matcher['month']), int(date_matcher['day']), int(time_matcher['hour']), int(time_matcher['minute']))
 
 # Given a yaml file path, return the file
 def parse_yaml(path):
diff --git a/scripts/plot.py b/scripts/plot.py
index 0bfe369..e14d3bb 100644
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -66,37 +66,7 @@ def plot_history(runs, plan, benchmarks, start_date, end_date, data_key, baselin
         # We have to sort by date, run_id includes the machine name, we cannot sort by alphabet
         x_labels.sort(key = lambda x: parse.parse_run_date(x))
 
-        # Only add notes for the first plot
-        # if row == 1:
-        #     notes_copy = notes.copy()
-        #     import datetime
-
-        #     # Sort notes
-        #     notes.sort(key = lambda x: parse.parse_note_date(x['date']))
-
-        #     # Align notes to logs/run_ids. Each note has a date, find the next log on or after the date.
-        #     def peek_next_note_date():
-        #         return parse.parse_note_date(notes[0]['date']) if len(notes) > 0 else datetime.datetime(9999, 1, 1) # end of the world. We will never find a log after this date.
-        #     next_note_date = peek_next_note_date()
-
-        #     for idx, run_id in enumerate(x_labels):
-        #         log_date = parse.parse_run_date(run_id)
-        #         if log_date >= next_note_date:
-        #             # We may have multiple notes on this date. We have to combine them.
-        #             combined_note = None
-
-        #             while log_date >= next_note_date:
-        #                 note = notes.pop(0)
-        #                 if combined_note is None:
-        #                     combined_note = { 'run_id': run_id, 'x': x[idx], 'note': f"{note['date']}: {note['note']}" }
-        #                 else:
-        #                     combined_note['note'] += f",{note['date']}: {note['note']}"
-        #                 next_note_date = peek_next_note_date()
-        #             aligned_notes.append(combined_note)
-            
-        #     notes = notes_copy
-
-        attributes = split_epochs(x, x_labels, y, notes.copy())
+        attributes = split_epochs(x, x_labels, y, std, notes.copy())
         # print(attributes)
 
         y_cur_aboslute = y[-1]
@@ -240,14 +210,19 @@ def keep_first_in_index_range(arr, f, start, end):
 
             # Normalized y
             epoch_normalized_start_y = v['start_y'] / y_baseline
+            epoch_normalized_start_y_std = v['start_y_std'] / y_baseline
             epoch_normalized_end_y = v['end_y'] / y_baseline
+            epoch_normalized_end_y_std = v['end_y_std'] / y_baseline
 
             # Epoch min/max
             epoch_normalized_min_y = v['min'] / y_baseline
             epoch_normalized_max_y = v['max'] / y_baseline
 
-            regress = epoch_normalized_end_y > epoch_normalized_start_y
-            color = "red" if regress else "green"
+            regress = check_regression(epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std)
+            if regress == "regression":
+                epoch_color = "red"
+            else:
+                epoch_color = "green"
 
             traces.append({**history_trace, **{
                 "hoverinfo": 'text',
@@ -255,10 +230,10 @@ def keep_first_in_index_range(arr, f, start, end):
                 "textposition": "top center",
                 "x": epoch_start_x,
                 "y": epoch_start_y,
-                "text": "Epoch: %s, start: %.2f, end: %.2f, min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_end_y, epoch_normalized_min_y, epoch_normalized_max_y),
-                "textfont_color": color,
+                "text": "Epoch: %s, start: %.2f +- %.2f, end: %.2f +- %.2f, min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std, epoch_normalized_min_y, epoch_normalized_max_y),
+                "textfont_color": epoch_color,
                 "cliponaxis": False,
-                "marker": { "size": 10, "color": color, "symbol": "star-diamond"},
+                "marker": { "size": 10, "color": epoch_color, "symbol": "star-diamond"},
                 "showlegend": False,
             }})
 
@@ -309,18 +284,19 @@ def keep_first_in_index_range(arr, f, start, end):
             # No data. Show neutral
             current_color = "black"
             current_symbol = "~"
-        elif current + current_std < y_best:
-            # improvement
-            current_color = "green"
-            current_symbol = "▽"
-        elif current - current_std > y_best:
-            # degradation
-            current_color = "red"
-            current_symbol = "△"
         else:
-            # neutral
-            current_color = "black"
-            current_symbol = "~"
+            trend = check_regression(y_best, 0, current, current_std)
+            if trend == "improvment":
+                current_color = "green"
+                current_symbol = "▽"
+            elif trend == "regression":
+                # degradation
+                current_color = "red"
+                current_symbol = "△"
+            else:
+                # neutral
+                current_color = "black"
+                current_symbol = "~"
 
         y_last_array = keep_last(y, lambda x: x == current)
         traces.append({**history_trace, **{
@@ -355,7 +331,7 @@ def keep_first_in_index_range(arr, f, start, end):
             "yshift": -50
         }})
 
-        PLOT_MORE_STATISTICS = False
+        PLOT_MORE_STATISTICS = True
         if PLOT_MORE_STATISTICS:
             # moving average
             y_moving_average = moving_average(y, 10)
@@ -488,7 +464,7 @@ def keep_first_in_index_range(arr, f, start, end):
     return fig
 
 
-def split_epochs(x, x_labels, y, notes):
+def split_epochs(x, x_labels, y, y_std, notes):
     import datetime
 
     FIRST_EPOCH = "19700101"
@@ -505,26 +481,30 @@ def new_epoch(idx, epoch_name, note = None):
                 prev_epoch_end = idx - 1
             else:
                 prev_epoch_end = 0
+            attrs[epoch]['end_idx'] = prev_epoch_end
             attrs[epoch]['end_x'] = x[prev_epoch_end]
             attrs[epoch]['end_y'] = y[prev_epoch_end]
+            attrs[epoch]['end_y_std'] = y_std[prev_epoch_end]
 
         epoch = epoch_name
 
         attrs[epoch_name] = {}
         attrs[epoch_name]['epoch'] = epoch_name
+        attrs[epoch_name]['start_idx'] = idx
         attrs[epoch_name]['start_x'] = x[idx]
         attrs[epoch_name]['start_y'] = y[idx]
+        attrs[epoch_name]['start_y_std'] = y_std[idx]
         if note is not None:
             attrs[epoch_name]['note'] = note
         else:
             attrs[epoch_name]['note'] = epoch_name
 
     # Sort notes
-    notes.sort(key = lambda x: parse.parse_note_date(x['date']))
+    notes.sort(key = lambda x: parse.parse_note_date(x['date'], x['time']))
 
     # Align notes to logs/run_ids. Each note has a date, find the next log on or after the date.
     def peek_next_note_date():
-        return parse.parse_note_date(notes[0]['date']) if len(notes) > 0 else datetime.datetime(9999, 1, 1) # end of the world. We will never find a log after this date.
+        return parse.parse_note_date(notes[0]['date'], notes[0]['time']) if len(notes) > 0 else datetime.datetime(9999, 1, 1) # end of the world. We will never find a log after this date.
     next_note_date = peek_next_note_date()
 
 
@@ -547,22 +527,83 @@ def peek_next_note_date():
             new_epoch(idx, FIRST_EPOCH)
 
     # End the last epoch
+    attrs[epoch]['end_idx'] = len(x) - 1
     attrs[epoch]['end_x'] = x[-1]
     attrs[epoch]['end_y'] = y[-1]
+    attrs[epoch]['end_y_std'] = y_std[-1]
+    
+    print(attrs)
 
     # For each epoch, find min/max
     for name, epoch in attrs.items():
-        epoch_y = y[epoch['start_x']:(epoch['end_x'] + 1)]
-        epoch_non_zero_y = [y for y in epoch_y if y > 0]
-        if len(epoch_non_zero_y) != 0:
-            epoch['min'] = min(epoch_non_zero_y)
-            epoch['max'] = max(epoch_y)
+        def find_min_with_index(lst, start, end):
+            if not lst:
+                raise ValueError("The list is empty")
+
+            if start < 0 or end >= len(lst) or start > end:
+                print("start %d, end %d, len %d", start, end, len(lst))
+                raise IndexError("Invalid start or end index")
+
+            min_value = lst[start]
+            min_index = start
+
+            for i in range(start + 1, end + 1):
+                if lst[i] < min_value:
+                    min_value = lst[i]
+                    min_index = i
+
+            return min_value, min_index
+        def find_max_with_index(lst, start, end):
+            if not lst:
+                raise ValueError("The list is empty")
+
+            if start < 0 or end >= len(lst) or start > end:
+                print("start %d, end %d, len %d", start, end, len(lst))
+                raise IndexError("Invalid start or end index")
+
+            max_value = lst[start]
+            max_index = start
+
+            for i in range(start + 1, end + 1):
+                if lst[i] > max_value:
+                    max_value = lst[i]
+                    max_index = i
+
+            return max_value, max_index
+
+        min, min_idx = find_min_with_index(y, epoch['start_idx'], epoch['end_idx'])
+        if min != 0:
+            epoch['min'] = min
+            epoch['min_std'] = y_std[min_idx]
         else:
             epoch['min'] = epoch['start_y']
+            epoch['min_std'] = 0
+
+        max, max_idx = find_max_with_index(y, epoch['start_idx'], epoch['end_idx'])
+        if max != 0:
+            epoch['max'] = max
+            epoch['max_std'] = y_std[max_idx]
+        else:
             epoch['max'] = epoch['start_y']
+            epoch['max_std'] = 0
 
     return attrs
 
+# Return improvement, or regression, or neutral
+def check_regression(r1, std1, r2, std2):
+    # Determine the lower and upper bounds for r1 and r2
+    lower_bound_r1 = r1 - std1
+    upper_bound_r1 = r1 + std1
+    lower_bound_r2 = r2 - std2
+    upper_bound_r2 = r2 + std2
+
+    if upper_bound_r2 < lower_bound_r1:
+        return "improvement"
+    elif lower_bound_r2 > upper_bound_r1:
+        return "regression"
+    # Otherwise, it's neutral
+    else:
+        return "neutral"
 
 def plot_multi_plans_history(runs, plans, benchmarks, start_date, end_date, data_key):
     # whether we should show legend - only show legend for a plan when it is the first time we add a trace for this plan

From e4c6748ffc4bd83f1cbb3cf0c46f76712e11fc07 Mon Sep 17 00:00:00 2001
From: Yi Lin <qinsoon@gmail.com>
Date: Tue, 24 Sep 2024 01:04:38 +0000
Subject: [PATCH 4/9] Make the graph smaller

---
 scripts/plot.py | 54 +++++++++++++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/scripts/plot.py b/scripts/plot.py
index e14d3bb..50637aa 100644
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -6,8 +6,8 @@
 import math
 
 
-GRAPH_WIDTH = 1000
-GRAPH_HEIGHT_PER_BENCHMARK = 200
+GRAPH_WIDTH = 500
+GRAPH_HEIGHT_PER_BENCHMARK = 100
 
 SHOW_DATA_POINT = False
 TRACE_MODE = "lines+markers" if SHOW_DATA_POINT else "lines"
@@ -16,9 +16,21 @@
 X_INTERVAL_1 = 1
 X_INTERVAL_2 = 3
 X_INTERVAL_3 = 5
-# We place the labels (big number/benchmark name/absolute number) on the position of (last point + this offset)
-LABEL_OFFSET = X_INTERVAL_3 * 3
 
+# We place the labels (big number/benchmark name/absolute number) on the position of (last point + this offset)
+LABEL_OFFSET = X_INTERVAL_3 * 15
+BIG_NUMBER_FONT_SIZE = 30
+BM_NAME_FONT_SIZE = 15
+BM_NAME_Y_SHIFT = 15
+SHOW_ABS_NUMBER = False
+ABS_NUMBER_FONT_SIZE = 10
+ABS_NUMBER_FONT_Y_SHIFT = -30
+
+# Plot statistics
+PLOT_MOVING_AVERAGE = True
+PLOT_STD_DEV = True
+
+# Use the same Y range for all the traces
 SAME_Y_RANGE_IN_ALL_TRACES = True
 
 # runs: all the runs for a certain build (as a dictionary from run_id -> run results)
@@ -66,6 +78,11 @@ def plot_history(runs, plan, benchmarks, start_date, end_date, data_key, baselin
         # We have to sort by date, run_id includes the machine name, we cannot sort by alphabet
         x_labels.sort(key = lambda x: parse.parse_run_date(x))
 
+        n_points = len(x)
+        assert len(y) == n_points
+        assert len(std) == n_points
+        assert len(x_labels) == n_points
+
         attributes = split_epochs(x, x_labels, y, std, notes.copy())
         # print(attributes)
 
@@ -205,7 +222,6 @@ def keep_first_in_index_range(arr, f, start, end):
         # Mark epoch
         for epoch_name, v in attributes.items():
             # Epoch start
-            epoch_start_x = keep_first(x, lambda x: x == v['start_x'])
             epoch_start_y = keep_first(y, lambda y: y == v['start_y'] / y_baseline)
 
             # Normalized y
@@ -228,7 +244,6 @@ def keep_first_in_index_range(arr, f, start, end):
                 "hoverinfo": 'text',
                 "mode": "markers",
                 "textposition": "top center",
-                "x": epoch_start_x,
                 "y": epoch_start_y,
                 "text": "Epoch: %s, start: %.2f +- %.2f, end: %.2f +- %.2f, min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std, epoch_normalized_min_y, epoch_normalized_max_y),
                 "textfont_color": epoch_color,
@@ -310,29 +325,29 @@ def keep_first_in_index_range(arr, f, start, end):
         # big number
         annotations.append({**annotation, **{
             "text": "%.2f" % current,
-            "font": {"color": current_color, "size": 60},
+            "font": {"color": current_color, "size": BIG_NUMBER_FONT_SIZE},
             "xanchor": "center",
             "yanchor": "middle",
         }})
         # benchmark name
         annotations.append({**annotation, **{
             "text": "<b>%s" % bm,
-            "font": {"color": "black", "size": 20},
+            "font": {"color": "black", "size": BM_NAME_FONT_SIZE},
             "xanchor": "center",
             "yanchor": "bottom",
-            "yshift": 40
+            "yshift": BM_NAME_Y_SHIFT
         }})
         # aboslute number
-        annotations.append({**annotation, **{
-            "text": "%.2f ms %s" % (y_cur_aboslute, current_symbol),
-            "font": {"color": "black"},
-            "xanchor": "center",
-            "yanchor": "bottom",
-            "yshift": -50
-        }})
+        if SHOW_ABS_NUMBER:
+            annotations.append({**annotation, **{
+                "text": "%.2f ms %s" % (y_cur_aboslute, current_symbol),
+                "font": {"color": "black", "size": ABS_NUMBER_FONT_SIZE},
+                "xanchor": "center",
+                "yanchor": "bottom",
+                "yshift": ABS_NUMBER_FONT_Y_SHIFT
+            }})
 
-        PLOT_MORE_STATISTICS = True
-        if PLOT_MORE_STATISTICS:
+        if PLOT_MOVING_AVERAGE:
             # moving average
             y_moving_average = moving_average(y, 10)
             traces.append({
@@ -350,6 +365,7 @@ def keep_first_in_index_range(arr, f, start, end):
                 "showlegend": False,
             })
 
+        if PLOT_STD_DEV:
             # variance (10p moving average of std dev)
             std_dev_moving_average = moving_average(std, 10)
             variance_trace = {
@@ -531,8 +547,6 @@ def peek_next_note_date():
     attrs[epoch]['end_x'] = x[-1]
     attrs[epoch]['end_y'] = y[-1]
     attrs[epoch]['end_y_std'] = y_std[-1]
-    
-    print(attrs)
 
     # For each epoch, find min/max
     for name, epoch in attrs.items():

From f001855de07820f0387615259fe05442c8e47be4 Mon Sep 17 00:00:00 2001
From: Yi Lin <qinsoon@gmail.com>
Date: Tue, 24 Sep 2024 01:26:17 +0000
Subject: [PATCH 5/9] Fix wrong epoch start

---
 scripts/history_report.py |  6 ++++--
 scripts/plot.py           | 32 +++++++++++++++++---------------
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/scripts/history_report.py b/scripts/history_report.py
index 9df8287..7dc786f 100644
--- a/scripts/history_report.py
+++ b/scripts/history_report.py
@@ -86,8 +86,10 @@
 
     baseline = plot.calculate_baseline(baseline_results, baseline_builds, "execution_times")
     pp.pprint(baseline)
-    
+
+    build_info = prefix
+
     # plot
-    fig = plot.plot_history(runs, plan, benchmarks, from_date, to_date, "execution_times", baseline, config['notes'].copy())
+    fig = plot.plot_history(build_info, runs, plan, benchmarks, from_date, to_date, "execution_times", baseline, config['notes'].copy())
     path = os.path.join(output_dir, "%s_%s_history.html" % (prefix, plan))
     fig.write_html(path)
diff --git a/scripts/plot.py b/scripts/plot.py
index 50637aa..87e2631 100644
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -40,9 +40,9 @@
 # data_key: the data to render
 # baseline: the baseline to plot as a dict {baseline: {benchmark: avg}}. None means no baseline, or no data for a certain benchmark.
 # notes: a list of [date, note]. date is YYYYMMDD
-def plot_history(runs, plan, benchmarks, start_date, end_date, data_key, baseline, notes=[]):
+def plot_history(build_info, runs, plan, benchmarks, start_date, end_date, data_key, baseline, notes=[]):
     layout = {
-        "title": plan,
+        "title": "%s - %s" % (build_info, plan),
         # "margin": {"t": 80},
         "width": GRAPH_WIDTH,
         "height": GRAPH_HEIGHT_PER_BENCHMARK * len(benchmarks),
@@ -221,8 +221,13 @@ def keep_first_in_index_range(arr, f, start, end):
 
         # Mark epoch
         for epoch_name, v in attributes.items():
+            print(v)
+
             # Epoch start
-            epoch_start_y = keep_first(y, lambda y: y == v['start_y'] / y_baseline)
+            epoch_start_y = keep_first_in_index_range(y, lambda y: y == v['start_y'] / y_baseline, v['start'], v['end'])
+
+            assert v['start'] <= n_points
+            assert v['end'] <= n_points
 
             # Normalized y
             epoch_normalized_start_y = v['start_y'] / y_baseline
@@ -245,7 +250,7 @@ def keep_first_in_index_range(arr, f, start, end):
                 "mode": "markers",
                 "textposition": "top center",
                 "y": epoch_start_y,
-                "text": "Epoch: %s, start: %.2f +- %.2f, end: %.2f +- %.2f, min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std, epoch_normalized_min_y, epoch_normalized_max_y),
+                "text": "Epoch: %s<br />  start: %.2f +- %.2f, end: %.2f +- %.2f<br />  min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std, epoch_normalized_min_y, epoch_normalized_max_y),
                 "textfont_color": epoch_color,
                 "cliponaxis": False,
                 "marker": { "size": 10, "color": epoch_color, "symbol": "star-diamond"},
@@ -352,7 +357,7 @@ def keep_first_in_index_range(arr, f, start, end):
             y_moving_average = moving_average(y, 10)
             traces.append({
                 "name": bm,
-                "hoverinfo": "text",
+                "hoverinfo": "none",
                 # "fill": "tozeroy",
                 "mode": TRACE_MODE,
                 "line": {"width": 1, "color": "gray"},
@@ -370,7 +375,7 @@ def keep_first_in_index_range(arr, f, start, end):
             std_dev_moving_average = moving_average(std, 10)
             variance_trace = {
                 "name": bm,
-                "hoverinfo": "text",
+                "hoverinfo": "none",
                 "mode": "lines",
                 "line_color": "#cacccf",
                 "line": {"width": 0},
@@ -497,8 +502,7 @@ def new_epoch(idx, epoch_name, note = None):
                 prev_epoch_end = idx - 1
             else:
                 prev_epoch_end = 0
-            attrs[epoch]['end_idx'] = prev_epoch_end
-            attrs[epoch]['end_x'] = x[prev_epoch_end]
+            attrs[epoch]['end'] = prev_epoch_end
             attrs[epoch]['end_y'] = y[prev_epoch_end]
             attrs[epoch]['end_y_std'] = y_std[prev_epoch_end]
 
@@ -506,8 +510,7 @@ def new_epoch(idx, epoch_name, note = None):
 
         attrs[epoch_name] = {}
         attrs[epoch_name]['epoch'] = epoch_name
-        attrs[epoch_name]['start_idx'] = idx
-        attrs[epoch_name]['start_x'] = x[idx]
+        attrs[epoch_name]['start'] = idx
         attrs[epoch_name]['start_y'] = y[idx]
         attrs[epoch_name]['start_y_std'] = y_std[idx]
         if note is not None:
@@ -533,7 +536,7 @@ def peek_next_note_date():
             while log_date >= next_note_date:
                 note = notes.pop(0)
                 if combined_note is None:
-                    combined_note = { 'run_id': run_id, 'x': x[idx], 'note': f"{note['date']}: {note['note']}" }
+                    combined_note = { 'run_id': run_id, 'note': f"{note['date']}: {note['note']}" }
                 else:
                     combined_note['note'] += f",{note['date']}: {note['note']}"
                 next_note_date = peek_next_note_date()
@@ -543,8 +546,7 @@ def peek_next_note_date():
             new_epoch(idx, FIRST_EPOCH)
 
     # End the last epoch
-    attrs[epoch]['end_idx'] = len(x) - 1
-    attrs[epoch]['end_x'] = x[-1]
+    attrs[epoch]['end'] = len(x) - 1
     attrs[epoch]['end_y'] = y[-1]
     attrs[epoch]['end_y_std'] = y_std[-1]
 
@@ -585,7 +587,7 @@ def find_max_with_index(lst, start, end):
 
             return max_value, max_index
 
-        min, min_idx = find_min_with_index(y, epoch['start_idx'], epoch['end_idx'])
+        min, min_idx = find_min_with_index(y, epoch['start'], epoch['end'])
         if min != 0:
             epoch['min'] = min
             epoch['min_std'] = y_std[min_idx]
@@ -593,7 +595,7 @@ def find_max_with_index(lst, start, end):
             epoch['min'] = epoch['start_y']
             epoch['min_std'] = 0
 
-        max, max_idx = find_max_with_index(y, epoch['start_idx'], epoch['end_idx'])
+        max, max_idx = find_max_with_index(y, epoch['start'], epoch['end'])
         if max != 0:
             epoch['max'] = max
             epoch['max_std'] = y_std[max_idx]

From 0a8b6e97ec6a99aebb9d83c29e6f813f3db563b8 Mon Sep 17 00:00:00 2001
From: Yi Lin <qinsoon@gmail.com>
Date: Tue, 24 Sep 2024 01:49:48 +0000
Subject: [PATCH 6/9] Minor fix

---
 scripts/history_report.py |  3 ---
 scripts/plot.py           | 10 +++++-----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/scripts/history_report.py b/scripts/history_report.py
index 7dc786f..20844d4 100644
--- a/scripts/history_report.py
+++ b/scripts/history_report.py
@@ -49,9 +49,6 @@
 excluded_runs = plot.get_excluded_runs_from_env_var('HISTORY_EXCLUDE_RUNS')
 
 for plan in plans:
-    if plan != "semispace":
-        continue
-
     # The path for all logs for the plan, such as /home/yilin/Code/ci-perf-kit/result_repo/openjdk/immix
     plan_path = os.path.join(result_repo_vm_root, plan)
     # Get all the runs for the plan, such as ['rat-2021-08-24-Tue-163625']
diff --git a/scripts/plot.py b/scripts/plot.py
index 87e2631..4fb26db 100644
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -123,10 +123,10 @@ def plot_history(build_info, runs, plan, benchmarks, start_date, end_date, data_
 
         this_y_upper = attributes[current_epoch]['max'] / y_baseline
         this_y_lower = attributes[current_epoch]['min'] / y_baseline
+        this_y_lower_std = attributes[current_epoch]['min_std'] / y_baseline
         if this_y_lower == 0:
             this_y_lower = 1
-
-        y_best = this_y_lower
+            this_y_lower_std = 0
 
         # update range
         if this_y_upper > y_range_upper:
@@ -250,7 +250,7 @@ def keep_first_in_index_range(arr, f, start, end):
                 "mode": "markers",
                 "textposition": "top center",
                 "y": epoch_start_y,
-                "text": "Epoch: %s<br />  start: %.2f +- %.2f, end: %.2f +- %.2f<br />  min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std, epoch_normalized_min_y, epoch_normalized_max_y),
+                "text": "Epoch: %s<br />  start: %.2f ± %.2f, end: %.2f ± %.2f<br />  min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std, epoch_normalized_min_y, epoch_normalized_max_y),
                 "textfont_color": epoch_color,
                 "cliponaxis": False,
                 "marker": { "size": 10, "color": epoch_color, "symbol": "star-diamond"},
@@ -305,7 +305,7 @@ def keep_first_in_index_range(arr, f, start, end):
             current_color = "black"
             current_symbol = "~"
         else:
-            trend = check_regression(y_best, 0, current, current_std)
+            trend = check_regression(this_y_lower, this_y_lower_std, current, current_std)
             if trend == "improvment":
                 current_color = "green"
                 current_symbol = "▽"
@@ -765,7 +765,7 @@ def history_per_day(runs, plan, benchmark, start_date, end_date, data_key):
             if result is None:
                 result = 0, 0
 
-        print("Run for %s: %s (%s +- %s)" % (single_date, last_run, result[0], result[1]))
+        print("Run for %s: %s (%s ± %s)" % (single_date, last_run, result[0], result[1]))
         avg.append(result[0])
         std.append(result[1])
     

From a80c4fcb37d2d0fe3c32765297f6c723b8f873a7 Mon Sep 17 00:00:00 2001
From: Yi Lin <qinsoon@gmail.com>
Date: Wed, 25 Sep 2024 04:00:07 +0000
Subject: [PATCH 7/9] Add a different way to check regression. Add a new epoch
 on 20240802

---
 configs/jikesrvm-plot.yml |  3 +++
 configs/openjdk-plot.yml  |  3 +++
 scripts/plot.py           | 46 ++++++++++++++++++++++++++++-----------
 3 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/configs/jikesrvm-plot.yml b/configs/jikesrvm-plot.yml
index 4181edd..e3a6439 100644
--- a/configs/jikesrvm-plot.yml
+++ b/configs/jikesrvm-plot.yml
@@ -19,6 +19,9 @@ notes:
   - date: "20240403"
     time: "0000"
     note: "Move to Rust 1.77.0"
+  - date: "20240802"
+    time: "0000"
+    note: "Microcode update for Zenbleed"
   - date: "20240903"
     time: "0000"
     note: "Move to Linux Kernel 6.8.0"
diff --git a/configs/openjdk-plot.yml b/configs/openjdk-plot.yml
index e5ba411..77accde 100644
--- a/configs/openjdk-plot.yml
+++ b/configs/openjdk-plot.yml
@@ -27,6 +27,9 @@ notes:
   - date: "20240403"
     time: "0000"
     note: "Move to Rust 1.77.0"
+  - date: "20240802"
+    time: "0000"
+    note: "Microcode update for Zenbleed"
   - date: "20240903"
     time: "0000"
     note: "Move to Linux Kernel 6.8.0"
diff --git a/scripts/plot.py b/scripts/plot.py
index 4fb26db..44c160f 100644
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -242,6 +242,8 @@ def keep_first_in_index_range(arr, f, start, end):
             regress = check_regression(epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std)
             if regress == "regression":
                 epoch_color = "red"
+            elif regress == "neutral":
+                epoch_color = "black"
             else:
                 epoch_color = "green"
 
@@ -607,19 +609,37 @@ def find_max_with_index(lst, start, end):
 
 # Return improvement, or regression, or neutral
 def check_regression(r1, std1, r2, std2):
-    # Determine the lower and upper bounds for r1 and r2
-    lower_bound_r1 = r1 - std1
-    upper_bound_r1 = r1 + std1
-    lower_bound_r2 = r2 - std2
-    upper_bound_r2 = r2 + std2
-
-    if upper_bound_r2 < lower_bound_r1:
-        return "improvement"
-    elif lower_bound_r2 > upper_bound_r1:
-        return "regression"
-    # Otherwise, it's neutral
-    else:
-        return "neutral"
+    def z_score_regression(r1, std1, r2, std2):
+        import math
+        pooled_std = math.sqrt(std1**2 + std2**2)
+        z_score = (r2 - r1) / pooled_std
+
+        # A z-score less than -1.96 indicates a statistically significant regression at 95% confidence
+        if z_score < -1.96:
+            return "regression"
+        # A z-score greater than 1.96 indicates a statistically significant improvement
+        elif z_score > 1.96:
+            return "improvement"
+        else:
+            return "neutral"
+
+    def boundary_regression(r1, std1, r2, std2):
+        # Determine the lower and upper bounds for r1 and r2
+        lower_bound_r1 = r1 - std1
+        upper_bound_r1 = r1 + std1
+        lower_bound_r2 = r2 - std2
+        upper_bound_r2 = r2 + std2
+
+        if upper_bound_r2 < lower_bound_r1:
+            return "improvement"
+        elif lower_bound_r2 > upper_bound_r1:
+            return "regression"
+        # Otherwise, it's neutral
+        else:
+            return "neutral"
+
+    # Use boundary regression. It is less statistically sound, but more intuitive for people to check the result.
+    return boundary_regression(r1, std1, r2, std2)
 
 def plot_multi_plans_history(runs, plans, benchmarks, start_date, end_date, data_key):
     # whether we should show legend - only show legend for a plan when it is the first time we add a trace for this plan

From aecbce2e5f6b084edd88101d4229e8692e897125 Mon Sep 17 00:00:00 2001
From: Yi Lin <qinsoon@gmail.com>
Date: Thu, 26 Sep 2024 00:58:44 +0000
Subject: [PATCH 8/9] Use vertical lines for epochs, mark best/worst for the
 current epoch

---
 scripts/history_report.py |   3 +
 scripts/plot.py           | 182 +++++++++++++++++++++-----------------
 2 files changed, 105 insertions(+), 80 deletions(-)

diff --git a/scripts/history_report.py b/scripts/history_report.py
index 20844d4..7dc786f 100644
--- a/scripts/history_report.py
+++ b/scripts/history_report.py
@@ -49,6 +49,9 @@
 excluded_runs = plot.get_excluded_runs_from_env_var('HISTORY_EXCLUDE_RUNS')
 
 for plan in plans:
+    if plan != "semispace":
+        continue
+
     # The path for all logs for the plan, such as /home/yilin/Code/ci-perf-kit/result_repo/openjdk/immix
     plan_path = os.path.join(result_repo_vm_root, plan)
     # Get all the runs for the plan, such as ['rat-2021-08-24-Tue-163625']
diff --git a/scripts/plot.py b/scripts/plot.py
index 44c160f..c51377f 100644
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -33,6 +33,9 @@
 # Use the same Y range for all the traces
 SAME_Y_RANGE_IN_ALL_TRACES = True
 
+MIN_MAX_MARKER_SIZE = 5
+CURRENT_POINT_MARKER_SIZE = 10
+
 # runs: all the runs for a certain build (as a dictionary from run_id -> run results)
 # plan: the plan to plot
 # benchmarks: benchmarks to plot
@@ -65,7 +68,6 @@ def plot_history(build_info, runs, plan, benchmarks, start_date, end_date, data_
 
     benchmarks.sort()
 
-    aligned_notes = []
     for bm in benchmarks:
         # extract results
         print("Plotting %s %s..." % (plan, bm))
@@ -84,7 +86,6 @@ def plot_history(build_info, runs, plan, benchmarks, start_date, end_date, data_
         assert len(x_labels) == n_points
 
         attributes = split_epochs(x, x_labels, y, std, notes.copy())
-        # print(attributes)
 
         y_cur_aboslute = y[-1]
 
@@ -218,72 +219,15 @@ def keep_first_in_index_range(arr, f, start, end):
                 else:
                     ret.append(None)
             return ret
-
-        # Mark epoch
-        for epoch_name, v in attributes.items():
-            print(v)
-
-            # Epoch start
-            epoch_start_y = keep_first_in_index_range(y, lambda y: y == v['start_y'] / y_baseline, v['start'], v['end'])
-
-            assert v['start'] <= n_points
-            assert v['end'] <= n_points
-
-            # Normalized y
-            epoch_normalized_start_y = v['start_y'] / y_baseline
-            epoch_normalized_start_y_std = v['start_y_std'] / y_baseline
-            epoch_normalized_end_y = v['end_y'] / y_baseline
-            epoch_normalized_end_y_std = v['end_y_std'] / y_baseline
-
-            # Epoch min/max
-            epoch_normalized_min_y = v['min'] / y_baseline
-            epoch_normalized_max_y = v['max'] / y_baseline
-
-            regress = check_regression(epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std)
-            if regress == "regression":
-                epoch_color = "red"
-            elif regress == "neutral":
-                epoch_color = "black"
-            else:
-                epoch_color = "green"
-
-            traces.append({**history_trace, **{
-                "hoverinfo": 'text',
-                "mode": "markers",
-                "textposition": "top center",
-                "y": epoch_start_y,
-                "text": "Epoch: %s<br />  start: %.2f ± %.2f, end: %.2f ± %.2f<br />  min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std, epoch_normalized_min_y, epoch_normalized_max_y),
-                "textfont_color": epoch_color,
-                "cliponaxis": False,
-                "marker": { "size": 10, "color": epoch_color, "symbol": "star-diamond"},
-                "showlegend": False,
-            }})
-
-            # if epoch_name == current_epoch:
-            #     # Epoch min
-            #     traces.append({**history_trace, **{
-            #         "hoverinfo": "text",
-            #         "mode": "markers",
-            #         "textposition": "top center",
-            #         "y": keep_first_in_index_range(y, lambda y: y == epoch_normalized_min_y, v['start_x'], v['end_x'] + 1),
-            #         "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
-            #         "textfont_color": "green",
-            #         "cliponaxis": False,
-            #         "marker": { "size": 10, "color": "green", "symbol": "triangle-down" },
-            #         "showlegend": False,
-            #     }})
-            #     # Epoch max
-            #     traces.append({**history_trace, **{
-            #         "hoverinfo": "text",
-            #         "mode": "markers",
-            #         "textposition": "top center",
-            #         "y": keep_first_in_index_range(y, lambda y: y == epoch_normalized_max_y, v['start_x'], v['end_x'] + 1),
-            #         "text": ["%s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
-            #         "textfont_color": "red",
-            #         "cliponaxis": False,
-            #         "marker": { "size": 10, "color": "red", "symbol": "triangle-up" },
-            #         "showlegend": False,
-            #     }})
+        def find_first_in_index_range(arr, v, start, end):
+            for idx, x in enumerate(arr):
+                if idx < start:
+                    continue
+                if idx >= end:
+                    return None
+                if x == v:
+                    return idx
+            return None
 
         # labeling
         annotation = {
@@ -308,17 +252,8 @@ def keep_first_in_index_range(arr, f, start, end):
             current_symbol = "~"
         else:
             trend = check_regression(this_y_lower, this_y_lower_std, current, current_std)
-            if trend == "improvment":
-                current_color = "green"
-                current_symbol = "▽"
-            elif trend == "regression":
-                # degradation
-                current_color = "red"
-                current_symbol = "△"
-            else:
-                # neutral
-                current_color = "black"
-                current_symbol = "~"
+            current_color = get_regression_color(trend)
+            current_symbol = get_regression_symbol(trend)
 
         y_last_array = keep_last(y, lambda x: x == current)
         traces.append({**history_trace, **{
@@ -326,7 +261,7 @@ def keep_first_in_index_range(arr, f, start, end):
             "mode": "markers",
             "y": y_last_array,
             "text": ["history current: %s: %.2f" % (x, y) for (x, y) in zip(x_labels, y)],
-            "marker": {"size": 15, "color": "black"},
+            "marker": {"size": CURRENT_POINT_MARKER_SIZE, "color": "black"},
             "showlegend": False,
         }})
         # big number
@@ -402,6 +337,67 @@ def keep_first_in_index_range(arr, f, start, end):
                 "text": ["moving avg + std dev: %s: %s" % (x, "{:.2f}".format(y) if y is not None else "na") for (x, y) in zip(x_labels, variance_up)],
             }})
 
+        # Mark epoch - draw this after stddev. So it will be rendered on top of stddev
+        for epoch_name, v in attributes.items():
+            print(v)
+
+            # Epoch start
+            epoch_start_y = keep_first_in_index_range(y, lambda y: y == v['start_y'] / y_baseline, v['start'], v['end'])
+
+            assert v['start'] <= n_points
+            assert v['end'] <= n_points
+
+            # Normalized y
+            epoch_normalized_start_y = v['start_y'] / y_baseline
+            epoch_normalized_start_y_std = v['start_y_std'] / y_baseline
+            epoch_normalized_end_y = v['end_y'] / y_baseline
+            epoch_normalized_end_y_std = v['end_y_std'] / y_baseline
+
+            # Epoch min/max
+            epoch_normalized_min_y = v['min'] / y_baseline
+            epoch_normalized_max_y = v['max'] / y_baseline
+
+            regress = check_regression(epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std)
+            epoch_color = get_regression_color(regress)
+
+            text = "Epoch: %s<br />  start: %.2f ± %.2f, end: %.2f ± %.2f<br />  min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std, epoch_normalized_min_y, epoch_normalized_max_y)
+
+            traces.append({**history_trace, **{
+                "hoverinfo": "text",
+                "mode": "lines",
+                "line": { "width": 1, "color": epoch_color },
+                "opacity": 0.2 if epoch_color == "black" else 1,
+                "x": [v['start_x'], v['start_x']],
+                "y": [-999, 999],
+                "text": text
+            }})
+
+            if epoch_name == current_epoch:
+                # Epoch min
+                traces.append({**history_trace, **{
+                    "hoverinfo": "text",
+                    "mode": "markers",
+                    "textposition": "top center",
+                    "y": keep_first_in_index_range(y, lambda y: y == epoch_normalized_min_y, v['start'], v['end'] + 1),
+                    "text": ["best: %s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
+                    "textfont_color": "green",
+                    "cliponaxis": False,
+                    "marker": { "size": MIN_MAX_MARKER_SIZE, "color": "green", "symbol": "triangle-down" },
+                    "showlegend": False,
+                }})
+                # Epoch max
+                traces.append({**history_trace, **{
+                    "hoverinfo": "text",
+                    "mode": "markers",
+                    "textposition": "top center",
+                    "y": keep_first_in_index_range(y, lambda y: y == epoch_normalized_max_y, v['start'], v['end'] + 1),
+                    "text": ["worst: %s: %.2f" % (x, y) if y != 0 else "" for (x, y) in zip(x_labels, y)],
+                    "textfont_color": "red",
+                    "cliponaxis": False,
+                    "marker": { "size": MIN_MAX_MARKER_SIZE, "color": "red", "symbol": "triangle-up" },
+                    "showlegend": False,
+                }})
+
         # baseline - we will draw one horizontal line per each baseline
         baseline_opacity = 0.6
         baseline_color = "orange"
@@ -480,8 +476,17 @@ def keep_first_in_index_range(arr, f, start, end):
     # This plots a vertical line for each note in the first subgraph.
     # for note in aligned_notes:
     #     fig.add_vline(x = int(note['x']), line_color = 'blue', annotation = { "text": "📓", "hovertext": note['note'] })
+    # for i in range(1, row):
+    #     for e in epochs:
+    #         print("Add Epoch %s for row %d" % (e['epoch'], i))
+    #         fig.add_vline(x = int(e['start_x']), line_color = "gray", annotation = { "text": "x", "hovertext": e['note'] }, yref = "y%d" % i)
+    # for idx, notes in enumerate(epochs_for_rows):
+    #     for n in notes:
+    #         color = get_regression_color(n['regression'])
+    #         fig.add_vline(x = n['x'], line_color = "gray", opacity = 0.2, annotation = { "text": get_regression_symbol(n['regression']) }, yref = "y%d" % (idx + 1), hovertext = n['text'])
 
     fig.update_layout(hovermode='x')
+    fig.update_layout(hoverdistance=1)
     fig.update_layout(margin=dict(l=5, r=5, t=50, b=5))
 
     return fig
@@ -513,6 +518,7 @@ def new_epoch(idx, epoch_name, note = None):
         attrs[epoch_name] = {}
         attrs[epoch_name]['epoch'] = epoch_name
         attrs[epoch_name]['start'] = idx
+        attrs[epoch_name]['start_x'] = x[idx]
         attrs[epoch_name]['start_y'] = y[idx]
         attrs[epoch_name]['start_y_std'] = y_std[idx]
         if note is not None:
@@ -641,6 +647,22 @@ def boundary_regression(r1, std1, r2, std2):
     # Use boundary regression. It is less statistically sound, but more intuitive for people to check the result.
     return boundary_regression(r1, std1, r2, std2)
 
+
+def get_regression_color(regression):
+    match regression:
+        case "regression": return "red"
+        case "improvement": return "green"
+        case "neutral": return "black"
+        case _: raise Exception('Unexpected regression string:' + regression)
+
+
+def get_regression_symbol(regression):
+    match regression:
+        case "regression": return "△"
+        case "improvement": return "▽"
+        case "neutral": return "~"
+        case _: raise Exception('Unexpected regression string:' + regression)
+
 def plot_multi_plans_history(runs, plans, benchmarks, start_date, end_date, data_key):
     # whether we should show legend - only show legend for a plan when it is the first time we add a trace for this plan
     show_legend = {}

From aa8aee3fbf6d57f76a050b0f2659276976e99fea Mon Sep 17 00:00:00 2001
From: Yi Lin <qinsoon@gmail.com>
Date: Thu, 26 Sep 2024 05:03:08 +0000
Subject: [PATCH 9/9] Mark regression points with red

---
 scripts/history_report.py |   3 -
 scripts/plot.py           | 121 ++++++++++++++++++++++++++------------
 2 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/scripts/history_report.py b/scripts/history_report.py
index 7dc786f..20844d4 100644
--- a/scripts/history_report.py
+++ b/scripts/history_report.py
@@ -49,9 +49,6 @@
 excluded_runs = plot.get_excluded_runs_from_env_var('HISTORY_EXCLUDE_RUNS')
 
 for plan in plans:
-    if plan != "semispace":
-        continue
-
     # The path for all logs for the plan, such as /home/yilin/Code/ci-perf-kit/result_repo/openjdk/immix
     plan_path = os.path.join(result_repo_vm_root, plan)
     # Get all the runs for the plan, such as ['rat-2021-08-24-Tue-163625']
diff --git a/scripts/plot.py b/scripts/plot.py
index c51377f..09767da 100644
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -32,6 +32,7 @@
 
 # Use the same Y range for all the traces
 SAME_Y_RANGE_IN_ALL_TRACES = True
+Y_RANGE_EXTRA = 0.2
 
 MIN_MAX_MARKER_SIZE = 5
 CURRENT_POINT_MARKER_SIZE = 10
@@ -68,6 +69,8 @@ def plot_history(build_info, runs, plan, benchmarks, start_date, end_date, data_
 
     benchmarks.sort()
 
+    epoch_vlines = []
+
     for bm in benchmarks:
         # extract results
         print("Plotting %s %s..." % (plan, bm))
@@ -119,8 +122,8 @@ def plot_history(build_info, runs, plan, benchmarks, start_date, end_date, data_
         # No min value. There is no value in the plot at all. We just need a reasonable baseline.
         if y_baseline == 0:
             y_baseline = min(nonzero_y)
-        # y_max = max(nonzero_y) / y_baseline
-        # y_min = min(nonzero_y) / y_baseline
+        y_max = max(nonzero_y) / y_baseline
+        y_min = min(nonzero_y) / y_baseline
 
         this_y_upper = attributes[current_epoch]['max'] / y_baseline
         this_y_lower = attributes[current_epoch]['min'] / y_baseline
@@ -155,11 +158,43 @@ def plot_history(build_info, runs, plan, benchmarks, start_date, end_date, data_
         }
 
         # history
+        history_x = x
+        history_y = make_zero_as_none(y)
+        history_colors = []
+        line_colors_info = []
+        for epoch_name in sorted(attributes.keys()):
+            history_colors.extend(attributes[epoch_name]['line_colors'])
+            line_colors_info.extend(attributes[epoch_name]['line_colors_info'])
+
+        assert len(history_colors) == len(history_x)
+        assert len(history_colors) == len(history_y)
+        assert len(history_colors) == len(line_colors_info)
+
+        # render each segment with color
+        for i in range(0, len(history_x) - 2):
+            traces.append({**history_trace, **{
+                "x": history_x[i:i+2],
+                "y": history_y[i:i+2],
+                "line": { "width": 3, "color": history_colors[i+1] },
+            }})
+
+        # render the hovertext with an invisible trace (we have to do this otherwise the hovertext is fucked up -- the segments are too crowded and we would see multiple hover texts showing up)
+        history_hovertext = []
+        for (label, val, color, color_info) in zip(x_labels, y, history_colors, line_colors_info):
+            if color_info is None:
+                t = get_hover_text("history", label, val)
+            else:
+                l1 = get_hover_text("history", label, val)
+                l2 = "(%s, compared to %s %.2f)" % (color_info['regression'], color_info['label'], color_info['value'] / y_baseline)
+                t = "%s<br \>%s" % (l1, l2)
+            history_hovertext.append(t)
         traces.append({**history_trace, **{
-            "line": {"width": 3, "color": "black"},
+            "line": { "color": "black" },
             "y": make_zero_as_none(y),
-            "text": ["history: %s: %.2f" % (x, y) for (x, y) in zip(x_labels, y)],
+            "opacity": 0,
+            "text": history_hovertext,
         }})
+
         layout["xaxis%d" % row] = {
             # attempt to show xticks. Couldn't get this work. Xticks are shown under the first subgraph. 
             # I can't switch it to the last (or it does not show on the last because out of boundary)
@@ -187,7 +222,7 @@ def plot_history(build_info, runs, plan, benchmarks, start_date, end_date, data_
             "showline": True,
             "zeroline": False,
             "showticklabels": False,
-            "range": [this_y_lower - 0.02, this_y_upper + 0.02]
+            "range": [this_y_lower - Y_RANGE_EXTRA, this_y_upper + Y_RANGE_EXTRA]
         }
 
         # highlight max/min
@@ -362,13 +397,13 @@ def find_first_in_index_range(arr, v, start, end):
 
             text = "Epoch: %s<br />  start: %.2f ± %.2f, end: %.2f ± %.2f<br />  min: %.2f, max: %.2f" % (v['note'], epoch_normalized_start_y, epoch_normalized_start_y_std, epoch_normalized_end_y, epoch_normalized_end_y_std, epoch_normalized_min_y, epoch_normalized_max_y)
 
-            traces.append({**history_trace, **{
+            epoch_vlines.append({**history_trace, **{
                 "hoverinfo": "text",
                 "mode": "lines",
                 "line": { "width": 1, "color": epoch_color },
                 "opacity": 0.2 if epoch_color == "black" else 1,
                 "x": [v['start_x'], v['start_x']],
-                "y": [-999, 999],
+                "y": [y_min, y_max],
                 "text": text
             }})
 
@@ -442,48 +477,23 @@ def find_first_in_index_range(arr, v, start, end):
                     #     "text": "%s: %.2f" % (build, hline),
                     # }})
 
-        # Notes
-        # Somehow this line does not show. But it adds a hover text for all the plots.
-        # for note in aligned_notes:
-        #     note_trace = {
-        #         "hoverinfo": "text",
-        #         "mode": "lines",
-        #         "line": {"width": 10, "color": "blue"},
-        #         "x": [note['x']],
-        #         "y": [0, 999],
-        #         "xaxis": x_axis,
-        #         "yaxis": y_axis,
-        #         "showlegend": False,
-        #         "opacity": 0,
-        #         "text": note['note']
-        #     }
-        #     traces.append(note_trace)
-
         row += 1
 
     # fix range for all the traces
     if SAME_Y_RANGE_IN_ALL_TRACES:
-        RANGE_EXTRA = 0.2
-        y_range = [y_range_lower - RANGE_EXTRA, y_range_upper + RANGE_EXTRA]
+        y_range = [y_range_lower - Y_RANGE_EXTRA, y_range_upper + Y_RANGE_EXTRA]
         for i in range(1, row):
             layout["yaxis%d" % i]["range"] = y_range
+        for line in epoch_vlines:
+            line["y"] = y_range
 
     fig = Figure(data = Data(traces), layout = layout)
     for anno in annotations:
         fig.add_annotation(anno)
     for line in baseline_hlines:
         fig.add_shape(line)
-    # This plots a vertical line for each note in the first subgraph.
-    # for note in aligned_notes:
-    #     fig.add_vline(x = int(note['x']), line_color = 'blue', annotation = { "text": "📓", "hovertext": note['note'] })
-    # for i in range(1, row):
-    #     for e in epochs:
-    #         print("Add Epoch %s for row %d" % (e['epoch'], i))
-    #         fig.add_vline(x = int(e['start_x']), line_color = "gray", annotation = { "text": "x", "hovertext": e['note'] }, yref = "y%d" % i)
-    # for idx, notes in enumerate(epochs_for_rows):
-    #     for n in notes:
-    #         color = get_regression_color(n['regression'])
-    #         fig.add_vline(x = n['x'], line_color = "gray", opacity = 0.2, annotation = { "text": get_regression_symbol(n['regression']) }, yref = "y%d" % (idx + 1), hovertext = n['text'])
+    for vline in epoch_vlines:
+        fig.add_trace(vline)
 
     fig.update_layout(hovermode='x')
     fig.update_layout(hoverdistance=1)
@@ -611,6 +621,35 @@ def find_max_with_index(lst, start, end):
             epoch['max'] = epoch['start_y']
             epoch['max_std'] = 0
 
+    # Decide the color for each segment, based on the best value up to that point in the epoch
+    for name, epoch in attrs.items():
+        start = epoch['start']
+        end = epoch['end']
+
+        # The best result in this epoch so far
+        best = -1
+
+        line_colors = []
+        line_colors_info = []
+
+        for i in range(start, end + 1):
+            if y[i] == 0:
+                # No data
+                line_colors.append("black")
+                line_colors_info.append(None)
+                continue
+
+            if best == -1:
+                best = i
+
+            trend = check_regression(y[best], y_std[best], y[i], y_std[i])
+            if trend == "improvement":
+                best = i
+            line_colors.append(get_regression_color(trend))
+            line_colors_info.append({ "label": x_labels[best], "value": y[best], "regression": trend })
+        epoch['line_colors'] = line_colors
+        epoch['line_colors_info'] = line_colors_info
+
     return attrs
 
 # Return improvement, or regression, or neutral
@@ -663,6 +702,14 @@ def get_regression_symbol(regression):
         case "neutral": return "~"
         case _: raise Exception('Unexpected regression string:' + regression)
 
+
+def get_hover_text(prefix, text, value):
+    if value is None:
+        return "%s: %s: none" % (prefix, text)
+    else:
+        return "%s: %s: %.2f" % (prefix, text, value)
+
+
 def plot_multi_plans_history(runs, plans, benchmarks, start_date, end_date, data_key):
     # whether we should show legend - only show legend for a plan when it is the first time we add a trace for this plan
     show_legend = {}