From bd25ec56308b8083158455ac3f92e6266db1ce51 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Sun, 30 Jun 2024 14:19:06 +0100 Subject: [PATCH 01/17] Initial Commit --- tsml_eval/segmentation/__init__.py | 12 +++++++ tsml_eval/segmentation/_bu.py | 40 ++++++++++++++++++++++ tsml_eval/segmentation/_sw.py | 34 +++++++++++++++++++ tsml_eval/segmentation/_swab.py | 38 +++++++++++++++++++++ tsml_eval/segmentation/_td.py | 48 +++++++++++++++++++++++++++ tsml_eval/segmentation/base.py | 35 +++++++++++++++++++ tsml_eval/segmentation/manual_test.py | 36 ++++++++++++++++++++ 7 files changed, 243 insertions(+) create mode 100644 tsml_eval/segmentation/__init__.py create mode 100644 tsml_eval/segmentation/_bu.py create mode 100644 tsml_eval/segmentation/_sw.py create mode 100644 tsml_eval/segmentation/_swab.py create mode 100644 tsml_eval/segmentation/_td.py create mode 100644 tsml_eval/segmentation/base.py create mode 100644 tsml_eval/segmentation/manual_test.py diff --git a/tsml_eval/segmentation/__init__.py b/tsml_eval/segmentation/__init__.py new file mode 100644 index 000000000..1602ecbef --- /dev/null +++ b/tsml_eval/segmentation/__init__.py @@ -0,0 +1,12 @@ +"""Piecewise Linear Approximation.""" + +__all__ = [ + "BasePLA", + "SlidingWindow", + "TopDown", + "BottomUp" +] +from base import BasePLA +from _sw import SlidingWindow +from _td import TopDown +from _bu import BottomUp \ No newline at end of file diff --git a/tsml_eval/segmentation/_bu.py b/tsml_eval/segmentation/_bu.py new file mode 100644 index 000000000..f3f84d506 --- /dev/null +++ b/tsml_eval/segmentation/_bu.py @@ -0,0 +1,40 @@ +from base import BasePLA +import numpy as np +__maintainer__ = [] +__all__ = ["BottomUp"] + +class BottomUp(BasePLA): + + def __init__(self, max_error): + super().__init__(max_error) + + #clean the code + def bottomUp(self, time_series): + seg_ts = [] + merge_cost = [] + for i in range(0, len(time_series), 2): + seg_ts.append(self.create_segment(time_series[i: i + 2])) + for i in range(len(seg_ts) - 1): + merge_cost.append(self.calculate_error(seg_ts[i] + seg_ts[i + 1])) + + merge_cost = np.array(merge_cost) + + while len(merge_cost != 0) and min(merge_cost) < self.max_error: + if(len(merge_cost) == len(seg_ts)): + print("error") + pos = np.argmin(merge_cost) + seg_ts[pos] = np.concatenate((seg_ts[pos], seg_ts[pos + 1])) + seg_ts.pop(pos + 1) + if (pos + 1) < len(merge_cost): + merge_cost = np.delete(merge_cost, pos + 1) + else: + merge_cost= np.delete(merge_cost, pos) + + if pos != 0: + merge_cost[pos - 1] = self.calculate_error(np.concatenate((seg_ts[pos - 1], seg_ts[pos]))) + + if((pos + 1) < len(seg_ts)): + merge_cost[pos] = self.calculate_error(np.concatenate((seg_ts[pos], seg_ts[pos + 1]))) + + + return seg_ts \ No newline at end of file diff --git a/tsml_eval/segmentation/_sw.py b/tsml_eval/segmentation/_sw.py new file mode 100644 index 000000000..781801b67 --- /dev/null +++ b/tsml_eval/segmentation/_sw.py @@ -0,0 +1,34 @@ + +from base import BasePLA +import numpy as np +__maintainer__ = [] +__all__ = ["SlidingWindow"] + +class SlidingWindow(BasePLA): + + def __init__(self, max_error): + super().__init__(max_error) + + """work in progress + def sliding_window(self, time_series): + seg_ts = [] + anchor = 0 + for i in range(1, len(time_series)): + if self.calculate_error(time_series[anchor:i]) > self.max_error: + seg_ts.append(self.create_segment(time_series[anchor: i - 1])) + anchor = i - 1 + if(anchor < i): + seg_ts.append(self.create_segment(time_series[anchor: i - 1])) + return np.concatenate(seg_ts) """ + + #! clean this up, the while loops are not done in a good manner. This is from the pseudocode + def sliding_window(self, time_series): + seg_ts = [] + anchor = 0 + while anchor < len(time_series): + i = 2 + while anchor + i -1 < len(time_series) and self.calculate_error(time_series[anchor:anchor + i]) < self.max_error: + i = i + 1 + seg_ts.append(self.create_segment(time_series[anchor:anchor + i - 1])) + anchor = anchor + i - 1 + return seg_ts \ No newline at end of file diff --git a/tsml_eval/segmentation/_swab.py b/tsml_eval/segmentation/_swab.py new file mode 100644 index 000000000..a3f82dd17 --- /dev/null +++ b/tsml_eval/segmentation/_swab.py @@ -0,0 +1,38 @@ +from base import BasePLA +import numpy as np +import sys +import BottomUp + +__maintainer__ = [] +__all__ = ["SWAB"] + +class SWAB(BasePLA): + + def __init__(self, max_error, seg_num = 6): + self.seg_num = seg_num + self.bottomup = BottomUp(max_error) + super().__init__(max_error) + + + def swab(self, time_series): + seg_ts = [] + buffer = np.empty(self.seg_num, dtype=object) + sw_lower_bound = len(buffer) / 2 + sw_upper_bound = len(buffer) * 2 + while len(buffer) < 3: + t = self.bottomup(time_series) + seg_ts.append(t[0]) + buffer = buffer[len(t) - 1:] + return None + + + #finds the next potential segment + def best_line(self, time_series, current_data_point, sw_lower_bound, sw_upper_bound): + seg_ts = [] + error = 0 + while error < self.max_error: + seg_ts.append = time_series[current_data_point] + error = self.calculate_error(seg_ts) + current_data_point = current_data_point + 1 + return seg_ts + \ No newline at end of file diff --git a/tsml_eval/segmentation/_td.py b/tsml_eval/segmentation/_td.py new file mode 100644 index 000000000..cb97d439f --- /dev/null +++ b/tsml_eval/segmentation/_td.py @@ -0,0 +1,48 @@ +from base import BasePLA +import numpy as np +import sys + +__maintainer__ = [] +__all__ = ["TopDown"] + +class TopDown(BasePLA): + + def __init__(self, max_error): + super().__init__(max_error) + + #Implement a cache system for this + def topDown(self, time_series): + seg_ts = [] + best_so_far = sys.float_info.max + breakpoint = None + for i in range(2, len(time_series -2)): + improvement_in_approximation = self.improvement_splitting_here(time_series, i) + if(improvement_in_approximation < best_so_far): + breakpoint = i + best_so_far = improvement_in_approximation + + if breakpoint == None: + return [time_series] + + left_segment = time_series[:breakpoint] + right_segment = time_series[breakpoint:] + + if self.calculate_error(left_segment) > self.max_error: + seg_ts.extend(self.topDown(left_segment)) + else: + seg_ts.append(left_segment) + + + if self.calculate_error(right_segment) > self.max_error: + seg_ts.extend(self.topDown(right_segment)) + else: + seg_ts.append(right_segment) + + return seg_ts + + + def improvement_splitting_here(self, time_series, breakpoint): + left_segment = time_series[:breakpoint] + right_segment = time_series[breakpoint:] + return self.calculate_error(left_segment) + self.calculate_error(right_segment) + \ No newline at end of file diff --git a/tsml_eval/segmentation/base.py b/tsml_eval/segmentation/base.py new file mode 100644 index 000000000..fb7578288 --- /dev/null +++ b/tsml_eval/segmentation/base.py @@ -0,0 +1,35 @@ +"""Abstract base class""" + +__maintainer__ = [] +__all__ = ["BasePLA"] + +import numpy as np +import pandas as pd +from sklearn.linear_model import LinearRegression + +class BasePLA(): + "Base class for piecewise linear approximation (PLA)" + + def __init__(self, max_error): + self.max_error = max_error + + def linear_regression(self, time_series, sequence = None): + n = len(time_series) + Y = np.array(time_series) + X = np.arange(n).reshape(-1 , 1) + linearRegression = LinearRegression() + linearRegression.fit(X, Y) + return linearRegression.predict(X) + + def sum_squared_error(self, time_series, linear_regression_time_series): + "formula: sse = the sum of the differences of the original series against the predicted series squared" + error = np.sum((time_series - linear_regression_time_series) ** 2) + return error + + def calculate_error(self, time_series): + lrts = self.linear_regression(time_series) + sse = self.sum_squared_error(time_series, lrts) + return sse + + def create_segment(self, time_series): + return self.linear_regression(time_series) \ No newline at end of file diff --git a/tsml_eval/segmentation/manual_test.py b/tsml_eval/segmentation/manual_test.py new file mode 100644 index 000000000..787c3eb26 --- /dev/null +++ b/tsml_eval/segmentation/manual_test.py @@ -0,0 +1,36 @@ +from _sw import SlidingWindow +from _bu import BottomUp +from _td import TopDown +from aeon.datasets import load_electric_devices_segmentation +from aeon.visualisation import plot_series_with_change_points, plot_series_with_profiles +import matplotlib.pyplot as plt +import numpy as np + + +ts, period_size, true_cps = load_electric_devices_segmentation() +ts = ts.values +sw = SlidingWindow(100) +results = sw.sliding_window(ts) +print(len(results)) + +plt.figure() +plt.plot(np.arange(len(ts)), ts) +plt.title('original') +plt.xlabel('x') +plt.ylabel('y') + +flattened_arr = [item for sublist in results for item in sublist] +plt.figure() +plt.plot(np.arange(len(flattened_arr)), flattened_arr) +plt.title('pla') +plt.xlabel('x') +plt.ylabel('y') + +for i in range(len(results)): + plt.figure() + plt.plot(np.arange(len(results[i])), results[i]) + plt.title(i) + plt.xlabel('x') + plt.ylabel('y') + +plt.show() From f37f6d65b558d1cf122c29ca073351ccf22d6fa0 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Mon, 1 Jul 2024 09:08:48 +0100 Subject: [PATCH 02/17] Changed SW to np.array --- tsml_eval/segmentation/_sw.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsml_eval/segmentation/_sw.py b/tsml_eval/segmentation/_sw.py index 781801b67..aff6a2206 100644 --- a/tsml_eval/segmentation/_sw.py +++ b/tsml_eval/segmentation/_sw.py @@ -23,12 +23,12 @@ def sliding_window(self, time_series): #! clean this up, the while loops are not done in a good manner. This is from the pseudocode def sliding_window(self, time_series): - seg_ts = [] + seg_ts = np.array([]) anchor = 0 while anchor < len(time_series): i = 2 while anchor + i -1 < len(time_series) and self.calculate_error(time_series[anchor:anchor + i]) < self.max_error: i = i + 1 - seg_ts.append(self.create_segment(time_series[anchor:anchor + i - 1])) + seg_ts = np.append(seg_ts, self.create_segment(time_series[anchor:anchor + i - 1])) anchor = anchor + i - 1 return seg_ts \ No newline at end of file From b84619fd41e00b9e67c724ecd942eb687bd375b2 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Mon, 1 Jul 2024 09:22:44 +0100 Subject: [PATCH 03/17] Changed TD to np.array --- tsml_eval/segmentation/_td.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tsml_eval/segmentation/_td.py b/tsml_eval/segmentation/_td.py index cb97d439f..0878066f8 100644 --- a/tsml_eval/segmentation/_td.py +++ b/tsml_eval/segmentation/_td.py @@ -12,7 +12,7 @@ def __init__(self, max_error): #Implement a cache system for this def topDown(self, time_series): - seg_ts = [] + seg_ts = np.array([]) best_so_far = sys.float_info.max breakpoint = None for i in range(2, len(time_series -2)): @@ -22,21 +22,21 @@ def topDown(self, time_series): best_so_far = improvement_in_approximation if breakpoint == None: - return [time_series] + return np.array([time_series]) left_segment = time_series[:breakpoint] right_segment = time_series[breakpoint:] if self.calculate_error(left_segment) > self.max_error: - seg_ts.extend(self.topDown(left_segment)) + seg_ts = np.concatenate(seg_ts, self.topDown(left_segment)) else: - seg_ts.append(left_segment) + seg_ts = np.append(seg_ts, left_segment) if self.calculate_error(right_segment) > self.max_error: - seg_ts.extend(self.topDown(right_segment)) + seg_ts = np.concatenate(seg_ts, self.topDown(right_segment)) else: - seg_ts.append(right_segment) + seg_ts = np.append(seg_ts, right_segment) return seg_ts From 7e7f017a60c2ae0a7968461e276afca7db85bfbf Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Mon, 1 Jul 2024 09:36:52 +0100 Subject: [PATCH 04/17] Fixed TD error, changed BU to np.array --- tsml_eval/segmentation/_bu.py | 18 ++++++++---------- tsml_eval/segmentation/_td.py | 4 ++-- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/tsml_eval/segmentation/_bu.py b/tsml_eval/segmentation/_bu.py index f3f84d506..6e508a5a6 100644 --- a/tsml_eval/segmentation/_bu.py +++ b/tsml_eval/segmentation/_bu.py @@ -1,5 +1,6 @@ from base import BasePLA import numpy as np +import math __maintainer__ = [] __all__ = ["BottomUp"] @@ -10,21 +11,18 @@ def __init__(self, max_error): #clean the code def bottomUp(self, time_series): - seg_ts = [] - merge_cost = [] - for i in range(0, len(time_series), 2): - seg_ts.append(self.create_segment(time_series[i: i + 2])) + seg_ts = np.zeros(math.ceil(len(time_series) / 2)) + merge_cost = np.zeros(len(seg_ts) - 1) + for i in range(len(seg_ts)): + current_time_seriex_index = i * 2 + seg_ts[i] = self.create_segment(time_series[current_time_seriex_index: current_time_seriex_index + 2]) for i in range(len(seg_ts) - 1): - merge_cost.append(self.calculate_error(seg_ts[i] + seg_ts[i + 1])) - - merge_cost = np.array(merge_cost) + merge_cost[i] = self.calculate_error(seg_ts[i] + seg_ts[i + 1]) while len(merge_cost != 0) and min(merge_cost) < self.max_error: - if(len(merge_cost) == len(seg_ts)): - print("error") pos = np.argmin(merge_cost) seg_ts[pos] = np.concatenate((seg_ts[pos], seg_ts[pos + 1])) - seg_ts.pop(pos + 1) + seg_ts = np.delete(seg_ts, pos + 1) if (pos + 1) < len(merge_cost): merge_cost = np.delete(merge_cost, pos + 1) else: diff --git a/tsml_eval/segmentation/_td.py b/tsml_eval/segmentation/_td.py index 0878066f8..bb27803d7 100644 --- a/tsml_eval/segmentation/_td.py +++ b/tsml_eval/segmentation/_td.py @@ -28,13 +28,13 @@ def topDown(self, time_series): right_segment = time_series[breakpoint:] if self.calculate_error(left_segment) > self.max_error: - seg_ts = np.concatenate(seg_ts, self.topDown(left_segment)) + seg_ts = np.concatenate((seg_ts, self.topDown(left_segment))) else: seg_ts = np.append(seg_ts, left_segment) if self.calculate_error(right_segment) > self.max_error: - seg_ts = np.concatenate(seg_ts, self.topDown(right_segment)) + seg_ts = np.concatenate((seg_ts, self.topDown(right_segment))) else: seg_ts = np.append(seg_ts, right_segment) From 77a4d753ce1366da573f2e2928387502d5308ee7 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Mon, 1 Jul 2024 10:19:00 +0100 Subject: [PATCH 05/17] Revert some changes due to errors caused from numpy array --- tsml_eval/segmentation/_bu.py | 25 ++++++++++++++----------- tsml_eval/segmentation/_sw.py | 12 +++++++++--- tsml_eval/segmentation/_td.py | 24 ++++++++++++------------ tsml_eval/segmentation/base.py | 3 ++- tsml_eval/segmentation/manual_test.py | 27 ++++----------------------- 5 files changed, 41 insertions(+), 50 deletions(-) diff --git a/tsml_eval/segmentation/_bu.py b/tsml_eval/segmentation/_bu.py index 6e508a5a6..fca63ee8b 100644 --- a/tsml_eval/segmentation/_bu.py +++ b/tsml_eval/segmentation/_bu.py @@ -11,28 +11,31 @@ def __init__(self, max_error): #clean the code def bottomUp(self, time_series): - seg_ts = np.zeros(math.ceil(len(time_series) / 2)) - merge_cost = np.zeros(len(seg_ts) - 1) - for i in range(len(seg_ts)): - current_time_seriex_index = i * 2 - seg_ts[i] = self.create_segment(time_series[current_time_seriex_index: current_time_seriex_index + 2]) + seg_ts = [] + merge_cost = [] + for i in range(0, len(time_series), 2): + seg_ts.append(self.create_segment(time_series[i: i + 2])) for i in range(len(seg_ts) - 1): - merge_cost[i] = self.calculate_error(seg_ts[i] + seg_ts[i + 1]) + merge_cost.append(self.calculate_error(seg_ts[i] + seg_ts[i + 1])) + + merge_cost = np.array(merge_cost) while len(merge_cost != 0) and min(merge_cost) < self.max_error: + if(len(merge_cost) == len(seg_ts)): + print("error") pos = np.argmin(merge_cost) seg_ts[pos] = np.concatenate((seg_ts[pos], seg_ts[pos + 1])) - seg_ts = np.delete(seg_ts, pos + 1) + seg_ts.pop(pos + 1) if (pos + 1) < len(merge_cost): merge_cost = np.delete(merge_cost, pos + 1) else: merge_cost= np.delete(merge_cost, pos) - + if pos != 0: merge_cost[pos - 1] = self.calculate_error(np.concatenate((seg_ts[pos - 1], seg_ts[pos]))) - + if((pos + 1) < len(seg_ts)): merge_cost[pos] = self.calculate_error(np.concatenate((seg_ts[pos], seg_ts[pos + 1]))) - - + + return seg_ts \ No newline at end of file diff --git a/tsml_eval/segmentation/_sw.py b/tsml_eval/segmentation/_sw.py index aff6a2206..6a4fa4ffe 100644 --- a/tsml_eval/segmentation/_sw.py +++ b/tsml_eval/segmentation/_sw.py @@ -23,12 +23,18 @@ def sliding_window(self, time_series): #! clean this up, the while loops are not done in a good manner. This is from the pseudocode def sliding_window(self, time_series): - seg_ts = np.array([]) + seg_ts = [] anchor = 0 while anchor < len(time_series): i = 2 while anchor + i -1 < len(time_series) and self.calculate_error(time_series[anchor:anchor + i]) < self.max_error: i = i + 1 - seg_ts = np.append(seg_ts, self.create_segment(time_series[anchor:anchor + i - 1])) + seg_ts.append(self.create_segment(time_series[anchor:anchor + i - 1])) anchor = anchor + i - 1 - return seg_ts \ No newline at end of file + return seg_ts + + def segment(time_series): + return None + + def pla(time_series): + return None \ No newline at end of file diff --git a/tsml_eval/segmentation/_td.py b/tsml_eval/segmentation/_td.py index bb27803d7..092d19810 100644 --- a/tsml_eval/segmentation/_td.py +++ b/tsml_eval/segmentation/_td.py @@ -12,7 +12,7 @@ def __init__(self, max_error): #Implement a cache system for this def topDown(self, time_series): - seg_ts = np.array([]) + seg_ts = [] best_so_far = sys.float_info.max breakpoint = None for i in range(2, len(time_series -2)): @@ -20,24 +20,24 @@ def topDown(self, time_series): if(improvement_in_approximation < best_so_far): breakpoint = i best_so_far = improvement_in_approximation - + if breakpoint == None: - return np.array([time_series]) - + return [time_series] + left_segment = time_series[:breakpoint] right_segment = time_series[breakpoint:] - + if self.calculate_error(left_segment) > self.max_error: - seg_ts = np.concatenate((seg_ts, self.topDown(left_segment))) + seg_ts.extend(self.topDown(left_segment)) else: - seg_ts = np.append(seg_ts, left_segment) - - + seg_ts.append(left_segment) + + if self.calculate_error(right_segment) > self.max_error: - seg_ts = np.concatenate((seg_ts, self.topDown(right_segment))) + seg_ts.extend(self.topDown(right_segment)) else: - seg_ts = np.append(seg_ts, right_segment) - + seg_ts.append(right_segment) + return seg_ts diff --git a/tsml_eval/segmentation/base.py b/tsml_eval/segmentation/base.py index fb7578288..62c4109a7 100644 --- a/tsml_eval/segmentation/base.py +++ b/tsml_eval/segmentation/base.py @@ -19,7 +19,8 @@ def linear_regression(self, time_series, sequence = None): X = np.arange(n).reshape(-1 , 1) linearRegression = LinearRegression() linearRegression.fit(X, Y) - return linearRegression.predict(X) + regression_line = np.array(linearRegression.predict(X)) + return regression_line def sum_squared_error(self, time_series, linear_regression_time_series): "formula: sse = the sum of the differences of the original series against the predicted series squared" diff --git a/tsml_eval/segmentation/manual_test.py b/tsml_eval/segmentation/manual_test.py index 787c3eb26..683b06c53 100644 --- a/tsml_eval/segmentation/manual_test.py +++ b/tsml_eval/segmentation/manual_test.py @@ -8,29 +8,10 @@ ts, period_size, true_cps = load_electric_devices_segmentation() +ts = ts[0:20] ts = ts.values -sw = SlidingWindow(100) -results = sw.sliding_window(ts) +sw = TopDown(100) +results = sw.topDown(ts) print(len(results)) -plt.figure() -plt.plot(np.arange(len(ts)), ts) -plt.title('original') -plt.xlabel('x') -plt.ylabel('y') - -flattened_arr = [item for sublist in results for item in sublist] -plt.figure() -plt.plot(np.arange(len(flattened_arr)), flattened_arr) -plt.title('pla') -plt.xlabel('x') -plt.ylabel('y') - -for i in range(len(results)): - plt.figure() - plt.plot(np.arange(len(results[i])), results[i]) - plt.title(i) - plt.xlabel('x') - plt.ylabel('y') - -plt.show() +print(results) \ No newline at end of file From 817a3354ea1d17ee35d8c3ed009806a8340d1552 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Mon, 1 Jul 2024 10:59:32 +0100 Subject: [PATCH 06/17] Added Swab and added dense findings of segmentations --- tsml_eval/segmentation/__init__.py | 6 +++-- tsml_eval/segmentation/_bu.py | 10 +++++++- tsml_eval/segmentation/_sw.py | 13 ++++++---- tsml_eval/segmentation/_swab.py | 36 +++++++++++++++++---------- tsml_eval/segmentation/_td.py | 10 ++++++-- tsml_eval/segmentation/manual_test.py | 6 ++--- 6 files changed, 55 insertions(+), 26 deletions(-) diff --git a/tsml_eval/segmentation/__init__.py b/tsml_eval/segmentation/__init__.py index 1602ecbef..d73f49449 100644 --- a/tsml_eval/segmentation/__init__.py +++ b/tsml_eval/segmentation/__init__.py @@ -4,9 +4,11 @@ "BasePLA", "SlidingWindow", "TopDown", - "BottomUp" + "BottomUp", + "SWAB", ] from base import BasePLA from _sw import SlidingWindow from _td import TopDown -from _bu import BottomUp \ No newline at end of file +from _bu import BottomUp +from _swab import SWAB \ No newline at end of file diff --git a/tsml_eval/segmentation/_bu.py b/tsml_eval/segmentation/_bu.py index fca63ee8b..4e32cea7f 100644 --- a/tsml_eval/segmentation/_bu.py +++ b/tsml_eval/segmentation/_bu.py @@ -38,4 +38,12 @@ def bottomUp(self, time_series): merge_cost[pos] = self.calculate_error(np.concatenate((seg_ts[pos], seg_ts[pos + 1]))) - return seg_ts \ No newline at end of file + return seg_ts + + + def dense(self, time_series): + results = self.bottomUp(time_series) + dense_array = np.zeros(len(results) - 1) + for i in range(results - 1): + dense_array[i] = len(results[i]) + return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/_sw.py b/tsml_eval/segmentation/_sw.py index 6a4fa4ffe..335c34304 100644 --- a/tsml_eval/segmentation/_sw.py +++ b/tsml_eval/segmentation/_sw.py @@ -33,8 +33,11 @@ def sliding_window(self, time_series): anchor = anchor + i - 1 return seg_ts - def segment(time_series): - return None - - def pla(time_series): - return None \ No newline at end of file + def dense(self, time_series): + results = self.sliding_window(time_series) + dense_array = np.zeros(len(results) - 1) + for i in range(results - 1): + dense_array[i] = len(results[i]) + return dense_array + + \ No newline at end of file diff --git a/tsml_eval/segmentation/_swab.py b/tsml_eval/segmentation/_swab.py index a3f82dd17..50f74593f 100644 --- a/tsml_eval/segmentation/_swab.py +++ b/tsml_eval/segmentation/_swab.py @@ -1,7 +1,7 @@ from base import BasePLA import numpy as np import sys -import BottomUp +from _bu import BottomUp __maintainer__ = [] __all__ = ["SWAB"] @@ -16,23 +16,33 @@ def __init__(self, max_error, seg_num = 6): def swab(self, time_series): seg_ts = [] - buffer = np.empty(self.seg_num, dtype=object) - sw_lower_bound = len(buffer) / 2 - sw_upper_bound = len(buffer) * 2 - while len(buffer) < 3: - t = self.bottomup(time_series) + seg = self.best_line(time_series, 0) + current_data_point = len(seg) + buffer = np.array(seg) + while len(buffer) > 0: + t = self.bottomup.bottomUp(time_series) seg_ts.append(t[0]) - buffer = buffer[len(t) - 1:] - return None + buffer = buffer[len(t[0]):] + if(current_data_point != len(time_series)): + seg = self.best_line(time_series, current_data_point) + current_data_point = current_data_point + len(seg) + buffer = np.append(buffer, seg) + return seg_ts #finds the next potential segment - def best_line(self, time_series, current_data_point, sw_lower_bound, sw_upper_bound): - seg_ts = [] + def best_line(self, time_series, current_data_point): + seg_ts = np.array([]) error = 0 - while error < self.max_error: - seg_ts.append = time_series[current_data_point] + while current_data_point < len(time_series) and error < self.max_error: + seg_ts = np.append(seg_ts, time_series[current_data_point]) error = self.calculate_error(seg_ts) current_data_point = current_data_point + 1 return seg_ts - \ No newline at end of file + + def dense(self, time_series): + results = self.swab(time_series) + dense_array = np.zeros(len(results) - 1) + for i in range(results - 1): + dense_array[i] = len(results[i]) + return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/_td.py b/tsml_eval/segmentation/_td.py index 092d19810..e951cd4b3 100644 --- a/tsml_eval/segmentation/_td.py +++ b/tsml_eval/segmentation/_td.py @@ -12,7 +12,7 @@ def __init__(self, max_error): #Implement a cache system for this def topDown(self, time_series): - seg_ts = [] + seg_ts = [] best_so_far = sys.float_info.max breakpoint = None for i in range(2, len(time_series -2)): @@ -45,4 +45,10 @@ def improvement_splitting_here(self, time_series, breakpoint): left_segment = time_series[:breakpoint] right_segment = time_series[breakpoint:] return self.calculate_error(left_segment) + self.calculate_error(right_segment) - \ No newline at end of file + + def dense(self, time_series): + results = self.topDown(time_series) + dense_array = np.zeros(len(results) - 1) + for i in range(results - 1): + dense_array[i] = len(results[i]) + return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/manual_test.py b/tsml_eval/segmentation/manual_test.py index 683b06c53..ac4a3eb3a 100644 --- a/tsml_eval/segmentation/manual_test.py +++ b/tsml_eval/segmentation/manual_test.py @@ -1,6 +1,7 @@ from _sw import SlidingWindow from _bu import BottomUp from _td import TopDown +from _swab import SWAB from aeon.datasets import load_electric_devices_segmentation from aeon.visualisation import plot_series_with_change_points, plot_series_with_profiles import matplotlib.pyplot as plt @@ -8,10 +9,9 @@ ts, period_size, true_cps = load_electric_devices_segmentation() -ts = ts[0:20] ts = ts.values -sw = TopDown(100) -results = sw.topDown(ts) +sw = SWAB(100) +results = sw.swab(ts) print(len(results)) print(results) \ No newline at end of file From 736dd6e701e4d13eee00152ee075fc960e1bb9b1 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Mon, 1 Jul 2024 11:12:26 +0100 Subject: [PATCH 07/17] Fixed dense findings for segmentation forPLA --- tsml_eval/segmentation/_bu.py | 6 ++++-- tsml_eval/segmentation/_sw.py | 8 +++++--- tsml_eval/segmentation/_swab.py | 6 ++++-- tsml_eval/segmentation/_td.py | 8 +++++--- tsml_eval/segmentation/manual_test.py | 6 +++--- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/tsml_eval/segmentation/_bu.py b/tsml_eval/segmentation/_bu.py index 4e32cea7f..11ded24d8 100644 --- a/tsml_eval/segmentation/_bu.py +++ b/tsml_eval/segmentation/_bu.py @@ -44,6 +44,8 @@ def bottomUp(self, time_series): def dense(self, time_series): results = self.bottomUp(time_series) dense_array = np.zeros(len(results) - 1) - for i in range(results - 1): - dense_array[i] = len(results[i]) + segmentation_point = 0 + for i in range(len(results) - 1): + segmentation_point = segmentation_point + len(results[i]) + dense_array[i] = segmentation_point return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/_sw.py b/tsml_eval/segmentation/_sw.py index 335c34304..1a59bc479 100644 --- a/tsml_eval/segmentation/_sw.py +++ b/tsml_eval/segmentation/_sw.py @@ -36,8 +36,10 @@ def sliding_window(self, time_series): def dense(self, time_series): results = self.sliding_window(time_series) dense_array = np.zeros(len(results) - 1) - for i in range(results - 1): - dense_array[i] = len(results[i]) - return dense_array + segmentation_point = 0 + for i in range(len(results) - 1): + segmentation_point = segmentation_point + len(results[i]) + dense_array[i] = segmentation_point + return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/_swab.py b/tsml_eval/segmentation/_swab.py index 50f74593f..45e08897e 100644 --- a/tsml_eval/segmentation/_swab.py +++ b/tsml_eval/segmentation/_swab.py @@ -43,6 +43,8 @@ def best_line(self, time_series, current_data_point): def dense(self, time_series): results = self.swab(time_series) dense_array = np.zeros(len(results) - 1) - for i in range(results - 1): - dense_array[i] = len(results[i]) + segmentation_point = 0 + for i in range(len(results) - 1): + segmentation_point = segmentation_point + len(results[i]) + dense_array[i] = segmentation_point return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/_td.py b/tsml_eval/segmentation/_td.py index e951cd4b3..45182adb7 100644 --- a/tsml_eval/segmentation/_td.py +++ b/tsml_eval/segmentation/_td.py @@ -49,6 +49,8 @@ def improvement_splitting_here(self, time_series, breakpoint): def dense(self, time_series): results = self.topDown(time_series) dense_array = np.zeros(len(results) - 1) - for i in range(results - 1): - dense_array[i] = len(results[i]) - return dense_array \ No newline at end of file + segmentation_point = 0 + for i in range(len(results) - 1): + segmentation_point = segmentation_point + len(results[i]) + dense_array[i] = segmentation_point + return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/manual_test.py b/tsml_eval/segmentation/manual_test.py index ac4a3eb3a..89c1dec4d 100644 --- a/tsml_eval/segmentation/manual_test.py +++ b/tsml_eval/segmentation/manual_test.py @@ -9,9 +9,9 @@ ts, period_size, true_cps = load_electric_devices_segmentation() +ts = ts[:4500] ts = ts.values -sw = SWAB(100) -results = sw.swab(ts) +sw = BottomUp(45) +results = sw.dense(ts) print(len(results)) - print(results) \ No newline at end of file From 1cea45fcb9532993a104fc838a3daf5ff063f640 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Mon, 1 Jul 2024 13:18:43 +0100 Subject: [PATCH 08/17] Added comments to each function and class --- tsml_eval/segmentation/_bu.py | 45 +++++++++++++++- tsml_eval/segmentation/_sw.py | 59 +++++++++++++++------ tsml_eval/segmentation/_swab.py | 65 +++++++++++++++++++++-- tsml_eval/segmentation/_td.py | 76 +++++++++++++++++++++++---- tsml_eval/segmentation/base.py | 67 +++++++++++++++++++++-- tsml_eval/segmentation/manual_test.py | 7 ++- 6 files changed, 279 insertions(+), 40 deletions(-) diff --git a/tsml_eval/segmentation/_bu.py b/tsml_eval/segmentation/_bu.py index 11ded24d8..2a8ea1576 100644 --- a/tsml_eval/segmentation/_bu.py +++ b/tsml_eval/segmentation/_bu.py @@ -5,12 +5,40 @@ __all__ = ["BottomUp"] class BottomUp(BasePLA): + """ + Bottom-Up Segmentation. + + Uses a bottom-up algorithm to traverse the dataset in an online manner. + + Parameters + ---------- + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + + References + ---------- + .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. + An online algorithm for segmenting time series. (pp. 289-296). + """ def __init__(self, max_error): super().__init__(max_error) #clean the code - def bottomUp(self, time_series): + def segment(self, time_series): + """Segment a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + + Returns + ------- + list + List of segmentations + """ + seg_ts = [] merge_cost = [] for i in range(0, len(time_series), 2): @@ -42,7 +70,20 @@ def bottomUp(self, time_series): def dense(self, time_series): - results = self.bottomUp(time_series) + """Return the dense values of a segmented time series + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + + Returns + ------- + list + dense values of a segmentation + """ + + results = self.segment(time_series) dense_array = np.zeros(len(results) - 1) segmentation_point = 0 for i in range(len(results) - 1): diff --git a/tsml_eval/segmentation/_sw.py b/tsml_eval/segmentation/_sw.py index 1a59bc479..7f1b93aba 100644 --- a/tsml_eval/segmentation/_sw.py +++ b/tsml_eval/segmentation/_sw.py @@ -5,24 +5,39 @@ __all__ = ["SlidingWindow"] class SlidingWindow(BasePLA): + """Sliding Window Segmentation. + + Uses a sliding window algorithm to traverse the dataset in an online manner. + + Parameters + ---------- + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + + References + ---------- + .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. + An online algorithm for segmenting time series. (pp. 289-296). + """ def __init__(self, max_error): super().__init__(max_error) - """work in progress - def sliding_window(self, time_series): - seg_ts = [] - anchor = 0 - for i in range(1, len(time_series)): - if self.calculate_error(time_series[anchor:i]) > self.max_error: - seg_ts.append(self.create_segment(time_series[anchor: i - 1])) - anchor = i - 1 - if(anchor < i): - seg_ts.append(self.create_segment(time_series[anchor: i - 1])) - return np.concatenate(seg_ts) """ - #! clean this up, the while loops are not done in a good manner. This is from the pseudocode - def sliding_window(self, time_series): + def segment(self, time_series): + """Segment a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + + Returns + ------- + list + List of segmentations + """ + seg_ts = [] anchor = 0 while anchor < len(time_series): @@ -33,13 +48,27 @@ def sliding_window(self, time_series): anchor = anchor + i - 1 return seg_ts + def dense(self, time_series): - results = self.sliding_window(time_series) + """Return the dense values of a segmented time series + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + + Returns + ------- + list + dense values of a segmentation + """ + + results = self.segment(time_series) dense_array = np.zeros(len(results) - 1) segmentation_point = 0 for i in range(len(results) - 1): segmentation_point = segmentation_point + len(results[i]) dense_array[i] = segmentation_point - return dense_array + return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/_swab.py b/tsml_eval/segmentation/_swab.py index 45e08897e..268387a8a 100644 --- a/tsml_eval/segmentation/_swab.py +++ b/tsml_eval/segmentation/_swab.py @@ -7,14 +7,41 @@ __all__ = ["SWAB"] class SWAB(BasePLA): + """ + SWAB (Sliding Window And Bottom-Up) Segmentation. + + Uses SWAB algorithm as described in [1] to traverse the dataset in an online manner. + + Parameters + ---------- + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + + References + ---------- + .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. + An online algorithm for segmenting time series. (pp. 289-296). + """ - def __init__(self, max_error, seg_num = 6): - self.seg_num = seg_num + def __init__(self, max_error): self.bottomup = BottomUp(max_error) super().__init__(max_error) - def swab(self, time_series): + def segment(self, time_series): + """Segment a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + + Returns + ------- + list + List of segmentations + """ + seg_ts = [] seg = self.best_line(time_series, 0) current_data_point = len(seg) @@ -32,6 +59,21 @@ def swab(self, time_series): #finds the next potential segment def best_line(self, time_series, current_data_point): + """Uses sliding window to find the next best segmentation candidate + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + current_data_point : int + the current_data_point we are observing + + Returns + ------- + np.array + new found segmentation candidate + """ + seg_ts = np.array([]) error = 0 while current_data_point < len(time_series) and error < self.max_error: @@ -41,10 +83,23 @@ def best_line(self, time_series, current_data_point): return seg_ts def dense(self, time_series): - results = self.swab(time_series) + """Return the dense values of a segmented time series + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + + Returns + ------- + list + dense values of a segmentation + """ + + results = self.segment(time_series) dense_array = np.zeros(len(results) - 1) segmentation_point = 0 for i in range(len(results) - 1): segmentation_point = segmentation_point + len(results[i]) dense_array[i] = segmentation_point - return dense_array \ No newline at end of file + return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/_td.py b/tsml_eval/segmentation/_td.py index 45182adb7..dc4c1d799 100644 --- a/tsml_eval/segmentation/_td.py +++ b/tsml_eval/segmentation/_td.py @@ -6,12 +6,40 @@ __all__ = ["TopDown"] class TopDown(BasePLA): + """ + Top-Down Segmentation. + + Uses a top-down algorithm to traverse the dataset in an online manner. + + Parameters + ---------- + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + + References + ---------- + .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. + An online algorithm for segmenting time series. (pp. 289-296). + """ def __init__(self, max_error): super().__init__(max_error) #Implement a cache system for this - def topDown(self, time_series): + def segment(self, time_series): + """Segment a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + + Returns + ------- + list + List of segmentations + """ + seg_ts = [] best_so_far = sys.float_info.max breakpoint = None @@ -21,36 +49,62 @@ def topDown(self, time_series): breakpoint = i best_so_far = improvement_in_approximation - if breakpoint == None: - return [time_series] - left_segment = time_series[:breakpoint] right_segment = time_series[breakpoint:] if self.calculate_error(left_segment) > self.max_error: - seg_ts.extend(self.topDown(left_segment)) + seg_ts.append(self.topDown(left_segment)) else: - seg_ts.append(left_segment) - + seg_ts.extend([left_segment]) if self.calculate_error(right_segment) > self.max_error: - seg_ts.extend(self.topDown(right_segment)) + seg_ts.append(self.topDown(right_segment)) else: - seg_ts.append(right_segment) + seg_ts.extend([right_segment]) return seg_ts def improvement_splitting_here(self, time_series, breakpoint): + """Returns the squared sum error of the left and right segment + splitted off at a particual point in a time series + + Parameters + ---------- + time_series : np.array + 1D time series. + breakpoint : int + the break point within the time series array + + Returns + ------- + error + the squared sum error of the split segmentations + """ + left_segment = time_series[:breakpoint] right_segment = time_series[breakpoint:] return self.calculate_error(left_segment) + self.calculate_error(right_segment) def dense(self, time_series): - results = self.topDown(time_series) + """Return the dense values of a segmented time series + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + + Returns + ------- + list + dense values of a segmentation + """ + + results = self.segment(time_series) dense_array = np.zeros(len(results) - 1) segmentation_point = 0 for i in range(len(results) - 1): segmentation_point = segmentation_point + len(results[i]) dense_array[i] = segmentation_point - return dense_array \ No newline at end of file + return dense_array + \ No newline at end of file diff --git a/tsml_eval/segmentation/base.py b/tsml_eval/segmentation/base.py index 62c4109a7..d210ac404 100644 --- a/tsml_eval/segmentation/base.py +++ b/tsml_eval/segmentation/base.py @@ -8,12 +8,31 @@ from sklearn.linear_model import LinearRegression class BasePLA(): - "Base class for piecewise linear approximation (PLA)" + """ + Base class for algorithms which use PLA (Piecewise Linear Approximation) for segmentation. + + Parameters + ---------- + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + """ def __init__(self, max_error): self.max_error = max_error - def linear_regression(self, time_series, sequence = None): + def linear_regression(self, time_series): + """Returns the fitted line through a time series. + + Parameters + ---------- + time_series : np.array + 1D time series. + + Returns + ------- + np.array + the fitted line + """ n = len(time_series) Y = np.array(time_series) X = np.arange(n).reshape(-1 , 1) @@ -23,14 +42,56 @@ def linear_regression(self, time_series, sequence = None): return regression_line def sum_squared_error(self, time_series, linear_regression_time_series): - "formula: sse = the sum of the differences of the original series against the predicted series squared" + """Returns the squared sum error time series and its linear regression + + formula: sse = the sum of the differences of the original series + against the predicted series squared + + Parameters + ---------- + time_series : np.array + 1D time series. + linear_regression_time_series: np.array + 1D linear time series formatted using linear regression + + Returns + ------- + error + the squared sum error of the split segmentations + """ + error = np.sum((time_series - linear_regression_time_series) ** 2) return error def calculate_error(self, time_series): + """Returns the squared sum error of a time series and its linear regression + + Parameters + ---------- + time_series : np.array + 1D time series. + + Returns + ------- + error + the squared sum error of a time series and it's linear regression + """ + lrts = self.linear_regression(time_series) sse = self.sum_squared_error(time_series, lrts) return sse def create_segment(self, time_series): + """create a linear segment of a given time series. + + Parameters + ---------- + time_series : np.array + 1D time series. + + Returns + ------- + np.array + the linear regression of the time series. + """ return self.linear_regression(time_series) \ No newline at end of file diff --git a/tsml_eval/segmentation/manual_test.py b/tsml_eval/segmentation/manual_test.py index 89c1dec4d..89cef024b 100644 --- a/tsml_eval/segmentation/manual_test.py +++ b/tsml_eval/segmentation/manual_test.py @@ -9,9 +9,8 @@ ts, period_size, true_cps = load_electric_devices_segmentation() -ts = ts[:4500] -ts = ts.values -sw = BottomUp(45) -results = sw.dense(ts) +ts = ts[:1000] +pla = TopDown(100) +results = pla.dense(ts) print(len(results)) print(results) \ No newline at end of file From 376be9d624f87bdcb4728688587e5a8cd410a160 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Mon, 1 Jul 2024 16:53:29 +0100 Subject: [PATCH 09/17] Commits for meeting tomorrow --- tsml_eval/segmentation/_bu.py | 2 +- tsml_eval/segmentation/_td.py | 4 ++-- tsml_eval/segmentation/manual_test.py | 13 +++++++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tsml_eval/segmentation/_bu.py b/tsml_eval/segmentation/_bu.py index 2a8ea1576..90cb92639 100644 --- a/tsml_eval/segmentation/_bu.py +++ b/tsml_eval/segmentation/_bu.py @@ -48,7 +48,7 @@ def segment(self, time_series): merge_cost = np.array(merge_cost) - while len(merge_cost != 0) and min(merge_cost) < self.max_error: + while len(merge_cost) != 0 and min(merge_cost) < self.max_error: if(len(merge_cost) == len(seg_ts)): print("error") pos = np.argmin(merge_cost) diff --git a/tsml_eval/segmentation/_td.py b/tsml_eval/segmentation/_td.py index dc4c1d799..32c66d4f0 100644 --- a/tsml_eval/segmentation/_td.py +++ b/tsml_eval/segmentation/_td.py @@ -53,12 +53,12 @@ def segment(self, time_series): right_segment = time_series[breakpoint:] if self.calculate_error(left_segment) > self.max_error: - seg_ts.append(self.topDown(left_segment)) + seg_ts.append(self.segment(left_segment)) else: seg_ts.extend([left_segment]) if self.calculate_error(right_segment) > self.max_error: - seg_ts.append(self.topDown(right_segment)) + seg_ts.append(self.segment(right_segment)) else: seg_ts.extend([right_segment]) diff --git a/tsml_eval/segmentation/manual_test.py b/tsml_eval/segmentation/manual_test.py index 89cef024b..1759163d4 100644 --- a/tsml_eval/segmentation/manual_test.py +++ b/tsml_eval/segmentation/manual_test.py @@ -6,11 +6,16 @@ from aeon.visualisation import plot_series_with_change_points, plot_series_with_profiles import matplotlib.pyplot as plt import numpy as np +from sklearn.preprocessing import MinMaxScaler ts, period_size, true_cps = load_electric_devices_segmentation() -ts = ts[:1000] -pla = TopDown(100) +ts = ts.values +ts = ts.reshape((len(ts), 1)) +scaler = MinMaxScaler(feature_range=(0, 1)) +scaler = scaler.fit(ts) +ts = scaler.transform(ts) +pla = BottomUp(22) results = pla.dense(ts) -print(len(results)) -print(results) \ No newline at end of file +print(results) +print(true_cps) \ No newline at end of file From 16a17cd0b3b48d39ff84455010535a12f1f869a5 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:46:38 +0100 Subject: [PATCH 10/17] Moved to _wip folder, changed from segmentation to series transformer and fixed issue with top down algorithm --- tsml_eval/_wip/series_transformer/__init__.py | 14 +++ tsml_eval/_wip/series_transformer/_bu.py | 87 +++++++++++++ tsml_eval/_wip/series_transformer/_sw.py | 70 +++++++++++ tsml_eval/_wip/series_transformer/_swab.py | 117 ++++++++++++++++++ tsml_eval/_wip/series_transformer/_td.py | 110 ++++++++++++++++ tsml_eval/_wip/series_transformer/base.py | 98 +++++++++++++++ .../_wip/series_transformer/manual_test.py | 42 +++++++ 7 files changed, 538 insertions(+) create mode 100644 tsml_eval/_wip/series_transformer/__init__.py create mode 100644 tsml_eval/_wip/series_transformer/_bu.py create mode 100644 tsml_eval/_wip/series_transformer/_sw.py create mode 100644 tsml_eval/_wip/series_transformer/_swab.py create mode 100644 tsml_eval/_wip/series_transformer/_td.py create mode 100644 tsml_eval/_wip/series_transformer/base.py create mode 100644 tsml_eval/_wip/series_transformer/manual_test.py diff --git a/tsml_eval/_wip/series_transformer/__init__.py b/tsml_eval/_wip/series_transformer/__init__.py new file mode 100644 index 000000000..d73f49449 --- /dev/null +++ b/tsml_eval/_wip/series_transformer/__init__.py @@ -0,0 +1,14 @@ +"""Piecewise Linear Approximation.""" + +__all__ = [ + "BasePLA", + "SlidingWindow", + "TopDown", + "BottomUp", + "SWAB", +] +from base import BasePLA +from _sw import SlidingWindow +from _td import TopDown +from _bu import BottomUp +from _swab import SWAB \ No newline at end of file diff --git a/tsml_eval/_wip/series_transformer/_bu.py b/tsml_eval/_wip/series_transformer/_bu.py new file mode 100644 index 000000000..32c9c970c --- /dev/null +++ b/tsml_eval/_wip/series_transformer/_bu.py @@ -0,0 +1,87 @@ +from base import BasePLA +import numpy as np +import math +__maintainer__ = [] +__all__ = ["BottomUp"] + +class BottomUp(BasePLA): + """ + Piecewise Linear Bottom-Up. + + Uses a bottom-up algorithm to traverse the dataset in an offline manner. + + Parameters + ---------- + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + + References + ---------- + .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. + An online algorithm for segmenting time series. (pp. 289-296). + """ + + def __init__(self, max_error): + super().__init__(max_error) + + #clean the code + def transform(self, time_series): + """Transform a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of transformed segmented time series + """ + + seg_ts = [] + merge_cost = [] + for i in range(0, len(time_series), 2): + seg_ts.append(self.create_segment(time_series[i: i + 2])) + for i in range(len(seg_ts) - 1): + merge_cost.append(self.calculate_error(seg_ts[i] + seg_ts[i + 1])) + + merge_cost = np.array(merge_cost) + + while len(merge_cost) != 0 and min(merge_cost) < self.max_error: + if(len(merge_cost) == len(seg_ts)): + print("error") + pos = np.argmin(merge_cost) + seg_ts[pos] = np.concatenate((seg_ts[pos], seg_ts[pos + 1])) + seg_ts.pop(pos + 1) + if (pos + 1) < len(merge_cost): + merge_cost = np.delete(merge_cost, pos + 1) + else: + merge_cost= np.delete(merge_cost, pos) + + if pos != 0: + merge_cost[pos - 1] = self.calculate_error(np.concatenate((seg_ts[pos - 1], seg_ts[pos]))) + + if((pos + 1) < len(seg_ts)): + merge_cost[pos] = self.calculate_error(np.concatenate((seg_ts[pos], seg_ts[pos + 1]))) + + + return seg_ts + + + def transform_flatten(self, time_series): + """Transform a time series and return a 1d array + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of a flattened transformed time series + """ + + pla_timeseries = self.transform(time_series) + return np.concatenate(pla_timeseries) \ No newline at end of file diff --git a/tsml_eval/_wip/series_transformer/_sw.py b/tsml_eval/_wip/series_transformer/_sw.py new file mode 100644 index 000000000..b64515b2d --- /dev/null +++ b/tsml_eval/_wip/series_transformer/_sw.py @@ -0,0 +1,70 @@ + +from base import BasePLA +import numpy as np +__maintainer__ = [] +__all__ = ["SlidingWindow"] + +class SlidingWindow(BasePLA): + """Piecewise Linear Sliding Window. + + Uses a sliding window algorithm to traverse the dataset in an online manner. + + Parameters + ---------- + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + + References + ---------- + .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. + An online algorithm for segmenting time series. (pp. 289-296). + """ + + def __init__(self, max_error): + super().__init__(max_error) + + #! clean this up, the while loops are not done in a good manner. This is from the pseudocode + def transform(self, time_series): + """Transform a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of transformed segmented time series + """ + + seg_ts = [] + anchor = 0 + while anchor < len(time_series): + i = 2 + while anchor + i -1 < len(time_series) and self.calculate_error(time_series[anchor:anchor + i]) < self.max_error: + i = i + 1 + seg_ts.append(self.create_segment(time_series[anchor:anchor + i - 1])) + anchor = anchor + i - 1 + return seg_ts + + + def transform_flatten(self, time_series): + """Transform a time series and return a 1d array + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of a flattened transformed time series + """ + + pla_timeseries = self.transform(time_series) + print(pla_timeseries) + return np.concatenate(pla_timeseries) + + \ No newline at end of file diff --git a/tsml_eval/_wip/series_transformer/_swab.py b/tsml_eval/_wip/series_transformer/_swab.py new file mode 100644 index 000000000..162df5411 --- /dev/null +++ b/tsml_eval/_wip/series_transformer/_swab.py @@ -0,0 +1,117 @@ +from base import BasePLA +import numpy as np +import sys +from _bu import BottomUp + +__maintainer__ = [] +__all__ = ["SWAB"] + +class SWAB(BasePLA): + """ + SWAB (Sliding Window And Bottom-Up) Segmentation. + + Uses SWAB algorithm as described in [1] to traverse the dataset in an online manner. + + Parameters + ---------- + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + + References + ---------- + .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. + An online algorithm for segmenting time series. (pp. 289-296). + """ + + def __init__(self, max_error, sequence_num): + self.bottomup = BottomUp(max_error) + self.sequence_num = sequence_num + super().__init__(max_error) + + #need to check buffer, i think it does grow exponantionally large + def transform(self, time_series): + """Transform a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of transformed segmented time series + """ + seg_ts = [] + + lower_boundary_window = int(self.sequence_num / 2) + upper_boundary_window = self.sequence_num * 2 + + seg = self.best_line(time_series, 0, lower_boundary_window, upper_boundary_window) + current_data_point = len(seg) + buffer = np.array(seg) + + while len(buffer) > 0: + t = self.bottomup.transform(time_series) + seg_ts.append(t[0]) + buffer = buffer[len(t[0]):] + if(current_data_point >= len(time_series)): + seg = self.best_line(time_series, current_data_point, lower_boundary_window, upper_boundary_window) + current_data_point = current_data_point + len(seg) + buffer = np.append(buffer, seg) + else: + buffer = np.array([]) + t = t[1:] + for i in range(len(t)): + seg_ts.append(t[i]) + return seg_ts + + + #finds the next potential segment + def best_line(self, time_series, current_data_point, lower_boundary_window, upper_boundary_window): + """Uses sliding window to find the next best segmentation candidate + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + current_data_point : int + the current_data_point we are observing + lower_boundary_window: int + the lower boundary of the window + upper_boundary_window: int + the uppoer boundary of the window + + Returns + ------- + np.array + new found segmentation candidates + """ + + max_window_length = current_data_point + upper_boundary_window + seg_ts = np.array(time_series[current_data_point: current_data_point + lower_boundary_window]) + current_data_point = current_data_point + lower_boundary_window + error = 0 + while current_data_point < max_window_length and current_data_point < len(time_series) and error < self.max_error: + seg_ts = np.append(seg_ts, time_series[current_data_point]) + error = self.calculate_error(seg_ts) + current_data_point = current_data_point + 1 + return seg_ts + + + def transform_flatten(self, time_series): + """Transform a time series and return a 1d array + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of a flattened transformed time series + """ + + pla_timeseries = self.transform(time_series) + return np.concatenate(pla_timeseries) \ No newline at end of file diff --git a/tsml_eval/_wip/series_transformer/_td.py b/tsml_eval/_wip/series_transformer/_td.py new file mode 100644 index 000000000..47a3d5f59 --- /dev/null +++ b/tsml_eval/_wip/series_transformer/_td.py @@ -0,0 +1,110 @@ +from base import BasePLA +import numpy as np +import sys + +__maintainer__ = [] +__all__ = ["TopDown"] + +class TopDown(BasePLA): + """ + Top-Down Segmentation. + + Uses a top-down algorithm to traverse the dataset in an online manner. + + Parameters + ---------- + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + + References + ---------- + .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. + An online algorithm for segmenting time series. (pp. 289-296). + """ + + def __init__(self, max_error): + super().__init__(max_error) + + #Implement a cache system for this + def transform(self, time_series): + """Transform a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of transformed segmented time series + """ + + best_so_far = sys.float_info.max + breakpoint = None + + for i in range(2, len(time_series -2)): + improvement_in_approximation = self.improvement_splitting_here(time_series, i) + if(improvement_in_approximation < best_so_far): + breakpoint = i + best_so_far = improvement_in_approximation + + left_found_segment = time_series[:breakpoint] + right_found_segment = time_series[breakpoint:] + + left_segment = None + right_segment = None + + if self.calculate_error(left_found_segment) > self.max_error: + left_segment = self.transform(left_found_segment) + else: + left_segment = [self.create_segment(left_found_segment)] + + if self.calculate_error(right_found_segment) > self.max_error: + right_segment = self.transform(right_found_segment) + else: + right_segment = [self.create_segment(right_found_segment)] + + return left_segment + right_segment + + + def improvement_splitting_here(self, time_series, breakpoint): + """Returns the squared sum error of the left and right segment + splitted off at a particual point in a time series + + Parameters + ---------- + time_series : np.array + 1D time series. + breakpoint : int + the break point within the time series array + + Returns + ------- + error + the squared sum error of the split segmentations + """ + + left_segment = time_series[:breakpoint] + right_segment = time_series[breakpoint:] + return self.calculate_error(left_segment) + self.calculate_error(right_segment) + + + def transform_flatten(self, time_series): + """Transform a time series and return a 1d array + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of a flattened transformed time series + """ + + pla_timeseries = self.transform(time_series) + print(pla_timeseries) + return np.concatenate(pla_timeseries) + \ No newline at end of file diff --git a/tsml_eval/_wip/series_transformer/base.py b/tsml_eval/_wip/series_transformer/base.py new file mode 100644 index 000000000..c77ad737f --- /dev/null +++ b/tsml_eval/_wip/series_transformer/base.py @@ -0,0 +1,98 @@ +"""Abstract base class""" + +__maintainer__ = [] +__all__ = ["BasePLA"] + +import numpy as np +import pandas as pd +from sklearn.linear_model import LinearRegression + +class BasePLA(): + """ + Base class for algorithms which use PLA (Piecewise Linear Approximation) for segmentation. + + Parameters + ---------- + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + """ + + def __init__(self, max_error): + self.max_error = max_error + + def linear_regression(self, time_series): + """Transform a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of transformed segmented time series + """ + + n = len(time_series) + Y = np.array(time_series) + X = np.arange(n).reshape(-1 , 1) + linearRegression = LinearRegression() + linearRegression.fit(X, Y) + regression_line = np.array(linearRegression.predict(X)) + return regression_line + + def sum_squared_error(self, time_series, linear_regression_time_series): + """Returns the squared sum error time series and its linear regression + + formula: sse = the sum of the differences of the original series + against the predicted series squared + + Parameters + ---------- + time_series : np.array + 1D time series. + linear_regression_time_series: np.array + 1D linear time series formatted using linear regression + + Returns + ------- + error + the squared sum error of the split segmentations + """ + + error = np.sum((time_series - linear_regression_time_series) ** 2) + return error + + def calculate_error(self, time_series): + """Returns the squared sum error of a time series and its linear regression + + Parameters + ---------- + time_series : np.array + 1D time series. + + Returns + ------- + error + the squared sum error of a time series and it's linear regression + """ + + lrts = self.linear_regression(time_series) + sse = self.sum_squared_error(time_series, lrts) + return sse + + def create_segment(self, time_series): + """create a linear segment of a given time series. + + Parameters + ---------- + time_series : np.array + 1D time series. + + Returns + ------- + np.array + the linear regression of the time series. + """ + return self.linear_regression(time_series) \ No newline at end of file diff --git a/tsml_eval/_wip/series_transformer/manual_test.py b/tsml_eval/_wip/series_transformer/manual_test.py new file mode 100644 index 000000000..ead33d58e --- /dev/null +++ b/tsml_eval/_wip/series_transformer/manual_test.py @@ -0,0 +1,42 @@ +from _sw import SlidingWindow +from _bu import BottomUp +from _td import TopDown +from _swab import SWAB +from aeon.datasets import load_electric_devices_segmentation +from aeon.visualisation import plot_series_with_change_points, plot_series_with_profiles +import matplotlib.pyplot as plt +import numpy as np +from sklearn.preprocessing import MinMaxScaler + + +ts, period_size, true_cps = load_electric_devices_segmentation() +ts = ts[:30] +ts = ts.values + + +ts = np.array([573.0,375.0,301.0,212.0,55.0,34.0,25.0,33.0,113.0,143.0,303.0,615.0,1226.0,1281.0,1221.0,1081.0,866.0,1096.0,1039.0,975.0,746.0,581.0,409.0,182.0]) + +pla = TopDown(40) +results = pla.transform_flatten(ts) + +print("Original: ", ts) +print("PLA : ", results) + +plt.subplot(2, 1, 1) # (rows, columns, subplot_number) +plt.plot(np.arange(len(ts)), ts) +plt.title('Original') +plt.xlabel('x') +plt.ylabel('y1') + +# Create the second subplot (lower plot) +plt.subplot(2, 1, 2) # (rows, columns, subplot_number) +plt.plot(np.arange(len(ts)), results) +plt.title('PLA') +plt.xlabel('x') +plt.ylabel('y2') + +# Adjust layout to prevent overlapping +plt.tight_layout() + +# Display the plot +plt.show() \ No newline at end of file From 1a4634c75de90bc96e0c4015a1fbb3d0196f1e2d Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Tue, 2 Jul 2024 16:42:18 +0100 Subject: [PATCH 11/17] Fixed bottom down algorithm for pla --- tsml_eval/_wip/series_transformer/_bu.py | 6 ++---- tsml_eval/_wip/series_transformer/manual_test.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tsml_eval/_wip/series_transformer/_bu.py b/tsml_eval/_wip/series_transformer/_bu.py index 32c9c970c..7e7ddd35d 100644 --- a/tsml_eval/_wip/series_transformer/_bu.py +++ b/tsml_eval/_wip/series_transformer/_bu.py @@ -49,10 +49,8 @@ def transform(self, time_series): merge_cost = np.array(merge_cost) while len(merge_cost) != 0 and min(merge_cost) < self.max_error: - if(len(merge_cost) == len(seg_ts)): - print("error") pos = np.argmin(merge_cost) - seg_ts[pos] = np.concatenate((seg_ts[pos], seg_ts[pos + 1])) + seg_ts[pos] = self.create_segment(np.concatenate((seg_ts[pos], seg_ts[pos + 1]))) seg_ts.pop(pos + 1) if (pos + 1) < len(merge_cost): merge_cost = np.delete(merge_cost, pos + 1) @@ -65,7 +63,7 @@ def transform(self, time_series): if((pos + 1) < len(seg_ts)): merge_cost[pos] = self.calculate_error(np.concatenate((seg_ts[pos], seg_ts[pos + 1]))) - + return seg_ts diff --git a/tsml_eval/_wip/series_transformer/manual_test.py b/tsml_eval/_wip/series_transformer/manual_test.py index ead33d58e..8bd813c92 100644 --- a/tsml_eval/_wip/series_transformer/manual_test.py +++ b/tsml_eval/_wip/series_transformer/manual_test.py @@ -16,7 +16,7 @@ ts = np.array([573.0,375.0,301.0,212.0,55.0,34.0,25.0,33.0,113.0,143.0,303.0,615.0,1226.0,1281.0,1221.0,1081.0,866.0,1096.0,1039.0,975.0,746.0,581.0,409.0,182.0]) -pla = TopDown(40) +pla = TopDown(1) results = pla.transform_flatten(ts) print("Original: ", ts) From 082f419f2807c0eb395795e061e7f6e6c3aa4fa2 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Tue, 2 Jul 2024 16:42:42 +0100 Subject: [PATCH 12/17] Deletion of folders --- tsml_eval/segmentation/__init__.py | 14 ---- tsml_eval/segmentation/_bu.py | 92 --------------------- tsml_eval/segmentation/_sw.py | 74 ----------------- tsml_eval/segmentation/_swab.py | 105 ------------------------ tsml_eval/segmentation/_td.py | 110 -------------------------- tsml_eval/segmentation/base.py | 97 ----------------------- tsml_eval/segmentation/manual_test.py | 21 ----- 7 files changed, 513 deletions(-) delete mode 100644 tsml_eval/segmentation/__init__.py delete mode 100644 tsml_eval/segmentation/_bu.py delete mode 100644 tsml_eval/segmentation/_sw.py delete mode 100644 tsml_eval/segmentation/_swab.py delete mode 100644 tsml_eval/segmentation/_td.py delete mode 100644 tsml_eval/segmentation/base.py delete mode 100644 tsml_eval/segmentation/manual_test.py diff --git a/tsml_eval/segmentation/__init__.py b/tsml_eval/segmentation/__init__.py deleted file mode 100644 index d73f49449..000000000 --- a/tsml_eval/segmentation/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -"""Piecewise Linear Approximation.""" - -__all__ = [ - "BasePLA", - "SlidingWindow", - "TopDown", - "BottomUp", - "SWAB", -] -from base import BasePLA -from _sw import SlidingWindow -from _td import TopDown -from _bu import BottomUp -from _swab import SWAB \ No newline at end of file diff --git a/tsml_eval/segmentation/_bu.py b/tsml_eval/segmentation/_bu.py deleted file mode 100644 index 90cb92639..000000000 --- a/tsml_eval/segmentation/_bu.py +++ /dev/null @@ -1,92 +0,0 @@ -from base import BasePLA -import numpy as np -import math -__maintainer__ = [] -__all__ = ["BottomUp"] - -class BottomUp(BasePLA): - """ - Bottom-Up Segmentation. - - Uses a bottom-up algorithm to traverse the dataset in an online manner. - - Parameters - ---------- - max_error: float - The maximum error valuefor the function to find before segmenting the dataset - - References - ---------- - .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. - An online algorithm for segmenting time series. (pp. 289-296). - """ - - def __init__(self, max_error): - super().__init__(max_error) - - #clean the code - def segment(self, time_series): - """Segment a time series - - Parameters - ---------- - time_series : np.array - 1D time series to be segmented. - - Returns - ------- - list - List of segmentations - """ - - seg_ts = [] - merge_cost = [] - for i in range(0, len(time_series), 2): - seg_ts.append(self.create_segment(time_series[i: i + 2])) - for i in range(len(seg_ts) - 1): - merge_cost.append(self.calculate_error(seg_ts[i] + seg_ts[i + 1])) - - merge_cost = np.array(merge_cost) - - while len(merge_cost) != 0 and min(merge_cost) < self.max_error: - if(len(merge_cost) == len(seg_ts)): - print("error") - pos = np.argmin(merge_cost) - seg_ts[pos] = np.concatenate((seg_ts[pos], seg_ts[pos + 1])) - seg_ts.pop(pos + 1) - if (pos + 1) < len(merge_cost): - merge_cost = np.delete(merge_cost, pos + 1) - else: - merge_cost= np.delete(merge_cost, pos) - - if pos != 0: - merge_cost[pos - 1] = self.calculate_error(np.concatenate((seg_ts[pos - 1], seg_ts[pos]))) - - if((pos + 1) < len(seg_ts)): - merge_cost[pos] = self.calculate_error(np.concatenate((seg_ts[pos], seg_ts[pos + 1]))) - - - return seg_ts - - - def dense(self, time_series): - """Return the dense values of a segmented time series - - Parameters - ---------- - time_series : np.array - 1D time series to be segmented. - - Returns - ------- - list - dense values of a segmentation - """ - - results = self.segment(time_series) - dense_array = np.zeros(len(results) - 1) - segmentation_point = 0 - for i in range(len(results) - 1): - segmentation_point = segmentation_point + len(results[i]) - dense_array[i] = segmentation_point - return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/_sw.py b/tsml_eval/segmentation/_sw.py deleted file mode 100644 index 7f1b93aba..000000000 --- a/tsml_eval/segmentation/_sw.py +++ /dev/null @@ -1,74 +0,0 @@ - -from base import BasePLA -import numpy as np -__maintainer__ = [] -__all__ = ["SlidingWindow"] - -class SlidingWindow(BasePLA): - """Sliding Window Segmentation. - - Uses a sliding window algorithm to traverse the dataset in an online manner. - - Parameters - ---------- - max_error: float - The maximum error valuefor the function to find before segmenting the dataset - - References - ---------- - .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. - An online algorithm for segmenting time series. (pp. 289-296). - """ - - def __init__(self, max_error): - super().__init__(max_error) - - #! clean this up, the while loops are not done in a good manner. This is from the pseudocode - def segment(self, time_series): - """Segment a time series - - Parameters - ---------- - time_series : np.array - 1D time series to be segmented. - - Returns - ------- - list - List of segmentations - """ - - seg_ts = [] - anchor = 0 - while anchor < len(time_series): - i = 2 - while anchor + i -1 < len(time_series) and self.calculate_error(time_series[anchor:anchor + i]) < self.max_error: - i = i + 1 - seg_ts.append(self.create_segment(time_series[anchor:anchor + i - 1])) - anchor = anchor + i - 1 - return seg_ts - - - def dense(self, time_series): - """Return the dense values of a segmented time series - - Parameters - ---------- - time_series : np.array - 1D time series to be segmented. - - Returns - ------- - list - dense values of a segmentation - """ - - results = self.segment(time_series) - dense_array = np.zeros(len(results) - 1) - segmentation_point = 0 - for i in range(len(results) - 1): - segmentation_point = segmentation_point + len(results[i]) - dense_array[i] = segmentation_point - return dense_array - - \ No newline at end of file diff --git a/tsml_eval/segmentation/_swab.py b/tsml_eval/segmentation/_swab.py deleted file mode 100644 index 268387a8a..000000000 --- a/tsml_eval/segmentation/_swab.py +++ /dev/null @@ -1,105 +0,0 @@ -from base import BasePLA -import numpy as np -import sys -from _bu import BottomUp - -__maintainer__ = [] -__all__ = ["SWAB"] - -class SWAB(BasePLA): - """ - SWAB (Sliding Window And Bottom-Up) Segmentation. - - Uses SWAB algorithm as described in [1] to traverse the dataset in an online manner. - - Parameters - ---------- - max_error: float - The maximum error valuefor the function to find before segmenting the dataset - - References - ---------- - .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. - An online algorithm for segmenting time series. (pp. 289-296). - """ - - def __init__(self, max_error): - self.bottomup = BottomUp(max_error) - super().__init__(max_error) - - - def segment(self, time_series): - """Segment a time series - - Parameters - ---------- - time_series : np.array - 1D time series to be segmented. - - Returns - ------- - list - List of segmentations - """ - - seg_ts = [] - seg = self.best_line(time_series, 0) - current_data_point = len(seg) - buffer = np.array(seg) - while len(buffer) > 0: - t = self.bottomup.bottomUp(time_series) - seg_ts.append(t[0]) - buffer = buffer[len(t[0]):] - if(current_data_point != len(time_series)): - seg = self.best_line(time_series, current_data_point) - current_data_point = current_data_point + len(seg) - buffer = np.append(buffer, seg) - return seg_ts - - - #finds the next potential segment - def best_line(self, time_series, current_data_point): - """Uses sliding window to find the next best segmentation candidate - - Parameters - ---------- - time_series : np.array - 1D time series to be segmented. - current_data_point : int - the current_data_point we are observing - - Returns - ------- - np.array - new found segmentation candidate - """ - - seg_ts = np.array([]) - error = 0 - while current_data_point < len(time_series) and error < self.max_error: - seg_ts = np.append(seg_ts, time_series[current_data_point]) - error = self.calculate_error(seg_ts) - current_data_point = current_data_point + 1 - return seg_ts - - def dense(self, time_series): - """Return the dense values of a segmented time series - - Parameters - ---------- - time_series : np.array - 1D time series to be segmented. - - Returns - ------- - list - dense values of a segmentation - """ - - results = self.segment(time_series) - dense_array = np.zeros(len(results) - 1) - segmentation_point = 0 - for i in range(len(results) - 1): - segmentation_point = segmentation_point + len(results[i]) - dense_array[i] = segmentation_point - return dense_array \ No newline at end of file diff --git a/tsml_eval/segmentation/_td.py b/tsml_eval/segmentation/_td.py deleted file mode 100644 index 32c66d4f0..000000000 --- a/tsml_eval/segmentation/_td.py +++ /dev/null @@ -1,110 +0,0 @@ -from base import BasePLA -import numpy as np -import sys - -__maintainer__ = [] -__all__ = ["TopDown"] - -class TopDown(BasePLA): - """ - Top-Down Segmentation. - - Uses a top-down algorithm to traverse the dataset in an online manner. - - Parameters - ---------- - max_error: float - The maximum error valuefor the function to find before segmenting the dataset - - References - ---------- - .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. - An online algorithm for segmenting time series. (pp. 289-296). - """ - - def __init__(self, max_error): - super().__init__(max_error) - - #Implement a cache system for this - def segment(self, time_series): - """Segment a time series - - Parameters - ---------- - time_series : np.array - 1D time series to be segmented. - - Returns - ------- - list - List of segmentations - """ - - seg_ts = [] - best_so_far = sys.float_info.max - breakpoint = None - for i in range(2, len(time_series -2)): - improvement_in_approximation = self.improvement_splitting_here(time_series, i) - if(improvement_in_approximation < best_so_far): - breakpoint = i - best_so_far = improvement_in_approximation - - left_segment = time_series[:breakpoint] - right_segment = time_series[breakpoint:] - - if self.calculate_error(left_segment) > self.max_error: - seg_ts.append(self.segment(left_segment)) - else: - seg_ts.extend([left_segment]) - - if self.calculate_error(right_segment) > self.max_error: - seg_ts.append(self.segment(right_segment)) - else: - seg_ts.extend([right_segment]) - - return seg_ts - - - def improvement_splitting_here(self, time_series, breakpoint): - """Returns the squared sum error of the left and right segment - splitted off at a particual point in a time series - - Parameters - ---------- - time_series : np.array - 1D time series. - breakpoint : int - the break point within the time series array - - Returns - ------- - error - the squared sum error of the split segmentations - """ - - left_segment = time_series[:breakpoint] - right_segment = time_series[breakpoint:] - return self.calculate_error(left_segment) + self.calculate_error(right_segment) - - def dense(self, time_series): - """Return the dense values of a segmented time series - - Parameters - ---------- - time_series : np.array - 1D time series to be segmented. - - Returns - ------- - list - dense values of a segmentation - """ - - results = self.segment(time_series) - dense_array = np.zeros(len(results) - 1) - segmentation_point = 0 - for i in range(len(results) - 1): - segmentation_point = segmentation_point + len(results[i]) - dense_array[i] = segmentation_point - return dense_array - \ No newline at end of file diff --git a/tsml_eval/segmentation/base.py b/tsml_eval/segmentation/base.py deleted file mode 100644 index d210ac404..000000000 --- a/tsml_eval/segmentation/base.py +++ /dev/null @@ -1,97 +0,0 @@ -"""Abstract base class""" - -__maintainer__ = [] -__all__ = ["BasePLA"] - -import numpy as np -import pandas as pd -from sklearn.linear_model import LinearRegression - -class BasePLA(): - """ - Base class for algorithms which use PLA (Piecewise Linear Approximation) for segmentation. - - Parameters - ---------- - max_error: float - The maximum error valuefor the function to find before segmenting the dataset - """ - - def __init__(self, max_error): - self.max_error = max_error - - def linear_regression(self, time_series): - """Returns the fitted line through a time series. - - Parameters - ---------- - time_series : np.array - 1D time series. - - Returns - ------- - np.array - the fitted line - """ - n = len(time_series) - Y = np.array(time_series) - X = np.arange(n).reshape(-1 , 1) - linearRegression = LinearRegression() - linearRegression.fit(X, Y) - regression_line = np.array(linearRegression.predict(X)) - return regression_line - - def sum_squared_error(self, time_series, linear_regression_time_series): - """Returns the squared sum error time series and its linear regression - - formula: sse = the sum of the differences of the original series - against the predicted series squared - - Parameters - ---------- - time_series : np.array - 1D time series. - linear_regression_time_series: np.array - 1D linear time series formatted using linear regression - - Returns - ------- - error - the squared sum error of the split segmentations - """ - - error = np.sum((time_series - linear_regression_time_series) ** 2) - return error - - def calculate_error(self, time_series): - """Returns the squared sum error of a time series and its linear regression - - Parameters - ---------- - time_series : np.array - 1D time series. - - Returns - ------- - error - the squared sum error of a time series and it's linear regression - """ - - lrts = self.linear_regression(time_series) - sse = self.sum_squared_error(time_series, lrts) - return sse - - def create_segment(self, time_series): - """create a linear segment of a given time series. - - Parameters - ---------- - time_series : np.array - 1D time series. - - Returns - ------- - np.array - the linear regression of the time series. - """ - return self.linear_regression(time_series) \ No newline at end of file diff --git a/tsml_eval/segmentation/manual_test.py b/tsml_eval/segmentation/manual_test.py deleted file mode 100644 index 1759163d4..000000000 --- a/tsml_eval/segmentation/manual_test.py +++ /dev/null @@ -1,21 +0,0 @@ -from _sw import SlidingWindow -from _bu import BottomUp -from _td import TopDown -from _swab import SWAB -from aeon.datasets import load_electric_devices_segmentation -from aeon.visualisation import plot_series_with_change_points, plot_series_with_profiles -import matplotlib.pyplot as plt -import numpy as np -from sklearn.preprocessing import MinMaxScaler - - -ts, period_size, true_cps = load_electric_devices_segmentation() -ts = ts.values -ts = ts.reshape((len(ts), 1)) -scaler = MinMaxScaler(feature_range=(0, 1)) -scaler = scaler.fit(ts) -ts = scaler.transform(ts) -pla = BottomUp(22) -results = pla.dense(ts) -print(results) -print(true_cps) \ No newline at end of file From 052403b416eff4e84e14b69e03a166fe009a36d2 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Wed, 3 Jul 2024 14:36:34 +0100 Subject: [PATCH 13/17] Implementing PiecewiseLinearApproximation as part of BaseSeriesTransformer --- tsml_eval/_wip/series_transformer/__init__.py | 13 +- .../series_transformer/{_bu.py => _bu_old.py} | 1 + tsml_eval/_wip/series_transformer/_pla.py | 375 ++++++++++++++++++ .../series_transformer/{_sw.py => _sw_old.py} | 0 .../{_swab.py => _swab_old.py} | 0 .../series_transformer/{_td.py => _td_old.py} | 0 tsml_eval/_wip/series_transformer/base.py | 98 ----- 7 files changed, 379 insertions(+), 108 deletions(-) rename tsml_eval/_wip/series_transformer/{_bu.py => _bu_old.py} (99%) create mode 100644 tsml_eval/_wip/series_transformer/_pla.py rename tsml_eval/_wip/series_transformer/{_sw.py => _sw_old.py} (100%) rename tsml_eval/_wip/series_transformer/{_swab.py => _swab_old.py} (100%) rename tsml_eval/_wip/series_transformer/{_td.py => _td_old.py} (100%) delete mode 100644 tsml_eval/_wip/series_transformer/base.py diff --git a/tsml_eval/_wip/series_transformer/__init__.py b/tsml_eval/_wip/series_transformer/__init__.py index d73f49449..a2b8a63c9 100644 --- a/tsml_eval/_wip/series_transformer/__init__.py +++ b/tsml_eval/_wip/series_transformer/__init__.py @@ -1,14 +1,7 @@ """Piecewise Linear Approximation.""" __all__ = [ - "BasePLA", - "SlidingWindow", - "TopDown", - "BottomUp", - "SWAB", + "_pla", ] -from base import BasePLA -from _sw import SlidingWindow -from _td import TopDown -from _bu import BottomUp -from _swab import SWAB \ No newline at end of file + +from _pla import PiecewiseLinearApproximation diff --git a/tsml_eval/_wip/series_transformer/_bu.py b/tsml_eval/_wip/series_transformer/_bu_old.py similarity index 99% rename from tsml_eval/_wip/series_transformer/_bu.py rename to tsml_eval/_wip/series_transformer/_bu_old.py index 7e7ddd35d..dec5cb6a0 100644 --- a/tsml_eval/_wip/series_transformer/_bu.py +++ b/tsml_eval/_wip/series_transformer/_bu_old.py @@ -67,6 +67,7 @@ def transform(self, time_series): return seg_ts + def transform_flatten(self, time_series): """Transform a time series and return a 1d array diff --git a/tsml_eval/_wip/series_transformer/_pla.py b/tsml_eval/_wip/series_transformer/_pla.py new file mode 100644 index 000000000..8c9fe1a92 --- /dev/null +++ b/tsml_eval/_wip/series_transformer/_pla.py @@ -0,0 +1,375 @@ +__maintainer__ = [] +__all__ = ["PiecewiseLinearApproximation"] + +from enum import Enum +import numpy as np +from sklearn.linear_model import LinearRegression +from aeon.transformations.series.base import BaseSeriesTransformer + +class PiecewiseLinearApproximation(BaseSeriesTransformer): + """PLA (Piecewise Linear Approximation) for series transformation. + + Parameters + ---------- + transformer: enum + The transformer to be used + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + buffer_size: int + The buffer size, used only for SWAB + """ + + class Transformer(Enum): + SlidingWindow = "SlidingWindow" + TopDown = "TopDown" + BottomUp = "BottomUp" + SWAB = "Swab" + + _tags = { + "fit_is_empty": True, + "python_dependencies": "sklearn", + } + + + def __init__(self, transformer, max_error, buffer_size): + if not isinstance(transformer, self.Transformer): + raise ValueError("Invalid status") + self.transformer = transformer + self.max_error = max_error + self.buffer_size = int(buffer_size) + self.segment_dense = np.array([]) + super().__init__(axis=0) + + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing the core logic, called from transform + + Parameters + ---------- + X : np.ndarray + 1D time series to be transformed + y : ignored argument for interface compatibility + + Returns + ------- + np.ndarray + 1D transformed version of X + """ + results = None + if(self.transformer == self.Transformer.SlidingWindow): + results = self._sliding_window(X) + elif(self.transformer == self.Transformer.TopDown): + results = self._top_down(X) + elif(self.transformer == self.Transformer.BottomUp): + results = self._bottom_up(X) + elif(self.transformer == self.Transformer.SWAB): + results = self._SWAB(X) + else: + raise RuntimeError("No transformer was called.") + + segment_dense = np.zeros[len(results) - 1] + segment_dense[0] = results[0] + for i in range(1, results): + segment_dense[i] = len(results) + [] + self.segment_dense = segment_dense + + return np.concatenate(results) + + def _sliding_window(self, X): + """Transform a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of transformed segmented time series + """ + seg_ts = [] + anchor = 0 + while anchor < len(X): + i = 2 + while anchor + i -1 < len(X) and self._calculate_error(X[anchor:anchor + i]) < self.max_error: + i = i + 1 + seg_ts.append(self._create_segment(X[anchor:anchor + i - 1])) + anchor = anchor + i - 1 + return seg_ts + + def _top_down(self, X): + """Transform a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of transformed segmented time series + """ + + best_so_far = float("inf") + breakpoint = None + + for i in range(2, len(X -2)): + improvement_in_approximation = self.improvement_splitting_here(X, i) + if(improvement_in_approximation < best_so_far): + breakpoint = i + best_so_far = improvement_in_approximation + + if(breakpoint == None): + return X + + left_found_segment = X[:breakpoint] + right_found_segment = X[breakpoint:] + + left_segment = None + right_segment = None + + if self._calculate_error(left_found_segment) > self.max_error: + left_segment = self.transform(left_found_segment) + else: + left_segment = [self._create_segment(left_found_segment)] + + if self._calculate_error(right_found_segment) > self.max_error: + right_segment = self.transform(right_found_segment) + else: + right_segment = [self._create_segment(right_found_segment)] + + return left_segment + right_segment + + def improvement_splitting_here(self, time_series, breakpoint): + """Returns the squared sum error of the left and right segment + splitted off at a particual point in a time series + + Parameters + ---------- + time_series : np.array + 1D time series. + breakpoint : int + the break point within the time series array + + Returns + ------- + error + the squared sum error of the split segmentations + """ + left_segment = time_series[:breakpoint] + right_segment = time_series[breakpoint:] + return self._calculate_error(left_segment) + self._calculate_error(right_segment) + + def _bottom_up(self, X): + """Transform a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of transformed segmented time series + """ + seg_ts = [] + merge_cost = [] + for i in range(0, len(X), 2): + seg_ts.append(self._create_segment(X[i: i + 2])) + for i in range(len(seg_ts) - 1): + merge_cost.append(self._calculate_error(seg_ts[i] + seg_ts[i + 1])) + + merge_cost = np.array(merge_cost) + + while len(merge_cost) != 0 and min(merge_cost) < self.max_error: + pos = np.argmin(merge_cost) + seg_ts[pos] = self._create_segment(np.concatenate((seg_ts[pos], seg_ts[pos + 1]))) + seg_ts.pop(pos + 1) + if (pos + 1) < len(merge_cost): + merge_cost = np.delete(merge_cost, pos + 1) + else: + merge_cost= np.delete(merge_cost, pos) + + if pos != 0: + merge_cost[pos - 1] = self._calculate_error(np.concatenate((seg_ts[pos - 1], seg_ts[pos]))) + + if((pos + 1) < len(seg_ts)): + merge_cost[pos] = self._calculate_error(np.concatenate((seg_ts[pos], seg_ts[pos + 1]))) + + return seg_ts + + def _SWAB(self, X): + """Transform a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of transformed segmented time series + """ + seg_ts = [] + if(self.buffer_size == None): + self.buffer_size == int(len(X) ** 0.5) + + lower_boundary_window = int(self.buffer_size / 2) + upper_boundary_window = self.buffer_size * 2 + + seg = self._best_line(X, 0, lower_boundary_window, upper_boundary_window) + current_data_point = len(seg) + buffer = np.array(seg) + + while len(buffer) > 0: + t = self._bottom_up(X) + seg_ts.append(t[0]) + buffer = buffer[len(t[0]):] + if(current_data_point >= len(X)): + seg = self._best_line(X, current_data_point, lower_boundary_window, upper_boundary_window) + current_data_point = current_data_point + len(seg) + buffer = np.append(buffer, seg) + else: + buffer = np.array([]) + t = t[1:] + for i in range(len(t)): + seg_ts.append(t[i]) + return seg_ts + + + def _best_line(self, time_series, current_data_point, lower_boundary_window, upper_boundary_window): + """Uses sliding window to find the next best segmentation candidate, used for SWAB + + Parameters + ---------- + time_series : np.array + 1D time series to be segmented. + current_data_point : int + the current_data_point we are observing + lower_boundary_window: int + the lower boundary of the window + upper_boundary_window: int + the uppoer boundary of the window + + Returns + ------- + np.array + new found segmentation candidates + """ + + max_window_length = current_data_point + upper_boundary_window + seg_ts = np.array(time_series[current_data_point: current_data_point + lower_boundary_window]) + current_data_point = current_data_point + lower_boundary_window + error = 0 + while current_data_point < max_window_length and current_data_point < len(time_series) and error < self.max_error: + seg_ts = np.append(seg_ts, time_series[current_data_point]) + error = self._calculate_error(seg_ts) + current_data_point = current_data_point + 1 + return seg_ts + + #Create own linear regression, inefficient to use sklearns + def _linear_regression(self, time_series): + """Transform a time series + + Parameters + ---------- + time_series : np.array + 1D time series to be transformed. + + Returns + ------- + list + List of transformed segmented time series + """ + + n = len(time_series) + Y = np.array(time_series) + X = np.arange(n).reshape(-1 , 1) + linearRegression = LinearRegression() + linearRegression.fit(X, Y) + regression_line = np.array(linearRegression.predict(X)) + return regression_line + + def _sum_squared_error(self, time_series, linear_regression_time_series): + """Returns the squared sum error time series and its linear regression + + formula: sse = the sum of the differences of the original series + against the predicted series squared + + Parameters + ---------- + time_series : np.array + 1D time series. + linear_regression_time_series: np.array + 1D linear time series formatted using linear regression + + Returns + ------- + error + the squared sum error of the split segmentations + """ + + error = np.sum((time_series - linear_regression_time_series) ** 2) + return error + + def _calculate_error(self, time_series): + """Returns the squared sum error of a time series and its linear regression + + Parameters + ---------- + time_series : np.array + 1D time series. + + Returns + ------- + error + the squared sum error of a time series and it's linear regression + """ + + lrts = self._linear_regression(time_series) + sse = self._sum_squared_error(time_series, lrts) + return sse + + def _create_segment(self, time_series): + """create a linear segment of a given time series. + + Parameters + ---------- + time_series : np.array + 1D time series. + + Returns + ------- + np.array + the linear regression of the time series. + """ + return self._linear_regression(time_series) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """ + Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params = { + "max_error": 0.95, + } + + return params \ No newline at end of file diff --git a/tsml_eval/_wip/series_transformer/_sw.py b/tsml_eval/_wip/series_transformer/_sw_old.py similarity index 100% rename from tsml_eval/_wip/series_transformer/_sw.py rename to tsml_eval/_wip/series_transformer/_sw_old.py diff --git a/tsml_eval/_wip/series_transformer/_swab.py b/tsml_eval/_wip/series_transformer/_swab_old.py similarity index 100% rename from tsml_eval/_wip/series_transformer/_swab.py rename to tsml_eval/_wip/series_transformer/_swab_old.py diff --git a/tsml_eval/_wip/series_transformer/_td.py b/tsml_eval/_wip/series_transformer/_td_old.py similarity index 100% rename from tsml_eval/_wip/series_transformer/_td.py rename to tsml_eval/_wip/series_transformer/_td_old.py diff --git a/tsml_eval/_wip/series_transformer/base.py b/tsml_eval/_wip/series_transformer/base.py deleted file mode 100644 index c77ad737f..000000000 --- a/tsml_eval/_wip/series_transformer/base.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Abstract base class""" - -__maintainer__ = [] -__all__ = ["BasePLA"] - -import numpy as np -import pandas as pd -from sklearn.linear_model import LinearRegression - -class BasePLA(): - """ - Base class for algorithms which use PLA (Piecewise Linear Approximation) for segmentation. - - Parameters - ---------- - max_error: float - The maximum error valuefor the function to find before segmenting the dataset - """ - - def __init__(self, max_error): - self.max_error = max_error - - def linear_regression(self, time_series): - """Transform a time series - - Parameters - ---------- - time_series : np.array - 1D time series to be transformed. - - Returns - ------- - list - List of transformed segmented time series - """ - - n = len(time_series) - Y = np.array(time_series) - X = np.arange(n).reshape(-1 , 1) - linearRegression = LinearRegression() - linearRegression.fit(X, Y) - regression_line = np.array(linearRegression.predict(X)) - return regression_line - - def sum_squared_error(self, time_series, linear_regression_time_series): - """Returns the squared sum error time series and its linear regression - - formula: sse = the sum of the differences of the original series - against the predicted series squared - - Parameters - ---------- - time_series : np.array - 1D time series. - linear_regression_time_series: np.array - 1D linear time series formatted using linear regression - - Returns - ------- - error - the squared sum error of the split segmentations - """ - - error = np.sum((time_series - linear_regression_time_series) ** 2) - return error - - def calculate_error(self, time_series): - """Returns the squared sum error of a time series and its linear regression - - Parameters - ---------- - time_series : np.array - 1D time series. - - Returns - ------- - error - the squared sum error of a time series and it's linear regression - """ - - lrts = self.linear_regression(time_series) - sse = self.sum_squared_error(time_series, lrts) - return sse - - def create_segment(self, time_series): - """create a linear segment of a given time series. - - Parameters - ---------- - time_series : np.array - 1D time series. - - Returns - ------- - np.array - the linear regression of the time series. - """ - return self.linear_regression(time_series) \ No newline at end of file From 7dac80392e43aa837349308c2c4d105be2982248 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Wed, 3 Jul 2024 14:53:30 +0100 Subject: [PATCH 14/17] Initial finish in implementing PLA and its transformers into base series transformer, some efficiencies can be made --- tsml_eval/_wip/series_transformer/_pla.py | 14 +++++++------- tsml_eval/_wip/series_transformer/manual_test.py | 9 +++------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/tsml_eval/_wip/series_transformer/_pla.py b/tsml_eval/_wip/series_transformer/_pla.py index 8c9fe1a92..0cd2ebf8a 100644 --- a/tsml_eval/_wip/series_transformer/_pla.py +++ b/tsml_eval/_wip/series_transformer/_pla.py @@ -31,12 +31,12 @@ class Transformer(Enum): } - def __init__(self, transformer, max_error, buffer_size): + def __init__(self, transformer, max_error, buffer_size = None): if not isinstance(transformer, self.Transformer): raise ValueError("Invalid status") self.transformer = transformer self.max_error = max_error - self.buffer_size = int(buffer_size) + self.buffer_size = buffer_size self.segment_dense = np.array([]) super().__init__(axis=0) @@ -69,10 +69,10 @@ def _transform(self, X, y=None): else: raise RuntimeError("No transformer was called.") - segment_dense = np.zeros[len(results) - 1] - segment_dense[0] = results[0] - for i in range(1, results): - segment_dense[i] = len(results) + [] + segment_dense = np.zeros([len(results) - 1]) + segment_dense[0] = len(results[0]) + for i in range(1, len(results) - 1): + segment_dense[i] = segment_dense[i - 1] + len(results[i]) self.segment_dense = segment_dense return np.concatenate(results) @@ -221,7 +221,7 @@ def _SWAB(self, X): self.buffer_size == int(len(X) ** 0.5) lower_boundary_window = int(self.buffer_size / 2) - upper_boundary_window = self.buffer_size * 2 + upper_boundary_window = int(self.buffer_size * 2) seg = self._best_line(X, 0, lower_boundary_window, upper_boundary_window) current_data_point = len(seg) diff --git a/tsml_eval/_wip/series_transformer/manual_test.py b/tsml_eval/_wip/series_transformer/manual_test.py index 8bd813c92..2ab55d173 100644 --- a/tsml_eval/_wip/series_transformer/manual_test.py +++ b/tsml_eval/_wip/series_transformer/manual_test.py @@ -1,7 +1,4 @@ -from _sw import SlidingWindow -from _bu import BottomUp -from _td import TopDown -from _swab import SWAB +from _pla import PiecewiseLinearApproximation from aeon.datasets import load_electric_devices_segmentation from aeon.visualisation import plot_series_with_change_points, plot_series_with_profiles import matplotlib.pyplot as plt @@ -16,8 +13,8 @@ ts = np.array([573.0,375.0,301.0,212.0,55.0,34.0,25.0,33.0,113.0,143.0,303.0,615.0,1226.0,1281.0,1221.0,1081.0,866.0,1096.0,1039.0,975.0,746.0,581.0,409.0,182.0]) -pla = TopDown(1) -results = pla.transform_flatten(ts) +pla = PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.BottomUp, 5) +results = pla.fit_transform(ts) print("Original: ", ts) print("PLA : ", results) From c998b55145f5372d325f0d74cf58319b3d053839 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Wed, 3 Jul 2024 15:21:43 +0100 Subject: [PATCH 15/17] Fixed Errors --- tsml_eval/_wip/series_transformer/_pla.py | 17 +++++++++-------- .../_wip/series_transformer/manual_test.py | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tsml_eval/_wip/series_transformer/_pla.py b/tsml_eval/_wip/series_transformer/_pla.py index 0cd2ebf8a..7949de277 100644 --- a/tsml_eval/_wip/series_transformer/_pla.py +++ b/tsml_eval/_wip/series_transformer/_pla.py @@ -69,11 +69,12 @@ def _transform(self, X, y=None): else: raise RuntimeError("No transformer was called.") - segment_dense = np.zeros([len(results) - 1]) - segment_dense[0] = len(results[0]) - for i in range(1, len(results) - 1): - segment_dense[i] = segment_dense[i - 1] + len(results[i]) - self.segment_dense = segment_dense + if(len(results) > 1): + segment_dense = np.zeros([len(results) - 1]) + segment_dense[0] = len(results[0]) + for i in range(1, len(results) - 1): + segment_dense[i] = segment_dense[i - 1] + len(results[i]) + self.segment_dense = segment_dense return np.concatenate(results) @@ -133,12 +134,12 @@ def _top_down(self, X): right_segment = None if self._calculate_error(left_found_segment) > self.max_error: - left_segment = self.transform(left_found_segment) + left_segment = self._top_down(left_found_segment) else: left_segment = [self._create_segment(left_found_segment)] if self._calculate_error(right_found_segment) > self.max_error: - right_segment = self.transform(right_found_segment) + right_segment = self._top_down(right_found_segment) else: right_segment = [self._create_segment(right_found_segment)] @@ -218,7 +219,7 @@ def _SWAB(self, X): """ seg_ts = [] if(self.buffer_size == None): - self.buffer_size == int(len(X) ** 0.5) + self.buffer_size = int(len(X) ** 0.5) lower_boundary_window = int(self.buffer_size / 2) upper_boundary_window = int(self.buffer_size * 2) diff --git a/tsml_eval/_wip/series_transformer/manual_test.py b/tsml_eval/_wip/series_transformer/manual_test.py index 2ab55d173..93ae09d20 100644 --- a/tsml_eval/_wip/series_transformer/manual_test.py +++ b/tsml_eval/_wip/series_transformer/manual_test.py @@ -13,7 +13,7 @@ ts = np.array([573.0,375.0,301.0,212.0,55.0,34.0,25.0,33.0,113.0,143.0,303.0,615.0,1226.0,1281.0,1221.0,1081.0,866.0,1096.0,1039.0,975.0,746.0,581.0,409.0,182.0]) -pla = PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.BottomUp, 5) +pla = PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.SWAB, 5) results = pla.fit_transform(ts) print("Original: ", ts) From c873928ef437ea4a370f1dd32b18161159aa5379 Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Thu, 4 Jul 2024 14:05:04 +0100 Subject: [PATCH 16/17] Tests added --- tsml_eval/_wip/series_transformer/__init__.py | 2 +- tsml_eval/_wip/series_transformer/_pla.py | 7 +- .../_wip/series_transformer/manual_test.py | 2 +- tsml_eval/_wip/series_transformer/test_pla.py | 69 +++++++++++++++++++ 4 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 tsml_eval/_wip/series_transformer/test_pla.py diff --git a/tsml_eval/_wip/series_transformer/__init__.py b/tsml_eval/_wip/series_transformer/__init__.py index a2b8a63c9..6b521d9f8 100644 --- a/tsml_eval/_wip/series_transformer/__init__.py +++ b/tsml_eval/_wip/series_transformer/__init__.py @@ -1,7 +1,7 @@ """Piecewise Linear Approximation.""" __all__ = [ - "_pla", + "PiecewiseLinearApproximation", ] from _pla import PiecewiseLinearApproximation diff --git a/tsml_eval/_wip/series_transformer/_pla.py b/tsml_eval/_wip/series_transformer/_pla.py index 7949de277..2ba61eee5 100644 --- a/tsml_eval/_wip/series_transformer/_pla.py +++ b/tsml_eval/_wip/series_transformer/_pla.py @@ -33,7 +33,11 @@ class Transformer(Enum): def __init__(self, transformer, max_error, buffer_size = None): if not isinstance(transformer, self.Transformer): - raise ValueError("Invalid status") + raise ValueError("Invalid transformer: please use Transformer class.") + if not isinstance(max_error, (int, float, complex)): + raise ValueError("Invalid max_error: it has to be a number.") + if not (buffer_size == None or isinstance(buffer_size, (int, float, complex))): + raise ValueError("Invalid buffer_size: use a number only or keep empty.") self.transformer = transformer self.max_error = max_error self.buffer_size = buffer_size @@ -370,6 +374,7 @@ def get_test_params(cls, parameter_set="default"): `create_test_instance` uses the first (or only) dictionary in `params` """ params = { + "transformer": PiecewiseLinearApproximation.Transformer.SWAB, "max_error": 0.95, } diff --git a/tsml_eval/_wip/series_transformer/manual_test.py b/tsml_eval/_wip/series_transformer/manual_test.py index 93ae09d20..17f03e341 100644 --- a/tsml_eval/_wip/series_transformer/manual_test.py +++ b/tsml_eval/_wip/series_transformer/manual_test.py @@ -13,7 +13,7 @@ ts = np.array([573.0,375.0,301.0,212.0,55.0,34.0,25.0,33.0,113.0,143.0,303.0,615.0,1226.0,1281.0,1221.0,1081.0,866.0,1096.0,1039.0,975.0,746.0,581.0,409.0,182.0]) -pla = PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.SWAB, 5) +pla = PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.SlidingWindow, float("inf")) results = pla.fit_transform(ts) print("Original: ", ts) diff --git a/tsml_eval/_wip/series_transformer/test_pla.py b/tsml_eval/_wip/series_transformer/test_pla.py new file mode 100644 index 000000000..4ce411953 --- /dev/null +++ b/tsml_eval/_wip/series_transformer/test_pla.py @@ -0,0 +1,69 @@ +import pytest +import numpy as np +import pandas as pd +from _pla import PiecewiseLinearApproximation + + +@pytest.fixture +def X(): + return np.array([573.0,375.0,301.0,212.0,55.0,34.0,25.0,33.0,113.0,143.0,303.0, + 615.0,1226.0,1281.0,1221.0,1081.0,866.0,1096.0,1039.0,975.0, + 746.0,581.0,409.0,182.0]) + +def test_piecewise_linear_approximation_sliding_window(X): + pla = PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.SlidingWindow, 100) + result = pla.fit_transform(X) + expected = np.array([573., 375., 301., 212., 53., 38., 23., 33., 113., 143., 303., + 615., 1226., 1281., 1221., 1081., 866., 1097.16666667, + 1036.66666667, 976.16666667, 747.16666667, + 578.66666667, 410.16666667, 182.]) + np.testing.assert_array_almost_equal(result, expected) + +def test_piecewise_linear_approximation_top_down(X): + pla = PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.TopDown, 100) + result = pla.fit_transform(X) + expected = np.array([573., 375., 301., 212., 53., 38., 23., 33., 113., 143., 303., + 615., 1226., 1281., 1221., 1081., 866., 1097.16666667, + 1036.66666667, 976.16666667, 746., 581., 409., 182.]) + np.testing.assert_array_almost_equal(result, expected) + +def test_piecewise_linear_approximation_bottom_up(X): + result = PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.BottomUp, 5).fit_transform(X) + expected = np.array([538.8, 423.1, 307.4, 191.7, 48., 40.5, 33., 25.5, 43.6, + 210.2,376.8, 543.4, 1276.5, 1227., 1177.5, 1128., 953.5, + 980.5, 1007.5, 1034.5, 759.1, 572.7, 386.3, 199.9]) + np.testing.assert_array_almost_equal(result, expected) + +def test_piecewise_linear_approximation_SWAB(X): + result = PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.SWAB, 5).fit_transform(X) + expected = np.array([538.8, 423.1, 307.4, 191.7, 48., 40.5, 33., 25.5, 43.6, 210.2, + 376.8, 543.4, 1276.5, 1227., 1177.5, 1128., 953.5, 980.5, + 1007.5, 1034.5, 759.1, 572.7, 386.3, 199.9]) + np.testing.assert_array_almost_equal(result, expected) + +def test_piecewise_linear_approximation_check_diff_in_params(X): + transformers = [PiecewiseLinearApproximation.Transformer.SlidingWindow, + PiecewiseLinearApproximation.Transformer.TopDown, + PiecewiseLinearApproximation.Transformer.BottomUp, + PiecewiseLinearApproximation.Transformer.SWAB] + for i in range(len(transformers)): + low_error_pla = PiecewiseLinearApproximation(transformers[i], 1) + high_error_pla = PiecewiseLinearApproximation(transformers[i], float("inf")) + low_error_result = low_error_pla.fit_transform(X) + high_error_result = high_error_pla.fit_transform(X) + assert not np.allclose(low_error_result, high_error_result) + +def test_piecewise_linear_approximation_wrong_parameters(X): + with pytest.raises(ValueError): + PiecewiseLinearApproximation("Fake Transformer", 100) + with pytest.raises(ValueError): + PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.SWAB, "max_error") + with pytest.raises(ValueError): + PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.SWAB, 100, "buffer_size") + +def test_piecewise_linear_approximation_one_segment(X): + X = X[:2] + pla = PiecewiseLinearApproximation(PiecewiseLinearApproximation.Transformer.BottomUp, 10) + result = pla.fit_transform(X) + assert 0 == len(pla.segment_dense) + np.testing.assert_array_almost_equal(X, result, decimal=1) \ No newline at end of file From d07e273c8bcdaa24cfaba055ef01e67faed754fd Mon Sep 17 00:00:00 2001 From: Moonzyyy <47296443+Moonzyyy@users.noreply.github.com> Date: Thu, 4 Jul 2024 15:01:48 +0100 Subject: [PATCH 17/17] Finished Progress --- tsml_eval/_wip/series_transformer/_pla.py | 129 ++++++++++++---------- 1 file changed, 73 insertions(+), 56 deletions(-) diff --git a/tsml_eval/_wip/series_transformer/_pla.py b/tsml_eval/_wip/series_transformer/_pla.py index 2ba61eee5..db16d31c1 100644 --- a/tsml_eval/_wip/series_transformer/_pla.py +++ b/tsml_eval/_wip/series_transformer/_pla.py @@ -7,19 +7,36 @@ from aeon.transformations.series.base import BaseSeriesTransformer class PiecewiseLinearApproximation(BaseSeriesTransformer): - """PLA (Piecewise Linear Approximation) for series transformation. - - Parameters - ---------- - transformer: enum - The transformer to be used - max_error: float - The maximum error valuefor the function to find before segmenting the dataset - buffer_size: int - The buffer size, used only for SWAB + """Piecewise Linear Approximation (PLA) for time series transformation. + + Takes a univariate time series as input. Approximates a time series using + linear regression and the sum of squares error (SSE) through an algorithm. + The algorithms available are two offline algorithms: TopDown and BottomUp + and two online algorithms: SlidingWindow and SWAB (Sliding Window and Bottom Up). + + Parameters + ---------- + transformer: enum + The transformer to be used + max_error: float + The maximum error valuefor the function to find before segmenting the dataset + buffer_size: int + The buffer size, used only for SWAB + + Attributes + ---------- + segment_dense : np.array + The endpoints of each found segment of the series for transformation + + References + ---------- + .. [1] Keogh, E., Chu, S., Hart, D. and Pazzani, M., 2001, November. + An online algorithm for segmenting time series. (pp. 289-296). """ class Transformer(Enum): + """An enum class specifically for PLA.""" + SlidingWindow = "SlidingWindow" TopDown = "TopDown" BottomUp = "BottomUp" @@ -59,7 +76,7 @@ def _transform(self, X, y=None): Returns ------- np.ndarray - 1D transformed version of X + 1D transform of X """ results = None if(self.transformer == self.Transformer.SlidingWindow): @@ -83,11 +100,11 @@ def _transform(self, X, y=None): return np.concatenate(results) def _sliding_window(self, X): - """Transform a time series + """Transform a time series using the sliding window algorithm. (Online) Parameters ---------- - time_series : np.array + X : np.ndarray 1D time series to be transformed. Returns @@ -106,11 +123,11 @@ def _sliding_window(self, X): return seg_ts def _top_down(self, X): - """Transform a time series + """Transform a time series using the top down algorithm (Offline) Parameters ---------- - time_series : np.array + X : np.ndarray 1D time series to be transformed. Returns @@ -149,32 +166,32 @@ def _top_down(self, X): return left_segment + right_segment - def improvement_splitting_here(self, time_series, breakpoint): - """Returns the squared sum error of the left and right segment - splitted off at a particual point in a time series + def improvement_splitting_here(self, X, breakpoint): + """Returns the SSE of the left and right segmennts split + at a particual point in a time series Parameters ---------- - time_series : np.array + X : np.array 1D time series. breakpoint : int the break point within the time series array Returns ------- - error + error: float the squared sum error of the split segmentations """ - left_segment = time_series[:breakpoint] - right_segment = time_series[breakpoint:] + left_segment = X[:breakpoint] + right_segment = X[breakpoint:] return self._calculate_error(left_segment) + self._calculate_error(right_segment) def _bottom_up(self, X): - """Transform a time series + """Transform a time series using the bottom up algorithm (Offline) Parameters ---------- - time_series : np.array + X : np.ndarray 1D time series to be transformed. Returns @@ -209,11 +226,11 @@ def _bottom_up(self, X): return seg_ts def _SWAB(self, X): - """Transform a time series + """Transform a time series using the SWAB algorithm (Online) Parameters ---------- - time_series : np.array + X : np.array 1D time series to be transformed. Returns @@ -248,12 +265,13 @@ def _SWAB(self, X): return seg_ts - def _best_line(self, time_series, current_data_point, lower_boundary_window, upper_boundary_window): - """Uses sliding window to find the next best segmentation candidate, used for SWAB + def _best_line(self, X, current_data_point, lower_boundary_window, upper_boundary_window): + """Uses sliding window to find the next best segmentation candidate. + Used inside of the SWAB algorithm. Parameters ---------- - time_series : np.array + X : np.array 1D time series to be segmented. current_data_point : int the current_data_point we are observing @@ -269,18 +287,19 @@ def _best_line(self, time_series, current_data_point, lower_boundary_window, upp """ max_window_length = current_data_point + upper_boundary_window - seg_ts = np.array(time_series[current_data_point: current_data_point + lower_boundary_window]) + seg_ts = np.array(X[current_data_point: current_data_point + lower_boundary_window]) current_data_point = current_data_point + lower_boundary_window error = 0 - while current_data_point < max_window_length and current_data_point < len(time_series) and error < self.max_error: - seg_ts = np.append(seg_ts, time_series[current_data_point]) + while current_data_point < max_window_length and current_data_point < len(X) and error < self.max_error: + seg_ts = np.append(seg_ts, X[current_data_point]) error = self._calculate_error(seg_ts) current_data_point = current_data_point + 1 return seg_ts #Create own linear regression, inefficient to use sklearns def _linear_regression(self, time_series): - """Transform a time series + """Creates a new time series using linear regression based + on the given time series. Parameters ---------- @@ -301,52 +320,51 @@ def _linear_regression(self, time_series): regression_line = np.array(linearRegression.predict(X)) return regression_line - def _sum_squared_error(self, time_series, linear_regression_time_series): - """Returns the squared sum error time series and its linear regression + def _sum_squared_error(self, X, p_X): + """Returns the SSE of a value and its predicted value - formula: sse = the sum of the differences of the original series - against the predicted series squared + formula: SSE = ∑i (Xi - p_Xi)^2 Parameters ---------- - time_series : np.array + X : np.array 1D time series. - linear_regression_time_series: np.array + p_X: np.array 1D linear time series formatted using linear regression Returns ------- - error - the squared sum error of the split segmentations + error: float + the SSE """ - error = np.sum((time_series - linear_regression_time_series) ** 2) + error = np.sum((X - p_X) ** 2) return error - def _calculate_error(self, time_series): - """Returns the squared sum error of a time series and its linear regression + def _calculate_error(self, X): + """Returns the SEE of a time series and its linear regression Parameters ---------- - time_series : np.array + X : np.array 1D time series. Returns ------- - error - the squared sum error of a time series and it's linear regression + error: float + the SSE """ - lrts = self._linear_regression(time_series) - sse = self._sum_squared_error(time_series, lrts) + lrts = self._linear_regression(X) + sse = self._sum_squared_error(X, lrts) return sse - def _create_segment(self, time_series): - """create a linear segment of a given time series. + def _create_segment(self, X): + """Create a linear segment of a given time series. Parameters ---------- - time_series : np.array + X : np.array 1D time series. Returns @@ -354,12 +372,11 @@ def _create_segment(self, time_series): np.array the linear regression of the time series. """ - return self._linear_regression(time_series) + return self._linear_regression(X) @classmethod def get_test_params(cls, parameter_set="default"): - """ - Return testing parameter settings for the estimator. + """Return testing parameter settings for the estimator. Parameters ---------- @@ -375,7 +392,7 @@ def get_test_params(cls, parameter_set="default"): """ params = { "transformer": PiecewiseLinearApproximation.Transformer.SWAB, - "max_error": 0.95, + "max_error": 5, } return params \ No newline at end of file