diff --git a/nbs/docs/reference/01_nixtla_client.ipynb b/nbs/docs/reference/01_nixtla_client.ipynb index bfb235e38..4b909171b 100644 --- a/nbs/docs/reference/01_nixtla_client.ipynb +++ b/nbs/docs/reference/01_nixtla_client.ipynb @@ -7,6 +7,17 @@ "# SDK Reference" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, { "cell_type": "code", "execution_count": null, @@ -31,15 +42,16 @@ "## NixtlaClient\n", "\n", "> NixtlaClient (api_key:Optional[str]=None, base_url:Optional[str]=None,\n", - "> max_retries:int=6, retry_interval:int=10,\n", + "> timeout:int=60, max_retries:int=6, retry_interval:int=10,\n", "> max_wait_time:int=360)\n", "\n", - "Constructs all the necessary attributes for the NixtlaClient object.\n", + "*Client to interact with the Nixtla API.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", - "| api_key | Optional | None | The authorization api_key interacts with the Nixtla API.
If not provided, it will be inferred by the NIXTLA_API_KEY environment variable. |\n", - "| base_url | Optional | None | Custom base_url. Pass only if provided. |\n", + "| api_key | Optional | None | The authorization api_key interacts with the Nixtla API.
If not provided, will use the NIXTLA_API_KEY environment variable. |\n", + "| base_url | Optional | None | Custom base_url.
If not provided, will use the NIXTLA_BASE_URL environment variable. |\n", + "| timeout | int | 60 | Request timeout in seconds. Set this to `None` to disable it. |\n", "| max_retries | int | 6 | The maximum number of attempts to make when calling the API before giving up.
It defines how many times the client will retry the API call if it fails.
Default value is 6, indicating the client will attempt the API call up to 6 times in total |\n", "| retry_interval | int | 10 | The interval in seconds between consecutive retry attempts.
This is the waiting period before the client tries to call the API again after a failed attempt.
Default value is 10 seconds, meaning the client waits for 10 seconds between retries. |\n", "| max_wait_time | int | 360 | The maximum total time in seconds that the client will spend on all retry attempts before giving up.
This sets an upper limit on the cumulative waiting time for all retry attempts.
If this time is exceeded, the client will stop retrying and raise an exception.
Default value is 360 seconds, meaning the client will cease retrying if the total time
spent on retries exceeds 360 seconds.
The client throws a ReadTimeout error after 60 seconds of inactivity. If you want to
catch these errors, use max_wait_time >> 60. |" @@ -50,15 +62,16 @@ "## NixtlaClient\n", "\n", "> NixtlaClient (api_key:Optional[str]=None, base_url:Optional[str]=None,\n", - "> max_retries:int=6, retry_interval:int=10,\n", + "> timeout:int=60, max_retries:int=6, retry_interval:int=10,\n", "> max_wait_time:int=360)\n", "\n", - "Constructs all the necessary attributes for the NixtlaClient object.\n", + "*Client to interact with the Nixtla API.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", - "| api_key | Optional | None | The authorization api_key interacts with the Nixtla API.
If not provided, it will be inferred by the NIXTLA_API_KEY environment variable. |\n", - "| base_url | Optional | None | Custom base_url. Pass only if provided. |\n", + "| api_key | Optional | None | The authorization api_key interacts with the Nixtla API.
If not provided, will use the NIXTLA_API_KEY environment variable. |\n", + "| base_url | Optional | None | Custom base_url.
If not provided, will use the NIXTLA_BASE_URL environment variable. |\n", + "| timeout | int | 60 | Request timeout in seconds. Set this to `None` to disable it. |\n", "| max_retries | int | 6 | The maximum number of attempts to make when calling the API before giving up.
It defines how many times the client will retry the API call if it fails.
Default value is 6, indicating the client will attempt the API call up to 6 times in total |\n", "| retry_interval | int | 10 | The interval in seconds between consecutive retry attempts.
This is the waiting period before the client tries to call the API again after a failed attempt.
Default value is 10 seconds, meaning the client waits for 10 seconds between retries. |\n", "| max_wait_time | int | 360 | The maximum total time in seconds that the client will spend on all retry attempts before giving up.
This sets an upper limit on the cumulative waiting time for all retry attempts.
If this time is exceeded, the client will stop retrying and raise an exception.
Default value is 360 seconds, meaning the client will cease retrying if the total time
spent on retries exceeds 360 seconds.
The client throws a ReadTimeout error after 60 seconds of inactivity. If you want to
catch these errors, use max_wait_time >> 60. |" @@ -88,7 +101,7 @@ "\n", "> NixtlaClient.validate_api_key (log:bool=True)\n", "\n", - "Returns True if your api_key is valid." + "*Returns True if your api_key is valid.*" ], "text/plain": [ "---\n", @@ -97,7 +110,7 @@ "\n", "> NixtlaClient.validate_api_key (log:bool=True)\n", "\n", - "Returns True if your api_key is valid." + "*Returns True if your api_key is valid.*" ] }, "execution_count": null, @@ -107,93 +120,7 @@ ], "source": [ "#| echo: false\n", - "show_doc(NixtlaClient.validate_api_key, title_level=2, name='NixtlaClient.validate_api_key')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "## NixtlaClient.plot\n", - "\n", - "> NixtlaClient.plot (df:pandas.core.frame.DataFrame,\n", - "> forecasts_df:Optional[pandas.core.frame.DataFrame]=Non\n", - "> e, id_col:str='unique_id', time_col:str='ds',\n", - "> target_col:str='y', unique_ids:Union[List[str],NoneTyp\n", - "> e,numpy.ndarray]=None, plot_random:bool=True,\n", - "> models:Optional[List[str]]=None,\n", - "> level:Optional[List[float]]=None,\n", - "> max_insample_length:Optional[int]=None,\n", - "> plot_anomalies:bool=False, engine:str='matplotlib',\n", - "> resampler_kwargs:Optional[Dict]=None)\n", - "\n", - "Plot forecasts and insample values.\n", - "\n", - "| | **Type** | **Default** | **Details** |\n", - "| -- | -------- | ----------- | ----------- |\n", - "| df | DataFrame | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", - "| forecasts_df | Optional | None | DataFrame with columns [`unique_id`, `ds`] and models. |\n", - "| id_col | str | unique_id | Column that identifies each serie. |\n", - "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", - "| target_col | str | y | Column that contains the target. |\n", - "| unique_ids | Union | None | Time Series to plot.
If None, time series are selected randomly. |\n", - "| plot_random | bool | True | Select time series to plot randomly. |\n", - "| models | Optional | None | List of models to plot. |\n", - "| level | Optional | None | List of prediction intervals to plot if paseed. |\n", - "| max_insample_length | Optional | None | Max number of train/insample observations to be plotted. |\n", - "| plot_anomalies | bool | False | Plot anomalies for each prediction interval. |\n", - "| engine | str | matplotlib | Library used to plot. 'plotly', 'plotly-resampler' or 'matplotlib'. |\n", - "| resampler_kwargs | Optional | None | Kwargs to be passed to plotly-resampler constructor.
For further custumization (\"show_dash\") call the method,
store the plotting object and add the extra arguments to
its `show_dash` method. |" - ], - "text/plain": [ - "---\n", - "\n", - "## NixtlaClient.plot\n", - "\n", - "> NixtlaClient.plot (df:pandas.core.frame.DataFrame,\n", - "> forecasts_df:Optional[pandas.core.frame.DataFrame]=Non\n", - "> e, id_col:str='unique_id', time_col:str='ds',\n", - "> target_col:str='y', unique_ids:Union[List[str],NoneTyp\n", - "> e,numpy.ndarray]=None, plot_random:bool=True,\n", - "> models:Optional[List[str]]=None,\n", - "> level:Optional[List[float]]=None,\n", - "> max_insample_length:Optional[int]=None,\n", - "> plot_anomalies:bool=False, engine:str='matplotlib',\n", - "> resampler_kwargs:Optional[Dict]=None)\n", - "\n", - "Plot forecasts and insample values.\n", - "\n", - "| | **Type** | **Default** | **Details** |\n", - "| -- | -------- | ----------- | ----------- |\n", - "| df | DataFrame | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", - "| forecasts_df | Optional | None | DataFrame with columns [`unique_id`, `ds`] and models. |\n", - "| id_col | str | unique_id | Column that identifies each serie. |\n", - "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", - "| target_col | str | y | Column that contains the target. |\n", - "| unique_ids | Union | None | Time Series to plot.
If None, time series are selected randomly. |\n", - "| plot_random | bool | True | Select time series to plot randomly. |\n", - "| models | Optional | None | List of models to plot. |\n", - "| level | Optional | None | List of prediction intervals to plot if paseed. |\n", - "| max_insample_length | Optional | None | Max number of train/insample observations to be plotted. |\n", - "| plot_anomalies | bool | False | Plot anomalies for each prediction interval. |\n", - "| engine | str | matplotlib | Library used to plot. 'plotly', 'plotly-resampler' or 'matplotlib'. |\n", - "| resampler_kwargs | Optional | None | Kwargs to be passed to plotly-resampler constructor.
For further custumization (\"show_dash\") call the method,
store the plotting object and add the extra arguments to
its `show_dash` method. |" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#| echo: false\n", - "show_doc(NixtlaClient.plot, name='NixtlaClient.plot', title_level=2)" + "show_doc(NixtlaClient.validate_api_key, title_level=2)" ] }, { @@ -208,88 +135,112 @@ "\n", "## NixtlaClient.forecast\n", "\n", - "> NixtlaClient.forecast (df:pandas.core.frame.DataFrame, h:int,\n", - "> freq:Optional[str]=None, id_col:str='unique_id',\n", + "> NixtlaClient.forecast (df:~AnyDFType, h:typing.Annotated[int,Gt(gt=0)],\n", + "> freq:Union[str,int,pandas._libs.tslibs.offsets.Bas\n", + "> eOffset,NoneType]=None, id_col:str='unique_id',\n", "> time_col:str='ds', target_col:str='y',\n", - "> X_df:Optional[pandas.core.frame.DataFrame]=None,\n", - "> level:Optional[List[Union[int,float]]]=None,\n", - "> quantiles:Optional[List[float]]=None,\n", - "> finetune_steps:int=0, finetune_loss:str='default',\n", + "> X_df:Optional[~AnyDFType]=None,\n", + "> level:Optional[list[Union[int,float]]]=None,\n", + "> quantiles:Optional[list[float]]=None,\n", + "> finetune_steps:typing.Annotated[int,Ge(ge=0)]=0,\n", + "> finetune_depth:Literal[1,2,3,4,5]=1, finetune_loss\n", + "> :Literal['default','mae','mse','rmse','mape','smap\n", + "> e']='default',\n", + "> finetuned_model_id:Optional[str]=None,\n", "> clean_ex_first:bool=True,\n", + "> hist_exog_list:Optional[list[str]]=None,\n", "> validate_api_key:bool=False,\n", - "> add_history:bool=False,\n", - "> date_features:Union[bool,List[str]]=False, date_fe\n", - "> atures_to_one_hot:Union[bool,List[str]]=True,\n", - "> model:str='timegpt-1',\n", - "> num_partitions:Optional[int]=None)\n", + "> add_history:bool=False, date_features:Union[bool,l\n", + "> ist[Union[str,Callable]]]=False, date_features_to_\n", + "> one_hot:Union[bool,list[str]]=False, model:Literal\n", + "> ['azureai','timegpt-1','timegpt-1-long-\n", + "> horizon']='timegpt-1', num_partitions:Optional[Ann\n", + "> otated[int,Gt(gt=0)]]=None,\n", + "> feature_contributions:bool=False)\n", "\n", - "Forecast your time series using TimeGPT.\n", + "*Forecast your time series using TimeGPT.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", - "| df | DataFrame | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", - "| h | int | | Forecast horizon. |\n", - "| freq | Optional | None | Frequency of the data. By default, the freq will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", + "| df | AnyDFType | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", + "| h | Annotated | | Forecast horizon. |\n", + "| freq | Union | None | Frequency of the timestamps. If `None`, it will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", "| id_col | str | unique_id | Column that identifies each serie. |\n", "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", "| target_col | str | y | Column that contains the target. |\n", "| X_df | Optional | None | DataFrame with [`unique_id`, `ds`] columns and `df`'s future exogenous. |\n", "| level | Optional | None | Confidence levels between 0 and 100 for prediction intervals. |\n", "| quantiles | Optional | None | Quantiles to forecast, list between (0, 1).
`level` and `quantiles` should not be used simultaneously.
The output dataframe will have the quantile columns
formatted as TimeGPT-q-(100 * q) for each q.
100 * q represents percentiles but we choose this notation
to avoid having dots in column names. |\n", - "| finetune_steps | int | 0 | Number of steps used to finetune learning TimeGPT in the
new data. |\n", - "| finetune_loss | str | default | Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. |\n", - "| clean_ex_first | bool | True | Clean exogenous signal before making forecasts
using TimeGPT. |\n", - "| validate_api_key | bool | False | If True, validates api_key before
sending requests. |\n", + "| finetune_steps | Annotated | 0 | Number of steps used to finetune learning TimeGPT in the
new data. |\n", + "| finetune_depth | Literal | 1 | The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning,
and 5 means that the entire model is finetuned. |\n", + "| finetune_loss | Literal | default | Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. |\n", + "| finetuned_model_id | Optional | None | ID of previously fine-tuned model to use. |\n", + "| clean_ex_first | bool | True | Clean exogenous signal before making forecasts using TimeGPT. |\n", + "| hist_exog_list | Optional | None | Column names of the historical exogenous features. |\n", + "| validate_api_key | bool | False | If True, validates api_key before sending requests. |\n", "| add_history | bool | False | Return fitted values of the model. |\n", "| date_features | Union | False | Features computed from the dates.
Can be pandas date attributes or functions that will take the dates as input.
If True automatically adds most used date features for the
frequency of `df`. |\n", - "| date_features_to_one_hot | Union | True | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", - "| model | str | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", + "| date_features_to_one_hot | Union | False | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", + "| model | Literal | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", "| num_partitions | Optional | None | Number of partitions to use.
If None, the number of partitions will be equal
to the available parallel resources in distributed environments. |\n", - "| **Returns** | **pandas.DataFrame** | | **DataFrame with TimeGPT forecasts for point predictions and probabilistic
predictions (if level is not None).** |" + "| feature_contributions | bool | False | |\n", + "| **Returns** | **AnyDFType** | | **DataFrame with TimeGPT forecasts for point predictions and probabilistic
predictions (if level is not None).** |" ], "text/plain": [ "---\n", "\n", "## NixtlaClient.forecast\n", "\n", - "> NixtlaClient.forecast (df:pandas.core.frame.DataFrame, h:int,\n", - "> freq:Optional[str]=None, id_col:str='unique_id',\n", + "> NixtlaClient.forecast (df:~AnyDFType, h:typing.Annotated[int,Gt(gt=0)],\n", + "> freq:Union[str,int,pandas._libs.tslibs.offsets.Bas\n", + "> eOffset,NoneType]=None, id_col:str='unique_id',\n", "> time_col:str='ds', target_col:str='y',\n", - "> X_df:Optional[pandas.core.frame.DataFrame]=None,\n", - "> level:Optional[List[Union[int,float]]]=None,\n", - "> quantiles:Optional[List[float]]=None,\n", - "> finetune_steps:int=0, finetune_loss:str='default',\n", + "> X_df:Optional[~AnyDFType]=None,\n", + "> level:Optional[list[Union[int,float]]]=None,\n", + "> quantiles:Optional[list[float]]=None,\n", + "> finetune_steps:typing.Annotated[int,Ge(ge=0)]=0,\n", + "> finetune_depth:Literal[1,2,3,4,5]=1, finetune_loss\n", + "> :Literal['default','mae','mse','rmse','mape','smap\n", + "> e']='default',\n", + "> finetuned_model_id:Optional[str]=None,\n", "> clean_ex_first:bool=True,\n", + "> hist_exog_list:Optional[list[str]]=None,\n", "> validate_api_key:bool=False,\n", - "> add_history:bool=False,\n", - "> date_features:Union[bool,List[str]]=False, date_fe\n", - "> atures_to_one_hot:Union[bool,List[str]]=True,\n", - "> model:str='timegpt-1',\n", - "> num_partitions:Optional[int]=None)\n", + "> add_history:bool=False, date_features:Union[bool,l\n", + "> ist[Union[str,Callable]]]=False, date_features_to_\n", + "> one_hot:Union[bool,list[str]]=False, model:Literal\n", + "> ['azureai','timegpt-1','timegpt-1-long-\n", + "> horizon']='timegpt-1', num_partitions:Optional[Ann\n", + "> otated[int,Gt(gt=0)]]=None,\n", + "> feature_contributions:bool=False)\n", "\n", - "Forecast your time series using TimeGPT.\n", + "*Forecast your time series using TimeGPT.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", - "| df | DataFrame | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", - "| h | int | | Forecast horizon. |\n", - "| freq | Optional | None | Frequency of the data. By default, the freq will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", + "| df | AnyDFType | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", + "| h | Annotated | | Forecast horizon. |\n", + "| freq | Union | None | Frequency of the timestamps. If `None`, it will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", "| id_col | str | unique_id | Column that identifies each serie. |\n", "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", "| target_col | str | y | Column that contains the target. |\n", "| X_df | Optional | None | DataFrame with [`unique_id`, `ds`] columns and `df`'s future exogenous. |\n", "| level | Optional | None | Confidence levels between 0 and 100 for prediction intervals. |\n", "| quantiles | Optional | None | Quantiles to forecast, list between (0, 1).
`level` and `quantiles` should not be used simultaneously.
The output dataframe will have the quantile columns
formatted as TimeGPT-q-(100 * q) for each q.
100 * q represents percentiles but we choose this notation
to avoid having dots in column names. |\n", - "| finetune_steps | int | 0 | Number of steps used to finetune learning TimeGPT in the
new data. |\n", - "| finetune_loss | str | default | Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. |\n", - "| clean_ex_first | bool | True | Clean exogenous signal before making forecasts
using TimeGPT. |\n", - "| validate_api_key | bool | False | If True, validates api_key before
sending requests. |\n", + "| finetune_steps | Annotated | 0 | Number of steps used to finetune learning TimeGPT in the
new data. |\n", + "| finetune_depth | Literal | 1 | The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning,
and 5 means that the entire model is finetuned. |\n", + "| finetune_loss | Literal | default | Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. |\n", + "| finetuned_model_id | Optional | None | ID of previously fine-tuned model to use. |\n", + "| clean_ex_first | bool | True | Clean exogenous signal before making forecasts using TimeGPT. |\n", + "| hist_exog_list | Optional | None | Column names of the historical exogenous features. |\n", + "| validate_api_key | bool | False | If True, validates api_key before sending requests. |\n", "| add_history | bool | False | Return fitted values of the model. |\n", "| date_features | Union | False | Features computed from the dates.
Can be pandas date attributes or functions that will take the dates as input.
If True automatically adds most used date features for the
frequency of `df`. |\n", - "| date_features_to_one_hot | Union | True | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", - "| model | str | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", + "| date_features_to_one_hot | Union | False | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", + "| model | Literal | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", "| num_partitions | Optional | None | Number of partitions to use.
If None, the number of partitions will be equal
to the available parallel resources in distributed environments. |\n", - "| **Returns** | **pandas.DataFrame** | | **DataFrame with TimeGPT forecasts for point predictions and probabilistic
predictions (if level is not None).** |" + "| feature_contributions | bool | False | |\n", + "| **Returns** | **AnyDFType** | | **DataFrame with TimeGPT forecasts for point predictions and probabilistic
predictions (if level is not None).** |" ] }, "execution_count": null, @@ -314,92 +265,116 @@ "\n", "## NixtlaClient.cross_validation\n", "\n", - "> NixtlaClient.cross_validation (df:pandas.core.frame.DataFrame, h:int,\n", - "> freq:Optional[str]=None,\n", + "> NixtlaClient.cross_validation (df:~AnyDFType,\n", + "> h:typing.Annotated[int,Gt(gt=0)], freq:Uni\n", + "> on[str,int,pandas._libs.tslibs.offsets.Bas\n", + "> eOffset,NoneType]=None,\n", "> id_col:str='unique_id', time_col:str='ds',\n", - "> target_col:str='y', level:Optional[List[Un\n", + "> target_col:str='y', level:Optional[list[Un\n", "> ion[int,float]]]=None,\n", - "> quantiles:Optional[List[float]]=None,\n", - "> validate_api_key:bool=False,\n", - "> n_windows:int=1,\n", - "> step_size:Optional[int]=None,\n", - "> finetune_steps:int=0,\n", - "> finetune_loss:str='default',\n", - "> clean_ex_first:bool=True,\n", - "> date_features:Union[bool,List[str]]=False,\n", - "> date_features_to_one_hot:Union[bool,List[s\n", - "> tr]]=True, model:str='timegpt-1',\n", - "> num_partitions:Optional[int]=None)\n", - "\n", - "Perform cross validation in your time series using TimeGPT.\n", + "> quantiles:Optional[list[float]]=None,\n", + "> validate_api_key:bool=False, n_windows:typ\n", + "> ing.Annotated[int,Gt(gt=0)]=1, step_size:O\n", + "> ptional[Annotated[int,Gt(gt=0)]]=None, fin\n", + "> etune_steps:typing.Annotated[int,Ge(ge=0)]\n", + "> =0, finetune_depth:Literal[1,2,3,4,5]=1, f\n", + "> inetune_loss:Literal['default','mae','mse'\n", + "> ,'rmse','mape','smape']='default',\n", + "> finetuned_model_id:Optional[str]=None,\n", + "> refit:bool=True, clean_ex_first:bool=True,\n", + "> hist_exog_list:Optional[list[str]]=None,\n", + "> date_features:Union[bool,list[str]]=False,\n", + "> date_features_to_one_hot:Union[bool,list[s\n", + "> tr]]=False, model:Literal['azureai','timeg\n", + "> pt-1','timegpt-1-long-\n", + "> horizon']='timegpt-1', num_partitions:Opti\n", + "> onal[Annotated[int,Gt(gt=0)]]=None)\n", + "\n", + "*Perform cross validation in your time series using TimeGPT.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", - "| df | DataFrame | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", - "| h | int | | Forecast horizon. |\n", - "| freq | Optional | None | Frequency of the data. By default, the freq will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", + "| df | AnyDFType | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", + "| h | Annotated | | Forecast horizon. |\n", + "| freq | Union | None | Frequency of the timestamps. If `None`, it will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", "| id_col | str | unique_id | Column that identifies each serie. |\n", "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", "| target_col | str | y | Column that contains the target. |\n", "| level | Optional | None | Confidence level between 0 and 100 for prediction intervals. |\n", - "| quantiles | Optional | None | Quantiles to forecast, list between (0, 1).
`level` and `quantiles` should not be used simultaneously.
The output dataframe will have the quantile columns
formatted as TimeGPT-q-(100 * q) for each q.
100 * q represents percentiles but we choose this notation
to avoid having dots in column names.. |\n", - "| validate_api_key | bool | False | If True, validates api_key before
sending requests. |\n", - "| n_windows | int | 1 | Number of windows to evaluate. |\n", + "| quantiles | Optional | None | Quantiles to forecast, list between (0, 1).
`level` and `quantiles` should not be used simultaneously.
The output dataframe will have the quantile columns
formatted as TimeGPT-q-(100 * q) for each q.
100 * q represents percentiles but we choose this notation
to avoid having dots in column names. |\n", + "| validate_api_key | bool | False | If True, validates api_key before sending requests. |\n", + "| n_windows | Annotated | 1 | Number of windows to evaluate. |\n", "| step_size | Optional | None | Step size between each cross validation window. If None it will be equal to `h`. |\n", - "| finetune_steps | int | 0 | Number of steps used to finetune TimeGPT in the
new data. |\n", - "| finetune_loss | str | default | Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. |\n", - "| clean_ex_first | bool | True | Clean exogenous signal before making forecasts
using TimeGPT. |\n", + "| finetune_steps | Annotated | 0 | Number of steps used to finetune TimeGPT in the
new data. |\n", + "| finetune_depth | Literal | 1 | The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning,
and 5 means that the entire model is finetuned. |\n", + "| finetune_loss | Literal | default | Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. |\n", + "| finetuned_model_id | Optional | None | ID of previously fine-tuned model to use. |\n", + "| refit | bool | True | Fine-tune the model in each window. If `False`, only fine-tunes on the first window.
Only used if `finetune_steps` > 0. |\n", + "| clean_ex_first | bool | True | Clean exogenous signal before making forecasts using TimeGPT. |\n", + "| hist_exog_list | Optional | None | Column names of the historical exogenous features. |\n", "| date_features | Union | False | Features computed from the dates.
Can be pandas date attributes or functions that will take the dates as input.
If True automatically adds most used date features for the
frequency of `df`. |\n", - "| date_features_to_one_hot | Union | True | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", - "| model | str | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", + "| date_features_to_one_hot | Union | False | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", + "| model | Literal | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", "| num_partitions | Optional | None | Number of partitions to use.
If None, the number of partitions will be equal
to the available parallel resources in distributed environments. |\n", - "| **Returns** | **pandas.DataFrame** | | **DataFrame with cross validation forecasts.** |" + "| **Returns** | **AnyDFType** | | **DataFrame with cross validation forecasts.** |" ], "text/plain": [ "---\n", "\n", "## NixtlaClient.cross_validation\n", "\n", - "> NixtlaClient.cross_validation (df:pandas.core.frame.DataFrame, h:int,\n", - "> freq:Optional[str]=None,\n", + "> NixtlaClient.cross_validation (df:~AnyDFType,\n", + "> h:typing.Annotated[int,Gt(gt=0)], freq:Uni\n", + "> on[str,int,pandas._libs.tslibs.offsets.Bas\n", + "> eOffset,NoneType]=None,\n", "> id_col:str='unique_id', time_col:str='ds',\n", - "> target_col:str='y', level:Optional[List[Un\n", + "> target_col:str='y', level:Optional[list[Un\n", "> ion[int,float]]]=None,\n", - "> quantiles:Optional[List[float]]=None,\n", - "> validate_api_key:bool=False,\n", - "> n_windows:int=1,\n", - "> step_size:Optional[int]=None,\n", - "> finetune_steps:int=0,\n", - "> finetune_loss:str='default',\n", - "> clean_ex_first:bool=True,\n", - "> date_features:Union[bool,List[str]]=False,\n", - "> date_features_to_one_hot:Union[bool,List[s\n", - "> tr]]=True, model:str='timegpt-1',\n", - "> num_partitions:Optional[int]=None)\n", - "\n", - "Perform cross validation in your time series using TimeGPT.\n", + "> quantiles:Optional[list[float]]=None,\n", + "> validate_api_key:bool=False, n_windows:typ\n", + "> ing.Annotated[int,Gt(gt=0)]=1, step_size:O\n", + "> ptional[Annotated[int,Gt(gt=0)]]=None, fin\n", + "> etune_steps:typing.Annotated[int,Ge(ge=0)]\n", + "> =0, finetune_depth:Literal[1,2,3,4,5]=1, f\n", + "> inetune_loss:Literal['default','mae','mse'\n", + "> ,'rmse','mape','smape']='default',\n", + "> finetuned_model_id:Optional[str]=None,\n", + "> refit:bool=True, clean_ex_first:bool=True,\n", + "> hist_exog_list:Optional[list[str]]=None,\n", + "> date_features:Union[bool,list[str]]=False,\n", + "> date_features_to_one_hot:Union[bool,list[s\n", + "> tr]]=False, model:Literal['azureai','timeg\n", + "> pt-1','timegpt-1-long-\n", + "> horizon']='timegpt-1', num_partitions:Opti\n", + "> onal[Annotated[int,Gt(gt=0)]]=None)\n", + "\n", + "*Perform cross validation in your time series using TimeGPT.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", - "| df | DataFrame | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", - "| h | int | | Forecast horizon. |\n", - "| freq | Optional | None | Frequency of the data. By default, the freq will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", + "| df | AnyDFType | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", + "| h | Annotated | | Forecast horizon. |\n", + "| freq | Union | None | Frequency of the timestamps. If `None`, it will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", "| id_col | str | unique_id | Column that identifies each serie. |\n", "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", "| target_col | str | y | Column that contains the target. |\n", "| level | Optional | None | Confidence level between 0 and 100 for prediction intervals. |\n", - "| quantiles | Optional | None | Quantiles to forecast, list between (0, 1).
`level` and `quantiles` should not be used simultaneously.
The output dataframe will have the quantile columns
formatted as TimeGPT-q-(100 * q) for each q.
100 * q represents percentiles but we choose this notation
to avoid having dots in column names.. |\n", - "| validate_api_key | bool | False | If True, validates api_key before
sending requests. |\n", - "| n_windows | int | 1 | Number of windows to evaluate. |\n", + "| quantiles | Optional | None | Quantiles to forecast, list between (0, 1).
`level` and `quantiles` should not be used simultaneously.
The output dataframe will have the quantile columns
formatted as TimeGPT-q-(100 * q) for each q.
100 * q represents percentiles but we choose this notation
to avoid having dots in column names. |\n", + "| validate_api_key | bool | False | If True, validates api_key before sending requests. |\n", + "| n_windows | Annotated | 1 | Number of windows to evaluate. |\n", "| step_size | Optional | None | Step size between each cross validation window. If None it will be equal to `h`. |\n", - "| finetune_steps | int | 0 | Number of steps used to finetune TimeGPT in the
new data. |\n", - "| finetune_loss | str | default | Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. |\n", - "| clean_ex_first | bool | True | Clean exogenous signal before making forecasts
using TimeGPT. |\n", + "| finetune_steps | Annotated | 0 | Number of steps used to finetune TimeGPT in the
new data. |\n", + "| finetune_depth | Literal | 1 | The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning,
and 5 means that the entire model is finetuned. |\n", + "| finetune_loss | Literal | default | Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. |\n", + "| finetuned_model_id | Optional | None | ID of previously fine-tuned model to use. |\n", + "| refit | bool | True | Fine-tune the model in each window. If `False`, only fine-tunes on the first window.
Only used if `finetune_steps` > 0. |\n", + "| clean_ex_first | bool | True | Clean exogenous signal before making forecasts using TimeGPT. |\n", + "| hist_exog_list | Optional | None | Column names of the historical exogenous features. |\n", "| date_features | Union | False | Features computed from the dates.
Can be pandas date attributes or functions that will take the dates as input.
If True automatically adds most used date features for the
frequency of `df`. |\n", - "| date_features_to_one_hot | Union | True | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", - "| model | str | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", + "| date_features_to_one_hot | Union | False | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", + "| model | Literal | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", "| num_partitions | Optional | None | Number of partitions to use.
If None, the number of partitions will be equal
to the available parallel resources in distributed environments. |\n", - "| **Returns** | **pandas.DataFrame** | | **DataFrame with cross validation forecasts.** |" + "| **Returns** | **AnyDFType** | | **DataFrame with cross validation forecasts.** |" ] }, "execution_count": null, @@ -424,70 +399,80 @@ "\n", "## NixtlaClient.detect_anomalies\n", "\n", - "> NixtlaClient.detect_anomalies (df:pandas.core.frame.DataFrame,\n", - "> freq:Optional[str]=None,\n", + "> NixtlaClient.detect_anomalies (df:~AnyDFType,\n", + "> freq:Union[str,int,pandas._libs.tslibs.off\n", + "> sets.BaseOffset,NoneType]=None,\n", "> id_col:str='unique_id', time_col:str='ds',\n", "> target_col:str='y',\n", "> level:Union[int,float]=99,\n", + "> finetuned_model_id:Optional[str]=None,\n", "> clean_ex_first:bool=True,\n", "> validate_api_key:bool=False,\n", - "> date_features:Union[bool,List[str]]=False,\n", - "> date_features_to_one_hot:Union[bool,List[s\n", - "> tr]]=True, model:str='timegpt-1',\n", - "> num_partitions:Optional[int]=None)\n", + "> date_features:Union[bool,list[str]]=False,\n", + "> date_features_to_one_hot:Union[bool,list[s\n", + "> tr]]=False, model:Literal['azureai','timeg\n", + "> pt-1','timegpt-1-long-\n", + "> horizon']='timegpt-1', num_partitions:Opti\n", + "> onal[Annotated[int,Gt(gt=0)]]=None)\n", "\n", - "Detect anomalies in your time series using TimeGPT.\n", + "*Detect anomalies in your time series using TimeGPT.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", - "| df | DataFrame | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", - "| freq | Optional | None | Frequency of the data. By default, the freq will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", + "| df | AnyDFType | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", + "| freq | Union | None | Frequency of the timestamps. If `None`, it will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", "| id_col | str | unique_id | Column that identifies each serie. |\n", "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", "| target_col | str | y | Column that contains the target. |\n", "| level | Union | 99 | Confidence level between 0 and 100 for detecting the anomalies. |\n", + "| finetuned_model_id | Optional | None | ID of previously fine-tuned model to use. |\n", "| clean_ex_first | bool | True | Clean exogenous signal before making forecasts
using TimeGPT. |\n", - "| validate_api_key | bool | False | If True, validates api_key before
sending requests. |\n", + "| validate_api_key | bool | False | If True, validates api_key before sending requests. |\n", "| date_features | Union | False | Features computed from the dates.
Can be pandas date attributes or functions that will take the dates as input.
If True automatically adds most used date features for the
frequency of `df`. |\n", - "| date_features_to_one_hot | Union | True | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", - "| model | str | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", + "| date_features_to_one_hot | Union | False | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", + "| model | Literal | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", "| num_partitions | Optional | None | Number of partitions to use.
If None, the number of partitions will be equal
to the available parallel resources in distributed environments. |\n", - "| **Returns** | **pandas.DataFrame** | | **DataFrame with anomalies flagged with 1 detected by TimeGPT.** |" + "| **Returns** | **AnyDFType** | | **DataFrame with anomalies flagged by TimeGPT.** |" ], "text/plain": [ "---\n", "\n", "## NixtlaClient.detect_anomalies\n", "\n", - "> NixtlaClient.detect_anomalies (df:pandas.core.frame.DataFrame,\n", - "> freq:Optional[str]=None,\n", + "> NixtlaClient.detect_anomalies (df:~AnyDFType,\n", + "> freq:Union[str,int,pandas._libs.tslibs.off\n", + "> sets.BaseOffset,NoneType]=None,\n", "> id_col:str='unique_id', time_col:str='ds',\n", "> target_col:str='y',\n", "> level:Union[int,float]=99,\n", + "> finetuned_model_id:Optional[str]=None,\n", "> clean_ex_first:bool=True,\n", "> validate_api_key:bool=False,\n", - "> date_features:Union[bool,List[str]]=False,\n", - "> date_features_to_one_hot:Union[bool,List[s\n", - "> tr]]=True, model:str='timegpt-1',\n", - "> num_partitions:Optional[int]=None)\n", + "> date_features:Union[bool,list[str]]=False,\n", + "> date_features_to_one_hot:Union[bool,list[s\n", + "> tr]]=False, model:Literal['azureai','timeg\n", + "> pt-1','timegpt-1-long-\n", + "> horizon']='timegpt-1', num_partitions:Opti\n", + "> onal[Annotated[int,Gt(gt=0)]]=None)\n", "\n", - "Detect anomalies in your time series using TimeGPT.\n", + "*Detect anomalies in your time series using TimeGPT.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", - "| df | DataFrame | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", - "| freq | Optional | None | Frequency of the data. By default, the freq will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", + "| df | AnyDFType | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", + "| freq | Union | None | Frequency of the timestamps. If `None`, it will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", "| id_col | str | unique_id | Column that identifies each serie. |\n", "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", "| target_col | str | y | Column that contains the target. |\n", "| level | Union | 99 | Confidence level between 0 and 100 for detecting the anomalies. |\n", + "| finetuned_model_id | Optional | None | ID of previously fine-tuned model to use. |\n", "| clean_ex_first | bool | True | Clean exogenous signal before making forecasts
using TimeGPT. |\n", - "| validate_api_key | bool | False | If True, validates api_key before
sending requests. |\n", + "| validate_api_key | bool | False | If True, validates api_key before sending requests. |\n", "| date_features | Union | False | Features computed from the dates.
Can be pandas date attributes or functions that will take the dates as input.
If True automatically adds most used date features for the
frequency of `df`. |\n", - "| date_features_to_one_hot | Union | True | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", - "| model | str | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", + "| date_features_to_one_hot | Union | False | Apply one-hot encoding to these date features.
If `date_features=True`, then all date features are
one-hot encoded by default. |\n", + "| model | Literal | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", "| num_partitions | Optional | None | Number of partitions to use.
If None, the number of partitions will be equal
to the available parallel resources in distributed environments. |\n", - "| **Returns** | **pandas.DataFrame** | | **DataFrame with anomalies flagged with 1 detected by TimeGPT.** |" + "| **Returns** | **AnyDFType** | | **DataFrame with anomalies flagged by TimeGPT.** |" ] }, "execution_count": null, @@ -499,6 +484,316 @@ "#| echo: false\n", "show_doc(NixtlaClient.detect_anomalies, title_level=2)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "## NixtlaClient.usage\n", + "\n", + "> NixtlaClient.usage ()\n", + "\n", + "*Query consumed requests and limits*" + ], + "text/plain": [ + "---\n", + "\n", + "## NixtlaClient.usage\n", + "\n", + "> NixtlaClient.usage ()\n", + "\n", + "*Query consumed requests and limits*" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| echo: false\n", + "show_doc(NixtlaClient.usage, title_level=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "## NixtlaClient.finetune\n", + "\n", + "> NixtlaClient.finetune\n", + "> (df:Union[pandas.core.frame.DataFrame,polars.dataf\n", + "> rame.frame.DataFrame], freq:Union[str,int,pandas._\n", + "> libs.tslibs.offsets.BaseOffset,NoneType]=None,\n", + "> id_col:str='unique_id', time_col:str='ds',\n", + "> target_col:str='y',\n", + "> finetune_steps:typing.Annotated[int,Ge(ge=0)]=10,\n", + "> finetune_depth:Literal[1,2,3,4,5]=1, finetune_loss\n", + "> :Literal['default','mae','mse','rmse','mape','smap\n", + "> e']='default', output_model_id:Optional[str]=None,\n", + "> finetuned_model_id:Optional[str]=None, model:Liter\n", + "> al['azureai','timegpt-1','timegpt-1-long-\n", + "> horizon']='timegpt-1')\n", + "\n", + "*Fine-tune TimeGPT to your series.*\n", + "\n", + "| | **Type** | **Default** | **Details** |\n", + "| -- | -------- | ----------- | ----------- |\n", + "| df | Union | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", + "| freq | Union | None | Frequency of the timestamps. If `None`, it will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", + "| id_col | str | unique_id | Column that identifies each serie. |\n", + "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", + "| target_col | str | y | Column that contains the target. |\n", + "| finetune_steps | Annotated | 10 | Number of steps used to finetune learning TimeGPT in the new data. |\n", + "| finetune_depth | Literal | 1 | The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning,
and 5 means that the entire model is finetuned. |\n", + "| finetune_loss | Literal | default | Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. |\n", + "| output_model_id | Optional | None | ID to assign to the fine-tuned model. If `None`, an UUID is used. |\n", + "| finetuned_model_id | Optional | None | ID of previously fine-tuned model to use as base. |\n", + "| model | Literal | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", + "| **Returns** | **str** | | **ID of the fine-tuned model** |" + ], + "text/plain": [ + "---\n", + "\n", + "## NixtlaClient.finetune\n", + "\n", + "> NixtlaClient.finetune\n", + "> (df:Union[pandas.core.frame.DataFrame,polars.dataf\n", + "> rame.frame.DataFrame], freq:Union[str,int,pandas._\n", + "> libs.tslibs.offsets.BaseOffset,NoneType]=None,\n", + "> id_col:str='unique_id', time_col:str='ds',\n", + "> target_col:str='y',\n", + "> finetune_steps:typing.Annotated[int,Ge(ge=0)]=10,\n", + "> finetune_depth:Literal[1,2,3,4,5]=1, finetune_loss\n", + "> :Literal['default','mae','mse','rmse','mape','smap\n", + "> e']='default', output_model_id:Optional[str]=None,\n", + "> finetuned_model_id:Optional[str]=None, model:Liter\n", + "> al['azureai','timegpt-1','timegpt-1-long-\n", + "> horizon']='timegpt-1')\n", + "\n", + "*Fine-tune TimeGPT to your series.*\n", + "\n", + "| | **Type** | **Default** | **Details** |\n", + "| -- | -------- | ----------- | ----------- |\n", + "| df | Union | | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", + "| freq | Union | None | Frequency of the timestamps. If `None`, it will be inferred automatically.
See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |\n", + "| id_col | str | unique_id | Column that identifies each serie. |\n", + "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", + "| target_col | str | y | Column that contains the target. |\n", + "| finetune_steps | Annotated | 10 | Number of steps used to finetune learning TimeGPT in the new data. |\n", + "| finetune_depth | Literal | 1 | The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning,
and 5 means that the entire model is finetuned. |\n", + "| finetune_loss | Literal | default | Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. |\n", + "| output_model_id | Optional | None | ID to assign to the fine-tuned model. If `None`, an UUID is used. |\n", + "| finetuned_model_id | Optional | None | ID of previously fine-tuned model to use as base. |\n", + "| model | Literal | timegpt-1 | Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`.
We recommend using `timegpt-1-long-horizon` for forecasting
if you want to predict more than one seasonal
period given the frequency of your data. |\n", + "| **Returns** | **str** | | **ID of the fine-tuned model** |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| echo: false\n", + "show_doc(NixtlaClient.finetune, title_level=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "## NixtlaClient.finetuned_models\n", + "\n", + "> NixtlaClient.finetuned_models ()\n", + "\n", + "*List fine-tuned models*" + ], + "text/plain": [ + "---\n", + "\n", + "## NixtlaClient.finetuned_models\n", + "\n", + "> NixtlaClient.finetuned_models ()\n", + "\n", + "*List fine-tuned models*" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| echo: false\n", + "show_doc(NixtlaClient.finetuned_models, title_level=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "## NixtlaClient.delete_finetuned_model\n", + "\n", + "> NixtlaClient.delete_finetuned_model (finetuned_model_id:str)\n", + "\n", + "*Delete a previously fine-tuned model*\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| finetuned_model_id | str | ID of the fine-tuned model to be deleted. |\n", + "| **Returns** | **bool** | **Whether delete was successful.** |" + ], + "text/plain": [ + "---\n", + "\n", + "## NixtlaClient.delete_finetuned_model\n", + "\n", + "> NixtlaClient.delete_finetuned_model (finetuned_model_id:str)\n", + "\n", + "*Delete a previously fine-tuned model*\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| finetuned_model_id | str | ID of the fine-tuned model to be deleted. |\n", + "| **Returns** | **bool** | **Whether delete was successful.** |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| echo: false\n", + "show_doc(NixtlaClient.delete_finetuned_model, title_level=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "## NixtlaClient.plot\n", + "\n", + "> NixtlaClient.plot (df:Union[pandas.core.frame.DataFrame,polars.dataframe.\n", + "> frame.DataFrame,NoneType]=None, forecasts_df:Union[pan\n", + "> das.core.frame.DataFrame,polars.dataframe.frame.DataFr\n", + "> ame,NoneType]=None, id_col:str='unique_id',\n", + "> time_col:str='ds', target_col:str='y', unique_ids:Unio\n", + "> n[list[str],NoneType,numpy.ndarray]=None,\n", + "> plot_random:bool=True, max_ids:int=8,\n", + "> models:Optional[list[str]]=None,\n", + "> level:Optional[list[Union[int,float]]]=None,\n", + "> max_insample_length:Optional[int]=None,\n", + "> plot_anomalies:bool=False,\n", + "> engine:Literal['matplotlib','plotly','plotly-\n", + "> resampler']='matplotlib',\n", + "> resampler_kwargs:Optional[dict]=None, ax:Union[Forward\n", + "> Ref('plt.Axes'),numpy.ndarray,ForwardRef('plotly.graph\n", + "> _objects.Figure'),NoneType]=None)\n", + "\n", + "*Plot forecasts and insample values.*\n", + "\n", + "| | **Type** | **Default** | **Details** |\n", + "| -- | -------- | ----------- | ----------- |\n", + "| df | Union | None | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", + "| forecasts_df | Union | None | DataFrame with columns [`unique_id`, `ds`] and models. |\n", + "| id_col | str | unique_id | Column that identifies each serie. |\n", + "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", + "| target_col | str | y | Column that contains the target. |\n", + "| unique_ids | Union | None | Time Series to plot.
If None, time series are selected randomly. |\n", + "| plot_random | bool | True | Select time series to plot randomly. |\n", + "| max_ids | int | 8 | Maximum number of ids to plot. |\n", + "| models | Optional | None | list of models to plot. |\n", + "| level | Optional | None | list of prediction intervals to plot if paseed. |\n", + "| max_insample_length | Optional | None | Max number of train/insample observations to be plotted. |\n", + "| plot_anomalies | bool | False | Plot anomalies for each prediction interval. |\n", + "| engine | Literal | matplotlib | Library used to plot. 'matplotlib', 'plotly' or 'plotly-resampler'. |\n", + "| resampler_kwargs | Optional | None | Kwargs to be passed to plotly-resampler constructor.
For further custumization (\"show_dash\") call the method,
store the plotting object and add the extra arguments to
its `show_dash` method. |\n", + "| ax | Union | None | Object where plots will be added. |" + ], + "text/plain": [ + "---\n", + "\n", + "## NixtlaClient.plot\n", + "\n", + "> NixtlaClient.plot (df:Union[pandas.core.frame.DataFrame,polars.dataframe.\n", + "> frame.DataFrame,NoneType]=None, forecasts_df:Union[pan\n", + "> das.core.frame.DataFrame,polars.dataframe.frame.DataFr\n", + "> ame,NoneType]=None, id_col:str='unique_id',\n", + "> time_col:str='ds', target_col:str='y', unique_ids:Unio\n", + "> n[list[str],NoneType,numpy.ndarray]=None,\n", + "> plot_random:bool=True, max_ids:int=8,\n", + "> models:Optional[list[str]]=None,\n", + "> level:Optional[list[Union[int,float]]]=None,\n", + "> max_insample_length:Optional[int]=None,\n", + "> plot_anomalies:bool=False,\n", + "> engine:Literal['matplotlib','plotly','plotly-\n", + "> resampler']='matplotlib',\n", + "> resampler_kwargs:Optional[dict]=None, ax:Union[Forward\n", + "> Ref('plt.Axes'),numpy.ndarray,ForwardRef('plotly.graph\n", + "> _objects.Figure'),NoneType]=None)\n", + "\n", + "*Plot forecasts and insample values.*\n", + "\n", + "| | **Type** | **Default** | **Details** |\n", + "| -- | -------- | ----------- | ----------- |\n", + "| df | Union | None | The DataFrame on which the function will operate. Expected to contain at least the following columns:
- time_col:
Column name in `df` that contains the time indices of the time series. This is typically a datetime
column with regular intervals, e.g., hourly, daily, monthly data points.
- target_col:
Column name in `df` that contains the target variable of the time series, i.e., the variable we
wish to predict or analyze.
Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
- id_col:
Column name in `df` that identifies unique time series. Each unique value in this column
corresponds to a unique time series. |\n", + "| forecasts_df | Union | None | DataFrame with columns [`unique_id`, `ds`] and models. |\n", + "| id_col | str | unique_id | Column that identifies each serie. |\n", + "| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |\n", + "| target_col | str | y | Column that contains the target. |\n", + "| unique_ids | Union | None | Time Series to plot.
If None, time series are selected randomly. |\n", + "| plot_random | bool | True | Select time series to plot randomly. |\n", + "| max_ids | int | 8 | Maximum number of ids to plot. |\n", + "| models | Optional | None | list of models to plot. |\n", + "| level | Optional | None | list of prediction intervals to plot if paseed. |\n", + "| max_insample_length | Optional | None | Max number of train/insample observations to be plotted. |\n", + "| plot_anomalies | bool | False | Plot anomalies for each prediction interval. |\n", + "| engine | Literal | matplotlib | Library used to plot. 'matplotlib', 'plotly' or 'plotly-resampler'. |\n", + "| resampler_kwargs | Optional | None | Kwargs to be passed to plotly-resampler constructor.
For further custumization (\"show_dash\") call the method,
store the plotting object and add the extra arguments to
its `show_dash` method. |\n", + "| ax | Union | None | Object where plots will be added. |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| echo: false\n", + "show_doc(NixtlaClient.plot, title_level=2)" + ] } ], "metadata": { diff --git a/nbs/src/nixtla_client.ipynb b/nbs/src/nixtla_client.ipynb index 68ac6d18f..ca7a48459 100644 --- a/nbs/src/nixtla_client.ipynb +++ b/nbs/src/nixtla_client.ipynb @@ -34,6 +34,7 @@ "outputs": [], "source": [ "#| export\n", + "import datetime\n", "import logging\n", "import math\n", "import os\n", @@ -59,6 +60,7 @@ "import pandas as pd\n", "import utilsforecast.processing as ufp\n", "import zstandard as zstd\n", + "from pydantic import BaseModel\n", "from tenacity import (\n", " RetryCallState,\n", " retry,\n", @@ -139,14 +141,17 @@ "outputs": [], "source": [ "#| hide\n", + "import time\n", + "import uuid\n", "from contextlib import contextmanager\n", "from itertools import product\n", - "from time import time, sleep\n", "\n", "from dotenv import load_dotenv\n", "from fastcore.test import test_eq, test_fail\n", "from utilsforecast.data import generate_series\n", + "from utilsforecast.evaluation import evaluate\n", "from utilsforecast.feature_engineering import fourier\n", + "from utilsforecast.losses import rmse\n", "\n", "from nixtla.date_features import SpecialDates" ] @@ -176,6 +181,17 @@ "_Freq = Union[str, int, pd.offsets.BaseOffset]\n", "_FreqType = TypeVar(\"_FreqType\", str, int, pd.offsets.BaseOffset)\n", "\n", + "class FinetunedModel(BaseModel, extra='allow'): # type: ignore\n", + " id: str\n", + " created_at: datetime.datetime\n", + " created_by: str\n", + " base_model_id: str\n", + " steps: int\n", + " depth: int\n", + " loss: _Loss\n", + " model: _Model\n", + " freq: str\n", + "\n", "_date_features_by_freq = {\n", " # Daily frequencies\n", " 'B': ['year', 'month', 'day', 'weekday'],\n", @@ -1000,6 +1016,12 @@ " )\n", "\n", " def usage(self) -> dict[str, dict[str, int]]:\n", + " \"\"\"Query consumed requests and limits\n", + " \n", + " Returns\n", + " -------\n", + " dict\n", + " Consumed requests and limits by minute and month.\"\"\"\n", " if self._is_azure:\n", " raise NotImplementedError('usage is not implemented for Azure deployments')\n", " with httpx.Client(**self._client_kwargs) as client:\n", @@ -1009,6 +1031,148 @@ " raise ApiError(status_code=resp.status_code, body=body)\n", " return body\n", "\n", + " def finetune(\n", + " self,\n", + " df: DataFrame,\n", + " freq: Optional[_Freq] = None, \n", + " id_col: str = 'unique_id',\n", + " time_col: str = 'ds',\n", + " target_col: str = 'y',\n", + " finetune_steps: _NonNegativeInt = 10,\n", + " finetune_depth: _Finetune_Depth = 1,\n", + " finetune_loss: _Loss = 'default',\n", + " output_model_id: Optional[str] = None,\n", + " finetuned_model_id: Optional[str] = None,\n", + " model: _Model = 'timegpt-1',\n", + " ) -> str:\n", + " \"\"\"Fine-tune TimeGPT to your series.\n", + "\n", + " Parameters\n", + " ----------\n", + " df : pandas or polars DataFrame\n", + " The DataFrame on which the function will operate. Expected to contain at least the following columns:\n", + " - time_col:\n", + " Column name in `df` that contains the time indices of the time series. This is typically a datetime\n", + " column with regular intervals, e.g., hourly, daily, monthly data points.\n", + " - target_col:\n", + " Column name in `df` that contains the target variable of the time series, i.e., the variable we \n", + " wish to predict or analyze.\n", + " Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:\n", + " - id_col:\n", + " Column name in `df` that identifies unique time series. Each unique value in this column\n", + " corresponds to a unique time series.\n", + " freq : str, int or pandas offset, optional (default=None).\n", + " Frequency of the timestamps. If `None`, it will be inferred automatically.\n", + " See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).\n", + " id_col : str (default='unique_id')\n", + " Column that identifies each serie.\n", + " time_col : str (default='ds')\n", + " Column that identifies each timestep, its values can be timestamps or integers.\n", + " target_col : str (default='y')\n", + " Column that contains the target.\n", + " finetune_steps : int (default=10)\n", + " Number of steps used to finetune learning TimeGPT in the new data.\n", + " finetune_depth : int (default=1)\n", + " The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning,\n", + " and 5 means that the entire model is finetuned.\n", + " finetune_loss : str (default='default')\n", + " Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`.\n", + " output_model_id : str, optional(default=None)\n", + " ID to assign to the fine-tuned model. If `None`, an UUID is used. \n", + " finetuned_model_id : str, optional(default=None)\n", + " ID of previously fine-tuned model to use as base.\n", + " model : str (default='timegpt-1')\n", + " Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`. \n", + " We recommend using `timegpt-1-long-horizon` for forecasting \n", + " if you want to predict more than one seasonal \n", + " period given the frequency of your data.\n", + "\n", + " Returns\n", + " -------\n", + " str\n", + " ID of the fine-tuned model\n", + " \"\"\"\n", + " if not isinstance(df, (pd.DataFrame, pl_DataFrame)):\n", + " raise ValueError(\"Can only fine-tune on pandas or polars dataframes.\")\n", + " model = self._maybe_override_model(model)\n", + " logger.info('Validating inputs...')\n", + " df, X_df, drop_id, freq = self._run_validations(\n", + " df=df,\n", + " X_df=None,\n", + " id_col=id_col,\n", + " time_col=time_col,\n", + " target_col=target_col,\n", + " validate_api_key=False,\n", + " model=model,\n", + " freq=freq,\n", + " )\n", + "\n", + " logger.info('Preprocessing dataframes...')\n", + " processed, *_ = _preprocess(\n", + " df=df,\n", + " X_df=None,\n", + " h=0,\n", + " freq=freq,\n", + " date_features=False,\n", + " date_features_to_one_hot=False,\n", + " id_col=id_col,\n", + " time_col=time_col,\n", + " target_col=target_col,\n", + " )\n", + " standard_freq = _standardize_freq(freq, processed)\n", + " model_input_size, model_horizon = self._get_model_params(model, standard_freq)\n", + " _validate_input_size(processed, model_input_size, model_horizon)\n", + " logger.info('Calling Fine-tune Endpoint...')\n", + " payload = {\n", + " 'series': {\n", + " 'y': processed.data[:, 0],\n", + " 'sizes': np.diff(processed.indptr),\n", + " },\n", + " 'model': model,\n", + " 'freq': standard_freq,\n", + " 'finetune_steps': finetune_steps,\n", + " 'finetune_depth': finetune_depth,\n", + " 'finetune_loss': finetune_loss,\n", + " 'output_model_id': output_model_id,\n", + " 'finetuned_model_id': finetuned_model_id,\n", + " }\n", + " with httpx.Client(**self._client_kwargs) as client:\n", + " resp = self._make_request_with_retries(client, 'v2/finetune', payload)\n", + " return resp['finetuned_model_id']\n", + "\n", + " def finetuned_models(self) -> list[FinetunedModel]:\n", + " \"\"\"List fine-tuned models\n", + " \n", + " Returns\n", + " -------\n", + " list of FinetunedModel\n", + " List of available fine-tuned models.\"\"\"\n", + " with httpx.Client(**self._client_kwargs) as client:\n", + " resp = client.get(\"/v2/finetuned_models\")\n", + " body = resp.json()\n", + " if resp.status_code != 200:\n", + " raise ApiError(status_code=resp.status_code, body=body)\n", + " return [FinetunedModel(**m) for m in body['finetuned_models']]\n", + "\n", + " def delete_finetuned_model(self, finetuned_model_id: str) -> bool:\n", + " \"\"\"Delete a previously fine-tuned model\n", + "\n", + " Parameters\n", + " ----------\n", + " finetuned_model_id : str\n", + " ID of the fine-tuned model to be deleted.\n", + "\n", + " Returns\n", + " -------\n", + " bool\n", + " Whether delete was successful.\"\"\"\n", + " with httpx.Client(**self._client_kwargs) as client:\n", + " resp = client.delete(\n", + " f\"/v2/finetuned_models/{finetuned_model_id}\",\n", + " headers={'accept-encoding': 'identity'},\n", + " )\n", + " return resp.status_code == 204\n", + "\n", " def _distributed_forecast(\n", " self,\n", " df: DistributedDFType,\n", @@ -1023,6 +1187,7 @@ " finetune_steps: _NonNegativeInt,\n", " finetune_depth: _Finetune_Depth,\n", " finetune_loss: _Loss,\n", + " finetuned_model_id: Optional[str],\n", " clean_ex_first: bool,\n", " hist_exog_list: Optional[list[str]],\n", " validate_api_key: bool,\n", @@ -1080,6 +1245,7 @@ " finetune_steps=finetune_steps,\n", " finetune_depth=finetune_depth,\n", " finetune_loss=finetune_loss,\n", + " finetuned_model_id=finetuned_model_id,\n", " clean_ex_first=clean_ex_first,\n", " hist_exog_list=hist_exog_list,\n", " validate_api_key=validate_api_key,\n", @@ -1094,7 +1260,7 @@ " as_fugue=True,\n", " )\n", " return fa.get_native_as_df(result_df)\n", - " \n", + "\n", " def forecast(\n", " self,\n", " df: AnyDFType,\n", @@ -1109,6 +1275,7 @@ " finetune_steps: _NonNegativeInt = 0,\n", " finetune_depth: _Finetune_Depth = 1,\n", " finetune_loss: _Loss = 'default',\n", + " finetuned_model_id: Optional[str] = None,\n", " clean_ex_first: bool = True,\n", " hist_exog_list: Optional[list[str]] = None,\n", " validate_api_key: bool = False,\n", @@ -1160,11 +1327,13 @@ " finetune_steps : int (default=0)\n", " Number of steps used to finetune learning TimeGPT in the\n", " new data.\n", - " finetune_depth: int (default=1)\n", + " finetune_depth : int (default=1)\n", " The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning,\n", " and 5 means that the entire model is finetuned.\n", " finetune_loss : str (default='default')\n", " Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`.\n", + " finetuned_model_id : str, optional(default=None)\n", + " ID of previously fine-tuned model to use.\n", " clean_ex_first : bool (default=True)\n", " Clean exogenous signal before making forecasts using TimeGPT.\n", " hist_exog_list : list of str, optional (default=None)\n", @@ -1216,6 +1385,7 @@ " finetune_steps=finetune_steps,\n", " finetune_depth=finetune_depth,\n", " finetune_loss=finetune_loss,\n", + " finetuned_model_id=finetuned_model_id,\n", " clean_ex_first=clean_ex_first,\n", " hist_exog_list=hist_exog_list,\n", " validate_api_key=validate_api_key,\n", @@ -1307,6 +1477,7 @@ " 'finetune_steps': finetune_steps,\n", " 'finetune_depth': finetune_depth,\n", " 'finetune_loss': finetune_loss,\n", + " 'finetuned_model_id': finetuned_model_id,\n", " 'feature_contributions': feature_contributions and X is not None,\n", " }\n", " with httpx.Client(**self._client_kwargs) as client:\n", @@ -1387,6 +1558,7 @@ " time_col: str,\n", " target_col: str,\n", " level: Union[int, float],\n", + " finetuned_model_id: Optional[str],\n", " clean_ex_first: bool,\n", " validate_api_key: bool,\n", " date_features: Union[bool, list[str]],\n", @@ -1417,6 +1589,7 @@ " time_col=time_col,\n", " target_col=target_col,\n", " level=level,\n", + " finetuned_model_id=finetuned_model_id,\n", " clean_ex_first=clean_ex_first,\n", " validate_api_key=validate_api_key,\n", " date_features=date_features,\n", @@ -1437,6 +1610,7 @@ " time_col: str = 'ds',\n", " target_col: str = 'y',\n", " level: Union[int, float] = 99,\n", + " finetuned_model_id: Optional[str] = None,\n", " clean_ex_first: bool = True,\n", " validate_api_key: bool = False,\n", " date_features: Union[bool, list[str]] = False,\n", @@ -1471,6 +1645,8 @@ " Column that contains the target.\n", " level : float (default=99)\n", " Confidence level between 0 and 100 for detecting the anomalies.\n", + " finetuned_model_id : str, optional(default=None)\n", + " ID of previously fine-tuned model to use.\n", " clean_ex_first : bool (default=True)\n", " Clean exogenous signal before making forecasts\n", " using TimeGPT.\n", @@ -1508,6 +1684,7 @@ " time_col=time_col,\n", " target_col=target_col,\n", " level=level,\n", + " finetuned_model_id=finetuned_model_id,\n", " clean_ex_first=clean_ex_first,\n", " validate_api_key=validate_api_key,\n", " date_features=date_features,\n", @@ -1558,6 +1735,7 @@ " },\n", " 'model': model,\n", " 'freq': standard_freq,\n", + " 'finetuned_model_id': finetuned_model_id,\n", " 'clean_ex_first': clean_ex_first,\n", " 'level': level,\n", " }\n", @@ -1600,6 +1778,7 @@ " finetune_steps: _NonNegativeInt,\n", " finetune_depth: _Finetune_Depth,\n", " finetune_loss: _Loss,\n", + " finetuned_model_id: Optional[str],\n", " refit: bool,\n", " clean_ex_first: bool,\n", " hist_exog_list: Optional[list[str]],\n", @@ -1639,6 +1818,7 @@ " finetune_steps=finetune_steps,\n", " finetune_depth=finetune_depth,\n", " finetune_loss=finetune_loss,\n", + " finetuned_model_id=finetuned_model_id,\n", " refit=refit,\n", " clean_ex_first=clean_ex_first,\n", " hist_exog_list=hist_exog_list,\n", @@ -1668,6 +1848,7 @@ " finetune_steps: _NonNegativeInt = 0,\n", " finetune_depth: _Finetune_Depth = 1,\n", " finetune_loss: _Loss = 'default',\n", + " finetuned_model_id: Optional[str] = None,\n", " refit: bool = True,\n", " clean_ex_first: bool = True,\n", " hist_exog_list: Optional[list[str]] = None,\n", @@ -1726,6 +1907,8 @@ " and 5 means that the entire model is finetuned.\n", " finetune_loss : str (default='default')\n", " Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`.\n", + " finetuned_model_id : str, optional(default=None)\n", + " ID of previously fine-tuned model to use.\n", " refit : bool (default=True)\n", " Fine-tune the model in each window. If `False`, only fine-tunes on the first window.\n", " Only used if `finetune_steps` > 0.\n", @@ -1773,6 +1956,7 @@ " finetune_steps=finetune_steps,\n", " finetune_depth=finetune_depth,\n", " finetune_loss=finetune_loss,\n", + " finetuned_model_id=finetuned_model_id,\n", " refit=refit,\n", " clean_ex_first=clean_ex_first,\n", " hist_exog_list=hist_exog_list,\n", @@ -1881,6 +2065,7 @@ " 'finetune_steps': finetune_steps,\n", " 'finetune_depth': finetune_depth,\n", " 'finetune_loss': finetune_loss,\n", + " 'finetuned_model_id': finetuned_model_id,\n", " 'refit': refit,\n", " }\n", " with httpx.Client(**self._client_kwargs) as client:\n", @@ -2046,6 +2231,7 @@ " finetune_steps: _NonNegativeInt,\n", " finetune_depth: _Finetune_Depth,\n", " finetune_loss: _Loss,\n", + " finetuned_model_id: Optional[str],\n", " clean_ex_first: bool,\n", " hist_exog_list: Optional[list[str]],\n", " validate_api_key: bool,\n", @@ -2075,6 +2261,7 @@ " finetune_steps=finetune_steps,\n", " finetune_depth=finetune_depth,\n", " finetune_loss=finetune_loss,\n", + " finetuned_model_id=finetuned_model_id,\n", " clean_ex_first=clean_ex_first,\n", " hist_exog_list=hist_exog_list,\n", " validate_api_key=validate_api_key,\n", @@ -2094,6 +2281,7 @@ " time_col: str,\n", " target_col: str,\n", " level: Union[int, float],\n", + " finetuned_model_id: Optional[str],\n", " clean_ex_first: bool,\n", " validate_api_key: bool,\n", " date_features: Union[bool, list[str]],\n", @@ -2108,6 +2296,7 @@ " time_col=time_col,\n", " target_col=target_col,\n", " level=level,\n", + " finetuned_model_id=finetuned_model_id,\n", " clean_ex_first=clean_ex_first,\n", " validate_api_key=validate_api_key,\n", " date_features=date_features,\n", @@ -2132,6 +2321,7 @@ " finetune_steps: _NonNegativeInt,\n", " finetune_depth: _Finetune_Depth,\n", " finetune_loss: _Loss,\n", + " finetuned_model_id: Optional[str],\n", " refit: bool,\n", " clean_ex_first: bool,\n", " hist_exog_list: Optional[list[str]],\n", @@ -2155,6 +2345,7 @@ " finetune_steps=finetune_steps,\n", " finetune_depth=finetune_depth,\n", " finetune_loss=finetune_loss,\n", + " finetuned_model_id=finetuned_model_id,\n", " refit=refit,\n", " clean_ex_first=clean_ex_first,\n", " hist_exog_list=hist_exog_list,\n", @@ -2277,6 +2468,147 @@ "nixtla_client.validate_api_key()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# custom client\n", + "custom_client = NixtlaClient(\n", + " base_url=os.environ['NIXTLA_BASE_URL_CUSTOM'],\n", + " api_key=os.environ['NIXTLA_API_KEY_CUSTOM'],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# usage endpoint\n", + "usage = custom_client.usage()\n", + "assert sorted(usage.keys()) == ['minute', 'month']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# finetuning\n", + "h = 5\n", + "series = generate_series(10, equal_ends=True)\n", + "train_end = series['ds'].max() - h * pd.offsets.Day()\n", + "train_mask = series['ds'] <= train_end\n", + "train = series[train_mask]\n", + "valid = series[~train_mask]\n", + "model_id1 = str(uuid.uuid4())\n", + "finetune_resp = custom_client.finetune(train, output_model_id=model_id1)\n", + "assert finetune_resp == model_id1\n", + "model_id2 = custom_client.finetune(train, finetuned_model_id=model_id1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# forecast with fine-tuned models\n", + "fcst_base = custom_client.forecast(train, h=h)\n", + "fcst1 = custom_client.forecast(train, h=h, finetuned_model_id=model_id1)\n", + "fcst2 = custom_client.forecast(train, h=h, finetuned_model_id=model_id2)\n", + "all_fcsts = fcst_base.assign(ten_rounds=fcst1['TimeGPT'], twenty_rounds=fcst2['TimeGPT'])\n", + "fcst_rmse = evaluate(\n", + " all_fcsts.merge(valid),\n", + " metrics=[rmse],\n", + " agg_fn='mean',\n", + ").loc[0]\n", + "# error was reduced over 40% by finetuning\n", + "assert 1 - fcst_rmse['ten_rounds'] / fcst_rmse['TimeGPT'] > 0.4\n", + "# error was reduced over 30% by further finetuning\n", + "assert 1 - fcst_rmse['twenty_rounds'] / fcst_rmse['ten_rounds'] > 0.3\n", + "\n", + "# non-existent model returns 404\n", + "try:\n", + " custom_client.forecast(train, h=5, finetuned_model_id='unexisting')\n", + "except ApiError as e:\n", + " assert e.status_code == 404" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# cv with fine-tuned model\n", + "cv_base = custom_client.cross_validation(series, n_windows=2, h=h)\n", + "cv_finetune = custom_client.cross_validation(series, n_windows=2, h=h, finetuned_model_id=model_id1)\n", + "all_fcsts = fcst_base.assign(ten_rounds=fcst1['TimeGPT'], twenty_rounds=fcst2['TimeGPT'])\n", + "cv_rmse = evaluate(\n", + " cv_base.merge(\n", + " cv_finetune,\n", + " on=['unique_id', 'ds', 'cutoff', 'y'],\n", + " suffixes=('_base', '_finetune')\n", + " ).drop(columns='cutoff'),\n", + " metrics=[rmse],\n", + " agg_fn='mean',\n", + ").loc[0]\n", + "# error was reduced over 40% by finetuning\n", + "assert 1 - cv_rmse['TimeGPT_finetune'] / cv_rmse['TimeGPT_base'] > 0.4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# delete finetuned model\n", + "custom_client.delete_finetuned_model(model_id1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# anomaly detection with fine-tuned model\n", + "train_anomalies = train.copy()\n", + "anomaly_date = train_end - 2 * pd.offsets.Day()\n", + "train_anomalies.loc[train['ds'] == anomaly_date, 'y'] *= 2\n", + "anomaly_base = custom_client.detect_anomalies(train_anomalies)\n", + "anomaly_finetune = custom_client.detect_anomalies(train_anomalies, finetuned_model_id=model_id2)\n", + "detected_anomalies_base = anomaly_base.set_index('ds').loc[anomaly_date, 'anomaly'].sum()\n", + "detected_anomalies_finetune = anomaly_finetune.set_index('ds').loc[anomaly_date, 'anomaly'].sum()\n", + "assert detected_anomalies_base < detected_anomalies_finetune" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# list finetuned models\n", + "models = custom_client.finetuned_models()\n", + "ids = {m.id for m in models}\n", + "assert model_id1 not in ids and model_id2 in ids" + ] + }, { "cell_type": "code", "execution_count": null, @@ -2319,22 +2651,6 @@ "assert len(zstd.ZstdDecompressor().decompress(content)) > 2**20" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "# usage endpoint\n", - "client2 = NixtlaClient(\n", - " base_url=os.environ['NIXTLA_BASE_URL_CUSTOM'],\n", - " api_key=os.environ['NIXTLA_API_KEY_CUSTOM'],\n", - ")\n", - "usage = client2.usage()\n", - "assert sorted(usage.keys()) == ['minute', 'month']" - ] - }, { "cell_type": "code", "execution_count": null, @@ -2687,11 +3003,11 @@ " max_wait_time=max_wait_time,\n", " )\n", " mock_nixtla_client._make_request = side_effect\n", - " init_time = time()\n", + " init_time = time.time()\n", " test_fail(\n", " lambda: mock_nixtla_client.forecast(df=df, h=12, time_col='timestamp', target_col='value'),\n", " )\n", - " total_mock_time = time() - init_time\n", + " total_mock_time = time.time() - init_time\n", " if should_retry:\n", " approx_expected_time = min((max_retries - 1) * retry_interval, max_wait_time)\n", " upper_expected_time = min(max_retries * retry_interval, max_wait_time)\n", @@ -2756,7 +3072,7 @@ "def raise_read_timeout_error(*args, **kwargs):\n", " sleep_seconds = 5\n", " print(f'raising ReadTimeout error after {sleep_seconds} seconds')\n", - " sleep(sleep_seconds)\n", + " time.sleep(sleep_seconds)\n", " raise httpx.ReadTimeout('Timed out')\n", "\n", "def raise_http_error(*args, **kwargs):\n", @@ -3798,7 +4114,60 @@ " df_qls.apply(lambda x: x.is_monotonic_increasing, axis=1).sum() == len(exp_q_cols)\n", " test_method_qls(nixtla_client.forecast)\n", " test_method_qls(nixtla_client.forecast, add_history=True)\n", - " test_method_qls(nixtla_client.cross_validation)" + " test_method_qls(nixtla_client.cross_validation)\n", + "\n", + "\n", + "def test_finetuned_model(df):\n", + " # fine-tuning on distributed fails\n", + " test_fail(\n", + " lambda: custom_client.finetune(df=df),\n", + " contains='Can only fine-tune on pandas or polars dataframes.'\n", + " )\n", + " \n", + " # forecast\n", + " local_fcst = custom_client.forecast(\n", + " df=fa.as_pandas(df), h=5, finetuned_model_id=model_id2\n", + " )\n", + " distr_fcst = fa.as_pandas(\n", + " custom_client.forecast(df=df, h=5, finetuned_model_id=model_id2)\n", + " ).sort_values(['unique_id', 'ds']).reset_index(drop=True)\n", + " pd.testing.assert_frame_equal(\n", + " local_fcst, \n", + " distr_fcst,\n", + " check_dtype=False,\n", + " atol=1e-4,\n", + " rtol=1e-2,\n", + " )\n", + "\n", + " # cross-validation\n", + " local_cv = custom_client.cross_validation(\n", + " df=fa.as_pandas(df), n_windows=2, h=5, finetuned_model_id=model_id2\n", + " )\n", + " distr_cv = fa.as_pandas(\n", + " custom_client.cross_validation(df=df, n_windows=2, h=5, finetuned_model_id=model_id2)\n", + " ).sort_values(['unique_id', 'ds']).reset_index(drop=True)\n", + " pd.testing.assert_frame_equal(\n", + " local_cv,\n", + " distr_cv[local_cv.columns],\n", + " check_dtype=False,\n", + " atol=1e-4,\n", + " rtol=1e-2,\n", + " )\n", + "\n", + " # anomaly detection\n", + " local_anomaly = custom_client.detect_anomalies(\n", + " df=fa.as_pandas(df), finetuned_model_id=model_id2\n", + " )\n", + " distr_anomaly = fa.as_pandas(\n", + " custom_client.detect_anomalies(df=df, finetuned_model_id=model_id2)\n", + " ).sort_values(['unique_id', 'ds']).reset_index(drop=True)\n", + " pd.testing.assert_frame_equal(\n", + " local_anomaly, \n", + " distr_anomaly[local_anomaly.columns],\n", + " check_dtype=False,\n", + " atol=1e-3,\n", + " rtol=1e-2,\n", + " )" ] }, { @@ -3868,6 +4237,9 @@ ").repartition(2)\n", "test_forecast_x_dataframe_diff_cols(spark_df_x_diff_cols, spark_future_ex_vars_df_diff_cols)\n", "\n", + "# test finetuning\n", + "test_finetuned_model(spark_df)\n", + "\n", "spark.stop()" ] }, @@ -3908,6 +4280,9 @@ "dask_future_ex_vars_df_diff_cols = dd.from_pandas(future_ex_vars_df.rename(columns=renamer), npartitions=2)\n", "test_forecast_x_dataframe_diff_cols(dask_df_x_diff_cols, dask_future_ex_vars_df_diff_cols)\n", "\n", + "# test finetuning\n", + "test_finetuned_model(dask_df)\n", + "\n", "client.close()" ] }, @@ -3954,8 +4329,22 @@ "ray_future_ex_vars_df_diff_cols = ray.data.from_pandas(future_ex_vars_df.rename(columns=renamer))\n", "test_forecast_x_dataframe_diff_cols(ray_df_x_diff_cols, ray_future_ex_vars_df_diff_cols)\n", "\n", + "# test finetuning\n", + "test_finetuned_model(ray_df)\n", + "\n", "ray.shutdown()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# cleanup\n", + "custom_client.delete_finetuned_model(model_id2)" + ] } ], "metadata": { diff --git a/nixtla/_modidx.py b/nixtla/_modidx.py index 1481f44a3..fa7ac394f 100644 --- a/nixtla/_modidx.py +++ b/nixtla/_modidx.py @@ -30,6 +30,8 @@ 'nixtla/nixtla_client.py'), 'nixtla.nixtla_client.ApiError.__str__': ( 'src/nixtla_client.html#apierror.__str__', 'nixtla/nixtla_client.py'), + 'nixtla.nixtla_client.FinetunedModel': ( 'src/nixtla_client.html#finetunedmodel', + 'nixtla/nixtla_client.py'), 'nixtla.nixtla_client.NixtlaClient': ( 'src/nixtla_client.html#nixtlaclient', 'nixtla/nixtla_client.py'), 'nixtla.nixtla_client.NixtlaClient.__init__': ( 'src/nixtla_client.html#nixtlaclient.__init__', @@ -58,8 +60,14 @@ 'nixtla/nixtla_client.py'), 'nixtla.nixtla_client.NixtlaClient.cross_validation': ( 'src/nixtla_client.html#nixtlaclient.cross_validation', 'nixtla/nixtla_client.py'), + 'nixtla.nixtla_client.NixtlaClient.delete_finetuned_model': ( 'src/nixtla_client.html#nixtlaclient.delete_finetuned_model', + 'nixtla/nixtla_client.py'), 'nixtla.nixtla_client.NixtlaClient.detect_anomalies': ( 'src/nixtla_client.html#nixtlaclient.detect_anomalies', 'nixtla/nixtla_client.py'), + 'nixtla.nixtla_client.NixtlaClient.finetune': ( 'src/nixtla_client.html#nixtlaclient.finetune', + 'nixtla/nixtla_client.py'), + 'nixtla.nixtla_client.NixtlaClient.finetuned_models': ( 'src/nixtla_client.html#nixtlaclient.finetuned_models', + 'nixtla/nixtla_client.py'), 'nixtla.nixtla_client.NixtlaClient.forecast': ( 'src/nixtla_client.html#nixtlaclient.forecast', 'nixtla/nixtla_client.py'), 'nixtla.nixtla_client.NixtlaClient.plot': ( 'src/nixtla_client.html#nixtlaclient.plot', diff --git a/nixtla/nixtla_client.py b/nixtla/nixtla_client.py index 4248a6447..6ba9e8db2 100644 --- a/nixtla/nixtla_client.py +++ b/nixtla/nixtla_client.py @@ -4,6 +4,7 @@ __all__ = ['ApiError', 'NixtlaClient'] # %% ../nbs/src/nixtla_client.ipynb 3 +import datetime import logging import math import os @@ -29,6 +30,7 @@ import pandas as pd import utilsforecast.processing as ufp import zstandard as zstd +from pydantic import BaseModel from tenacity import ( RetryCallState, retry, @@ -104,6 +106,19 @@ _Freq = Union[str, int, pd.offsets.BaseOffset] _FreqType = TypeVar("_FreqType", str, int, pd.offsets.BaseOffset) + +class FinetunedModel(BaseModel, extra="allow"): # type: ignore + id: str + created_at: datetime.datetime + created_by: str + base_model_id: str + steps: int + depth: int + loss: _Loss + model: _Model + freq: str + + _date_features_by_freq = { # Daily frequencies "B": ["year", "month", "day", "weekday"], @@ -928,6 +943,12 @@ def validate_api_key(self, log: bool = True) -> bool: ) == "success" or "Forecasting! :)" in validation.get("detail", "") def usage(self) -> dict[str, dict[str, int]]: + """Query consumed requests and limits + + Returns + ------- + dict + Consumed requests and limits by minute and month.""" if self._is_azure: raise NotImplementedError("usage is not implemented for Azure deployments") with httpx.Client(**self._client_kwargs) as client: @@ -937,6 +958,148 @@ def usage(self) -> dict[str, dict[str, int]]: raise ApiError(status_code=resp.status_code, body=body) return body + def finetune( + self, + df: DataFrame, + freq: Optional[_Freq] = None, + id_col: str = "unique_id", + time_col: str = "ds", + target_col: str = "y", + finetune_steps: _NonNegativeInt = 10, + finetune_depth: _Finetune_Depth = 1, + finetune_loss: _Loss = "default", + output_model_id: Optional[str] = None, + finetuned_model_id: Optional[str] = None, + model: _Model = "timegpt-1", + ) -> str: + """Fine-tune TimeGPT to your series. + + Parameters + ---------- + df : pandas or polars DataFrame + The DataFrame on which the function will operate. Expected to contain at least the following columns: + - time_col: + Column name in `df` that contains the time indices of the time series. This is typically a datetime + column with regular intervals, e.g., hourly, daily, monthly data points. + - target_col: + Column name in `df` that contains the target variable of the time series, i.e., the variable we + wish to predict or analyze. + Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column: + - id_col: + Column name in `df` that identifies unique time series. Each unique value in this column + corresponds to a unique time series. + freq : str, int or pandas offset, optional (default=None). + Frequency of the timestamps. If `None`, it will be inferred automatically. + See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). + id_col : str (default='unique_id') + Column that identifies each serie. + time_col : str (default='ds') + Column that identifies each timestep, its values can be timestamps or integers. + target_col : str (default='y') + Column that contains the target. + finetune_steps : int (default=10) + Number of steps used to finetune learning TimeGPT in the new data. + finetune_depth : int (default=1) + The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning, + and 5 means that the entire model is finetuned. + finetune_loss : str (default='default') + Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. + output_model_id : str, optional(default=None) + ID to assign to the fine-tuned model. If `None`, an UUID is used. + finetuned_model_id : str, optional(default=None) + ID of previously fine-tuned model to use as base. + model : str (default='timegpt-1') + Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`. + We recommend using `timegpt-1-long-horizon` for forecasting + if you want to predict more than one seasonal + period given the frequency of your data. + + Returns + ------- + str + ID of the fine-tuned model + """ + if not isinstance(df, (pd.DataFrame, pl_DataFrame)): + raise ValueError("Can only fine-tune on pandas or polars dataframes.") + model = self._maybe_override_model(model) + logger.info("Validating inputs...") + df, X_df, drop_id, freq = self._run_validations( + df=df, + X_df=None, + id_col=id_col, + time_col=time_col, + target_col=target_col, + validate_api_key=False, + model=model, + freq=freq, + ) + + logger.info("Preprocessing dataframes...") + processed, *_ = _preprocess( + df=df, + X_df=None, + h=0, + freq=freq, + date_features=False, + date_features_to_one_hot=False, + id_col=id_col, + time_col=time_col, + target_col=target_col, + ) + standard_freq = _standardize_freq(freq, processed) + model_input_size, model_horizon = self._get_model_params(model, standard_freq) + _validate_input_size(processed, model_input_size, model_horizon) + logger.info("Calling Fine-tune Endpoint...") + payload = { + "series": { + "y": processed.data[:, 0], + "sizes": np.diff(processed.indptr), + }, + "model": model, + "freq": standard_freq, + "finetune_steps": finetune_steps, + "finetune_depth": finetune_depth, + "finetune_loss": finetune_loss, + "output_model_id": output_model_id, + "finetuned_model_id": finetuned_model_id, + } + with httpx.Client(**self._client_kwargs) as client: + resp = self._make_request_with_retries(client, "v2/finetune", payload) + return resp["finetuned_model_id"] + + def finetuned_models(self) -> list[FinetunedModel]: + """List fine-tuned models + + Returns + ------- + list of FinetunedModel + List of available fine-tuned models.""" + with httpx.Client(**self._client_kwargs) as client: + resp = client.get("/v2/finetuned_models") + body = resp.json() + if resp.status_code != 200: + raise ApiError(status_code=resp.status_code, body=body) + return [FinetunedModel(**m) for m in body["finetuned_models"]] + + def delete_finetuned_model(self, finetuned_model_id: str) -> bool: + """Delete a previously fine-tuned model + + Parameters + ---------- + finetuned_model_id : str + ID of the fine-tuned model to be deleted. + + Returns + ------- + bool + Whether delete was successful.""" + with httpx.Client(**self._client_kwargs) as client: + resp = client.delete( + f"/v2/finetuned_models/{finetuned_model_id}", + headers={"accept-encoding": "identity"}, + ) + return resp.status_code == 204 + def _distributed_forecast( self, df: DistributedDFType, @@ -951,6 +1114,7 @@ def _distributed_forecast( finetune_steps: _NonNegativeInt, finetune_depth: _Finetune_Depth, finetune_loss: _Loss, + finetuned_model_id: Optional[str], clean_ex_first: bool, hist_exog_list: Optional[list[str]], validate_api_key: bool, @@ -1009,6 +1173,7 @@ def format_X_df( finetune_steps=finetune_steps, finetune_depth=finetune_depth, finetune_loss=finetune_loss, + finetuned_model_id=finetuned_model_id, clean_ex_first=clean_ex_first, hist_exog_list=hist_exog_list, validate_api_key=validate_api_key, @@ -1038,6 +1203,7 @@ def forecast( finetune_steps: _NonNegativeInt = 0, finetune_depth: _Finetune_Depth = 1, finetune_loss: _Loss = "default", + finetuned_model_id: Optional[str] = None, clean_ex_first: bool = True, hist_exog_list: Optional[list[str]] = None, validate_api_key: bool = False, @@ -1089,11 +1255,13 @@ def forecast( finetune_steps : int (default=0) Number of steps used to finetune learning TimeGPT in the new data. - finetune_depth: int (default=1) + finetune_depth : int (default=1) The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning, and 5 means that the entire model is finetuned. finetune_loss : str (default='default') Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. + finetuned_model_id : str, optional(default=None) + ID of previously fine-tuned model to use. clean_ex_first : bool (default=True) Clean exogenous signal before making forecasts using TimeGPT. hist_exog_list : list of str, optional (default=None) @@ -1145,6 +1313,7 @@ def forecast( finetune_steps=finetune_steps, finetune_depth=finetune_depth, finetune_loss=finetune_loss, + finetuned_model_id=finetuned_model_id, clean_ex_first=clean_ex_first, hist_exog_list=hist_exog_list, validate_api_key=validate_api_key, @@ -1236,6 +1405,7 @@ def forecast( "finetune_steps": finetune_steps, "finetune_depth": finetune_depth, "finetune_loss": finetune_loss, + "finetuned_model_id": finetuned_model_id, "feature_contributions": feature_contributions and X is not None, } with httpx.Client(**self._client_kwargs) as client: @@ -1322,6 +1492,7 @@ def _distributed_detect_anomalies( time_col: str, target_col: str, level: Union[int, float], + finetuned_model_id: Optional[str], clean_ex_first: bool, validate_api_key: bool, date_features: Union[bool, list[str]], @@ -1352,6 +1523,7 @@ def _distributed_detect_anomalies( time_col=time_col, target_col=target_col, level=level, + finetuned_model_id=finetuned_model_id, clean_ex_first=clean_ex_first, validate_api_key=validate_api_key, date_features=date_features, @@ -1372,6 +1544,7 @@ def detect_anomalies( time_col: str = "ds", target_col: str = "y", level: Union[int, float] = 99, + finetuned_model_id: Optional[str] = None, clean_ex_first: bool = True, validate_api_key: bool = False, date_features: Union[bool, list[str]] = False, @@ -1406,6 +1579,8 @@ def detect_anomalies( Column that contains the target. level : float (default=99) Confidence level between 0 and 100 for detecting the anomalies. + finetuned_model_id : str, optional(default=None) + ID of previously fine-tuned model to use. clean_ex_first : bool (default=True) Clean exogenous signal before making forecasts using TimeGPT. @@ -1443,6 +1618,7 @@ def detect_anomalies( time_col=time_col, target_col=target_col, level=level, + finetuned_model_id=finetuned_model_id, clean_ex_first=clean_ex_first, validate_api_key=validate_api_key, date_features=date_features, @@ -1493,6 +1669,7 @@ def detect_anomalies( }, "model": model, "freq": standard_freq, + "finetuned_model_id": finetuned_model_id, "clean_ex_first": clean_ex_first, "level": level, } @@ -1537,6 +1714,7 @@ def _distributed_cross_validation( finetune_steps: _NonNegativeInt, finetune_depth: _Finetune_Depth, finetune_loss: _Loss, + finetuned_model_id: Optional[str], refit: bool, clean_ex_first: bool, hist_exog_list: Optional[list[str]], @@ -1576,6 +1754,7 @@ def _distributed_cross_validation( finetune_steps=finetune_steps, finetune_depth=finetune_depth, finetune_loss=finetune_loss, + finetuned_model_id=finetuned_model_id, refit=refit, clean_ex_first=clean_ex_first, hist_exog_list=hist_exog_list, @@ -1605,6 +1784,7 @@ def cross_validation( finetune_steps: _NonNegativeInt = 0, finetune_depth: _Finetune_Depth = 1, finetune_loss: _Loss = "default", + finetuned_model_id: Optional[str] = None, refit: bool = True, clean_ex_first: bool = True, hist_exog_list: Optional[list[str]] = None, @@ -1663,6 +1843,8 @@ def cross_validation( and 5 means that the entire model is finetuned. finetune_loss : str (default='default') Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`. + finetuned_model_id : str, optional(default=None) + ID of previously fine-tuned model to use. refit : bool (default=True) Fine-tune the model in each window. If `False`, only fine-tunes on the first window. Only used if `finetune_steps` > 0. @@ -1710,6 +1892,7 @@ def cross_validation( finetune_steps=finetune_steps, finetune_depth=finetune_depth, finetune_loss=finetune_loss, + finetuned_model_id=finetuned_model_id, refit=refit, clean_ex_first=clean_ex_first, hist_exog_list=hist_exog_list, @@ -1818,6 +2001,7 @@ def cross_validation( "finetune_steps": finetune_steps, "finetune_depth": finetune_depth, "finetune_loss": finetune_loss, + "finetuned_model_id": finetuned_model_id, "refit": refit, } with httpx.Client(**self._client_kwargs) as client: @@ -1980,6 +2164,7 @@ def _forecast_wrapper( finetune_steps: _NonNegativeInt, finetune_depth: _Finetune_Depth, finetune_loss: _Loss, + finetuned_model_id: Optional[str], clean_ex_first: bool, hist_exog_list: Optional[list[str]], validate_api_key: bool, @@ -2009,6 +2194,7 @@ def _forecast_wrapper( finetune_steps=finetune_steps, finetune_depth=finetune_depth, finetune_loss=finetune_loss, + finetuned_model_id=finetuned_model_id, clean_ex_first=clean_ex_first, hist_exog_list=hist_exog_list, validate_api_key=validate_api_key, @@ -2029,6 +2215,7 @@ def _detect_anomalies_wrapper( time_col: str, target_col: str, level: Union[int, float], + finetuned_model_id: Optional[str], clean_ex_first: bool, validate_api_key: bool, date_features: Union[bool, list[str]], @@ -2043,6 +2230,7 @@ def _detect_anomalies_wrapper( time_col=time_col, target_col=target_col, level=level, + finetuned_model_id=finetuned_model_id, clean_ex_first=clean_ex_first, validate_api_key=validate_api_key, date_features=date_features, @@ -2068,6 +2256,7 @@ def _cross_validation_wrapper( finetune_steps: _NonNegativeInt, finetune_depth: _Finetune_Depth, finetune_loss: _Loss, + finetuned_model_id: Optional[str], refit: bool, clean_ex_first: bool, hist_exog_list: Optional[list[str]], @@ -2091,6 +2280,7 @@ def _cross_validation_wrapper( finetune_steps=finetune_steps, finetune_depth=finetune_depth, finetune_loss=finetune_loss, + finetuned_model_id=finetuned_model_id, refit=refit, clean_ex_first=clean_ex_first, hist_exog_list=hist_exog_list, diff --git a/settings.ini b/settings.ini index ef45b3800..f8c1d8c80 100644 --- a/settings.ini +++ b/settings.ini @@ -15,7 +15,7 @@ language = English custom_sidebar = True license = apache2 status = 4 -requirements = annotated-types httpx[zstd] orjson pandas tenacity tqdm utilsforecast>=0.2.8 +requirements = annotated-types httpx[zstd] orjson pandas pydantic>=1.10 tenacity tqdm utilsforecast>=0.2.8 dev_requirements = black datasetsforecast fire hierarchicalforecast jupyterlab nbdev neuralforecast numpy<2 plotly polars pre-commit pyreadr python-dotenv pyyaml setuptools<70 statsforecast tabulate distributed_requirements = fugue[dask,ray,spark]>=0.8.7 pandas<2.2 ray<2.6.3 plotting_requirements = utilsforecast[plotting]