Setting default retry strategy in llms and embedders (#9387)

szymondudycz · Manul from Pathway · commit 675f7adbf3b0 · 2025-10-10T12:59:40.000Z
GitOrigin-RevId: 1d851c5b456a54d2ad9de217a253e34578bdf0e7
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 
 ### Changed
 - `pw.io.deltalake.read` now accepts the `start_from_timestamp_ms` parameter for non-append-only tables. In this case, the connector will replay the history of changes in the table version by version starting from the state of the table at the given timestamp. The differences between versions will be applied atomically.
+- Asynchronous UDFs for connecting to API based llm and embedding models now have by default retry strategy set to `pw.udfs.ExponentialRetryStrategy()`
 
 ## [0.26.3] - 2025-10-03
 
diff --git a/python/pathway/xpacks/llm/embedders.py b/python/pathway/xpacks/llm/embedders.py
@@ -96,7 +96,8 @@ class OpenAIEmbedder(BaseEmbedder):
         capacity: Maximum number of concurrent operations allowed.
             Defaults to None, indicating no specific limit.
         retry_strategy: Strategy for handling retries in case of failures.
-            Defaults to None, meaning no retries.
+            Defaults to the `ExponentialRetryStrategy
+            <https://pathway.com/developers/api-docs/udfs#pathway.udfs.ExponentialBackoffRetryStrategy>`_.
         cache_strategy: Defines the caching mechanism. To enable caching,
             a valid `CacheStrategy` should be provided.
             See `Cache strategy <https://pathway.com/developers/api-docs/udfs#pathway.udfs.CacheStrategy>`_
@@ -153,7 +154,9 @@ def __init__(
         self,
         *,
         capacity: int | None = None,
-        retry_strategy: udfs.AsyncRetryStrategy | None = None,
+        retry_strategy: (
+            udfs.AsyncRetryStrategy | None
+        ) = pw.udfs.ExponentialBackoffRetryStrategy(),
         cache_strategy: udfs.CacheStrategy | None = None,
         model: str | None = "text-embedding-3-small",
         truncation_keep_strategy: Literal["start", "end"] | None = "start",
@@ -259,7 +262,8 @@ class LiteLLMEmbedder(BaseEmbedder):
         capacity: Maximum number of concurrent operations allowed.
             Defaults to None, indicating no specific limit.
         retry_strategy: Strategy for handling retries in case of failures.
-            Defaults to None, meaning no retries.
+            Defaults to the `ExponentialRetryStrategy
+            <https://pathway.com/developers/api-docs/udfs#pathway.udfs.ExponentialBackoffRetryStrategy>`_.
         cache_strategy: Defines the caching mechanism. To enable caching,
             a valid `CacheStrategy` should be provided.
             See `Cache strategy <https://pathway.com/developers/api-docs/udfs#pathway.udfs.CacheStrategy>`_
@@ -305,7 +309,9 @@ def __init__(
         self,
         *,
         capacity: int | None = None,
-        retry_strategy: udfs.AsyncRetryStrategy | None = None,
+        retry_strategy: (
+            udfs.AsyncRetryStrategy | None
+        ) = pw.udfs.ExponentialBackoffRetryStrategy(),
         cache_strategy: udfs.CacheStrategy | None = None,
         model: str | None = None,
         **llmlite_kwargs,
@@ -454,7 +460,8 @@ class GeminiEmbedder(BaseEmbedder):
         capacity: Maximum number of concurrent operations allowed.
             Defaults to ``None``, indicating no specific limit.
         retry_strategy: Strategy for handling retries in case of failures.
-            Defaults to ``None``, meaning no retries.
+            Defaults to the `ExponentialRetryStrategy
+            <https://pathway.com/developers/api-docs/udfs#pathway.udfs.ExponentialBackoffRetryStrategy>`_.
         cache_strategy: Defines the caching mechanism. To enable caching,
             a valid ``CacheStrategy`` should be provided.
             See `Cache strategy <https://pathway.com/developers/api-docs/udfs#pathway.udfs.CacheStrategy>`_
@@ -495,7 +502,9 @@ def __init__(
         self,
         *,
         capacity: int | None = None,
-        retry_strategy: udfs.AsyncRetryStrategy | None = None,
+        retry_strategy: (
+            udfs.AsyncRetryStrategy | None
+        ) = pw.udfs.ExponentialBackoffRetryStrategy(),
         cache_strategy: udfs.CacheStrategy | None = None,
         model: str | None = "models/embedding-001",
         api_key: str | None = None,
diff --git a/python/pathway/xpacks/llm/llms.py b/python/pathway/xpacks/llm/llms.py
@@ -103,7 +103,8 @@ class OpenAIChat(BaseChat):
         capacity: Maximum number of concurrent operations allowed.
             Defaults to None, indicating no specific limit.
         retry_strategy: Strategy for handling retries in case of failures.
-            Defaults to None, meaning no retries.
+            Defaults to the `ExponentialRetryStrategy
+            <https://pathway.com/developers/api-docs/udfs#pathway.udfs.ExponentialBackoffRetryStrategy>`_.
         cache_strategy: Defines the caching mechanism. To enable caching,
             a valid `CacheStrategy` should be provided.
             See `Cache strategy <https://pathway.com/developers/api-docs/udfs#pathway.udfs.CacheStrategy>`_
@@ -241,7 +242,9 @@ class OpenAIChat(BaseChat):
     def __init__(
         self,
         capacity: int | None = None,
-        retry_strategy: udfs.AsyncRetryStrategy | None = None,
+        retry_strategy: (
+            udfs.AsyncRetryStrategy | None
+        ) = pw.udfs.ExponentialBackoffRetryStrategy(),
         cache_strategy: udfs.CacheStrategy | None = None,
         model: str | None = "gpt-3.5-turbo",
         *,
@@ -332,7 +335,8 @@ class LiteLLMChat(BaseChat):
         capacity: Maximum number of concurrent operations allowed.
             Defaults to None, indicating no specific limit.
         retry_strategy: Strategy for handling retries in case of failures.
-            Defaults to None, meaning no retries.
+            Defaults to the `ExponentialRetryStrategy
+            <https://pathway.com/developers/api-docs/udfs#pathway.udfs.ExponentialBackoffRetryStrategy>`_.
         cache_strategy: Defines the caching mechanism. To enable caching,
             a valid `CacheStrategy` should be provided.
             See `Cache strategy <https://pathway.com/developers/api-docs/udfs#pathway.udfs.CacheStrategy>`_
@@ -371,7 +375,9 @@ class LiteLLMChat(BaseChat):
     def __init__(
         self,
         capacity: int | None = None,
-        retry_strategy: udfs.AsyncRetryStrategy | None = None,
+        retry_strategy: (
+            udfs.AsyncRetryStrategy | None
+        ) = pw.udfs.ExponentialBackoffRetryStrategy(),
         cache_strategy: udfs.CacheStrategy | None = None,
         model: str | None = None,
         *,
@@ -632,7 +638,8 @@ class CohereChat(BaseChat):
         capacity: Maximum number of concurrent operations allowed.
             Defaults to None, indicating no specific limit.
         retry_strategy: Strategy for handling retries in case of failures.
-            Defaults to None, meaning no retries.
+            Defaults to the `ExponentialRetryStrategy
+            <https://pathway.com/developers/api-docs/udfs#pathway.udfs.ExponentialBackoffRetryStrategy>`_.
         cache_strategy: Defines the caching mechanism. To enable caching,
             a valid `CacheStrategy` should be provided.
             See `Cache strategy <https://pathway.com/developers/api-docs/udfs#pathway.udfs.CacheStrategy>`_
@@ -666,7 +673,9 @@ class CohereChat(BaseChat):
     def __init__(
         self,
         capacity: int | None = None,
-        retry_strategy: udfs.AsyncRetryStrategy | None = None,
+        retry_strategy: (
+            udfs.AsyncRetryStrategy | None
+        ) = pw.udfs.ExponentialBackoffRetryStrategy(),
         cache_strategy: udfs.CacheStrategy | None = None,
         model: str | None = "command",
         **cohere_kwargs,