Skip to content

Commit 4b25933

Browse files
committed
fix(personal): 收口 nodriver 断连并降低异常噪音
1 parent 5702e1e commit 4b25933

File tree

1 file changed

+217
-20
lines changed

1 file changed

+217
-20
lines changed

src/services/browser_captcha_personal.py

Lines changed: 217 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import shutil
1414
import tempfile
1515
import subprocess
16+
import types
1617
from typing import Optional, Dict, Any, Iterable
1718

1819
from ..core.logger import debug_logger
@@ -120,6 +121,7 @@ def _ensure_nodriver_installed() -> bool:
120121
# 尝试导入 nodriver
121122
uc = None
122123
NODRIVER_AVAILABLE = False
124+
_NODRIVER_RUNTIME_PATCHED = False
123125

124126
if DOCKER_HEADED_BLOCKED:
125127
debug_logger.log_warning(
@@ -143,6 +145,184 @@ def _ensure_nodriver_installed() -> bool:
143145
print(f"[BrowserCaptcha] ❌ nodriver 导入失败: {e}")
144146

145147

148+
_RUNTIME_ERROR_KEYWORDS = (
149+
"has been closed",
150+
"browser has been closed",
151+
"target closed",
152+
"connection closed",
153+
"connection lost",
154+
"connection refused",
155+
"connection reset",
156+
"broken pipe",
157+
"session closed",
158+
"not attached to an active page",
159+
"no session with given id",
160+
"cannot find context with specified id",
161+
"websocket is not open",
162+
"no close frame received or sent",
163+
"cannot call write to closing transport",
164+
"cannot write to closing transport",
165+
"cannot call send once a close message has been sent",
166+
"connectionclosederror",
167+
"connectionrefusederror",
168+
"disconnected",
169+
"errno 111",
170+
)
171+
172+
173+
def _flatten_exception_text(error: Any) -> str:
174+
"""拼接异常链文本,便于统一识别 nodriver 运行态断连。"""
175+
visited: set[int] = set()
176+
pending = [error]
177+
parts: list[str] = []
178+
179+
while pending:
180+
current = pending.pop()
181+
if current is None:
182+
continue
183+
184+
current_id = id(current)
185+
if current_id in visited:
186+
continue
187+
visited.add(current_id)
188+
189+
parts.append(type(current).__name__)
190+
191+
message = str(current or "").strip()
192+
if message:
193+
parts.append(message)
194+
195+
args = getattr(current, "args", None)
196+
if isinstance(args, tuple):
197+
for arg in args:
198+
arg_text = str(arg or "").strip()
199+
if arg_text:
200+
parts.append(arg_text)
201+
202+
pending.append(getattr(current, "__cause__", None))
203+
pending.append(getattr(current, "__context__", None))
204+
205+
return " | ".join(parts).lower()
206+
207+
208+
def _is_runtime_disconnect_error(error: Any) -> bool:
209+
"""识别浏览器 / websocket 运行态断连。"""
210+
error_text = _flatten_exception_text(error)
211+
if not error_text:
212+
return False
213+
return any(keyword in error_text for keyword in _RUNTIME_ERROR_KEYWORDS)
214+
215+
216+
def _finalize_nodriver_send_task(connection, transaction, tx_id: int, task: asyncio.Task):
217+
"""回收 nodriver websocket.send 的后台异常,避免事件循环打印未检索 task 错误。"""
218+
try:
219+
task.result()
220+
except asyncio.CancelledError:
221+
connection.mapper.pop(tx_id, None)
222+
if not transaction.done():
223+
transaction.cancel()
224+
except Exception as e:
225+
connection.mapper.pop(tx_id, None)
226+
if not transaction.done():
227+
try:
228+
transaction.set_exception(e)
229+
except Exception:
230+
pass
231+
232+
if _is_runtime_disconnect_error(e):
233+
debug_logger.log_warning(
234+
f"[BrowserCaptcha] nodriver websocket 发送在断连后退出: {type(e).__name__}: {e}"
235+
)
236+
else:
237+
debug_logger.log_warning(
238+
f"[BrowserCaptcha] nodriver websocket 发送异常: {type(e).__name__}: {e}"
239+
)
240+
241+
242+
def _patch_nodriver_connection_instance(connection_instance):
243+
"""在连接实例级别收口 websocket.send 的后台异常。"""
244+
if not connection_instance or getattr(connection_instance, "_flow2api_send_patched", False):
245+
return
246+
247+
try:
248+
from nodriver.core import connection as nodriver_connection_module
249+
except Exception as e:
250+
debug_logger.log_warning(f"[BrowserCaptcha] 加载 nodriver.connection 失败,跳过连接补丁: {e}")
251+
return
252+
253+
async def patched_send(self, cdp_obj, _is_update=False):
254+
if self.closed:
255+
await self.connect()
256+
if not _is_update:
257+
await self._register_handlers()
258+
259+
transaction = nodriver_connection_module.Transaction(cdp_obj)
260+
tx_id = next(self.__count__)
261+
transaction.id = tx_id
262+
self.mapper[tx_id] = transaction
263+
264+
send_task = asyncio.create_task(self.websocket.send(transaction.message))
265+
send_task.add_done_callback(
266+
lambda task, connection=self, tx=transaction, current_tx_id=tx_id:
267+
_finalize_nodriver_send_task(connection, tx, current_tx_id, task)
268+
)
269+
return await transaction
270+
271+
connection_instance.send = types.MethodType(patched_send, connection_instance)
272+
connection_instance._flow2api_send_patched = True
273+
274+
275+
def _patch_nodriver_browser_instance(browser_instance):
276+
"""在浏览器实例级别收口 update_targets,并补齐新 target 的连接补丁。"""
277+
if not browser_instance:
278+
return
279+
280+
_patch_nodriver_connection_instance(getattr(browser_instance, "connection", None))
281+
for target in list(getattr(browser_instance, "targets", []) or []):
282+
_patch_nodriver_connection_instance(target)
283+
284+
if getattr(browser_instance, "_flow2api_update_targets_patched", False):
285+
return
286+
287+
original_update_targets = browser_instance.update_targets
288+
289+
async def patched_update_targets(self, *args, **kwargs):
290+
try:
291+
result = await original_update_targets(*args, **kwargs)
292+
except asyncio.CancelledError:
293+
raise
294+
except Exception as e:
295+
if _is_runtime_disconnect_error(e):
296+
debug_logger.log_warning(
297+
f"[BrowserCaptcha] nodriver.update_targets 在浏览器断连后退出: {type(e).__name__}: {e}"
298+
)
299+
return []
300+
raise
301+
302+
_patch_nodriver_connection_instance(getattr(self, "connection", None))
303+
for target in list(getattr(self, "targets", []) or []):
304+
_patch_nodriver_connection_instance(target)
305+
return result
306+
307+
browser_instance.update_targets = types.MethodType(patched_update_targets, browser_instance)
308+
browser_instance._flow2api_update_targets_patched = True
309+
310+
311+
def _patch_nodriver_runtime(browser_instance=None):
312+
"""给 nodriver 当前浏览器实例补一层断连降噪与异常透传。"""
313+
global _NODRIVER_RUNTIME_PATCHED
314+
315+
if not NODRIVER_AVAILABLE or uc is None:
316+
return
317+
318+
if browser_instance is not None:
319+
_patch_nodriver_browser_instance(browser_instance)
320+
321+
if not _NODRIVER_RUNTIME_PATCHED:
322+
_NODRIVER_RUNTIME_PATCHED = True
323+
debug_logger.log_info("[BrowserCaptcha] 已启用 nodriver 运行态安全补丁")
324+
325+
146326
def _parse_proxy_url(proxy_url: str):
147327
"""Parse a proxy URL into (protocol, host, port, username, password)."""
148328
if not proxy_url:
@@ -351,24 +531,7 @@ async def _wait_for_display_ready(self, display_value: str, timeout_seconds: flo
351531

352532
def _is_browser_runtime_error(self, error: Any) -> bool:
353533
"""识别浏览器运行态已损坏/已关闭的典型异常。"""
354-
error_text = str(error or "").strip().lower()
355-
if not error_text:
356-
return False
357-
358-
runtime_keywords = [
359-
"has been closed",
360-
"browser has been closed",
361-
"target closed",
362-
"connection closed",
363-
"connection lost",
364-
"session closed",
365-
"not attached to an active page",
366-
"no session with given id",
367-
"cannot find context with specified id",
368-
"websocket is not open",
369-
"disconnected",
370-
]
371-
return any(keyword in error_text for keyword in runtime_keywords)
534+
return _is_runtime_disconnect_error(error)
372535

373536
async def _probe_browser_runtime(self) -> bool:
374537
"""轻量探测当前 nodriver 连接是否仍可用。"""
@@ -746,10 +909,42 @@ async def _close_tab_quietly(self, tab):
746909
except Exception:
747910
pass
748911

749-
async def _stop_browser_process(self, browser_instance):
912+
async def _disconnect_browser_connection_quietly(self, browser_instance, reason: str):
913+
"""尽量先关闭 DevTools websocket,减少 nodriver 后台任务在浏览器退场时炸栈。"""
914+
if not browser_instance:
915+
return
916+
917+
connection = getattr(browser_instance, "connection", None)
918+
disconnect_method = getattr(connection, "disconnect", None) if connection else None
919+
if disconnect_method is None:
920+
return
921+
922+
try:
923+
result = disconnect_method()
924+
if inspect.isawaitable(result):
925+
await self._run_with_timeout(
926+
result,
927+
timeout_seconds=5.0,
928+
label=f"browser.disconnect:{reason}",
929+
)
930+
await asyncio.sleep(0)
931+
except Exception as e:
932+
if self._is_browser_runtime_error(e):
933+
debug_logger.log_warning(
934+
f"[BrowserCaptcha] 浏览器连接关闭时检测到已断连状态 ({reason}): {e}"
935+
)
936+
return
937+
debug_logger.log_warning(
938+
f"[BrowserCaptcha] 浏览器连接关闭异常 ({reason}): {type(e).__name__}: {e}"
939+
)
940+
941+
async def _stop_browser_process(self, browser_instance, reason: str = "browser_stop"):
750942
"""兼容 nodriver 同步 stop API,安全停止浏览器进程。"""
751943
if not browser_instance:
752944
return
945+
946+
await self._disconnect_browser_connection_quietly(browser_instance, reason=reason)
947+
753948
stop_method = getattr(browser_instance, "stop", None)
754949
if stop_method is None:
755950
return
@@ -800,7 +995,7 @@ async def close_once(tab):
800995

801996
if browser_instance:
802997
try:
803-
await self._stop_browser_process(browser_instance)
998+
await self._stop_browser_process(browser_instance, reason=reason)
804999
except Exception as e:
8051000
debug_logger.log_warning(
8061001
f"[BrowserCaptcha] 停止浏览器实例失败 ({reason}): {e}"
@@ -859,6 +1054,7 @@ async def initialize(self):
8591054
debug_logger.log_warning("[BrowserCaptcha] 浏览器连接已失活,准备重新初始化...")
8601055
browser_needs_restart = True
8611056
else:
1057+
_patch_nodriver_runtime(self.browser)
8621058
if self._idle_reaper_task is None or self._idle_reaper_task.done():
8631059
self._idle_reaper_task = asyncio.create_task(self._idle_tab_reaper_loop())
8641060
return
@@ -1009,6 +1205,7 @@ async def initialize(self):
10091205
label="nodriver.start.retry_no_sandbox",
10101206
)
10111207

1208+
_patch_nodriver_runtime(self.browser)
10121209
self._initialized = True
10131210
if self._idle_reaper_task is None or self._idle_reaper_task.done():
10141211
self._idle_reaper_task = asyncio.create_task(self._idle_tab_reaper_loop())

0 commit comments

Comments
 (0)