1313import shutil
1414import tempfile
1515import subprocess
16+ import types
1617from typing import Optional , Dict , Any , Iterable
1718
1819from ..core .logger import debug_logger
@@ -120,6 +121,7 @@ def _ensure_nodriver_installed() -> bool:
120121# 尝试导入 nodriver
121122uc = None
122123NODRIVER_AVAILABLE = False
124+ _NODRIVER_RUNTIME_PATCHED = False
123125
124126if DOCKER_HEADED_BLOCKED :
125127 debug_logger .log_warning (
@@ -143,6 +145,184 @@ def _ensure_nodriver_installed() -> bool:
143145 print (f"[BrowserCaptcha] ❌ nodriver 导入失败: { e } " )
144146
145147
148+ _RUNTIME_ERROR_KEYWORDS = (
149+ "has been closed" ,
150+ "browser has been closed" ,
151+ "target closed" ,
152+ "connection closed" ,
153+ "connection lost" ,
154+ "connection refused" ,
155+ "connection reset" ,
156+ "broken pipe" ,
157+ "session closed" ,
158+ "not attached to an active page" ,
159+ "no session with given id" ,
160+ "cannot find context with specified id" ,
161+ "websocket is not open" ,
162+ "no close frame received or sent" ,
163+ "cannot call write to closing transport" ,
164+ "cannot write to closing transport" ,
165+ "cannot call send once a close message has been sent" ,
166+ "connectionclosederror" ,
167+ "connectionrefusederror" ,
168+ "disconnected" ,
169+ "errno 111" ,
170+ )
171+
172+
173+ def _flatten_exception_text (error : Any ) -> str :
174+ """拼接异常链文本,便于统一识别 nodriver 运行态断连。"""
175+ visited : set [int ] = set ()
176+ pending = [error ]
177+ parts : list [str ] = []
178+
179+ while pending :
180+ current = pending .pop ()
181+ if current is None :
182+ continue
183+
184+ current_id = id (current )
185+ if current_id in visited :
186+ continue
187+ visited .add (current_id )
188+
189+ parts .append (type (current ).__name__ )
190+
191+ message = str (current or "" ).strip ()
192+ if message :
193+ parts .append (message )
194+
195+ args = getattr (current , "args" , None )
196+ if isinstance (args , tuple ):
197+ for arg in args :
198+ arg_text = str (arg or "" ).strip ()
199+ if arg_text :
200+ parts .append (arg_text )
201+
202+ pending .append (getattr (current , "__cause__" , None ))
203+ pending .append (getattr (current , "__context__" , None ))
204+
205+ return " | " .join (parts ).lower ()
206+
207+
208+ def _is_runtime_disconnect_error (error : Any ) -> bool :
209+ """识别浏览器 / websocket 运行态断连。"""
210+ error_text = _flatten_exception_text (error )
211+ if not error_text :
212+ return False
213+ return any (keyword in error_text for keyword in _RUNTIME_ERROR_KEYWORDS )
214+
215+
216+ def _finalize_nodriver_send_task (connection , transaction , tx_id : int , task : asyncio .Task ):
217+ """回收 nodriver websocket.send 的后台异常,避免事件循环打印未检索 task 错误。"""
218+ try :
219+ task .result ()
220+ except asyncio .CancelledError :
221+ connection .mapper .pop (tx_id , None )
222+ if not transaction .done ():
223+ transaction .cancel ()
224+ except Exception as e :
225+ connection .mapper .pop (tx_id , None )
226+ if not transaction .done ():
227+ try :
228+ transaction .set_exception (e )
229+ except Exception :
230+ pass
231+
232+ if _is_runtime_disconnect_error (e ):
233+ debug_logger .log_warning (
234+ f"[BrowserCaptcha] nodriver websocket 发送在断连后退出: { type (e ).__name__ } : { e } "
235+ )
236+ else :
237+ debug_logger .log_warning (
238+ f"[BrowserCaptcha] nodriver websocket 发送异常: { type (e ).__name__ } : { e } "
239+ )
240+
241+
242+ def _patch_nodriver_connection_instance (connection_instance ):
243+ """在连接实例级别收口 websocket.send 的后台异常。"""
244+ if not connection_instance or getattr (connection_instance , "_flow2api_send_patched" , False ):
245+ return
246+
247+ try :
248+ from nodriver .core import connection as nodriver_connection_module
249+ except Exception as e :
250+ debug_logger .log_warning (f"[BrowserCaptcha] 加载 nodriver.connection 失败,跳过连接补丁: { e } " )
251+ return
252+
253+ async def patched_send (self , cdp_obj , _is_update = False ):
254+ if self .closed :
255+ await self .connect ()
256+ if not _is_update :
257+ await self ._register_handlers ()
258+
259+ transaction = nodriver_connection_module .Transaction (cdp_obj )
260+ tx_id = next (self .__count__ )
261+ transaction .id = tx_id
262+ self .mapper [tx_id ] = transaction
263+
264+ send_task = asyncio .create_task (self .websocket .send (transaction .message ))
265+ send_task .add_done_callback (
266+ lambda task , connection = self , tx = transaction , current_tx_id = tx_id :
267+ _finalize_nodriver_send_task (connection , tx , current_tx_id , task )
268+ )
269+ return await transaction
270+
271+ connection_instance .send = types .MethodType (patched_send , connection_instance )
272+ connection_instance ._flow2api_send_patched = True
273+
274+
275+ def _patch_nodriver_browser_instance (browser_instance ):
276+ """在浏览器实例级别收口 update_targets,并补齐新 target 的连接补丁。"""
277+ if not browser_instance :
278+ return
279+
280+ _patch_nodriver_connection_instance (getattr (browser_instance , "connection" , None ))
281+ for target in list (getattr (browser_instance , "targets" , []) or []):
282+ _patch_nodriver_connection_instance (target )
283+
284+ if getattr (browser_instance , "_flow2api_update_targets_patched" , False ):
285+ return
286+
287+ original_update_targets = browser_instance .update_targets
288+
289+ async def patched_update_targets (self , * args , ** kwargs ):
290+ try :
291+ result = await original_update_targets (* args , ** kwargs )
292+ except asyncio .CancelledError :
293+ raise
294+ except Exception as e :
295+ if _is_runtime_disconnect_error (e ):
296+ debug_logger .log_warning (
297+ f"[BrowserCaptcha] nodriver.update_targets 在浏览器断连后退出: { type (e ).__name__ } : { e } "
298+ )
299+ return []
300+ raise
301+
302+ _patch_nodriver_connection_instance (getattr (self , "connection" , None ))
303+ for target in list (getattr (self , "targets" , []) or []):
304+ _patch_nodriver_connection_instance (target )
305+ return result
306+
307+ browser_instance .update_targets = types .MethodType (patched_update_targets , browser_instance )
308+ browser_instance ._flow2api_update_targets_patched = True
309+
310+
311+ def _patch_nodriver_runtime (browser_instance = None ):
312+ """给 nodriver 当前浏览器实例补一层断连降噪与异常透传。"""
313+ global _NODRIVER_RUNTIME_PATCHED
314+
315+ if not NODRIVER_AVAILABLE or uc is None :
316+ return
317+
318+ if browser_instance is not None :
319+ _patch_nodriver_browser_instance (browser_instance )
320+
321+ if not _NODRIVER_RUNTIME_PATCHED :
322+ _NODRIVER_RUNTIME_PATCHED = True
323+ debug_logger .log_info ("[BrowserCaptcha] 已启用 nodriver 运行态安全补丁" )
324+
325+
146326def _parse_proxy_url (proxy_url : str ):
147327 """Parse a proxy URL into (protocol, host, port, username, password)."""
148328 if not proxy_url :
@@ -351,24 +531,7 @@ async def _wait_for_display_ready(self, display_value: str, timeout_seconds: flo
351531
352532 def _is_browser_runtime_error (self , error : Any ) -> bool :
353533 """识别浏览器运行态已损坏/已关闭的典型异常。"""
354- error_text = str (error or "" ).strip ().lower ()
355- if not error_text :
356- return False
357-
358- runtime_keywords = [
359- "has been closed" ,
360- "browser has been closed" ,
361- "target closed" ,
362- "connection closed" ,
363- "connection lost" ,
364- "session closed" ,
365- "not attached to an active page" ,
366- "no session with given id" ,
367- "cannot find context with specified id" ,
368- "websocket is not open" ,
369- "disconnected" ,
370- ]
371- return any (keyword in error_text for keyword in runtime_keywords )
534+ return _is_runtime_disconnect_error (error )
372535
373536 async def _probe_browser_runtime (self ) -> bool :
374537 """轻量探测当前 nodriver 连接是否仍可用。"""
@@ -746,10 +909,42 @@ async def _close_tab_quietly(self, tab):
746909 except Exception :
747910 pass
748911
749- async def _stop_browser_process (self , browser_instance ):
912+ async def _disconnect_browser_connection_quietly (self , browser_instance , reason : str ):
913+ """尽量先关闭 DevTools websocket,减少 nodriver 后台任务在浏览器退场时炸栈。"""
914+ if not browser_instance :
915+ return
916+
917+ connection = getattr (browser_instance , "connection" , None )
918+ disconnect_method = getattr (connection , "disconnect" , None ) if connection else None
919+ if disconnect_method is None :
920+ return
921+
922+ try :
923+ result = disconnect_method ()
924+ if inspect .isawaitable (result ):
925+ await self ._run_with_timeout (
926+ result ,
927+ timeout_seconds = 5.0 ,
928+ label = f"browser.disconnect:{ reason } " ,
929+ )
930+ await asyncio .sleep (0 )
931+ except Exception as e :
932+ if self ._is_browser_runtime_error (e ):
933+ debug_logger .log_warning (
934+ f"[BrowserCaptcha] 浏览器连接关闭时检测到已断连状态 ({ reason } ): { e } "
935+ )
936+ return
937+ debug_logger .log_warning (
938+ f"[BrowserCaptcha] 浏览器连接关闭异常 ({ reason } ): { type (e ).__name__ } : { e } "
939+ )
940+
941+ async def _stop_browser_process (self , browser_instance , reason : str = "browser_stop" ):
750942 """兼容 nodriver 同步 stop API,安全停止浏览器进程。"""
751943 if not browser_instance :
752944 return
945+
946+ await self ._disconnect_browser_connection_quietly (browser_instance , reason = reason )
947+
753948 stop_method = getattr (browser_instance , "stop" , None )
754949 if stop_method is None :
755950 return
@@ -800,7 +995,7 @@ async def close_once(tab):
800995
801996 if browser_instance :
802997 try :
803- await self ._stop_browser_process (browser_instance )
998+ await self ._stop_browser_process (browser_instance , reason = reason )
804999 except Exception as e :
8051000 debug_logger .log_warning (
8061001 f"[BrowserCaptcha] 停止浏览器实例失败 ({ reason } ): { e } "
@@ -859,6 +1054,7 @@ async def initialize(self):
8591054 debug_logger .log_warning ("[BrowserCaptcha] 浏览器连接已失活,准备重新初始化..." )
8601055 browser_needs_restart = True
8611056 else :
1057+ _patch_nodriver_runtime (self .browser )
8621058 if self ._idle_reaper_task is None or self ._idle_reaper_task .done ():
8631059 self ._idle_reaper_task = asyncio .create_task (self ._idle_tab_reaper_loop ())
8641060 return
@@ -1009,6 +1205,7 @@ async def initialize(self):
10091205 label = "nodriver.start.retry_no_sandbox" ,
10101206 )
10111207
1208+ _patch_nodriver_runtime (self .browser )
10121209 self ._initialized = True
10131210 if self ._idle_reaper_task is None or self ._idle_reaper_task .done ():
10141211 self ._idle_reaper_task = asyncio .create_task (self ._idle_tab_reaper_loop ())
0 commit comments