Skip to content

Commit dcf7f7b

Browse files
authored
refactor(tarko): some enhancement for gui agent (#1198)
1 parent b564062 commit dcf7f7b

File tree

3 files changed

+39
-31
lines changed

3 files changed

+39
-31
lines changed

multimodal/tarko/agent-web-ui/src/standalone/chat/Message/components/ToolCalls.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ export const ToolCalls: React.FC<ToolCallsProps> = ({
195195
switch (toolName) {
196196
case 'browser_navigate':
197197
return 'Navigate';
198+
case 'browser_vision_control':
199+
return 'Browser';
198200
case 'browser_get_markdown':
199201
return 'Extract Content';
200202
case 'browser_click':

multimodal/tarko/agent-web-ui/src/standalone/workspace/components/WorkspaceHeader.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ export const WorkspaceHeader: React.FC<WorkspaceHeaderProps> = ({
8282
<div className="min-w-0 flex-1">
8383
<div className="flex items-baseline gap-2">
8484
<h2 className="font-medium text-gray-900 dark:text-gray-100 text-base leading-tight truncate">
85-
{panelContent.title}
85+
{panelContent.title === 'browser_vision_control' ? 'Browser' : panelContent.title}
8686
</h2>
8787
<div className="text-xs text-gray-400 dark:text-gray-500 whitespace-nowrap flex-shrink-0 font-mono">
8888
{formatTimestamp(panelContent.timestamp, true)}

multimodal/tarko/agent-web-ui/src/standalone/workspace/renderers/BrowserControlRenderer.tsx

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -73,24 +73,28 @@ export const BrowserControlRenderer: React.FC<BrowserControlRendererProps> = ({
7373

7474
// Find the most recent environment input (screenshot) before this operation
7575
useEffect(() => {
76-
if (!activeSessionId) return;
77-
78-
const sessionMessages = messages[activeSessionId] || [];
76+
// Initialize: clear current screenshot if no direct environment image provided
77+
if (!environmentImage) {
78+
setRelatedImage(null);
79+
}
7980

80-
if (!toolCallId) return;
81+
if (!activeSessionId || !toolCallId) return;
8182

82-
// Get the index of current tool call in messages
83+
const sessionMessages = messages[activeSessionId] || [];
8384
const currentToolCallIndex = sessionMessages.findIndex((msg) =>
8485
msg.toolCalls?.some((tc) => tc.id === toolCallId),
8586
);
8687

87-
if (currentToolCallIndex === -1) return;
88+
if (currentToolCallIndex === -1) {
89+
console.warn(`[BrowserControlRenderer] Tool call ${toolCallId} not found in messages`);
90+
if (!environmentImage) setRelatedImage(null);
91+
return;
92+
}
8893

89-
// Find the environment input closest to the current tool call
9094
let foundImage = false;
9195

92-
// Search forward for environment input, find the most recent screenshot
93-
for (let i = currentToolCallIndex; i >= 0; i--) {
96+
// Only search for screenshots BEFORE the current tool call
97+
for (let i = currentToolCallIndex - 1; i >= 0; i--) {
9498
const msg = sessionMessages[i];
9599
if (msg.role === 'environment' && Array.isArray(msg.content)) {
96100
const imgContent = msg.content.find(
@@ -105,29 +109,14 @@ export const BrowserControlRenderer: React.FC<BrowserControlRendererProps> = ({
105109
}
106110
}
107111

108-
// If no image is found before the current tool call, search all environment messages as fallback
109-
if (!foundImage) {
112+
// If no valid screenshot found before the tool call, clear the display
113+
if (!foundImage && !environmentImage) {
110114
console.warn(
111-
`[BrowserControlRenderer] Could not find preceding screenshot for toolCallId: ${toolCallId}. Falling back to search all environment messages.`,
115+
`[BrowserControlRenderer] No valid screenshot found before toolCallId: ${toolCallId}. Clearing screenshot display.`,
112116
);
113-
const envMessages = sessionMessages.filter(
114-
(msg) => msg.role === 'environment' && Array.isArray(msg.content),
115-
);
116-
117-
// Search backwards to find the most recent screenshot
118-
for (let i = envMessages.length - 1; i >= 0; i--) {
119-
const msg = envMessages[i];
120-
const imgContent = msg.content.find(
121-
(c) => typeof c === 'object' && 'type' in c && c.type === 'image_url',
122-
);
123-
124-
if (imgContent && 'image_url' in imgContent && imgContent.image_url.url) {
125-
setRelatedImage(imgContent.image_url.url);
126-
break; // Stop when the latest one is found
127-
}
128-
}
117+
setRelatedImage(null);
129118
}
130-
}, [activeSessionId, messages, toolCallId]);
119+
}, [activeSessionId, messages, toolCallId, environmentImage]);
131120

132121
// Handler to get image dimensions when loaded
133122
const handleImageLoad = () => {
@@ -142,7 +131,7 @@ export const BrowserControlRenderer: React.FC<BrowserControlRendererProps> = ({
142131
return (
143132
<div className="space-y-4">
144133
{/* Screenshot section - moved to the top */}
145-
{relatedImage && (
134+
{relatedImage ? (
146135
<div>
147136
<BrowserShell className="mb-4">
148137
<div className="relative">
@@ -275,6 +264,23 @@ export const BrowserControlRenderer: React.FC<BrowserControlRendererProps> = ({
275264
</div>
276265
</BrowserShell>
277266
</div>
267+
) : (
268+
// No screenshot available - show placeholder
269+
<div>
270+
<BrowserShell className="mb-4">
271+
<div className="flex items-center justify-center h-64 bg-gray-50 dark:bg-gray-800">
272+
<div className="text-center">
273+
<FiImage className="mx-auto text-gray-400 dark:text-gray-500 mb-2" size={48} />
274+
<p className="text-gray-500 dark:text-gray-400 font-medium">
275+
No Screenshot Available
276+
</p>
277+
<p className="text-sm text-gray-400 dark:text-gray-500 mt-1">
278+
Unable to find environment screenshot for this operation
279+
</p>
280+
</div>
281+
</div>
282+
</BrowserShell>
283+
</div>
278284
)}
279285

280286
{/* Visual operation details card */}

0 commit comments

Comments
 (0)