@@ -13,16 +13,20 @@ import { GUIAgentToolCallEngine } from './ToolCallEngine';
1313import { SYSTEM_PROMPT } from './prompts' ;
1414import { Base64ImageParser } from '@agent-infra/media-utils' ;
1515import { Operator , BaseGUIAgent } from '@gui-agent/shared/base' ;
16- import { GUIAgentConfig , NormalizeCoordinates } from '@gui-agent/shared/types' ;
16+ import {
17+ GUIAgentConfig ,
18+ NormalizeCoordinates ,
19+ ImageDetailCalculator ,
20+ } from '@gui-agent/shared/types' ;
1721import {
1822 assembleSystemPrompt ,
1923 isSystemPromptTemplate ,
20- defaultNormalizeCoords ,
2124 normalizeActionCoords ,
2225 sleep ,
2326} from '@gui-agent/shared/utils' ;
2427import { GUI_ADAPTED_TOOL_NAME } from './constants' ;
2528import { convertToAgentUIAction , createGUIErrorResponse } from './utils' ;
29+ import { defaultNormalizeCoords , defaultDetailCalculator } from './defaultImpls' ;
2630
2731const defaultLogger = new ConsoleLogger ( '[GUIAgent]' , LogLevel . DEBUG ) ;
2832
@@ -31,6 +35,7 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent {
3135
3236 private operator : Operator | undefined ;
3337 private normalizeCoordinates : NormalizeCoordinates ;
38+ private detailCalculator : ImageDetailCalculator ;
3439 private loopIntervalInMs : number ;
3540
3641 constructor ( config : GUIAgentConfig < T > ) {
@@ -40,6 +45,7 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent {
4045 systemPrompt,
4146 customeActionParser,
4247 normalizeCoordinates,
48+ detailCalculator,
4349 maxLoopCount,
4450 loopIntervalInMs = 500 ,
4551 } = config ;
@@ -69,6 +75,8 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent {
6975 } ) ;
7076 this . operator = operator ;
7177 this . normalizeCoordinates = normalizeCoordinates ?? defaultNormalizeCoords ;
78+ // Default detail calculator implementation
79+ this . detailCalculator = detailCalculator ?? defaultDetailCalculator ;
7280 this . loopIntervalInMs = loopIntervalInMs ;
7381 this . logger = this . logger . spawn ( '[GUIAgent]' ) ;
7482 }
@@ -163,11 +171,17 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent {
163171 return ;
164172 }
165173
174+ const { width : imageWidth , height : imageHeight } = base64Tool . getDimensions ( ) || {
175+ width : - 1 ,
176+ height : - 1 ,
177+ } ;
178+
166179 const content : ChatCompletionContentPart [ ] = [
167180 {
168181 type : 'image_url' ,
169182 image_url : {
170183 url : base64Uri ,
184+ detail : this . detailCalculator ( imageWidth , imageHeight ) ,
171185 } ,
172186 } ,
173187 ] ;
0 commit comments